From 9f480faec58cd6197a007ea1dcac6b7c3daf1139 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:39 -0800 Subject: crypto: chacha20 - Fix keystream alignment for chacha20_block() When chacha20_block() outputs the keystream block, it uses 'u32' stores directly. However, the callers (crypto/chacha20_generic.c and drivers/char/random.c) declare the keystream buffer as a 'u8' array, which is not guaranteed to have the needed alignment. Fix it by having both callers declare the keystream as a 'u32' array. For now this is preferable to switching over to the unaligned access macros because chacha20_block() is only being used in cases where we can easily control the alignment (stack buffers). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha20.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/chacha20.c b/lib/chacha20.c index 250ceed9ec9a..29d3801dee24 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -21,7 +21,7 @@ static inline u32 rotl32(u32 v, u8 n) return (v << n) | (v >> (sizeof(v) * 8 - n)); } -extern void chacha20_block(u32 *state, void *stream) +void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; int i; -- cgit v1.2.3 From b393e8b33efd2ee08576ceddc10c2b4bfb3b5435 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 10:20:44 -0700 Subject: percpu: READ_ONCE() now implies smp_read_barrier_depends() Because READ_ONCE() now implies smp_read_barrier_depends(), this commit removes the now-redundant smp_read_barrier_depends() following the READ_ONCE() in __ref_is_percpu(). Signed-off-by: Paul E. McKenney Acked-by: Tejun Heo Cc: Christoph Lameter --- include/linux/percpu-refcount.h | 6 +++--- lib/percpu-refcount.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 6658d9ee5257..864d167a1073 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -139,12 +139,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that * READ_ONCE() is required when fetching it. + * + * The smp_read_barrier_depends() implied by READ_ONCE() pairs + * with smp_store_release() in __percpu_ref_switch_to_percpu(). */ percpu_ptr = READ_ONCE(ref->percpu_count_ptr); - /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */ - smp_read_barrier_depends(); - /* * Theoretically, the following could test just ATOMIC; however, * then we'd have to mask off DEAD separately as DEAD may be diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe03c6d52761..30e7dd88148b 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -197,10 +197,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); /* - * Restore per-cpu operation. smp_store_release() is paired with - * smp_read_barrier_depends() in __ref_is_percpu() and guarantees - * that the zeroing is visible to all percpu accesses which can see - * the following __PERCPU_REF_ATOMIC clearing. + * Restore per-cpu operation. smp_store_release() is paired + * with READ_ONCE() in __ref_is_percpu() and guarantees that the + * zeroing is visible to all percpu accesses which can see the + * following __PERCPU_REF_ATOMIC clearing. */ for_each_possible_cpu(cpu) *per_cpu_ptr(percpu_count, cpu) = 0; -- cgit v1.2.3 From 516df050615e4b0fd2dd0448cb5a807208a3837a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:39:57 -0700 Subject: lib/assoc_array: Remove smp_read_barrier_depends() Now that smp_read_barrier_depends() is implied by READ_ONCE(), the several smp_read_barrier_depends() calls may be removed from lib/assoc_array.c. This commit makes this change and marks the READ_ONCE() calls that head address dependencies. Signed-off-by: Paul E. McKenney Cc: Jonathan Corbet Cc: Mark Rutland Cc: Alexander Kuleshov Cc: David Howells --- lib/assoc_array.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/assoc_array.c b/lib/assoc_array.c index b77d51da8c73..c6659cb37033 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -38,12 +38,10 @@ begin_node: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); - smp_read_barrier_depends(); - cursor = READ_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ } node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); slot = 0; /* We perform two passes of each node. @@ -55,15 +53,12 @@ begin_node: */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { - /* We need a barrier between the read of the pointer - * and dereferencing the pointer - but only if we are - * actually going to dereference it. + /* We need a barrier between the read of the pointer, + * which is supplied by the above READ_ONCE(). */ - smp_read_barrier_depends(); - /* Invoke the callback */ ret = iterator(assoc_array_ptr_to_leaf(ptr), iterator_data); @@ -86,10 +81,8 @@ begin_node: continue_node: node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); - for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; @@ -98,16 +91,15 @@ continue_node: finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ - parent = READ_ONCE(node->back_pointer); + parent = READ_ONCE(node->back_pointer); /* Address dependency. */ slot = node->parent_slot; if (parent == stop) return 0; if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); - smp_read_barrier_depends(); cursor = parent; - parent = READ_ONCE(shortcut->back_pointer); + parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ slot = shortcut->parent_slot; if (parent == stop) return 0; @@ -147,7 +139,7 @@ int assoc_array_iterate(const struct assoc_array *array, void *iterator_data), void *iterator_data) { - struct assoc_array_ptr *root = READ_ONCE(array->root); + struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ if (!root) return 0; @@ -194,7 +186,7 @@ assoc_array_walk(const struct assoc_array *array, pr_devel("-->%s()\n", __func__); - cursor = READ_ONCE(array->root); + cursor = READ_ONCE(array->root); /* Address dependency. */ if (!cursor) return assoc_array_walk_tree_empty; @@ -216,11 +208,9 @@ jumped: consider_node: node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); - slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); @@ -254,7 +244,6 @@ consider_node: cursor = ptr; follow_shortcut: shortcut = assoc_array_ptr_to_shortcut(cursor); - smp_read_barrier_depends(); pr_devel("shortcut to %d\n", shortcut->skip_to_level); sc_level = level + ASSOC_ARRAY_LEVEL_STEP; BUG_ON(sc_level > shortcut->skip_to_level); @@ -294,7 +283,7 @@ follow_shortcut: } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ - cursor = READ_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; @@ -331,20 +320,18 @@ void *assoc_array_find(const struct assoc_array *array, return NULL; node = result.terminal_node.node; - smp_read_barrier_depends(); /* If the target key is available to us, it's has to be pointed to by * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are * actually going to dereference it. */ leaf = assoc_array_ptr_to_leaf(ptr); - smp_read_barrier_depends(); if (ops->compare_object(leaf, index_key)) return (void *)leaf; } -- cgit v1.2.3 From d9d16e16a3f82e88fec99222c9c7e26563b05688 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 7 Nov 2017 17:30:05 +0100 Subject: kobject: add SPDX identifiers to all kobject files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the kobject files files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 1 + include/linux/kobject_ns.h | 1 + lib/kobject.c | 1 + lib/kobject_uevent.c | 1 + samples/kobject/kobject-example.c | 1 + samples/kobject/kset-example.c | 1 + 6 files changed, 6 insertions(+) (limited to 'lib') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e0a6205caa71..ac59b67f131e 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kobject.h - generic kernel object infrastructure. * diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index df32d2508290..7dd22132cc13 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Kernel object name space definitions * * Copyright (c) 2002-2003 Patrick Mochel diff --git a/lib/kobject.c b/lib/kobject.c index 763d70a18941..450e771d0ac7 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kobject.c - library routines for handling generic kernel objects * diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c3e84edc47c9..bbedaf359a1e 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kernel userspace event delivery * diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c index 2e0740f06cd7..6cc8bb36a4e8 100644 --- a/samples/kobject/kobject-example.c +++ b/samples/kobject/kobject-example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Sample kobject implementation * diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c index a55bff52bde3..58543eca7f16 100644 --- a/samples/kobject/kset-example.c +++ b/samples/kobject/kset-example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Sample kset and ktype implementation * -- cgit v1.2.3 From 045c5f75b77177ad19fe747e33aae2249a12e827 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 7 Nov 2017 17:30:06 +0100 Subject: kobject: Remove redundant license text Now that the SPDX tag is in all kobject files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 -- include/linux/kobject_ns.h | 2 -- lib/kobject.c | 3 --- lib/kobject_uevent.c | 2 -- samples/kobject/kobject-example.c | 3 --- samples/kobject/kset-example.c | 3 --- 6 files changed, 15 deletions(-) (limited to 'lib') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ac59b67f131e..7f6f93c3df9c 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -7,8 +7,6 @@ * Copyright (c) 2006-2008 Greg Kroah-Hartman * Copyright (c) 2006-2008 Novell Inc. * - * This file is released under the GPLv2. - * * Please read Documentation/kobject.txt before using the kobject * interface, ESPECIALLY the parts about reference counts and object * destructors. diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 7dd22132cc13..069aa2ebef90 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -8,8 +8,6 @@ * * Split from kobject.h by David Howells (dhowells@redhat.com) * - * This file is released under the GPLv2. - * * Please read Documentation/kobject.txt before using the kobject * interface, ESPECIALLY the parts about reference counts and object * destructors. diff --git a/lib/kobject.c b/lib/kobject.c index 450e771d0ac7..fad12300d264 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -6,9 +6,6 @@ * Copyright (c) 2006-2007 Greg Kroah-Hartman * Copyright (c) 2006-2007 Novell Inc. * - * This file is released under the GPLv2. - * - * * Please see the file Documentation/kobject.txt for critical information * about using the kobject interface. */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index bbedaf359a1e..cc7e2c46273b 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -6,8 +6,6 @@ * Copyright (C) 2004 Novell, Inc. All rights reserved. * Copyright (C) 2004 IBM, Inc. All rights reserved. * - * Licensed under the GNU GPL v2. - * * Authors: * Robert Love * Kay Sievers diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c index 6cc8bb36a4e8..9e383fdbaa00 100644 --- a/samples/kobject/kobject-example.c +++ b/samples/kobject/kobject-example.c @@ -4,9 +4,6 @@ * * Copyright (C) 2004-2007 Greg Kroah-Hartman * Copyright (C) 2007 Novell Inc. - * - * Released under the GPL version 2 only. - * */ #include #include diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c index 58543eca7f16..401328fd687d 100644 --- a/samples/kobject/kset-example.c +++ b/samples/kobject/kset-example.c @@ -4,9 +4,6 @@ * * Copyright (C) 2004-2007 Greg Kroah-Hartman * Copyright (C) 2007 Novell Inc. - * - * Released under the GPL version 2 only. - * */ #include #include -- cgit v1.2.3 From a0e94598e6b6c0d1df6a5fa14eb7c767ca817a20 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 9 Dec 2017 17:24:24 +0100 Subject: Fix misannotated out-of-line _copy_to_user() Destination is a kernel pointer and source - a userland one in _copy_from_user(); _copy_to_user() is the other way round. Fixes: d597580d37377 ("generic ...copy_..._user primitives") Signed-off-by: Christophe Leroy Signed-off-by: Al Viro --- lib/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/usercopy.c b/lib/usercopy.c index 15e2e6fb060e..3744b2a8e591 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -20,7 +20,7 @@ EXPORT_SYMBOL(_copy_from_user); #endif #ifndef INLINE_COPY_TO_USER -unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n) +unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (likely(access_ok(VERIFY_WRITE, to, n))) { -- cgit v1.2.3 From 97a6ec4ac021f7fbec05c15a3aa0c4aaf0461af5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:41 -0800 Subject: rhashtable: Change rhashtable_walk_start to return void Most callers of rhashtable_walk_start don't care about a resize event which is indicated by a return value of -EAGAIN. So calls to rhashtable_walk_start are wrapped wih code to ignore -EAGAIN. Something like this is common: ret = rhashtable_walk_start(rhiter); if (ret && ret != -EAGAIN) goto out; Since zero and -EAGAIN are the only possible return values from the function this check is pointless. The condition never evaluates to true. This patch changes rhashtable_walk_start to return void. This simplifies code for the callers that ignore -EAGAIN. For the few cases where the caller cares about the resize event, particularly where the table can be walked in mulitple parts for netlink or seq file dump, the function rhashtable_walk_start_check has been added that returns -EAGAIN on a resize event. Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 +--- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 7 ++--- fs/gfs2/glock.c | 7 ++--- include/linux/rhashtable.h | 8 ++++- include/net/sctp/sctp.h | 2 +- lib/rhashtable.c | 10 +++++-- lib/test_rhashtable.c | 6 +--- net/ipv6/ila/ila_xlat.c | 4 +-- net/ipv6/seg6.c | 4 +-- net/mac80211/mesh_pathtbl.c | 34 +++++++--------------- net/netfilter/nft_set_hash.c | 10 ++----- net/netlink/af_netlink.c | 5 ++-- net/netlink/diag.c | 8 ++--- net/sctp/proc.c | 6 +--- net/sctp/socket.c | 19 +++--------- net/tipc/socket.c | 6 ++-- 16 files changed, 48 insertions(+), 94 deletions(-) (limited to 'lib') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 9807214da206..2ae5ed151369 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1412,11 +1412,7 @@ bnxt_tc_flow_stats_batch_prep(struct bnxt *bp, void *flow_node; int rc, i; - rc = rhashtable_walk_start(iter); - if (rc && rc != -EAGAIN) { - i = 0; - goto done; - } + rhashtable_walk_start(iter); rc = 0; for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index a12b894f135d..9b9f3f99b39d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -763,9 +763,7 @@ static void ch_flower_stats_handler(struct work_struct *work) rhashtable_walk_enter(&adap->flower_tbl, &iter); do { - flower_entry = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(flower_entry)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((flower_entry = rhashtable_walk_next(&iter)) && !IS_ERR(flower_entry)) { @@ -784,8 +782,9 @@ static void ch_flower_stats_handler(struct work_struct *work) spin_unlock(&flower_entry->lock); } } -walk_stop: + rhashtable_walk_stop(&iter); + } while (flower_entry == ERR_PTR(-EAGAIN)); rhashtable_walk_exit(&iter); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 11066d8647d2..90af87ff29ba 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1549,16 +1549,13 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) rhashtable_walk_enter(&gl_hash_table, &iter); do { - gl = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(gl)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) if (gl->gl_name.ln_sbd == sdp && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); -walk_stop: rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -1947,7 +1944,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) loff_t n = *pos; rhashtable_walk_enter(&gl_hash_table, &gi->hti); - if (rhashtable_walk_start(&gi->hti) != 0) + if (rhashtable_walk_start_check(&gi->hti) != 0) return NULL; do { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 361c08e35dbc..13ccc483738d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -378,7 +378,13 @@ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); + +static inline void rhashtable_walk_start(struct rhashtable_iter *iter) +{ + (void)rhashtable_walk_start_check(iter); +} + void *rhashtable_walk_next(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 906a9c0efa71..6f79415f6634 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ddd7dde87c3c..1935e86ed477 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -732,7 +732,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** - * rhashtable_walk_start - Start a hash table walk + * rhashtable_walk_start_check - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk at the current iterator position. Note that we take @@ -744,8 +744,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * Returns -EAGAIN if resize event occured. Note that the iterator * will rewind back to the beginning and you may use it immediately * by calling rhashtable_walk_next. + * + * rhashtable_walk_start is defined as an inline variant that returns + * void. This is preferred in cases where the caller would ignore + * resize events and always continue. */ -int rhashtable_walk_start(struct rhashtable_iter *iter) +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; @@ -764,7 +768,7 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_start); +EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** * rhashtable_walk_next - Return the next object and advance the iterator diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 8e83cbdc049c..76d3667fdea2 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) return; } - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - pr_warn("Test failed: iterator failed: %d\n", err); - return; - } + rhashtable_walk_start(&hti); while ((pos = rhashtable_walk_next(&hti))) { if (PTR_ERR(pos) == -EAGAIN) { diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 6eb5e68f112a..44c39c5f0638 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) struct ila_map *ila; int ret; - ret = rhashtable_walk_start(rhiter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(rhiter); for (;;) { ila = rhashtable_walk_next(rhiter); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c81407770956..7f5621d09571 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) struct seg6_hmac_info *hinfo; int ret; - ret = rhashtable_walk_start(iter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(iter); for (;;) { hinfo = rhashtable_walk_next(iter); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 86c8dfef56a4..a5125624a76d 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (ret) return NULL; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto err; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (i++ == idx) break; } -err: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); @@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } -out: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) break; __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index f8166c1d5430..3f1624ee056f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (err) return; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - iter->err = err; - goto out; - } + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { @@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work) if (err) goto schedule; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) - goto out; + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b9e0ee4e22f5..ab325d4d6fef 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2478,8 +2478,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter) return err; } - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + rhashtable_walk_start(&iter->hti); + + return 0; } static void netlink_walk_stop(struct nl_seq_iter *iter) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 8faa20b4d457..7dda33b9b784 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!s_num) rhashtable_walk_enter(&tbl->hash, hti); - ret = rhashtable_walk_start(hti); - if (ret == -EAGAIN) - ret = 0; - if (ret) - goto stop; + rhashtable_walk_start(hti); while ((nlsk = rhashtable_walk_next(hti))) { if (IS_ERR(nlsk)) { @@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, } } -stop: rhashtable_walk_stop(hti); + if (ret) goto done; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 26b4be6b4172..4545bc2aff84 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -288,12 +288,8 @@ struct sctp_ht_iter { static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - int err = sctp_transport_walk_start(&iter->hti); - if (err) { - iter->start_fail = 1; - return ERR_PTR(err); - } + sctp_transport_walk_start(&iter->hti); iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index eb17a911aa29..3e55daa37e66 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4676,20 +4676,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, EXPORT_SYMBOL_GPL(sctp_get_sctp_info); /* use callback to avoid exporting the core structure */ -int sctp_transport_walk_start(struct rhashtable_iter *iter) +void sctp_transport_walk_start(struct rhashtable_iter *iter) { - int err; - rhltable_walk_enter(&sctp_transport_hashtable, iter); - err = rhashtable_walk_start(iter); - if (err && err != -EAGAIN) { - rhashtable_walk_stop(iter); - rhashtable_walk_exit(iter); - return err; - } - - return 0; + rhashtable_walk_start(iter); } void sctp_transport_walk_stop(struct rhashtable_iter *iter) @@ -4780,12 +4771,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; - int ret; + int ret = 0; again: - ret = sctp_transport_walk_start(&hti); - if (ret) - return ret; + sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..22c4fd8a9dfe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2640,9 +2640,7 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_enter(&tn->sk_rht, &iter); do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2651,7 +2649,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } -walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } -- cgit v1.2.3 From 2db54b475ae918d274bfc276416c384ba95e9f94 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:42 -0800 Subject: rhashtable: Add rhastable_walk_peek This function is like rhashtable_walk_next except that it only returns the current element in the inter and does not advance the iter. This patch also creates __rhashtable_walk_find_next. It finds the next element in the table when the entry cached in iter is NULL or at the end of a slot. __rhashtable_walk_find_next is called from rhashtable_walk_next and rhastable_walk_peek. end_of_table is an added field to the iter structure. This indicates that the end of table was reached (walker.tbl being NULL is not a sufficient condition for end of table). Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 + lib/rhashtable.c | 103 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 89 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 13ccc483738d..542b1b265ac4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -207,6 +207,7 @@ struct rhashtable_iter { struct rhashtable_walker walker; unsigned int slot; unsigned int skip; + bool end_of_table; }; static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) @@ -386,6 +387,7 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter) } void *rhashtable_walk_next(struct rhashtable_iter *iter); +void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 1935e86ed477..6fc52d82efe6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -707,6 +707,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) iter->p = NULL; iter->slot = 0; iter->skip = 0; + iter->end_of_table = 0; spin_lock(&ht->lock); iter->walker.tbl = @@ -761,7 +762,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl) { + if (!iter->walker.tbl && !iter->end_of_table) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -771,18 +772,16 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** - * rhashtable_walk_next - Return the next object and advance the iterator + * __rhashtable_walk_find_next - Find the next element in a table (or the first + * one in case of a new walk). + * * @iter: Hash table iterator * - * Note that you must call rhashtable_walk_stop when you are finished - * with the walk. + * Returns the found object or NULL when the end of the table is reached. * - * Returns the next object or NULL when the end of the table is reached. - * - * Returns -EAGAIN if resize event occured. Note that the iterator - * will rewind back to the beginning and you may continue to use it. + * Returns -EAGAIN if resize event occurred. */ -void *rhashtable_walk_next(struct rhashtable_iter *iter) +static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; struct rhlist_head *list = iter->list; @@ -790,13 +789,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; - if (p) { - if (!rhlist || !(list = rcu_dereference(list->next))) { - p = rcu_dereference(p->next); - list = container_of(p, struct rhlist_head, rhead); - } - goto next; - } + if (!tbl) + return NULL; for (; iter->slot < tbl->size; iter->slot++) { int skip = iter->skip; @@ -840,12 +834,89 @@ next: iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); + } else { + iter->end_of_table = true; } return NULL; } + +/** + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator + * + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_next(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; + + if (p) { + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); + } + + /* At the end of this slot, switch to next one and then find + * next entry from that point. + */ + iter->skip = 0; + iter->slot++; + } + + return __rhashtable_walk_find_next(iter); +} EXPORT_SYMBOL_GPL(rhashtable_walk_next); +/** + * rhashtable_walk_peek - Return the next object but don't advance the iterator + * @iter: Hash table iterator + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_peek(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + + if (p) + return rht_obj(ht, ht->rhlist ? &list->rhead : p); + + /* No object found in current iter, find next one in the table. */ + + if (iter->skip) { + /* A nonzero skip value points to the next entry in the table + * beyond that last one that was found. Decrement skip so + * we find the current value. __rhashtable_walk_find_next + * will restore the original value of skip assuming that + * the table hasn't changed. + */ + iter->skip--; + } + + return __rhashtable_walk_find_next(iter); +} +EXPORT_SYMBOL_GPL(rhashtable_walk_peek); + /** * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator -- cgit v1.2.3 From 92f36cca5773cbaa78c46ccf49503964a52da294 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:44 -0800 Subject: spinlock: Add library function to allocate spinlock buckets array Add two new library functions: alloc_bucket_spinlocks and free_bucket_spinlocks. These are used to allocate and free an array of spinlocks that are useful as locks for hash buckets. The interface specifies the maximum number of spinlocks in the array as well as a CPU multiplier to derive the number of spinlocks to allocate. The number allocated is rounded up to a power of two to make the array amenable to hash lookup. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/spinlock.h | 6 ++++++ lib/Makefile | 2 +- lib/bucket_locks.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 lib/bucket_locks.c (limited to 'lib') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a39186194cd6..10fd28b118ee 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -414,4 +414,10 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, + size_t max_size, unsigned int cpu_mult, + gfp_t gfp); + +void free_bucket_spinlocks(spinlock_t *locks); + #endif /* __LINUX_SPINLOCK_H */ diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8ffd..a6c8529dd9b2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,7 +39,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c new file mode 100644 index 000000000000..266a97c5708b --- /dev/null +++ b/lib/bucket_locks.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +/* Allocate an array of spinlocks to be accessed by a hash. Two arguments + * indicate the number of elements to allocate in the array. max_size + * gives the maximum number of elements to allocate. cpu_mult gives + * the number of locks per CPU to allocate. The size is rounded up + * to a power of 2 to be suitable as a hash table. + */ + +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, + size_t max_size, unsigned int cpu_mult, gfp_t gfp) +{ + spinlock_t *tlocks = NULL; + unsigned int i, size; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + + if (cpu_mult) { + nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); + size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size); + } else { + size = max_size; + } + + if (sizeof(spinlock_t) != 0) { + if (gfpflags_allow_blocking(gfp)) + tlocks = kvmalloc(size * sizeof(spinlock_t), gfp); + else + tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp); + if (!tlocks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&tlocks[i]); + } + + *locks = tlocks; + *locks_mask = size - 1; + + return 0; +} +EXPORT_SYMBOL(alloc_bucket_spinlocks); + +void free_bucket_spinlocks(spinlock_t *locks) +{ + kvfree(locks); +} +EXPORT_SYMBOL(free_bucket_spinlocks); -- cgit v1.2.3 From 64e0cd0d3540dbbdf6661943025409e6b31d5178 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:45 -0800 Subject: rhashtable: Call library function alloc_bucket_locks To allocate the array of bucket locks for the hash table we now call library function alloc_bucket_spinlocks. This function is based on the old alloc_bucket_locks in rhashtable and should produce the same effect. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- lib/rhashtable.c | 47 ++++++++--------------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6fc52d82efe6..3825c30aaa36 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #define ASSERT_RHT_MUTEX(HT) #endif - -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, - gfp_t gfp) -{ - unsigned int i, size; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - - nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); - size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); - - /* Never allocate more than 0.5 locks per bucket */ - size = min_t(unsigned int, size, tbl->size >> 1); - - if (tbl->nest) - size = min(size, 1U << tbl->nest); - - if (sizeof(spinlock_t) != 0) { - if (gfpflags_allow_blocking(gfp)) - tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp); - else - tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - gfp); - if (!tbl->locks) - return -ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&tbl->locks[i]); - } - tbl->locks_mask = size - 1; - - return 0; -} - static void nested_table_free(union nested_table *ntbl, unsigned int size) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); @@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - kvfree(tbl->locks); + free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size; + size_t size, max_locks; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); @@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->size = size; - if (alloc_bucket_locks(ht, tbl, gfp) < 0) { + max_locks = size >> 1; + if (tbl->nest) + max_locks = min_t(size_t, max_locks, 1U << tbl->nest); + + if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, + ht->p.locks_mul, gfp) < 0) { bucket_table_free(tbl); return NULL; } -- cgit v1.2.3 From 0f7cda2b824bb2afe0d75716a8664117fa03f5e0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 1 Dec 2017 12:10:00 -0800 Subject: Kconfig: Make STRICT_DEVMEM default-y on x86 and arm64 Distros have been shipping with CONFIG_STRICT_DEVMEM=y for years now. It is probably time to flip this default for x86 and arm64. Signed-off-by: Kees Cook Acked-by: Laura Abbott Cc: Andrew Morton Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20171201201000.GA44539@beast Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 947d3e2ed5c2..39b123d04a36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1985,7 +1985,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED - default y if TILE || PPC + default y if TILE || PPC || X86 || ARM64 ---help--- If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental -- cgit v1.2.3 From d063623b6ae7c326d21a2713dd38cab15d794c1c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Nov 2017 12:32:42 -0800 Subject: Documentation: add UUID/GUID to kernel-api Update kernel-doc notation in lib/uuid.c and then add UUID/GUID function interfaces to kernel-api. Signed-off-by: Randy Dunlap [jc: tweaked the uuid_is_valid() kerneldoc] Signed-off-by: Jonathan Corbet --- Documentation/core-api/kernel-api.rst | 6 ++++++ lib/uuid.c | 34 +++++++++++++++++----------------- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 5d2d5f83579c..e7fadf02c511 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -148,6 +148,12 @@ Sorting .. kernel-doc:: lib/list_sort.c :export: +UUID/GUID +--------- + +.. kernel-doc:: lib/uuid.c + :export: + Memory Management in Linux ========================== diff --git a/lib/uuid.c b/lib/uuid.c index 680b9fb9ba09..2290b9f001a9 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -29,15 +29,14 @@ EXPORT_SYMBOL(uuid_null); const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; -/*************************************************************** +/** + * generate_random_uuid - generate a random UUID + * @uuid: where to put the generated UUID + * * Random UUID interface * - * Used here for a Boot ID, but can be useful for other kernel - * drivers. - ***************************************************************/ - -/* - * Generate random UUID + * Used to create a Boot ID or a filesystem UUID/GUID, but can be + * useful for other kernel drivers. */ void generate_random_uuid(unsigned char uuid[16]) { @@ -73,16 +72,17 @@ void uuid_gen(uuid_t *bu) EXPORT_SYMBOL_GPL(uuid_gen); /** - * uuid_is_valid - checks if UUID string valid - * @uuid: UUID string to check - * - * Description: - * It checks if the UUID string is following the format: - * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - * where x is a hex digit. - * - * Return: true if input is valid UUID string. - */ + * uuid_is_valid - checks if a UUID string is valid + * @uuid: UUID string to check + * + * Description: + * It checks if the UUID string is following the format: + * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + * + * where x is a hex digit. + * + * Return: true if input is valid UUID string. + */ bool uuid_is_valid(const char *uuid) { unsigned int i; -- cgit v1.2.3 From b3ed23213eab1e08be594ad44b4237588c58af09 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 20 Dec 2017 08:17:15 +1100 Subject: doc: convert printk-formats.txt to rst Documentation/printk-formats.txt is a candidate for conversion to ReStructuredText format. Some effort has already been made to do this conversion even thought the suffix is currently .txt Changes required to complete conversion - Move printk-formats.txt to core-api/printk-formats.rst - Add entry to Documentation/core-api/index.rst - Remove entry from Documentation/00-INDEX - Fix minor grammatical errors. - Order heading adornments as suggested by rst docs. - Use 'Passed by reference' uniformly. - Update pointer documentation around %px specifier. - Fix erroneous double backticks (to commas). - Remove extraneous double backticks (suggested by Jonathan Corbet). - Simplify documentation for kobject. Signed-off-by: Tobin C. Harding [jc: downcased "kernel"] Signed-off-by: Jonathan Corbet --- Documentation/00-INDEX | 2 - Documentation/core-api/index.rst | 1 + Documentation/core-api/printk-formats.rst | 492 ++++++++++++++++++++++++++++++ Documentation/printk-formats.txt | 483 ----------------------------- lib/vsprintf.c | 3 +- 5 files changed, 495 insertions(+), 486 deletions(-) create mode 100644 Documentation/core-api/printk-formats.rst delete mode 100644 Documentation/printk-formats.txt (limited to 'lib') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 3bec49c33bbb..7023bfaec21c 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -346,8 +346,6 @@ prctl/ - directory with info on the priveledge control subsystem preempt-locking.txt - info on locking under a preemptive kernel. -printk-formats.txt - - how to get printk format specifiers right process/ - how to work with the mainline kernel development process. pps/ diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index d4d54b05d6c5..d55ee6b006ed 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -22,6 +22,7 @@ Core utilities flexible-arrays librs genalloc + printk-formats Interfaces for kernel debugging =============================== diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst new file mode 100644 index 000000000000..258b46435320 --- /dev/null +++ b/Documentation/core-api/printk-formats.rst @@ -0,0 +1,492 @@ +========================================= +How to get printk format specifiers right +========================================= + +:Author: Randy Dunlap +:Author: Andrew Murray + + +Integer types +============= + +:: + + If variable is of Type, use printk format specifier: + ------------------------------------------------------------ + int %d or %x + unsigned int %u or %x + long %ld or %lx + unsigned long %lu or %lx + long long %lld or %llx + unsigned long long %llu or %llx + size_t %zu or %zx + ssize_t %zd or %zx + s32 %d or %x + u32 %u or %x + s64 %lld or %llx + u64 %llu or %llx + + +If is dependent on a config option for its size (e.g., sector_t, +blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a +format specifier of its largest possible type and explicitly cast to it. + +Example:: + + printk("test: sector number/total blocks: %llu/%llu\n", + (unsigned long long)sector, (unsigned long long)blockcount); + +Reminder: sizeof() returns type size_t. + +The kernel's printf does not support %n. Floating point formats (%e, %f, +%g, %a) are also not recognized, for obvious reasons. Use of any +unsupported specifier or length qualifier results in a WARN and early +return from vsnprintf(). + +Pointer types +============= + +A raw pointer value may be printed with %p which will hash the address +before printing. The kernel also supports extended specifiers for printing +pointers of different types. + +Plain Pointers +-------------- + +:: + + %p abcdef12 or 00000000abcdef12 + +Pointers printed without a specifier extension (i.e unadorned %p) are +hashed to prevent leaking information about the kernel memory layout. This +has the added benefit of providing a unique identifier. On 64-bit machines +the first 32 bits are zeroed. If you *really* want the address see %px +below. + +Symbols/Function Pointers +------------------------- + +:: + + %pF versatile_init+0x0/0x110 + %pf versatile_init + %pS versatile_init+0x0/0x110 + %pSR versatile_init+0x9/0x110 + (with __builtin_extract_return_addr() translation) + %ps versatile_init + %pB prev_fn_of_versatile_init+0x88/0x88 + + +The ``F`` and ``f`` specifiers are for printing function pointers, +for example, f->func, &gettimeofday. They have the same result as +``S`` and ``s`` specifiers. But they do an extra conversion on +ia64, ppc64 and parisc64 architectures where the function pointers +are actually function descriptors. + +The ``S`` and ``s`` specifiers can be used for printing symbols +from direct addresses, for example, __builtin_return_address(0), +(void *)regs->ip. They result in the symbol name with (S) or +without (s) offsets. If KALLSYMS are disabled then the symbol +address is printed instead. + +The ``B`` specifier results in the symbol name with offsets and should be +used when printing stack backtraces. The specifier takes into +consideration the effect of compiler optimisations which may occur +when tail-calls are used and marked with the noreturn GCC attribute. + +Examples:: + + printk("Going to call: %pF\n", gettimeofday); + printk("Going to call: %pF\n", p->func); + printk("%s: called from %pS\n", __func__, (void *)_RET_IP_); + printk("%s: called from %pS\n", __func__, + (void *)__builtin_return_address(0)); + printk("Faulted at %pS\n", (void *)regs->ip); + printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack); + +Kernel Pointers +--------------- + +:: + + %pK 01234567 or 0123456789abcdef + +For printing kernel pointers which should be hidden from unprivileged +users. The behaviour of %pK depends on the kptr_restrict sysctl - see +Documentation/sysctl/kernel.txt for more details. + +Unmodified Addresses +-------------------- + +:: + + %px 01234567 or 0123456789abcdef + +For printing pointers when you *really* want to print the address. Please +consider whether or not you are leaking sensitive information about the +kernel memory layout before printing pointers with %px. %px is functionally +equivalent to %lx (or %lu). %px is preferred because it is more uniquely +grep'able. If in the future we need to modify the way the kernel handles +printing pointers we will be better equipped to find the call sites. + +Struct Resources +---------------- + +:: + + %pr [mem 0x60000000-0x6fffffff flags 0x2200] or + [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] + %pR [mem 0x60000000-0x6fffffff pref] or + [mem 0x0000000060000000-0x000000006fffffff pref] + +For printing struct resources. The ``R`` and ``r`` specifiers result in a +printed resource with (R) or without (r) a decoded flags member. + +Passed by reference. + +Physical address types phys_addr_t +---------------------------------- + +:: + + %pa[p] 0x01234567 or 0x0123456789abcdef + +For printing a phys_addr_t type (and its derivatives, such as +resource_size_t) which can vary based on build options, regardless of the +width of the CPU data path. + +Passed by reference. + +DMA address types dma_addr_t +---------------------------- + +:: + + %pad 0x01234567 or 0x0123456789abcdef + +For printing a dma_addr_t type which can vary based on build options, +regardless of the width of the CPU data path. + +Passed by reference. + +Raw buffer as an escaped string +------------------------------- + +:: + + %*pE[achnops] + +For printing raw buffer as an escaped string. For the following buffer:: + + 1b 62 20 5c 43 07 22 90 0d 5d + +A few examples show how the conversion would be done (excluding surrounding +quotes):: + + %*pE "\eb \C\a"\220\r]" + %*pEhp "\x1bb \C\x07"\x90\x0d]" + %*pEa "\e\142\040\\\103\a\042\220\r\135" + +The conversion rules are applied according to an optional combination +of flags (see :c:func:`string_escape_mem` kernel documentation for the +details): + + - a - ESCAPE_ANY + - c - ESCAPE_SPECIAL + - h - ESCAPE_HEX + - n - ESCAPE_NULL + - o - ESCAPE_OCTAL + - p - ESCAPE_NP + - s - ESCAPE_SPACE + +By default ESCAPE_ANY_NP is used. + +ESCAPE_ANY_NP is the sane choice for many cases, in particularly for +printing SSIDs. + +If field width is omitted then 1 byte only will be escaped. + +Raw buffer as a hex string +-------------------------- + +:: + + %*ph 00 01 02 ... 3f + %*phC 00:01:02: ... :3f + %*phD 00-01-02- ... -3f + %*phN 000102 ... 3f + +For printing small buffers (up to 64 bytes long) as a hex string with a +certain separator. For larger buffers consider using +:c:func:`print_hex_dump`. + +MAC/FDDI addresses +------------------ + +:: + + %pM 00:01:02:03:04:05 + %pMR 05:04:03:02:01:00 + %pMF 00-01-02-03-04-05 + %pm 000102030405 + %pmR 050403020100 + +For printing 6-byte MAC/FDDI addresses in hex notation. The ``M`` and ``m`` +specifiers result in a printed address with (M) or without (m) byte +separators. The default byte separator is the colon (:). + +Where FDDI addresses are concerned the ``F`` specifier can be used after +the ``M`` specifier to use dash (-) separators instead of the default +separator. + +For Bluetooth addresses the ``R`` specifier shall be used after the ``M`` +specifier to use reversed byte order suitable for visual interpretation +of Bluetooth addresses which are in the little endian order. + +Passed by reference. + +IPv4 addresses +-------------- + +:: + + %pI4 1.2.3.4 + %pi4 001.002.003.004 + %p[Ii]4[hnbl] + +For printing IPv4 dot-separated decimal addresses. The ``I4`` and ``i4`` +specifiers result in a printed address with (i4) or without (I4) leading +zeros. + +The additional ``h``, ``n``, ``b``, and ``l`` specifiers are used to specify +host, network, big or little endian order addresses respectively. Where +no specifier is provided the default network/big endian order is used. + +Passed by reference. + +IPv6 addresses +-------------- + +:: + + %pI6 0001:0002:0003:0004:0005:0006:0007:0008 + %pi6 00010002000300040005000600070008 + %pI6c 1:2:3:4:5:6:7:8 + +For printing IPv6 network-order 16-bit hex addresses. The ``I6`` and ``i6`` +specifiers result in a printed address with (I6) or without (i6) +colon-separators. Leading zeros are always used. + +The additional ``c`` specifier can be used with the ``I`` specifier to +print a compressed IPv6 address as described by +http://tools.ietf.org/html/rfc5952 + +Passed by reference. + +IPv4/IPv6 addresses (generic, with port, flowinfo, scope) +--------------------------------------------------------- + +:: + + %pIS 1.2.3.4 or 0001:0002:0003:0004:0005:0006:0007:0008 + %piS 001.002.003.004 or 00010002000300040005000600070008 + %pISc 1.2.3.4 or 1:2:3:4:5:6:7:8 + %pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345 + %p[Ii]S[pfschnbl] + +For printing an IP address without the need to distinguish whether it's of +type AF_INET or AF_INET6. A pointer to a valid struct sockaddr, +specified through ``IS`` or ``iS``, can be passed to this format specifier. + +The additional ``p``, ``f``, and ``s`` specifiers are used to specify port +(IPv4, IPv6), flowinfo (IPv6) and scope (IPv6). Ports have a ``:`` prefix, +flowinfo a ``/`` and scope a ``%``, each followed by the actual value. + +In case of an IPv6 address the compressed IPv6 address as described by +http://tools.ietf.org/html/rfc5952 is being used if the additional +specifier ``c`` is given. The IPv6 address is surrounded by ``[``, ``]`` in +case of additional specifiers ``p``, ``f`` or ``s`` as suggested by +https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07 + +In case of IPv4 addresses, the additional ``h``, ``n``, ``b``, and ``l`` +specifiers can be used as well and are ignored in case of an IPv6 +address. + +Passed by reference. + +Further examples:: + + %pISfc 1.2.3.4 or [1:2:3:4:5:6:7:8]/123456789 + %pISsc 1.2.3.4 or [1:2:3:4:5:6:7:8]%1234567890 + %pISpfc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345/123456789 + +UUID/GUID addresses +------------------- + +:: + + %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f + %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F + %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f + %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F + +For printing 16-byte UUID/GUIDs addresses. The additional ``l``, ``L``, +``b`` and ``B`` specifiers are used to specify a little endian order in +lower (l) or upper case (L) hex notation - and big endian order in lower (b) +or upper case (B) hex notation. + +Where no additional specifiers are used the default big endian +order with lower case hex notation will be printed. + +Passed by reference. + +dentry names +------------ + +:: + + %pd{,2,3,4} + %pD{,2,3,4} + +For printing dentry name; if we race with :c:func:`d_move`, the name might +be a mix of old and new ones, but it won't oops. %pd dentry is a safer +equivalent of %s dentry->d_name.name we used to use, %pd prints ``n`` +last components. %pD does the same thing for struct file. + +Passed by reference. + +block_device names +------------------ + +:: + + %pg sda, sda1 or loop0p1 + +For printing name of block_device pointers. + +struct va_format +---------------- + +:: + + %pV + +For printing struct va_format structures. These contain a format string +and va_list as follows:: + + struct va_format { + const char *fmt; + va_list *va; + }; + +Implements a "recursive vsnprintf". + +Do not use this feature without some mechanism to verify the +correctness of the format string and va_list arguments. + +Passed by reference. + +kobjects +-------- + +:: + + %pOF[fnpPcCF] + + +For printing kobject based structs (device nodes). Default behaviour is +equivalent to %pOFf. + + - f - device node full_name + - n - device node name + - p - device node phandle + - P - device node path spec (name + @unit) + - F - device node flags + - c - major compatible string + - C - full compatible string + +The separator when using multiple arguments is ':' + +Examples:: + + %pOF /foo/bar@0 - Node full name + %pOFf /foo/bar@0 - Same as above + %pOFfp /foo/bar@0:10 - Node full name + phandle + %pOFfcF /foo/bar@0:foo,device:--P- - Node full name + + major compatible string + + node flags + D - dynamic + d - detached + P - Populated + B - Populated bus + +Passed by reference. + +struct clk +---------- + +:: + + %pC pll1 + %pCn pll1 + %pCr 1560000000 + +For printing struct clk structures. %pC and %pCn print the name +(Common Clock Framework) or address (legacy clock framework) of the +structure; %pCr prints the current clock rate. + +Passed by reference. + +bitmap and its derivatives such as cpumask and nodemask +------------------------------------------------------- + +:: + + %*pb 0779 + %*pbl 0,3-6,8-10 + +For printing bitmap and its derivatives such as cpumask and nodemask, +%*pb outputs the bitmap with field width as the number of bits and %*pbl +output the bitmap as range list with field width as the number of bits. + +Passed by reference. + +Flags bitfields such as page flags, gfp_flags +--------------------------------------------- + +:: + + %pGp referenced|uptodate|lru|active|private + %pGg GFP_USER|GFP_DMA32|GFP_NOWARN + %pGv read|exec|mayread|maywrite|mayexec|denywrite + +For printing flags bitfields as a collection of symbolic constants that +would construct the value. The type of flags is given by the third +character. Currently supported are [p]age flags, [v]ma_flags (both +expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag +names and print order depends on the particular type. + +Note that this format should not be used directly in the +:c:func:`TP_printk()` part of a tracepoint. Instead, use the show_*_flags() +functions from . + +Passed by reference. + +Network device features +----------------------- + +:: + + %pNF 0x000000000000c000 + +For printing netdev_features_t. + +Passed by reference. + +Thanks +====== + +If you add other %p extensions, please extend with +one or more test cases, if at all feasible. + +Thank you for your cooperation and attention. diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt deleted file mode 100644 index aa0a776c817a..000000000000 --- a/Documentation/printk-formats.txt +++ /dev/null @@ -1,483 +0,0 @@ -========================================= -How to get printk format specifiers right -========================================= - -:Author: Randy Dunlap -:Author: Andrew Murray - -Integer types -============= - -:: - - If variable is of Type, use printk format specifier: - ------------------------------------------------------------ - int %d or %x - unsigned int %u or %x - long %ld or %lx - unsigned long %lu or %lx - long long %lld or %llx - unsigned long long %llu or %llx - size_t %zu or %zx - ssize_t %zd or %zx - s32 %d or %x - u32 %u or %x - s64 %lld or %llx - u64 %llu or %llx - -If is dependent on a config option for its size (e.g., ``sector_t``, -``blkcnt_t``) or is architecture-dependent for its size (e.g., ``tcflag_t``), -use a format specifier of its largest possible type and explicitly cast to it. - -Example:: - - printk("test: sector number/total blocks: %llu/%llu\n", - (unsigned long long)sector, (unsigned long long)blockcount); - -Reminder: ``sizeof()`` result is of type ``size_t``. - -The kernel's printf does not support ``%n``. For obvious reasons, floating -point formats (``%e, %f, %g, %a``) are also not recognized. Use of any -unsupported specifier or length qualifier results in a WARN and early -return from vsnprintf. - -Raw pointer value SHOULD be printed with %p. The kernel supports -the following extended format specifiers for pointer types: - -Pointer Types -============= - -Pointers printed without a specifier extension (i.e unadorned %p) are -hashed to give a unique identifier without leaking kernel addresses to user -space. On 64 bit machines the first 32 bits are zeroed. If you _really_ -want the address see %px below. - -:: - - %p abcdef12 or 00000000abcdef12 - -Symbols/Function Pointers -========================= - -:: - - %pF versatile_init+0x0/0x110 - %pf versatile_init - %pS versatile_init+0x0/0x110 - %pSR versatile_init+0x9/0x110 - (with __builtin_extract_return_addr() translation) - %ps versatile_init - %pB prev_fn_of_versatile_init+0x88/0x88 - -The ``F`` and ``f`` specifiers are for printing function pointers, -for example, f->func, &gettimeofday. They have the same result as -``S`` and ``s`` specifiers. But they do an extra conversion on -ia64, ppc64 and parisc64 architectures where the function pointers -are actually function descriptors. - -The ``S`` and ``s`` specifiers can be used for printing symbols -from direct addresses, for example, __builtin_return_address(0), -(void *)regs->ip. They result in the symbol name with (``S``) or -without (``s``) offsets. If KALLSYMS are disabled then the symbol -address is printed instead. - -The ``B`` specifier results in the symbol name with offsets and should be -used when printing stack backtraces. The specifier takes into -consideration the effect of compiler optimisations which may occur -when tail-call``s are used and marked with the noreturn GCC attribute. - -Examples:: - - printk("Going to call: %pF\n", gettimeofday); - printk("Going to call: %pF\n", p->func); - printk("%s: called from %pS\n", __func__, (void *)_RET_IP_); - printk("%s: called from %pS\n", __func__, - (void *)__builtin_return_address(0)); - printk("Faulted at %pS\n", (void *)regs->ip); - printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack); - -Kernel Pointers -=============== - -:: - - %pK 01234567 or 0123456789abcdef - -For printing kernel pointers which should be hidden from unprivileged -users. The behaviour of ``%pK`` depends on the ``kptr_restrict sysctl`` - see -Documentation/sysctl/kernel.txt for more details. - -Unmodified Addresses -==================== - -:: - - %px 01234567 or 0123456789abcdef - -For printing pointers when you _really_ want to print the address. Please -consider whether or not you are leaking sensitive information about the -Kernel layout in memory before printing pointers with %px. %px is -functionally equivalent to %lx. %px is preferred to %lx because it is more -uniquely grep'able. If, in the future, we need to modify the way the Kernel -handles printing pointers it will be nice to be able to find the call -sites. - -Struct Resources -================ - -:: - - %pr [mem 0x60000000-0x6fffffff flags 0x2200] or - [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] - %pR [mem 0x60000000-0x6fffffff pref] or - [mem 0x0000000060000000-0x000000006fffffff pref] - -For printing struct resources. The ``R`` and ``r`` specifiers result in a -printed resource with (``R``) or without (``r``) a decoded flags member. -Passed by reference. - -Physical addresses types ``phys_addr_t`` -======================================== - -:: - - %pa[p] 0x01234567 or 0x0123456789abcdef - -For printing a ``phys_addr_t`` type (and its derivatives, such as -``resource_size_t``) which can vary based on build options, regardless of -the width of the CPU data path. Passed by reference. - -DMA addresses types ``dma_addr_t`` -================================== - -:: - - %pad 0x01234567 or 0x0123456789abcdef - -For printing a ``dma_addr_t`` type which can vary based on build options, -regardless of the width of the CPU data path. Passed by reference. - -Raw buffer as an escaped string -=============================== - -:: - - %*pE[achnops] - -For printing raw buffer as an escaped string. For the following buffer:: - - 1b 62 20 5c 43 07 22 90 0d 5d - -few examples show how the conversion would be done (the result string -without surrounding quotes):: - - %*pE "\eb \C\a"\220\r]" - %*pEhp "\x1bb \C\x07"\x90\x0d]" - %*pEa "\e\142\040\\\103\a\042\220\r\135" - -The conversion rules are applied according to an optional combination -of flags (see :c:func:`string_escape_mem` kernel documentation for the -details): - - - ``a`` - ESCAPE_ANY - - ``c`` - ESCAPE_SPECIAL - - ``h`` - ESCAPE_HEX - - ``n`` - ESCAPE_NULL - - ``o`` - ESCAPE_OCTAL - - ``p`` - ESCAPE_NP - - ``s`` - ESCAPE_SPACE - -By default ESCAPE_ANY_NP is used. - -ESCAPE_ANY_NP is the sane choice for many cases, in particularly for -printing SSIDs. - -If field width is omitted the 1 byte only will be escaped. - -Raw buffer as a hex string -========================== - -:: - - %*ph 00 01 02 ... 3f - %*phC 00:01:02: ... :3f - %*phD 00-01-02- ... -3f - %*phN 000102 ... 3f - -For printing a small buffers (up to 64 bytes long) as a hex string with -certain separator. For the larger buffers consider to use -:c:func:`print_hex_dump`. - -MAC/FDDI addresses -================== - -:: - - %pM 00:01:02:03:04:05 - %pMR 05:04:03:02:01:00 - %pMF 00-01-02-03-04-05 - %pm 000102030405 - %pmR 050403020100 - -For printing 6-byte MAC/FDDI addresses in hex notation. The ``M`` and ``m`` -specifiers result in a printed address with (``M``) or without (``m``) byte -separators. The default byte separator is the colon (``:``). - -Where FDDI addresses are concerned the ``F`` specifier can be used after -the ``M`` specifier to use dash (``-``) separators instead of the default -separator. - -For Bluetooth addresses the ``R`` specifier shall be used after the ``M`` -specifier to use reversed byte order suitable for visual interpretation -of Bluetooth addresses which are in the little endian order. - -Passed by reference. - -IPv4 addresses -============== - -:: - - %pI4 1.2.3.4 - %pi4 001.002.003.004 - %p[Ii]4[hnbl] - -For printing IPv4 dot-separated decimal addresses. The ``I4`` and ``i4`` -specifiers result in a printed address with (``i4``) or without (``I4``) -leading zeros. - -The additional ``h``, ``n``, ``b``, and ``l`` specifiers are used to specify -host, network, big or little endian order addresses respectively. Where -no specifier is provided the default network/big endian order is used. - -Passed by reference. - -IPv6 addresses -============== - -:: - - %pI6 0001:0002:0003:0004:0005:0006:0007:0008 - %pi6 00010002000300040005000600070008 - %pI6c 1:2:3:4:5:6:7:8 - -For printing IPv6 network-order 16-bit hex addresses. The ``I6`` and ``i6`` -specifiers result in a printed address with (``I6``) or without (``i6``) -colon-separators. Leading zeros are always used. - -The additional ``c`` specifier can be used with the ``I`` specifier to -print a compressed IPv6 address as described by -http://tools.ietf.org/html/rfc5952 - -Passed by reference. - -IPv4/IPv6 addresses (generic, with port, flowinfo, scope) -========================================================= - -:: - - %pIS 1.2.3.4 or 0001:0002:0003:0004:0005:0006:0007:0008 - %piS 001.002.003.004 or 00010002000300040005000600070008 - %pISc 1.2.3.4 or 1:2:3:4:5:6:7:8 - %pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345 - %p[Ii]S[pfschnbl] - -For printing an IP address without the need to distinguish whether it``s -of type AF_INET or AF_INET6, a pointer to a valid ``struct sockaddr``, -specified through ``IS`` or ``iS``, can be passed to this format specifier. - -The additional ``p``, ``f``, and ``s`` specifiers are used to specify port -(IPv4, IPv6), flowinfo (IPv6) and scope (IPv6). Ports have a ``:`` prefix, -flowinfo a ``/`` and scope a ``%``, each followed by the actual value. - -In case of an IPv6 address the compressed IPv6 address as described by -http://tools.ietf.org/html/rfc5952 is being used if the additional -specifier ``c`` is given. The IPv6 address is surrounded by ``[``, ``]`` in -case of additional specifiers ``p``, ``f`` or ``s`` as suggested by -https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07 - -In case of IPv4 addresses, the additional ``h``, ``n``, ``b``, and ``l`` -specifiers can be used as well and are ignored in case of an IPv6 -address. - -Passed by reference. - -Further examples:: - - %pISfc 1.2.3.4 or [1:2:3:4:5:6:7:8]/123456789 - %pISsc 1.2.3.4 or [1:2:3:4:5:6:7:8]%1234567890 - %pISpfc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345/123456789 - -UUID/GUID addresses -=================== - -:: - - %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f - %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F - %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f - %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F - -For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L', -'b' and 'B' specifiers are used to specify a little endian order in -lower ('l') or upper case ('L') hex characters - and big endian order -in lower ('b') or upper case ('B') hex characters. - -Where no additional specifiers are used the default big endian -order with lower case hex characters will be printed. - -Passed by reference. - -dentry names -============ - -:: - - %pd{,2,3,4} - %pD{,2,3,4} - -For printing dentry name; if we race with :c:func:`d_move`, the name might be -a mix of old and new ones, but it won't oops. ``%pd`` dentry is a safer -equivalent of ``%s`` ``dentry->d_name.name`` we used to use, ``%pd`` prints -``n`` last components. ``%pD`` does the same thing for struct file. - -Passed by reference. - -block_device names -================== - -:: - - %pg sda, sda1 or loop0p1 - -For printing name of block_device pointers. - -struct va_format -================ - -:: - - %pV - -For printing struct va_format structures. These contain a format string -and va_list as follows:: - - struct va_format { - const char *fmt; - va_list *va; - }; - -Implements a "recursive vsnprintf". - -Do not use this feature without some mechanism to verify the -correctness of the format string and va_list arguments. - -Passed by reference. - -kobjects -======== - -:: - - %pO - - Base specifier for kobject based structs. Must be followed with - character for specific type of kobject as listed below: - - Device tree nodes: - - %pOF[fnpPcCF] - - For printing device tree nodes. The optional arguments are: - f device node full_name - n device node name - p device node phandle - P device node path spec (name + @unit) - F device node flags - c major compatible string - C full compatible string - Without any arguments prints full_name (same as %pOFf) - The separator when using multiple arguments is ':' - - Examples: - - %pOF /foo/bar@0 - Node full name - %pOFf /foo/bar@0 - Same as above - %pOFfp /foo/bar@0:10 - Node full name + phandle - %pOFfcF /foo/bar@0:foo,device:--P- - Node full name + - major compatible string + - node flags - D - dynamic - d - detached - P - Populated - B - Populated bus - - Passed by reference. - - -struct clk -========== - -:: - - %pC pll1 - %pCn pll1 - %pCr 1560000000 - -For printing struct clk structures. ``%pC`` and ``%pCn`` print the name -(Common Clock Framework) or address (legacy clock framework) of the -structure; ``%pCr`` prints the current clock rate. - -Passed by reference. - -bitmap and its derivatives such as cpumask and nodemask -======================================================= - -:: - - %*pb 0779 - %*pbl 0,3-6,8-10 - -For printing bitmap and its derivatives such as cpumask and nodemask, -``%*pb`` output the bitmap with field width as the number of bits and ``%*pbl`` -output the bitmap as range list with field width as the number of bits. - -Passed by reference. - -Flags bitfields such as page flags, gfp_flags -============================================= - -:: - - %pGp referenced|uptodate|lru|active|private - %pGg GFP_USER|GFP_DMA32|GFP_NOWARN - %pGv read|exec|mayread|maywrite|mayexec|denywrite - -For printing flags bitfields as a collection of symbolic constants that -would construct the value. The type of flags is given by the third -character. Currently supported are [p]age flags, [v]ma_flags (both -expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag -names and print order depends on the particular type. - -Note that this format should not be used directly in :c:func:`TP_printk()` part -of a tracepoint. Instead, use the ``show_*_flags()`` functions from -. - -Passed by reference. - -Network device features -======================= - -:: - - %pNF 0x000000000000c000 - -For printing netdev_features_t. - -Passed by reference. - -If you add other ``%p`` extensions, please extend lib/test_printf.c with -one or more test cases, if at all feasible. - - -Thank you for your cooperation and attention. diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01c3957b2de6..6ec0844ab5d1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1834,7 +1834,8 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) * * - 'x' For printing the address. Equivalent to "%lx". * - * ** Please update also Documentation/printk-formats.txt when making changes ** + * ** When making changes please also update: + * Documentation/core-api/printk-formats.rst * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a -- cgit v1.2.3 From 27e7c0e813aa5c626ac7e68250bdac82205057b1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 21 Dec 2017 12:39:45 -0700 Subject: vsprintf: Fix a dangling documentation reference A reference to printk-formats.txt didn't get updated when the file moved; fix that. Signed-off-by: Jonathan Corbet --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 6ec0844ab5d1..2b18135446dc 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2195,7 +2195,7 @@ set_precision(struct printf_spec *spec, int prec) * - ``%n`` is unsupported * - ``%p*`` is handled by pointer() * - * See pointer() or Documentation/printk-formats.txt for more + * See pointer() or Documentation/core-api/printk-formats.rst for more * extensive description. * * **Please update the documentation in both places when making changes** -- cgit v1.2.3 From 4e5dff41be7b5201c1c47ceb3a2a8d698516bc2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Nov 2017 10:24:58 -0700 Subject: blk-mq: improve heavily contended tag case Even with a number of waitqueues, we can get into a situation where we are heavily contended on the waitqueue lock. I got a report on spc1 where we're spending seconds doing this. Arguably the use case is nasty, I reproduce it with one device and 1000 threads banging on the device. But that doesn't mean we shouldn't be handling it better. What ends up happening is that a thread will fail to get a tag, add itself to the waitqueue, and subsequently get woken up when a tag is freed - only to find itself going back to sleep on the waitqueue. Instead of waking all threads, use an exclusive wait and wake up our sbitmap batch count instead. This seems to work well for me (massive improvement for this use case), and it survives basic testing. But I haven't fully verified it yet. An additional improvement is running the queue and checking for a new tag BEFORE needing to add ourselves to the waitqueue. Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 13 +++++++------ lib/sbitmap.c | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c81b40ecd3f1..336dde07b230 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -134,12 +134,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) ws = bt_wait_ptr(bt, data->hctx); drop_ctx = data->ctx == NULL; do { - prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); - - tag = __blk_mq_get_tag(data, bt); - if (tag != -1) - break; - /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for @@ -155,6 +149,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != -1) break; + prepare_to_wait_exclusive(&ws->wait, &wait, + TASK_UNINTERRUPTIBLE); + + tag = __blk_mq_get_tag(data, bt); + if (tag != -1) + break; + if (data->ctx) blk_mq_put_ctx(data->ctx); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 80aa8d5463fa..42b5ca0acf93 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) */ atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); - wake_up(&ws->wait); + wake_up_nr(&ws->wait, wake_batch); } } -- cgit v1.2.3 From 14ebc28e07e68ff412aa42f7d8b67969e2f63d00 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 22 Dec 2017 06:32:16 -0800 Subject: errseq: Add to documentation tree - Move errseq.rst into core-api - Add errseq to the core-api index - Promote the header to a more prominent header type, otherwise we get three entries in the table of contents. - Reformat the table to look nicer and be a little more proportional in terms of horizontal width per bit (the SF bit is still disproportionately large, but there's no way to fix that). - Include errseq kernel-doc in the errseq.rst - Neaten some kernel-doc markup Signed-off-by: Matthew Wilcox Reviewed-by: Jeff Layton Reviewed-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- Documentation/core-api/errseq.rst | 159 ++++++++++++++++++++++++++++++++++++++ Documentation/core-api/index.rst | 1 + Documentation/errseq.rst | 149 ----------------------------------- include/linux/errseq.h | 2 +- lib/errseq.c | 37 +++++---- 5 files changed, 182 insertions(+), 166 deletions(-) create mode 100644 Documentation/core-api/errseq.rst delete mode 100644 Documentation/errseq.rst (limited to 'lib') diff --git a/Documentation/core-api/errseq.rst b/Documentation/core-api/errseq.rst new file mode 100644 index 000000000000..ff332e272405 --- /dev/null +++ b/Documentation/core-api/errseq.rst @@ -0,0 +1,159 @@ +===================== +The errseq_t datatype +===================== + +An errseq_t is a way of recording errors in one place, and allowing any +number of "subscribers" to tell whether it has changed since a previous +point where it was sampled. + +The initial use case for this is tracking errors for file +synchronization syscalls (fsync, fdatasync, msync and sync_file_range), +but it may be usable in other situations. + +It's implemented as an unsigned 32-bit value. The low order bits are +designated to hold an error code (between 1 and MAX_ERRNO). The upper bits +are used as a counter. This is done with atomics instead of locking so that +these functions can be called from any context. + +Note that there is a risk of collisions if new errors are being recorded +frequently, since we have so few bits to use as a counter. + +To mitigate this, the bit between the error value and counter is used as +a flag to tell whether the value has been sampled since a new value was +recorded. That allows us to avoid bumping the counter if no one has +sampled it since the last time an error was recorded. + +Thus we end up with a value that looks something like this: + ++--------------------------------------+----+------------------------+ +| 31..13 | 12 | 11..0 | ++--------------------------------------+----+------------------------+ +| counter | SF | errno | ++--------------------------------------+----+------------------------+ + +The general idea is for "watchers" to sample an errseq_t value and keep +it as a running cursor. That value can later be used to tell whether +any new errors have occurred since that sampling was done, and atomically +record the state at the time that it was checked. This allows us to +record errors in one place, and then have a number of "watchers" that +can tell whether the value has changed since they last checked it. + +A new errseq_t should always be zeroed out. An errseq_t value of all zeroes +is the special (but common) case where there has never been an error. An all +zero value thus serves as the "epoch" if one wishes to know whether there +has ever been an error set since it was first initialized. + +API usage +========= + +Let me tell you a story about a worker drone. Now, he's a good worker +overall, but the company is a little...management heavy. He has to +report to 77 supervisors today, and tomorrow the "big boss" is coming in +from out of town and he's sure to test the poor fellow too. + +They're all handing him work to do -- so much he can't keep track of who +handed him what, but that's not really a big problem. The supervisors +just want to know when he's finished all of the work they've handed him so +far and whether he made any mistakes since they last asked. + +He might have made the mistake on work they didn't actually hand him, +but he can't keep track of things at that level of detail, all he can +remember is the most recent mistake that he made. + +Here's our worker_drone representation:: + + struct worker_drone { + errseq_t wd_err; /* for recording errors */ + }; + +Every day, the worker_drone starts out with a blank slate:: + + struct worker_drone wd; + + wd.wd_err = (errseq_t)0; + +The supervisors come in and get an initial read for the day. They +don't care about anything that happened before their watch begins:: + + struct supervisor { + errseq_t s_wd_err; /* private "cursor" for wd_err */ + spinlock_t s_wd_err_lock; /* protects s_wd_err */ + } + + struct supervisor su; + + su.s_wd_err = errseq_sample(&wd.wd_err); + spin_lock_init(&su.s_wd_err_lock); + +Now they start handing him tasks to do. Every few minutes they ask him to +finish up all of the work they've handed him so far. Then they ask him +whether he made any mistakes on any of it:: + + spin_lock(&su.su_wd_err_lock); + err = errseq_check_and_advance(&wd.wd_err, &su.s_wd_err); + spin_unlock(&su.su_wd_err_lock); + +Up to this point, that just keeps returning 0. + +Now, the owners of this company are quite miserly and have given him +substandard equipment with which to do his job. Occasionally it +glitches and he makes a mistake. He sighs a heavy sigh, and marks it +down:: + + errseq_set(&wd.wd_err, -EIO); + +...and then gets back to work. The supervisors eventually poll again +and they each get the error when they next check. Subsequent calls will +return 0, until another error is recorded, at which point it's reported +to each of them once. + +Note that the supervisors can't tell how many mistakes he made, only +whether one was made since they last checked, and the latest value +recorded. + +Occasionally the big boss comes in for a spot check and asks the worker +to do a one-off job for him. He's not really watching the worker +full-time like the supervisors, but he does need to know whether a +mistake occurred while his job was processing. + +He can just sample the current errseq_t in the worker, and then use that +to tell whether an error has occurred later:: + + errseq_t since = errseq_sample(&wd.wd_err); + /* submit some work and wait for it to complete */ + err = errseq_check(&wd.wd_err, since); + +Since he's just going to discard "since" after that point, he doesn't +need to advance it here. He also doesn't need any locking since it's +not usable by anyone else. + +Serializing errseq_t cursor updates +=================================== + +Note that the errseq_t API does not protect the errseq_t cursor during a +check_and_advance_operation. Only the canonical error code is handled +atomically. In a situation where more than one task might be using the +same errseq_t cursor at the same time, it's important to serialize +updates to that cursor. + +If that's not done, then it's possible for the cursor to go backward +in which case the same error could be reported more than once. + +Because of this, it's often advantageous to first do an errseq_check to +see if anything has changed, and only later do an +errseq_check_and_advance after taking the lock. e.g.:: + + if (errseq_check(&wd.wd_err, READ_ONCE(su.s_wd_err)) { + /* su.s_wd_err is protected by s_wd_err_lock */ + spin_lock(&su.s_wd_err_lock); + err = errseq_check_and_advance(&wd.wd_err, &su.s_wd_err); + spin_unlock(&su.s_wd_err_lock); + } + +That avoids the spinlock in the common case where nothing has changed +since the last time it was checked. + +Functions +========= + +.. kernel-doc:: lib/errseq.c diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index d55ee6b006ed..1b1fd01990b5 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -22,6 +22,7 @@ Core utilities flexible-arrays librs genalloc + errseq printk-formats Interfaces for kernel debugging diff --git a/Documentation/errseq.rst b/Documentation/errseq.rst deleted file mode 100644 index 4c29bd5afbc5..000000000000 --- a/Documentation/errseq.rst +++ /dev/null @@ -1,149 +0,0 @@ -The errseq_t datatype -===================== -An errseq_t is a way of recording errors in one place, and allowing any -number of "subscribers" to tell whether it has changed since a previous -point where it was sampled. - -The initial use case for this is tracking errors for file -synchronization syscalls (fsync, fdatasync, msync and sync_file_range), -but it may be usable in other situations. - -It's implemented as an unsigned 32-bit value. The low order bits are -designated to hold an error code (between 1 and MAX_ERRNO). The upper bits -are used as a counter. This is done with atomics instead of locking so that -these functions can be called from any context. - -Note that there is a risk of collisions if new errors are being recorded -frequently, since we have so few bits to use as a counter. - -To mitigate this, the bit between the error value and counter is used as -a flag to tell whether the value has been sampled since a new value was -recorded. That allows us to avoid bumping the counter if no one has -sampled it since the last time an error was recorded. - -Thus we end up with a value that looks something like this:: - - bit: 31..13 12 11..0 - +-----------------+----+----------------+ - | counter | SF | errno | - +-----------------+----+----------------+ - -The general idea is for "watchers" to sample an errseq_t value and keep -it as a running cursor. That value can later be used to tell whether -any new errors have occurred since that sampling was done, and atomically -record the state at the time that it was checked. This allows us to -record errors in one place, and then have a number of "watchers" that -can tell whether the value has changed since they last checked it. - -A new errseq_t should always be zeroed out. An errseq_t value of all zeroes -is the special (but common) case where there has never been an error. An all -zero value thus serves as the "epoch" if one wishes to know whether there -has ever been an error set since it was first initialized. - -API usage -========= -Let me tell you a story about a worker drone. Now, he's a good worker -overall, but the company is a little...management heavy. He has to -report to 77 supervisors today, and tomorrow the "big boss" is coming in -from out of town and he's sure to test the poor fellow too. - -They're all handing him work to do -- so much he can't keep track of who -handed him what, but that's not really a big problem. The supervisors -just want to know when he's finished all of the work they've handed him so -far and whether he made any mistakes since they last asked. - -He might have made the mistake on work they didn't actually hand him, -but he can't keep track of things at that level of detail, all he can -remember is the most recent mistake that he made. - -Here's our worker_drone representation:: - - struct worker_drone { - errseq_t wd_err; /* for recording errors */ - }; - -Every day, the worker_drone starts out with a blank slate:: - - struct worker_drone wd; - - wd.wd_err = (errseq_t)0; - -The supervisors come in and get an initial read for the day. They -don't care about anything that happened before their watch begins:: - - struct supervisor { - errseq_t s_wd_err; /* private "cursor" for wd_err */ - spinlock_t s_wd_err_lock; /* protects s_wd_err */ - } - - struct supervisor su; - - su.s_wd_err = errseq_sample(&wd.wd_err); - spin_lock_init(&su.s_wd_err_lock); - -Now they start handing him tasks to do. Every few minutes they ask him to -finish up all of the work they've handed him so far. Then they ask him -whether he made any mistakes on any of it:: - - spin_lock(&su.su_wd_err_lock); - err = errseq_check_and_advance(&wd.wd_err, &su.s_wd_err); - spin_unlock(&su.su_wd_err_lock); - -Up to this point, that just keeps returning 0. - -Now, the owners of this company are quite miserly and have given him -substandard equipment with which to do his job. Occasionally it -glitches and he makes a mistake. He sighs a heavy sigh, and marks it -down:: - - errseq_set(&wd.wd_err, -EIO); - -...and then gets back to work. The supervisors eventually poll again -and they each get the error when they next check. Subsequent calls will -return 0, until another error is recorded, at which point it's reported -to each of them once. - -Note that the supervisors can't tell how many mistakes he made, only -whether one was made since they last checked, and the latest value -recorded. - -Occasionally the big boss comes in for a spot check and asks the worker -to do a one-off job for him. He's not really watching the worker -full-time like the supervisors, but he does need to know whether a -mistake occurred while his job was processing. - -He can just sample the current errseq_t in the worker, and then use that -to tell whether an error has occurred later:: - - errseq_t since = errseq_sample(&wd.wd_err); - /* submit some work and wait for it to complete */ - err = errseq_check(&wd.wd_err, since); - -Since he's just going to discard "since" after that point, he doesn't -need to advance it here. He also doesn't need any locking since it's -not usable by anyone else. - -Serializing errseq_t cursor updates -=================================== -Note that the errseq_t API does not protect the errseq_t cursor during a -check_and_advance_operation. Only the canonical error code is handled -atomically. In a situation where more than one task might be using the -same errseq_t cursor at the same time, it's important to serialize -updates to that cursor. - -If that's not done, then it's possible for the cursor to go backward -in which case the same error could be reported more than once. - -Because of this, it's often advantageous to first do an errseq_check to -see if anything has changed, and only later do an -errseq_check_and_advance after taking the lock. e.g.:: - - if (errseq_check(&wd.wd_err, READ_ONCE(su.s_wd_err)) { - /* su.s_wd_err is protected by s_wd_err_lock */ - spin_lock(&su.s_wd_err_lock); - err = errseq_check_and_advance(&wd.wd_err, &su.s_wd_err); - spin_unlock(&su.s_wd_err_lock); - } - -That avoids the spinlock in the common case where nothing has changed -since the last time it was checked. diff --git a/include/linux/errseq.h b/include/linux/errseq.h index 6ffae9c5052d..fc2777770768 100644 --- a/include/linux/errseq.h +++ b/include/linux/errseq.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * See Documentation/errseq.rst and lib/errseq.c + * See Documentation/core-api/errseq.rst and lib/errseq.c */ #ifndef _LINUX_ERRSEQ_H #define _LINUX_ERRSEQ_H diff --git a/lib/errseq.c b/lib/errseq.c index 79cc66897db4..df782418b333 100644 --- a/lib/errseq.c +++ b/lib/errseq.c @@ -46,14 +46,14 @@ * @eseq: errseq_t field that should be set * @err: error to set (must be between -1 and -MAX_ERRNO) * - * This function sets the error in *eseq, and increments the sequence counter + * This function sets the error in @eseq, and increments the sequence counter * if the last sequence was sampled at some point in the past. * * Any error set will always overwrite an existing error. * - * We do return the latest value here, primarily for debugging purposes. The - * return value should not be used as a previously sampled value in later calls - * as it will not have the SEEN flag set. + * Return: The previous value, primarily for debugging purposes. The + * return value should not be used as a previously sampled value in later + * calls as it will not have the SEEN flag set. */ errseq_t errseq_set(errseq_t *eseq, int err) { @@ -108,11 +108,13 @@ errseq_t errseq_set(errseq_t *eseq, int err) EXPORT_SYMBOL(errseq_set); /** - * errseq_sample - grab current errseq_t value - * @eseq: pointer to errseq_t to be sampled + * errseq_sample() - Grab current errseq_t value. + * @eseq: Pointer to errseq_t to be sampled. * * This function allows callers to sample an errseq_t value, marking it as * "seen" if required. + * + * Return: The current errseq value. */ errseq_t errseq_sample(errseq_t *eseq) { @@ -134,15 +136,15 @@ errseq_t errseq_sample(errseq_t *eseq) EXPORT_SYMBOL(errseq_sample); /** - * errseq_check - has an error occurred since a particular sample point? - * @eseq: pointer to errseq_t value to be checked - * @since: previously-sampled errseq_t from which to check + * errseq_check() - Has an error occurred since a particular sample point? + * @eseq: Pointer to errseq_t value to be checked. + * @since: Previously-sampled errseq_t from which to check. * - * Grab the value that eseq points to, and see if it has changed "since" - * the given value was sampled. The "since" value is not advanced, so there + * Grab the value that eseq points to, and see if it has changed @since + * the given value was sampled. The @since value is not advanced, so there * is no need to mark the value as seen. * - * Returns the latest error set in the errseq_t or 0 if it hasn't changed. + * Return: The latest error set in the errseq_t or 0 if it hasn't changed. */ int errseq_check(errseq_t *eseq, errseq_t since) { @@ -155,11 +157,11 @@ int errseq_check(errseq_t *eseq, errseq_t since) EXPORT_SYMBOL(errseq_check); /** - * errseq_check_and_advance - check an errseq_t and advance to current value - * @eseq: pointer to value being checked and reported - * @since: pointer to previously-sampled errseq_t to check against and advance + * errseq_check_and_advance() - Check an errseq_t and advance to current value. + * @eseq: Pointer to value being checked and reported. + * @since: Pointer to previously-sampled errseq_t to check against and advance. * - * Grab the eseq value, and see whether it matches the value that "since" + * Grab the eseq value, and see whether it matches the value that @since * points to. If it does, then just return 0. * * If it doesn't, then the value has changed. Set the "seen" flag, and try to @@ -170,6 +172,9 @@ EXPORT_SYMBOL(errseq_check); * value. The caller must provide that if necessary. Because of this, callers * may want to do a lockless errseq_check before taking the lock and calling * this. + * + * Return: Negative errno if one has been stored, or 0 if no new error has + * occurred. */ int errseq_check_and_advance(errseq_t *eseq, errseq_t *since) { -- cgit v1.2.3 From d202d47b5e62cbd54d2cfffbe01471a7690dc652 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 11 Dec 2017 21:50:24 +0900 Subject: lib: do not use print_symbol() print_symbol() is a very old API that has been obsoleted by %pS format specifier in a normal printk() call. Replace print_symbol() with a direct printk("%pS") call. Link: http://lkml.kernel.org/r/20171211125025.2270-13-sergey.senozhatsky@gmail.com To: Andrew Morton To: Russell King To: Catalin Marinas To: Mark Salter To: Tony Luck To: David Howells To: Yoshinori Sato To: Guan Xuetao To: Borislav Petkov To: Greg Kroah-Hartman To: Thomas Gleixner To: Peter Zijlstra To: Vineet Gupta To: Fengguang Wu Cc: Steven Rostedt Cc: Petr Mladek Cc: LKML Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-ia64@vger.kernel.org Cc: linux-am33-list@redhat.com Cc: linux-sh@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Sergey Senozhatsky [pmladek@suse.com: updated commit message] Signed-off-by: Petr Mladek --- lib/smp_processor_id.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 835cc6df2776..85925aaa4fff 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -5,7 +5,6 @@ * DEBUG_PREEMPT variant of smp_processor_id(). */ #include -#include #include notrace static unsigned int check_preemption_disabled(const char *what1, @@ -43,7 +42,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1, printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", what1, what2, preempt_count() - 1, current->comm, current->pid); - print_symbol("caller is %s\n", (long)__builtin_return_address(0)); + printk("caller is %pS\n", __builtin_return_address(0)); dump_stack(); out_enable: -- cgit v1.2.3 From e80a0af4759a164214f02da157a3800753ce135f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:46 -0800 Subject: lib/scatterlist: Introduce sgl_alloc() and sgl_free() Many kernel drivers contain code that allocates and frees both a scatterlist and the pages that populate that scatterlist. Introduce functions in lib/scatterlist.c that perform these tasks instead of duplicating this functionality in multiple drivers. Only include these functions in the build if CONFIG_SGL_ALLOC=y to avoid that the kernel size increases if this functionality is not used. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 10 +++++ lib/Kconfig | 4 ++ lib/scatterlist.c | 105 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) (limited to 'lib') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b7c83254c566..b8a7c1d1dbe3 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -276,6 +276,16 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); +#ifdef CONFIG_SGL_ALLOC +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p); +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p); +void sgl_free_order(struct scatterlist *sgl, int order); +void sgl_free(struct scatterlist *sgl); +#endif /* CONFIG_SGL_ALLOC */ + size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); diff --git a/lib/Kconfig b/lib/Kconfig index c5e84fbcb30b..4dd5c11366f9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -409,6 +409,10 @@ config HAS_DMA depends on !NO_DMA default y +config SGL_ALLOC + bool + default n + config DMA_NOOP_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7c1c55f7daaa..9afc9b432083 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -474,6 +474,111 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +#ifdef CONFIG_SGL_ALLOC + +/** + * sgl_alloc_order - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist. Must be at least one + * @order: Second argument for alloc_pages() + * @chainable: Whether or not to allocate an extra element in the scatterlist + * for scatterlist chaining purposes + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist that have pages + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p) +{ + struct scatterlist *sgl, *sg; + struct page *page; + unsigned int nent, nalloc; + u32 elem_len; + + nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); + /* Check for integer overflow */ + if (length > (nent << (PAGE_SHIFT + order))) + return NULL; + nalloc = nent; + if (chainable) { + /* Check for integer overflow */ + if (nalloc + 1 < nalloc) + return NULL; + nalloc++; + } + sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), + (gfp & ~GFP_DMA) | __GFP_ZERO); + if (!sgl) + return NULL; + + sg_init_table(sgl, nent); + sg = sgl; + while (length) { + elem_len = min_t(u64, length, PAGE_SIZE << order); + page = alloc_pages(gfp, order); + if (!page) { + sgl_free(sgl); + return NULL; + } + + sg_set_page(sg, page, elem_len, 0); + length -= elem_len; + sg = sg_next(sg); + } + WARN_ON_ONCE(sg); + if (nent_p) + *nent_p = nent; + return sgl; +} +EXPORT_SYMBOL(sgl_alloc_order); + +/** + * sgl_alloc - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p) +{ + return sgl_alloc_order(length, 0, false, gfp, nent_p); +} +EXPORT_SYMBOL(sgl_alloc); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + struct scatterlist *sg; + struct page *page; + + for (sg = sgl; sg; sg = sg_next(sg)) { + page = sg_page(sg); + if (page) + __free_pages(page, order); + } + kfree(sgl); +} +EXPORT_SYMBOL(sgl_free_order); + +/** + * sgl_free - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + */ +void sgl_free(struct scatterlist *sgl) +{ + sgl_free_order(sgl, 0); +} +EXPORT_SYMBOL(sgl_free); + +#endif /* CONFIG_SGL_ALLOC */ + void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset) -- cgit v1.2.3 From 0d85adb5fbd33daf81276d6fa0f990136eb4bf29 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jan 2018 15:39:58 -0800 Subject: lib/crc-ccitt: Add CCITT-FALSE CRC16 variant In support of a soon to be published MFD driver using serdev to talk to a supervisory processor that uses the CCITT-FALSE CRC16 variant in it's protocol, this patch was tested successfully on an i.MX6 ARM platform. Link: http://lkml.kernel.org/r/20170413142932.27287-1-andrew.smirnov@gmail.com Signed-off-by: Andrey Vostrikov Signed-off-by: Andrey Smirnov Tested-by: Chris Healy Signed-off-by: Andrew Morton Signed-off-by: Lee Jones --- include/linux/crc-ccitt.h | 7 ++++++ lib/crc-ccitt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index cd4f420231ba..72c92c396bb8 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -5,12 +5,19 @@ #include extern u16 const crc_ccitt_table[256]; +extern u16 const crc_ccitt_false_table[256]; extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len); +extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len); static inline u16 crc_ccitt_byte(u16 crc, const u8 c) { return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } +static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c) +{ + return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; +} + #endif /* _LINUX_CRC_CCITT_H */ diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c index 7f6dd68d2d09..d873b34039ff 100644 --- a/lib/crc-ccitt.c +++ b/lib/crc-ccitt.c @@ -51,8 +51,49 @@ u16 const crc_ccitt_table[256] = { }; EXPORT_SYMBOL(crc_ccitt_table); +/* + * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE + * Reflected bits order, does not augment final value. + */ +u16 const crc_ccitt_false_table[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, + 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, + 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, + 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, + 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, + 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, + 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, + 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, + 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, + 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, + 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, + 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, + 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, + 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, + 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, + 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, + 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, + 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, + 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, + 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, + 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, + 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, + 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 +}; +EXPORT_SYMBOL(crc_ccitt_false_table); + /** - * crc_ccitt - recompute the CRC for the data buffer + * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data + * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer @@ -65,5 +106,20 @@ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) } EXPORT_SYMBOL(crc_ccitt); +/** + * crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant) + * for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + */ +u16 crc_ccitt_false(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc_ccitt_false_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc_ccitt_false); + MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 04b8eb7a4ccd9ef9343e2720ccf2a5db8cfe2f67 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Dec 2017 13:36:49 +0900 Subject: symbol lookup: introduce dereference_symbol_descriptor() dereference_symbol_descriptor() invokes appropriate ARCH specific function descriptor dereference callbacks: - dereference_kernel_function_descriptor() if the pointer is a kernel symbol; - dereference_module_function_descriptor() if the pointer is a module symbol. This is the last step needed to make '%pS/%ps' smart enough to handle function descriptor dereference on affected ARCHs and to retire '%pF/%pf'. To refresh it: Some architectures (ia64, ppc64, parisc64) use an indirect pointer for C function pointers - the function pointer points to a function descriptor and we need to dereference it to get the actual function pointer. Function descriptors live in .opd elf section and all affected ARCHs (ia64, ppc64, parisc64) handle it properly for kernel and modules. So we, technically, can decide if the dereference is needed by simply looking at the pointer: if it belongs to .opd section then we need to dereference it. The kernel and modules have their own .opd sections, obviously, that's why we need to split dereference_function_descriptor() and use separate kernel and module dereference arch callbacks. Link: http://lkml.kernel.org/r/20171206043649.GB15885@jagdpanzerIV Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: James Bottomley Cc: Andrew Morton Cc: Jessica Yu Cc: Steven Rostedt Cc: linux-ia64@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Tested-by: Tony Luck #ia64 Tested-by: Santosh Sivaraj #powerpc Tested-by: Helge Deller #parisc64 Signed-off-by: Petr Mladek --- Documentation/printk-formats.txt | 35 +++++++++----------------- include/linux/kallsyms.h | 54 ++++++++++++++++++++++++++++++++++++++++ kernel/kallsyms.c | 35 -------------------------- lib/vsprintf.c | 5 ++-- 4 files changed, 68 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 361789df51ec..58c44cce90b6 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -50,42 +50,31 @@ Symbols/Function Pointers :: + %pS versatile_init+0x0/0x110 + %ps versatile_init %pF versatile_init+0x0/0x110 %pf versatile_init - %pS versatile_init+0x0/0x110 %pSR versatile_init+0x9/0x110 (with __builtin_extract_return_addr() translation) - %ps versatile_init %pB prev_fn_of_versatile_init+0x88/0x88 -The ``F`` and ``f`` specifiers are for printing function pointers, -for example, f->func, &gettimeofday. They have the same result as -``S`` and ``s`` specifiers. But they do an extra conversion on -ia64, ppc64 and parisc64 architectures where the function pointers -are actually function descriptors. +The ``S`` and ``s`` specifiers are used for printing a pointer in symbolic +format. They result in the symbol name with (``S``) or without (``s``) +offsets. If KALLSYMS are disabled then the symbol address is printed instead. -The ``S`` and ``s`` specifiers can be used for printing symbols -from direct addresses, for example, __builtin_return_address(0), -(void *)regs->ip. They result in the symbol name with (``S``) or -without (``s``) offsets. If KALLSYMS are disabled then the symbol -address is printed instead. +Note, that the ``F`` and ``f`` specifiers are identical to ``S`` (``s``) +and thus deprecated. We have ``F`` and ``f`` because on ia64, ppc64 and +parisc64 function pointers are indirect and, in fact, are function +descriptors, which require additional dereferencing before we can lookup +the symbol. As of now, ``S`` and ``s`` perform dereferencing on those +platforms (when needed), so ``F`` and ``f`` exist for compatibility +reasons only. The ``B`` specifier results in the symbol name with offsets and should be used when printing stack backtraces. The specifier takes into consideration the effect of compiler optimisations which may occur when tail-call``s are used and marked with the noreturn GCC attribute. -Examples:: - - printk("Going to call: %pF\n", gettimeofday); - printk("Going to call: %pF\n", p->func); - printk("%s: called from %pS\n", __func__, (void *)_RET_IP_); - printk("%s: called from %pS\n", __func__, - (void *)__builtin_return_address(0)); - printk("Faulted at %pS\n", (void *)regs->ip); - printk(" %s%pB\n", (reliable ? "" : "? "), (void *)*stack); - - Kernel Pointers =============== diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 708f337d780b..e4f2e5a65f14 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -9,6 +9,10 @@ #include #include #include +#include +#include + +#include #define KSYM_NAME_LEN 128 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ @@ -22,6 +26,56 @@ struct module; +static inline int is_kernel_inittext(unsigned long addr) +{ + if (addr >= (unsigned long)_sinittext + && addr <= (unsigned long)_einittext) + return 1; + return 0; +} + +static inline int is_kernel_text(unsigned long addr) +{ + if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || + arch_is_kernel_text(addr)) + return 1; + return in_gate_area_no_mm(addr); +} + +static inline int is_kernel(unsigned long addr) +{ + if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) + return 1; + return in_gate_area_no_mm(addr); +} + +static inline int is_ksym_addr(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) + return is_kernel(addr); + + return is_kernel_text(addr) || is_kernel_inittext(addr); +} + +static inline void *dereference_symbol_descriptor(void *ptr) +{ +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR + struct module *mod; + + ptr = dereference_kernel_function_descriptor(ptr); + if (is_ksym_addr((unsigned long)ptr)) + return ptr; + + preempt_disable(); + mod = __module_address((unsigned long)ptr); + preempt_enable(); + + if (mod) + ptr = dereference_module_function_descriptor(mod, ptr); +#endif + return ptr; +} + #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 531ffa984bc2..0e4c0922908a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -12,7 +12,6 @@ * compression (see scripts/kallsyms.c for a more complete description) */ #include -#include #include #include #include @@ -20,15 +19,12 @@ #include #include #include /* for cond_resched */ -#include #include #include #include #include #include -#include - /* * These will be re-linked against their real values * during the second link stage. @@ -52,37 +48,6 @@ extern const u16 kallsyms_token_index[] __weak; extern const unsigned long kallsyms_markers[] __weak; -static inline int is_kernel_inittext(unsigned long addr) -{ - if (addr >= (unsigned long)_sinittext - && addr <= (unsigned long)_einittext) - return 1; - return 0; -} - -static inline int is_kernel_text(unsigned long addr) -{ - if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || - arch_is_kernel_text(addr)) - return 1; - return in_gate_area_no_mm(addr); -} - -static inline int is_kernel(unsigned long addr) -{ - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) - return 1; - return in_gate_area_no_mm(addr); -} - -static int is_ksym_addr(unsigned long addr) -{ - if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) - return is_kernel(addr); - - return is_kernel_text(addr) || is_kernel_inittext(addr); -} - /* * Expand a compressed symbol data into the resulting uncompressed string, * if uncompressed string is too long (>= maxlen), it will be truncated, diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 1746bae94d41..16e2eefb0f79 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -40,7 +40,6 @@ #include "../mm/internal.h" /* For the trace_print_flags arrays */ #include /* for PAGE_SIZE */ -#include /* for dereference_function_descriptor() */ #include /* cpu_to_le16 */ #include @@ -1723,10 +1722,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, switch (*fmt) { case 'F': case 'f': - ptr = dereference_function_descriptor(ptr); - /* Fallthrough */ case 'S': case 's': + ptr = dereference_symbol_descriptor(ptr); + /* Fallthrough */ case 'B': return symbol_string(buf, end, ptr, spec, fmt); case 'R': -- cgit v1.2.3 From b6b996b6cdeecf7e1646c87422e04e446ddce124 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 19 Dec 2017 10:15:07 -0800 Subject: treewide: Use DEVICE_ATTR_RW Convert DEVICE_ATTR uses to DEVICE_ATTR_RW where possible. Done with perl script: $ git grep -w --name-only DEVICE_ATTR | \ xargs perl -i -e 'local $/; while (<>) { s/\bDEVICE_ATTR\s*\(\s*(\w+)\s*,\s*\(?(\s*S_IRUGO\s*\|\s*S_IWUSR|\s*S_IWUSR\s*\|\s*S_IRUGO\s*|\s*0644\s*)\)?\s*,\s*\1_show\s*,\s*\1_store\s*\)/DEVICE_ATTR_RW(\1)/g; print;}' Signed-off-by: Joe Perches Acked-by: Felipe Balbi Acked-by: Andy Shevchenko Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Zhang Rui Acked-by: Jarkko Nikula Acked-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/topology.c | 3 +-- arch/tile/kernel/sysfs.c | 2 +- drivers/gpu/drm/i915/i915_sysfs.c | 6 ++--- drivers/platform/x86/compal-laptop.c | 18 +++++---------- drivers/s390/cio/device.c | 2 +- drivers/scsi/lpfc/lpfc_attr.c | 43 ++++++++++++------------------------ drivers/thermal/thermal_sysfs.c | 9 ++++---- drivers/tty/serial/sh-sci.c | 2 +- drivers/usb/phy/phy-tahvo.c | 2 +- drivers/video/fbdev/auo_k190x.c | 4 ++-- drivers/video/fbdev/w100fb.c | 4 ++-- lib/test_firmware.c | 14 +++++------- lib/test_kmod.c | 14 +++++------- sound/soc/omap/mcbsp.c | 4 ++-- 14 files changed, 48 insertions(+), 79 deletions(-) (limited to 'lib') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4d5b65e527b5..4b6e0397f66d 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -404,8 +404,7 @@ out: put_online_cpus(); return rc ? rc : count; } -static DEVICE_ATTR(dispatching, 0644, dispatching_show, - dispatching_store); +static DEVICE_ATTR_RW(dispatching); static ssize_t cpu_polarization_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index 825867c53853..af5024f0fb5a 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -184,7 +184,7 @@ static ssize_t hv_stats_store(struct device *dev, return n < 0 ? n : count; } -static DEVICE_ATTR(hv_stats, 0644, hv_stats_show, hv_stats_store); +static DEVICE_ATTR_RW(hv_stats); static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif) { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 791759f632e1..3fce362ca921 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -436,9 +436,9 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); -static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO | S_IWUSR, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); -static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); -static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); +static DEVICE_ATTR_RW(gt_boost_freq_mhz); +static DEVICE_ATTR_RW(gt_max_freq_mhz); +static DEVICE_ATTR_RW(gt_min_freq_mhz); static DEVICE_ATTR(vlv_rpe_freq_mhz, S_IRUGO, vlv_rpe_freq_mhz_show, NULL); diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 6bcb750e1865..4f9bc72f0584 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -679,18 +679,12 @@ static int bat_writeable_property(struct power_supply *psy, /* ============== */ /* Driver Globals */ /* ============== */ -static DEVICE_ATTR(wake_up_pme, - 0644, wake_up_pme_show, wake_up_pme_store); -static DEVICE_ATTR(wake_up_modem, - 0644, wake_up_modem_show, wake_up_modem_store); -static DEVICE_ATTR(wake_up_lan, - 0644, wake_up_lan_show, wake_up_lan_store); -static DEVICE_ATTR(wake_up_wlan, - 0644, wake_up_wlan_show, wake_up_wlan_store); -static DEVICE_ATTR(wake_up_key, - 0644, wake_up_key_show, wake_up_key_store); -static DEVICE_ATTR(wake_up_mouse, - 0644, wake_up_mouse_show, wake_up_mouse_store); +static DEVICE_ATTR_RW(wake_up_pme); +static DEVICE_ATTR_RW(wake_up_modem); +static DEVICE_ATTR_RW(wake_up_lan); +static DEVICE_ATTR_RW(wake_up_wlan); +static DEVICE_ATTR_RW(wake_up_key); +static DEVICE_ATTR_RW(wake_up_mouse); static DEVICE_ATTR(fan1_input, S_IRUGO, fan_show, NULL); static DEVICE_ATTR(temp1_input, S_IRUGO, temp_cpu, NULL); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 75a245f38e2e..6eefb67b31f3 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -600,7 +600,7 @@ static ssize_t vpm_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(devtype, 0444, devtype_show, NULL); static DEVICE_ATTR(cutype, 0444, cutype_show, NULL); static DEVICE_ATTR(modalias, 0444, modalias_show, NULL); -static DEVICE_ATTR(online, 0644, online_show, online_store); +static DEVICE_ATTR_RW(online); static DEVICE_ATTR(availability, 0444, available_show, NULL); static DEVICE_ATTR(logging, 0200, NULL, initiate_logging); static DEVICE_ATTR(vpm, 0444, vpm_show, NULL); diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 82f6e219ee34..c9f54de2aefd 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2485,8 +2485,7 @@ lpfc_soft_wwpn_store(struct device *dev, struct device_attribute *attr, "reinit adapter - %d\n", stat2); return (stat1 || stat2) ? -EIO : count; } -static DEVICE_ATTR(lpfc_soft_wwpn, S_IRUGO | S_IWUSR, - lpfc_soft_wwpn_show, lpfc_soft_wwpn_store); +static DEVICE_ATTR_RW(lpfc_soft_wwpn); /** * lpfc_soft_wwnn_show - Return the cfg soft ww node name for the adapter @@ -2549,8 +2548,7 @@ lpfc_soft_wwnn_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR(lpfc_soft_wwnn, S_IRUGO | S_IWUSR, - lpfc_soft_wwnn_show, lpfc_soft_wwnn_store); +static DEVICE_ATTR_RW(lpfc_soft_wwnn); /** * lpfc_oas_tgt_show - Return wwpn of target whose luns maybe enabled for @@ -3068,8 +3066,7 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:" " 1 - poll with interrupts enabled" " 3 - poll and disable FCP ring interrupts"); -static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, - lpfc_poll_show, lpfc_poll_store); +static DEVICE_ATTR_RW(lpfc_poll); int lpfc_no_hba_reset_cnt; unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = { @@ -3302,8 +3299,7 @@ lpfc_nodev_tmo_set(struct lpfc_vport *vport, int val) lpfc_vport_param_store(nodev_tmo) -static DEVICE_ATTR(lpfc_nodev_tmo, S_IRUGO | S_IWUSR, - lpfc_nodev_tmo_show, lpfc_nodev_tmo_store); +static DEVICE_ATTR_RW(lpfc_nodev_tmo); /* # lpfc_devloss_tmo: If set, it will hold all I/O errors on devices that @@ -3352,8 +3348,7 @@ lpfc_devloss_tmo_set(struct lpfc_vport *vport, int val) } lpfc_vport_param_store(devloss_tmo) -static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR, - lpfc_devloss_tmo_show, lpfc_devloss_tmo_store); +static DEVICE_ATTR_RW(lpfc_devloss_tmo); /* * lpfc_suppress_rsp: Enable suppress rsp feature is firmware supports it @@ -3545,8 +3540,7 @@ lpfc_restrict_login_set(struct lpfc_vport *vport, int val) return 0; } lpfc_vport_param_store(restrict_login); -static DEVICE_ATTR(lpfc_restrict_login, S_IRUGO | S_IWUSR, - lpfc_restrict_login_show, lpfc_restrict_login_store); +static DEVICE_ATTR_RW(lpfc_restrict_login); /* # Some disk devices have a "select ID" or "select Target" capability. @@ -3660,8 +3654,7 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr, } lpfc_param_show(topology) -static DEVICE_ATTR(lpfc_topology, S_IRUGO | S_IWUSR, - lpfc_topology_show, lpfc_topology_store); +static DEVICE_ATTR_RW(lpfc_topology); /** * lpfc_static_vport_show: Read callback function for @@ -3919,8 +3912,7 @@ lpfc_stat_data_ctrl_show(struct device *dev, struct device_attribute *attr, /* * Sysfs attribute to control the statistical data collection. */ -static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR, - lpfc_stat_data_ctrl_show, lpfc_stat_data_ctrl_store); +static DEVICE_ATTR_RW(lpfc_stat_data_ctrl); /* * lpfc_drvr_stat_data: sysfs attr to get driver statistical data. @@ -4159,8 +4151,7 @@ lpfc_link_speed_init(struct lpfc_hba *phba, int val) return -EINVAL; } -static DEVICE_ATTR(lpfc_link_speed, S_IRUGO | S_IWUSR, - lpfc_link_speed_show, lpfc_link_speed_store); +static DEVICE_ATTR_RW(lpfc_link_speed); /* # lpfc_aer_support: Support PCIe device Advanced Error Reporting (AER) @@ -4253,8 +4244,7 @@ lpfc_aer_support_store(struct device *dev, struct device_attribute *attr, return rc; } -static DEVICE_ATTR(lpfc_aer_support, S_IRUGO | S_IWUSR, - lpfc_aer_support_show, lpfc_aer_support_store); +static DEVICE_ATTR_RW(lpfc_aer_support); /** * lpfc_aer_cleanup_state - Clean up aer state to the aer enabled device @@ -4401,8 +4391,7 @@ LPFC_ATTR(sriov_nr_virtfn, LPFC_DEF_VFN_PER_PFN, 0, LPFC_MAX_VFN_PER_PFN, "Enable PCIe device SR-IOV virtual fn"); lpfc_param_show(sriov_nr_virtfn) -static DEVICE_ATTR(lpfc_sriov_nr_virtfn, S_IRUGO | S_IWUSR, - lpfc_sriov_nr_virtfn_show, lpfc_sriov_nr_virtfn_store); +static DEVICE_ATTR_RW(lpfc_sriov_nr_virtfn); /** * lpfc_request_firmware_store - Request for Linux generic firmware upgrade @@ -4576,8 +4565,7 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val) return 0; } -static DEVICE_ATTR(lpfc_fcp_imax, S_IRUGO | S_IWUSR, - lpfc_fcp_imax_show, lpfc_fcp_imax_store); +static DEVICE_ATTR_RW(lpfc_fcp_imax); /* * lpfc_auto_imax: Controls Auto-interrupt coalescing values support. @@ -4737,8 +4725,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val) return 0; } -static DEVICE_ATTR(lpfc_fcp_cpu_map, S_IRUGO | S_IWUSR, - lpfc_fcp_cpu_map_show, lpfc_fcp_cpu_map_store); +static DEVICE_ATTR_RW(lpfc_fcp_cpu_map); /* # lpfc_fcp_class: Determines FC class to use for the FCP protocol. @@ -4824,9 +4811,7 @@ lpfc_max_scsicmpl_time_set(struct lpfc_vport *vport, int val) return 0; } lpfc_vport_param_store(max_scsicmpl_time); -static DEVICE_ATTR(lpfc_max_scsicmpl_time, S_IRUGO | S_IWUSR, - lpfc_max_scsicmpl_time_show, - lpfc_max_scsicmpl_time_store); +static DEVICE_ATTR_RW(lpfc_max_scsicmpl_time); /* # lpfc_ack0: Use ACK0, instead of ACK1 for class 2 acknowledgement. Value diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index fb80c96d8f73..c008af7fb480 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -398,14 +398,13 @@ create_s32_tzp_attr(offset); */ static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); -static DEVICE_ATTR(policy, S_IRUGO | S_IWUSR, policy_show, policy_store); +static DEVICE_ATTR_RW(policy); static DEVICE_ATTR(available_policies, S_IRUGO, available_policies_show, NULL); -static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show, - sustainable_power_store); +static DEVICE_ATTR_RW(sustainable_power); /* These thermal zone device attributes are created based on conditions */ -static DEVICE_ATTR(mode, 0644, mode_show, mode_store); -static DEVICE_ATTR(passive, S_IRUGO | S_IWUSR, passive_show, passive_store); +static DEVICE_ATTR_RW(mode); +static DEVICE_ATTR_RW(passive); /* These attributes are unconditionally added to a thermal zone */ static struct attribute *thermal_zone_dev_attrs[] = { diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index d9f399c4e90c..7257c078e155 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1144,7 +1144,7 @@ static ssize_t rx_fifo_timeout_store(struct device *dev, return count; } -static DEVICE_ATTR(rx_fifo_timeout, 0644, rx_fifo_timeout_show, rx_fifo_timeout_store); +static DEVICE_ATTR_RW(rx_fifo_timeout); #ifdef CONFIG_SERIAL_SH_SCI_DMA diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c index b3ce42edb373..7f7c5c82420d 100644 --- a/drivers/usb/phy/phy-tahvo.c +++ b/drivers/usb/phy/phy-tahvo.c @@ -310,7 +310,7 @@ static ssize_t otg_mode_store(struct device *device, return r; } -static DEVICE_ATTR(otg_mode, 0644, otg_mode_show, otg_mode_store); +static DEVICE_ATTR_RW(otg_mode); static struct attribute *tahvo_attributes[] = { &dev_attr_vbus.attr, diff --git a/drivers/video/fbdev/auo_k190x.c b/drivers/video/fbdev/auo_k190x.c index 0d06038324e0..1e383c547633 100644 --- a/drivers/video/fbdev/auo_k190x.c +++ b/drivers/video/fbdev/auo_k190x.c @@ -708,8 +708,8 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", temp); } -static DEVICE_ATTR(update_mode, 0644, update_mode_show, update_mode_store); -static DEVICE_ATTR(flash, 0644, flash_show, flash_store); +static DEVICE_ATTR_RW(update_mode); +static DEVICE_ATTR_RW(flash); static DEVICE_ATTR(temp, 0644, temp_show, NULL); static struct attribute *auok190x_attributes[] = { diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c index d570e19a2864..035ff6e02894 100644 --- a/drivers/video/fbdev/w100fb.c +++ b/drivers/video/fbdev/w100fb.c @@ -110,7 +110,7 @@ static ssize_t flip_store(struct device *dev, struct device_attribute *attr, con return count; } -static DEVICE_ATTR(flip, 0644, flip_show, flip_store); +static DEVICE_ATTR_RW(flip); static ssize_t w100fb_reg_read(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -166,7 +166,7 @@ static ssize_t fastpllclk_store(struct device *dev, struct device_attribute *att return count; } -static DEVICE_ATTR(fastpllclk, 0644, fastpllclk_show, fastpllclk_store); +static DEVICE_ATTR_RW(fastpllclk); /* * Some touchscreens need hsync information from the video driver to diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 64a4c76cba2b..964784dc1602 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -359,7 +359,7 @@ static ssize_t config_name_show(struct device *dev, { return config_test_show_str(buf, test_fw_config->name); } -static DEVICE_ATTR(config_name, 0644, config_name_show, config_name_store); +static DEVICE_ATTR_RW(config_name); static ssize_t config_num_requests_store(struct device *dev, struct device_attribute *attr, @@ -388,8 +388,7 @@ static ssize_t config_num_requests_show(struct device *dev, { return test_dev_config_show_u8(buf, test_fw_config->num_requests); } -static DEVICE_ATTR(config_num_requests, 0644, config_num_requests_show, - config_num_requests_store); +static DEVICE_ATTR_RW(config_num_requests); static ssize_t config_sync_direct_store(struct device *dev, struct device_attribute *attr, @@ -411,8 +410,7 @@ static ssize_t config_sync_direct_show(struct device *dev, { return test_dev_config_show_bool(buf, test_fw_config->sync_direct); } -static DEVICE_ATTR(config_sync_direct, 0644, config_sync_direct_show, - config_sync_direct_store); +static DEVICE_ATTR_RW(config_sync_direct); static ssize_t config_send_uevent_store(struct device *dev, struct device_attribute *attr, @@ -428,8 +426,7 @@ static ssize_t config_send_uevent_show(struct device *dev, { return test_dev_config_show_bool(buf, test_fw_config->send_uevent); } -static DEVICE_ATTR(config_send_uevent, 0644, config_send_uevent_show, - config_send_uevent_store); +static DEVICE_ATTR_RW(config_send_uevent); static ssize_t config_read_fw_idx_store(struct device *dev, struct device_attribute *attr, @@ -445,8 +442,7 @@ static ssize_t config_read_fw_idx_show(struct device *dev, { return test_dev_config_show_u8(buf, test_fw_config->read_fw_idx); } -static DEVICE_ATTR(config_read_fw_idx, 0644, config_read_fw_idx_show, - config_read_fw_idx_store); +static DEVICE_ATTR_RW(config_read_fw_idx); static ssize_t trigger_request_store(struct device *dev, diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 337f408b4de6..e372b97eee13 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -694,8 +694,7 @@ static ssize_t config_test_driver_show(struct device *dev, return config_test_show_str(&test_dev->config_mutex, buf, config->test_driver); } -static DEVICE_ATTR(config_test_driver, 0644, config_test_driver_show, - config_test_driver_store); +static DEVICE_ATTR_RW(config_test_driver); static ssize_t config_test_fs_store(struct device *dev, struct device_attribute *attr, @@ -726,8 +725,7 @@ static ssize_t config_test_fs_show(struct device *dev, return config_test_show_str(&test_dev->config_mutex, buf, config->test_fs); } -static DEVICE_ATTR(config_test_fs, 0644, config_test_fs_show, - config_test_fs_store); +static DEVICE_ATTR_RW(config_test_fs); static int trigger_config_run_type(struct kmod_test_device *test_dev, enum kmod_test_case test_case, @@ -1012,8 +1010,7 @@ static ssize_t config_num_threads_show(struct device *dev, return test_dev_config_show_int(test_dev, buf, config->num_threads); } -static DEVICE_ATTR(config_num_threads, 0644, config_num_threads_show, - config_num_threads_store); +static DEVICE_ATTR_RW(config_num_threads); static ssize_t config_test_case_store(struct device *dev, struct device_attribute *attr, @@ -1037,8 +1034,7 @@ static ssize_t config_test_case_show(struct device *dev, return test_dev_config_show_uint(test_dev, buf, config->test_case); } -static DEVICE_ATTR(config_test_case, 0644, config_test_case_show, - config_test_case_store); +static DEVICE_ATTR_RW(config_test_case); static ssize_t test_result_show(struct device *dev, struct device_attribute *attr, @@ -1049,7 +1045,7 @@ static ssize_t test_result_show(struct device *dev, return test_dev_config_show_int(test_dev, buf, config->test_result); } -static DEVICE_ATTR(test_result, 0644, test_result_show, test_result_store); +static DEVICE_ATTR_RW(test_result); #define TEST_KMOD_DEV_ATTR(name) &dev_attr_##name.attr diff --git a/sound/soc/omap/mcbsp.c b/sound/soc/omap/mcbsp.c index 7a54e3083203..79d4dc785e5c 100644 --- a/sound/soc/omap/mcbsp.c +++ b/sound/soc/omap/mcbsp.c @@ -854,7 +854,7 @@ unlock: return size; } -static DEVICE_ATTR(dma_op_mode, 0644, dma_op_mode_show, dma_op_mode_store); +static DEVICE_ATTR_RW(dma_op_mode); static const struct attribute *additional_attrs[] = { &dev_attr_max_tx_thres.attr, @@ -923,7 +923,7 @@ out: return size; } -static DEVICE_ATTR(st_taps, 0644, st_taps_show, st_taps_store); +static DEVICE_ATTR_RW(st_taps); static const struct attribute *sidetone_attrs[] = { &dev_attr_st_taps.attr, -- cgit v1.2.3 From ea8c64ace86647260ec4255f483e5844d62af2df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jan 2018 16:21:13 +0100 Subject: dma-mapping: move swiotlb arch helpers to a new header phys_to_dma, dma_to_phys and dma_capable are helpers published by architecture code for use of swiotlb and xen-swiotlb only. Drivers are not supposed to use these directly, but use the DMA API instead. Move these to a new asm/dma-direct.h helper, included by a linux/dma-direct.h wrapper that provides the default linear mapping unless the architecture wants to override it. In the MIPS case the existing dma-coherent.h is reused for now as untangling it will take a bit of work. Signed-off-by: Christoph Hellwig Acked-by: Robin Murphy --- MAINTAINERS | 1 + arch/Kconfig | 4 +++ arch/arm/Kconfig | 1 + arch/arm/include/asm/dma-direct.h | 36 ++++++++++++++++++++++ arch/arm/include/asm/dma-mapping.h | 31 ------------------- arch/arm64/include/asm/dma-mapping.h | 22 ------------- arch/arm64/mm/dma-mapping.c | 2 +- arch/ia64/include/asm/dma-mapping.h | 18 ----------- arch/mips/Kconfig | 2 ++ arch/mips/include/asm/dma-direct.h | 1 + arch/mips/include/asm/dma-mapping.h | 8 ----- .../include/asm/mach-cavium-octeon/dma-coherence.h | 8 +++++ arch/mips/include/asm/mach-generic/dma-coherence.h | 12 -------- .../include/asm/mach-loongson64/dma-coherence.h | 8 +++++ arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/dma-direct.h | 29 +++++++++++++++++ arch/powerpc/include/asm/dma-mapping.h | 25 --------------- arch/tile/include/asm/dma-mapping.h | 18 ----------- arch/unicore32/include/asm/dma-mapping.h | 18 ----------- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-direct.h | 30 ++++++++++++++++++ arch/x86/include/asm/dma-mapping.h | 26 ---------------- arch/x86/kernel/amd_gart_64.c | 1 + arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-nommu.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/mm/mem_encrypt.c | 2 +- arch/x86/pci/sta2x11-fixup.c | 1 + arch/xtensa/include/asm/dma-mapping.h | 10 ------ drivers/crypto/marvell/cesa.c | 1 + drivers/mtd/nand/qcom_nandc.c | 1 + drivers/xen/swiotlb-xen.c | 2 +- include/linux/dma-direct.h | 32 +++++++++++++++++++ lib/swiotlb.c | 2 +- 34 files changed, 165 insertions(+), 195 deletions(-) create mode 100644 arch/arm/include/asm/dma-direct.h create mode 100644 arch/mips/include/asm/dma-direct.h create mode 100644 arch/powerpc/include/asm/dma-direct.h create mode 100644 arch/x86/include/asm/dma-direct.h create mode 100644 include/linux/dma-direct.h (limited to 'lib') diff --git a/MAINTAINERS b/MAINTAINERS index 95c3fa1f520f..d2cfdcce1db5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4338,6 +4338,7 @@ F: lib/dma-noop.c F: lib/dma-virt.c F: drivers/base/dma-mapping.c F: drivers/base/dma-coherent.c +F: include/linux/dma-direct.h F: include/linux/dma-mapping.h DME1737 HARDWARE MONITOR DRIVER diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..3edf118ad777 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -938,6 +938,10 @@ config STRICT_MODULE_RWX and non-text memory will be made non-executable. This provides protection against certain security exploits (e.g. writing to text) +# select if the architecture provides an asm/dma-direct.h header +config ARCH_HAS_PHYS_TO_DMA + bool + config ARCH_HAS_REFCOUNT bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 51c8df561077..00d889a37965 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,6 +8,7 @@ config ARM select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_SET_MEMORY + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h new file mode 100644 index 000000000000..5b0a8a421894 --- /dev/null +++ b/arch/arm/include/asm/dma-direct.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_ARM_DMA_DIRECT_H +#define ASM_ARM_DMA_DIRECT_H 1 + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + unsigned int offset = paddr & ~PAGE_MASK; + return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + unsigned int offset = dev_addr & ~PAGE_MASK; + return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + u64 limit, mask; + + if (!dev->dma_mask) + return 0; + + mask = *dev->dma_mask; + + limit = (mask + 1) & ~mask; + if (limit && size > limit) + return 0; + + if ((addr | (addr + size - 1)) & ~mask) + return 0; + + return 1; +} + +#endif /* ASM_ARM_DMA_DIRECT_H */ diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index daf837423a76..5fb1b7fbdfbe 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -109,37 +109,6 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - unsigned int offset = paddr & ~PAGE_MASK; - return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) -{ - unsigned int offset = dev_addr & ~PAGE_MASK; - return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; -} - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - u64 limit, mask; - - if (!dev->dma_mask) - return 0; - - mask = *dev->dma_mask; - - limit = (mask + 1) & ~mask; - if (limit && size > limit) - return 0; - - if ((addr | (addr + size - 1)) & ~mask) - return 0; - - return 1; -} - static inline void dma_mark_clean(void *addr, size_t size) { } /** diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index eada887a93bf..400fa67d3b5a 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -50,28 +50,6 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - dma_addr_t dev_addr = (dma_addr_t)paddr; - - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) -{ - phys_addr_t paddr = (phys_addr_t)dev_addr; - - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - static inline void dma_mark_clean(void *addr, size_t size) { } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b45c5bcaeccb..f3a637b98487 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index c1bab526a046..eabee56d995c 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -27,22 +27,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return platform_dma_get_ops(NULL); } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - #endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 350a990fc719..4b0c26b2e9b7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -429,6 +429,7 @@ config MACH_LOONGSON32 config MACH_LOONGSON64 bool "Loongson-2/3 family of machines" + select ARCH_HAS_PHYS_TO_DMA select SYS_SUPPORTS_ZBOOT help This enables the support of Loongson-2/3 family of machines. @@ -877,6 +878,7 @@ config MIKROTIK_RB532 config CAVIUM_OCTEON_SOC bool "Cavium Networks Octeon SoC based boards" select CEVT_R4K + select ARCH_HAS_PHYS_TO_DMA select ARCH_PHYS_ADDR_T_64BIT select DMA_COHERENT select SYS_SUPPORTS_64BIT_KERNEL diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h new file mode 100644 index 000000000000..f32f15530aba --- /dev/null +++ b/arch/mips/include/asm/dma-direct.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 5c334ac15945..676c14cfc580 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -17,14 +17,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return mips_dma_map_ops; } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - static inline void dma_mark_clean(void *addr, size_t size) {} #define arch_setup_dma_ops arch_setup_dma_ops diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h index 9110988b92a1..138edf6b5b48 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h @@ -61,6 +61,14 @@ static inline void plat_post_dma_flush(struct device *dev) { } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); diff --git a/arch/mips/include/asm/mach-generic/dma-coherence.h b/arch/mips/include/asm/mach-generic/dma-coherence.h index 61addb1677e9..8ad7a40ca786 100644 --- a/arch/mips/include/asm/mach-generic/dma-coherence.h +++ b/arch/mips/include/asm/mach-generic/dma-coherence.h @@ -70,16 +70,4 @@ static inline void plat_post_dma_flush(struct device *dev) } #endif -#ifdef CONFIG_SWIOTLB -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} -#endif - #endif /* __ASM_MACH_GENERIC_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h index 1602a9e9e8c2..b1b575f5c6c1 100644 --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h @@ -17,6 +17,14 @@ struct device; +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..887285eb684a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -139,6 +139,7 @@ config PPC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h new file mode 100644 index 000000000000..a5b59c765426 --- /dev/null +++ b/arch/powerpc/include/asm/dma-direct.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_POWERPC_DMA_DIRECT_H +#define ASM_POWERPC_DMA_DIRECT_H 1 + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ +#ifdef CONFIG_SWIOTLB + struct dev_archdata *sd = &dev->archdata; + + if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr) + return false; +#endif + + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr + get_dma_offset(dev); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr - get_dma_offset(dev); +} +#endif /* ASM_POWERPC_DMA_DIRECT_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 592c7f418aa0..f6ab51205a85 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -112,31 +112,6 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask); extern u64 __dma_get_required_mask(struct device *dev); -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ -#ifdef CONFIG_SWIOTLB - struct dev_archdata *sd = &dev->archdata; - - if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr) - return false; -#endif - - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr + get_dma_offset(dev); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr - get_dma_offset(dev); -} - #define ARCH_HAS_DMA_MMAP_COHERENT #endif /* __KERNEL__ */ diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 97ad62878290..75b8aaa4e70b 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -44,26 +44,8 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) dev->archdata.dma_offset = off; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - static inline void dma_mark_clean(void *addr, size_t size) {} -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - #define HAVE_ARCH_DMA_SET_MASK 1 int dma_set_mask(struct device *dev, u64 mask); diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index ac608c2f6af6..5cb250bf2d8c 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h @@ -25,24 +25,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &swiotlb_dma_map_ops; } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (dev && dev->dma_mask) - return addr + size - 1 <= *dev->dma_mask; - - return 1; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - static inline void dma_mark_clean(void *addr, size_t size) {} #endif /* __KERNEL__ */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..f6f4328103c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,6 +54,7 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h new file mode 100644 index 000000000000..1295bc622ebe --- /dev/null +++ b/arch/x86/include/asm/dma-direct.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_DMA_DIRECT_H +#define ASM_X86_DMA_DIRECT_H 1 + +#include + +#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ +bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +#else +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(paddr); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(daddr); +} +#endif /* CONFIG_X86_DMA_REMAP */ +#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0350d99bb8fd..dfdc9357a349 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -12,7 +12,6 @@ #include #include #include -#include #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -42,31 +41,6 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ -extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ - static inline unsigned long dma_alloc_coherent_mask(struct device *dev, gfp_t gfp) { diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index cc0e8bc0ea3f..ecd486cb06ab 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 599d7462eccc..8439e6de6156 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b0caae27e1b7..618285e475c6 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Fallback functions when the main IOMMU code is not compiled in. This code is roughly equivalent to i386. */ -#include +#include #include #include #include diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 53bd05ea90d8..9d3e35c33d94 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..09532c935da0 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 53d600217973..75577c1490c4 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #define STA2X11_SWIOTLB_SIZE (4*1024*1024) diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 153bf2370988..44098800dad7 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -23,14 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &xtensa_dma_map_ops; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return (dma_addr_t)paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return (phys_addr_t)daddr; -} - #endif /* _XTENSA_DMA_MAPPING_H */ diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 293832488cc9..3a0c40081ffb 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -24,6 +24,7 @@ #include #include #include +#include /* XXX: drivers shall never use this directly! */ #include #include #include diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 2656c1ac5646..411cdfd12a85 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -23,6 +23,7 @@ #include #include #include +#include /* XXX: drivers shall never use this directly! */ /* NANDc reg offsets */ #define NAND_FLASH_CMD 0x00 diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 82fc54f8eb77..5bb72d3f8337 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -36,7 +36,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h new file mode 100644 index 000000000000..2cc1b6558944 --- /dev/null +++ b/include/linux/dma-direct.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_DMA_DIRECT_H +#define _LINUX_DMA_DIRECT_H 1 + +#include + +#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA +#include +#else +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + dma_addr_t dev_addr = (dma_addr_t)paddr; + + return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + phys_addr_t paddr = (phys_addr_t)dev_addr; + + return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} +#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +#endif /* _LINUX_DMA_DIRECT_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cea19aaf303c..6583f3512386 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -18,7 +18,7 @@ */ #include -#include +#include #include #include #include -- cgit v1.2.3 From 7660b1fb367eb3723b48d3980451fc4f25a05021 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Dec 2017 18:02:45 -0600 Subject: crypto: chacha20 - use rol32() macro from bitops.h For chacha20_block(), use the existing 32-bit left-rotate function instead of defining one ourselves. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha20.c | 69 +++++++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/chacha20.c b/lib/chacha20.c index 29d3801dee24..c1cc50fb68c9 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -16,11 +16,6 @@ #include #include -static inline u32 rotl32(u32 v, u8 n) -{ - return (v << n) | (v >> (sizeof(v) * 8 - n)); -} - void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; @@ -30,45 +25,45 @@ void chacha20_block(u32 *state, u32 *stream) x[i] = state[i]; for (i = 0; i < 20; i += 2) { - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } for (i = 0; i < ARRAY_SIZE(x); i++) -- cgit v1.2.3 From 540adea3809f61115d2a1ea4ed6e627613452ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:03 +0900 Subject: error-injection: Separate error-injection from kprobe Since error-injection framework is not limited to be used by kprobes, nor bpf. Other kernel subsystems can use it freely for checking safeness of error-injection, e.g. livepatch, ftrace etc. So this separate error-injection framework from kprobes. Some differences has been made: - "kprobe" word is removed from any APIs/structures. - BPF_ALLOW_ERROR_INJECTION() is renamed to ALLOW_ERROR_INJECTION() since it is not limited for BPF too. - CONFIG_FUNCTION_ERROR_INJECTION is the config item of this feature. It is automatically enabled if the arch supports error injection feature for kprobe or ftrace etc. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- arch/Kconfig | 2 +- arch/x86/Kconfig | 2 +- arch/x86/include/asm/error-injection.h | 13 ++ arch/x86/kernel/kprobes/core.c | 14 --- arch/x86/lib/Makefile | 1 + arch/x86/lib/error-inject.c | 19 +++ fs/btrfs/disk-io.c | 4 +- fs/btrfs/free-space-cache.c | 4 +- include/asm-generic/error-injection.h | 20 ++++ include/asm-generic/vmlinux.lds.h | 14 +-- include/linux/bpf.h | 11 -- include/linux/error-injection.h | 21 ++++ include/linux/kprobes.h | 1 - include/linux/module.h | 6 +- kernel/kprobes.c | 163 ------------------------- kernel/module.c | 8 +- kernel/trace/Kconfig | 2 +- kernel/trace/bpf_trace.c | 4 +- kernel/trace/trace_kprobe.c | 3 +- lib/Kconfig.debug | 4 + lib/Makefile | 1 + lib/error-inject.c | 213 +++++++++++++++++++++++++++++++++ 22 files changed, 317 insertions(+), 213 deletions(-) create mode 100644 arch/x86/include/asm/error-injection.h create mode 100644 arch/x86/lib/error-inject.c create mode 100644 include/asm-generic/error-injection.h create mode 100644 include/linux/error-injection.h create mode 100644 lib/error-inject.c (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index d3f4aaf9cb7a..97376accfb14 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,7 +196,7 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool -config HAVE_KPROBE_OVERRIDE +config HAVE_FUNCTION_ERROR_INJECTION bool config HAVE_NMI diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45dc6233f2b9..366b19cb79b7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,7 +154,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE - select HAVE_KPROBE_OVERRIDE + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h new file mode 100644 index 000000000000..47b7a1296245 --- /dev/null +++ b/arch/x86/include/asm/error-injection.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ERROR_INJECTION_H +#define _ASM_ERROR_INJECTION_H + +#include +#include +#include +#include + +asmlinkage void just_return_func(void); +void override_function_with_return(struct pt_regs *regs); + +#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index b02a377d5905..bd36f3c33cd0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1183,17 +1183,3 @@ int arch_trampoline_kprobe(struct kprobe *p) { return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_kprobe_override_function); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 7b181b61170e..171377b83be1 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c new file mode 100644 index 000000000000..7b881d03d0dd --- /dev/null +++ b/arch/x86/lib/error-inject.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +asmlinkage void just_return_func(void); + +asm( + ".type just_return_func, @function\n" + "just_return_func:\n" + " ret\n" + ".size just_return_func, .-just_return_func\n" +); + +void override_function_with_return(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&just_return_func; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5da18ebc9222..9798e21ebe9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include "ctree.h" #include "disk-io.h" @@ -3124,7 +3124,7 @@ recovery_tree_root: goto fail_block_groups; goto retry_root_backup; } -BPF_ALLOW_ERROR_INJECTION(open_ctree); +ALLOW_ERROR_INJECTION(open_ctree); static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index fb1382893bfc..ef847699031a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, return 0; } -BPF_ALLOW_ERROR_INJECTION(io_ctl_init); +ALLOW_ERROR_INJECTION(io_ctl_init); static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h new file mode 100644 index 000000000000..08352c9d9f97 --- /dev/null +++ b/include/asm-generic/error-injection.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_ERROR_INJECTION_H +#define _ASM_GENERIC_ERROR_INJECTION_H + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * Whitelist ganerating macro. Specify functions which can be + * error-injectable using this macro. + */ +#define ALLOW_ERROR_INJECTION(fname) \ +static unsigned long __used \ + __attribute__((__section__("_error_injection_whitelist"))) \ + _eil_addr_##fname = (unsigned long)fname; +#else +#define ALLOW_ERROR_INJECTION(fname) +#endif +#endif + +#endif /* _ASM_GENERIC_ERROR_INJECTION_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a2e8582d094a..f2068cca5206 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -136,13 +136,13 @@ #define KPROBE_BLACKLIST() #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -#define ERROR_INJECT_LIST() . = ALIGN(8); \ - VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \ - KEEP(*(_kprobe_error_inject_list)) \ - VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .; +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +#define ERROR_INJECT_WHITELIST() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\ + KEEP(*(_error_injection_whitelist)) \ + VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .; #else -#define ERROR_INJECT_LIST() +#define ERROR_INJECT_WHITELIST() #endif #ifdef CONFIG_EVENT_TRACING @@ -573,7 +573,7 @@ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ - ERROR_INJECT_LIST() \ + ERROR_INJECT_WHITELIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 44f26f6df8fc..3496977203a3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -613,15 +613,4 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -#define BPF_ALLOW_ERROR_INJECTION(fname) \ -static unsigned long __used \ - __attribute__((__section__("_kprobe_error_inject_list"))) \ - _eil_addr_##fname = (unsigned long)fname; -#else -#define BPF_ALLOW_ERROR_INJECTION(fname) -#endif -#endif - #endif /* _LINUX_BPF_H */ diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h new file mode 100644 index 000000000000..130a67c50dac --- /dev/null +++ b/include/linux/error-injection.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ERROR_INJECTION_H +#define _LINUX_ERROR_INJECTION_H + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + +#include + +extern bool within_error_injection_list(unsigned long addr); + +#else /* !CONFIG_FUNCTION_ERROR_INJECTION */ + +#include +static inline bool within_error_injection_list(unsigned long addr) +{ + return false; +} + +#endif + +#endif /* _LINUX_ERROR_INJECTION_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 963fd364f3d6..9440a2fc8893 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -271,7 +271,6 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset); extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -extern bool within_kprobe_error_injection_list(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; diff --git a/include/linux/module.h b/include/linux/module.h index 548fa09fa806..792e51d83bda 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -476,9 +476,9 @@ struct module { unsigned int num_ctors; #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE - unsigned int num_kprobe_ei_funcs; - unsigned long *kprobe_ei_funcs; +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + unsigned int num_ei_funcs; + unsigned long *ei_funcs; #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b4aab48ad258..da2ccf142358 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -83,16 +83,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } -/* List of symbols that can be overriden for error injection. */ -static LIST_HEAD(kprobe_error_injection_list); -static DEFINE_MUTEX(kprobe_ei_mutex); -struct kprobe_ei_entry { - struct list_head list; - unsigned long start_addr; - unsigned long end_addr; - void *priv; -}; - /* Blacklist -- list of struct kprobe_blacklist_entry */ static LIST_HEAD(kprobe_blacklist); @@ -1404,17 +1394,6 @@ bool within_kprobe_blacklist(unsigned long addr) return false; } -bool within_kprobe_error_injection_list(unsigned long addr) -{ - struct kprobe_ei_entry *ent; - - list_for_each_entry(ent, &kprobe_error_injection_list, list) { - if (addr >= ent->start_addr && addr < ent->end_addr) - return true; - } - return false; -} - /* * If we have a symbol_name argument, look it up and add the offset field * to it. This way, we can specify a relative address to a symbol. @@ -2189,86 +2168,6 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return 0; } -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -/* Markers of the _kprobe_error_inject_list section */ -extern unsigned long __start_kprobe_error_inject_list[]; -extern unsigned long __stop_kprobe_error_inject_list[]; - -/* - * Lookup and populate the kprobe_error_injection_list. - * - * For safety reasons we only allow certain functions to be overriden with - * bpf_error_injection, so we need to populate the list of the symbols that have - * been marked as safe for overriding. - */ -static void populate_kprobe_error_injection_list(unsigned long *start, - unsigned long *end, - void *priv) -{ - unsigned long *iter; - struct kprobe_ei_entry *ent; - unsigned long entry, offset = 0, size = 0; - - mutex_lock(&kprobe_ei_mutex); - for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)*iter); - - if (!kernel_text_address(entry) || - !kallsyms_lookup_size_offset(entry, &size, &offset)) { - pr_err("Failed to find error inject entry at %p\n", - (void *)entry); - continue; - } - - ent = kmalloc(sizeof(*ent), GFP_KERNEL); - if (!ent) - break; - ent->start_addr = entry; - ent->end_addr = entry + size; - ent->priv = priv; - INIT_LIST_HEAD(&ent->list); - list_add_tail(&ent->list, &kprobe_error_injection_list); - } - mutex_unlock(&kprobe_ei_mutex); -} - -static void __init populate_kernel_kprobe_ei_list(void) -{ - populate_kprobe_error_injection_list(__start_kprobe_error_inject_list, - __stop_kprobe_error_inject_list, - NULL); -} - -static void module_load_kprobe_ei_list(struct module *mod) -{ - if (!mod->num_kprobe_ei_funcs) - return; - populate_kprobe_error_injection_list(mod->kprobe_ei_funcs, - mod->kprobe_ei_funcs + - mod->num_kprobe_ei_funcs, mod); -} - -static void module_unload_kprobe_ei_list(struct module *mod) -{ - struct kprobe_ei_entry *ent, *n; - if (!mod->num_kprobe_ei_funcs) - return; - - mutex_lock(&kprobe_ei_mutex); - list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) { - if (ent->priv == mod) { - list_del_init(&ent->list); - kfree(ent); - } - } - mutex_unlock(&kprobe_ei_mutex); -} -#else -static inline void __init populate_kernel_kprobe_ei_list(void) {} -static inline void module_load_kprobe_ei_list(struct module *m) {} -static inline void module_unload_kprobe_ei_list(struct module *m) {} -#endif - /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2279,11 +2178,6 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); - if (val == MODULE_STATE_COMING) - module_load_kprobe_ei_list(mod); - else if (val == MODULE_STATE_GOING) - module_unload_kprobe_ei_list(mod); - if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2346,8 +2240,6 @@ static int __init init_kprobes(void) pr_err("Please take care of using kprobes.\n"); } - populate_kernel_kprobe_ei_list(); - if (kretprobe_blacklist_size) { /* lookup the function address from its name */ for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { @@ -2515,56 +2407,6 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = { .release = seq_release, }; -/* - * kprobes/error_injection_list -- shows which functions can be overriden for - * error injection. - * */ -static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos) -{ - mutex_lock(&kprobe_ei_mutex); - return seq_list_start(&kprobe_error_injection_list, *pos); -} - -static void kprobe_ei_seq_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&kprobe_ei_mutex); -} - -static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos) -{ - return seq_list_next(v, &kprobe_error_injection_list, pos); -} - -static int kprobe_ei_seq_show(struct seq_file *m, void *v) -{ - char buffer[KSYM_SYMBOL_LEN]; - struct kprobe_ei_entry *ent = - list_entry(v, struct kprobe_ei_entry, list); - - sprint_symbol(buffer, ent->start_addr); - seq_printf(m, "%s\n", buffer); - return 0; -} - -static const struct seq_operations kprobe_ei_seq_ops = { - .start = kprobe_ei_seq_start, - .next = kprobe_ei_seq_next, - .stop = kprobe_ei_seq_stop, - .show = kprobe_ei_seq_show, -}; - -static int kprobe_ei_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &kprobe_ei_seq_ops); -} - -static const struct file_operations debugfs_kprobe_ei_ops = { - .open = kprobe_ei_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static void arm_all_kprobes(void) { struct hlist_head *head; @@ -2706,11 +2548,6 @@ static int __init debugfs_kprobe_init(void) if (!file) goto error; - file = debugfs_create_file("error_injection_list", 0444, dir, NULL, - &debugfs_kprobe_ei_ops); - if (!file) - goto error; - return 0; error: diff --git a/kernel/module.c b/kernel/module.c index bd695bfdc5c4..601494d4b7ea 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3118,10 +3118,10 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif -#ifdef CONFIG_BPF_KPROBE_OVERRIDE - mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list", - sizeof(*mod->kprobe_ei_funcs), - &mod->num_kprobe_ei_funcs); +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + mod->ei_funcs = section_objs(info, "_error_injection_whitelist", + sizeof(*mod->ei_funcs), + &mod->num_ei_funcs); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6400e1bf97c5..7114c885a78a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -533,7 +533,7 @@ config FUNCTION_PROFILER config BPF_KPROBE_OVERRIDE bool "Enable BPF programs to override a kprobed function" depends on BPF_EVENTS - depends on HAVE_KPROBE_OVERRIDE + depends on FUNCTION_ERROR_INJECTION default n help Allows BPF to override the execution of a probed function and diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 24ed6363e00f..f274468cbc45 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "trace_probe.h" #include "trace.h" @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) { regs_set_return_value(regs, rc); - arch_kprobe_override_function(regs); + override_function_with_return(regs); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8c90441bc87..1fad24acd444 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "trace_probe.h" @@ -107,7 +108,7 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call) } else { addr = (unsigned long)tk->rp.kp.addr; } - return within_kprobe_error_injection_list(addr); + return within_error_injection_list(addr); } static int register_kprobe_event(struct trace_kprobe *tk); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9d5b78aad4c5..2a33efdd1fea 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1500,6 +1500,10 @@ config FAULT_INJECTION Provide fault-injection framework. For more details, see Documentation/fault-injection/. +config FUNCTION_ERROR_INJECTION + def_bool y + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + config FAILSLAB bool "Fault-injection capability for kmalloc" depends on FAULT_INJECTION diff --git a/lib/Makefile b/lib/Makefile index a6c8529dd9b2..75ec13778cd8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/error-inject.c b/lib/error-inject.c new file mode 100644 index 000000000000..bccadcf3c981 --- /dev/null +++ b/lib/error-inject.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +// error-inject.c: Function-level error injection table +#include +#include +#include +#include +#include +#include +#include +#include + +/* Whitelist of symbols that can be overridden for error injection. */ +static LIST_HEAD(error_injection_list); +static DEFINE_MUTEX(ei_mutex); +struct ei_entry { + struct list_head list; + unsigned long start_addr; + unsigned long end_addr; + void *priv; +}; + +bool within_error_injection_list(unsigned long addr) +{ + struct ei_entry *ent; + bool ret = false; + + mutex_lock(&ei_mutex); + list_for_each_entry(ent, &error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) { + ret = true; + break; + } + } + mutex_unlock(&ei_mutex); + return ret; +} + +/* + * Lookup and populate the error_injection_list. + * + * For safety reasons we only allow certain functions to be overridden with + * bpf_error_injection, so we need to populate the list of the symbols that have + * been marked as safe for overriding. + */ +static void populate_error_injection_list(unsigned long *start, + unsigned long *end, void *priv) +{ + unsigned long *iter; + struct ei_entry *ent; + unsigned long entry, offset = 0, size = 0; + + mutex_lock(&ei_mutex); + for (iter = start; iter < end; iter++) { + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find error inject entry at %p\n", + (void *)entry); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + break; + ent->start_addr = entry; + ent->end_addr = entry + size; + ent->priv = priv; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &error_injection_list); + } + mutex_unlock(&ei_mutex); +} + +/* Markers of the _error_inject_whitelist section */ +extern unsigned long __start_error_injection_whitelist[]; +extern unsigned long __stop_error_injection_whitelist[]; + +static void __init populate_kernel_ei_list(void) +{ + populate_error_injection_list(__start_error_injection_whitelist, + __stop_error_injection_whitelist, + NULL); +} + +#ifdef CONFIG_MODULES +static void module_load_ei_list(struct module *mod) +{ + if (!mod->num_ei_funcs) + return; + + populate_error_injection_list(mod->ei_funcs, + mod->ei_funcs + mod->num_ei_funcs, mod); +} + +static void module_unload_ei_list(struct module *mod) +{ + struct ei_entry *ent, *n; + + if (!mod->num_ei_funcs) + return; + + mutex_lock(&ei_mutex); + list_for_each_entry_safe(ent, n, &error_injection_list, list) { + if (ent->priv == mod) { + list_del_init(&ent->list); + kfree(ent); + } + } + mutex_unlock(&ei_mutex); +} + +/* Module notifier call back, checking error injection table on the module */ +static int ei_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + + if (val == MODULE_STATE_COMING) + module_load_ei_list(mod); + else if (val == MODULE_STATE_GOING) + module_unload_ei_list(mod); + + return NOTIFY_DONE; +} + +static struct notifier_block ei_module_nb = { + .notifier_call = ei_module_callback, + .priority = 0 +}; + +static __init int module_ei_init(void) +{ + return register_module_notifier(&ei_module_nb); +} +#else /* !CONFIG_MODULES */ +#define module_ei_init() (0) +#endif + +/* + * error_injection/whitelist -- shows which functions can be overridden for + * error injection. + */ +static void *ei_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ei_mutex); + return seq_list_start(&error_injection_list, *pos); +} + +static void ei_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&ei_mutex); +} + +static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &error_injection_list, pos); +} + +static int ei_seq_show(struct seq_file *m, void *v) +{ + struct ei_entry *ent = list_entry(v, struct ei_entry, list); + + seq_printf(m, "%pf\n", (void *)ent->start_addr); + return 0; +} + +static const struct seq_operations ei_seq_ops = { + .start = ei_seq_start, + .next = ei_seq_next, + .stop = ei_seq_stop, + .show = ei_seq_show, +}; + +static int ei_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &ei_seq_ops); +} + +static const struct file_operations debugfs_ei_ops = { + .open = ei_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init ei_debugfs_init(void) +{ + struct dentry *dir, *file; + + dir = debugfs_create_dir("error_injection", NULL); + if (!dir) + return -ENOMEM; + + file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + + return 0; +} + +static int __init init_error_injection(void) +{ + populate_kernel_ei_list(); + + if (!module_ei_init()) + ei_debugfs_init(); + + return 0; +} +late_initcall(init_error_injection); -- cgit v1.2.3 From 663faf9f7beeaca4ad0176bb96c776eed9dad0c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:33 +0900 Subject: error-injection: Add injectable error types Add injectable error types for each error-injectable function. One motivation of error injection test is to find software flaws, mistakes or mis-handlings of expectable errors. If we find such flaws by the test, that is a program bug, so we need to fix it. But if the tester miss input the error (e.g. just return success code without processing anything), it causes unexpected behavior even if the caller is correctly programmed to handle any errors. That is not what we want to test by error injection. To clarify what type of errors the caller must expect for each injectable function, this introduces injectable error types: - EI_ETYPE_NULL : means the function will return NULL if it fails. No ERR_PTR, just a NULL. - EI_ETYPE_ERRNO : means the function will return -ERRNO if it fails. - EI_ETYPE_ERRNO_NULL : means the function will return -ERRNO (ERR_PTR) or NULL. ALLOW_ERROR_INJECTION() macro is expanded to get one of NULL, ERRNO, ERRNO_NULL to record the error type for each function. e.g. ALLOW_ERROR_INJECTION(open_ctree, ERRNO) This error types are shown in debugfs as below. ==== / # cat /sys/kernel/debug/error_injection/list open_ctree [btrfs] ERRNO io_ctl_init [btrfs] ERRNO ==== Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/free-space-cache.c | 2 +- include/asm-generic/error-injection.h | 23 +++++++++++++++---- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/error-injection.h | 6 +++++ include/linux/module.h | 3 ++- lib/error-inject.c | 43 +++++++++++++++++++++++++++++------ 7 files changed, 66 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9798e21ebe9d..83e2349e1362 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3124,7 +3124,7 @@ recovery_tree_root: goto fail_block_groups; goto retry_root_backup; } -ALLOW_ERROR_INJECTION(open_ctree); +ALLOW_ERROR_INJECTION(open_ctree, ERRNO); static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ef847699031a..586bb06472bb 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, return 0; } -ALLOW_ERROR_INJECTION(io_ctl_init); +ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 08352c9d9f97..296c65442f00 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -3,17 +3,32 @@ #define _ASM_GENERIC_ERROR_INJECTION_H #if defined(__KERNEL__) && !defined(__ASSEMBLY__) +enum { + EI_ETYPE_NONE, /* Dummy value for undefined case */ + EI_ETYPE_NULL, /* Return NULL if failure */ + EI_ETYPE_ERRNO, /* Return -ERRNO if failure */ + EI_ETYPE_ERRNO_NULL, /* Return -ERRNO or NULL if failure */ +}; + +struct error_injection_entry { + unsigned long addr; + int etype; +}; + #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* * Whitelist ganerating macro. Specify functions which can be * error-injectable using this macro. */ -#define ALLOW_ERROR_INJECTION(fname) \ -static unsigned long __used \ +#define ALLOW_ERROR_INJECTION(fname, _etype) \ +static struct error_injection_entry __used \ __attribute__((__section__("_error_injection_whitelist"))) \ - _eil_addr_##fname = (unsigned long)fname; + _eil_addr_##fname = { \ + .addr = (unsigned long)fname, \ + .etype = EI_ETYPE_##_etype, \ + }; #else -#define ALLOW_ERROR_INJECTION(fname) +#define ALLOW_ERROR_INJECTION(fname, _etype) #endif #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f2068cca5206..ebe544e048cd 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -137,7 +137,7 @@ #endif #ifdef CONFIG_FUNCTION_ERROR_INJECTION -#define ERROR_INJECT_WHITELIST() . = ALIGN(8); \ +#define ERROR_INJECT_WHITELIST() STRUCT_ALIGN(); \ VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\ KEEP(*(_error_injection_whitelist)) \ VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .; diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 130a67c50dac..280c61ecbf20 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -7,6 +7,7 @@ #include extern bool within_error_injection_list(unsigned long addr); +extern int get_injectable_error_type(unsigned long addr); #else /* !CONFIG_FUNCTION_ERROR_INJECTION */ @@ -16,6 +17,11 @@ static inline bool within_error_injection_list(unsigned long addr) return false; } +static inline int get_injectable_error_type(unsigned long addr) +{ + return EI_ETYPE_NONE; +} + #endif #endif /* _LINUX_ERROR_INJECTION_H */ diff --git a/include/linux/module.h b/include/linux/module.h index 792e51d83bda..9642d3116718 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -477,8 +478,8 @@ struct module { #endif #ifdef CONFIG_FUNCTION_ERROR_INJECTION + struct error_injection_entry *ei_funcs; unsigned int num_ei_funcs; - unsigned long *ei_funcs; #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT diff --git a/lib/error-inject.c b/lib/error-inject.c index bccadcf3c981..c0d4600f4896 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -16,6 +16,7 @@ struct ei_entry { struct list_head list; unsigned long start_addr; unsigned long end_addr; + int etype; void *priv; }; @@ -35,6 +36,17 @@ bool within_error_injection_list(unsigned long addr) return ret; } +int get_injectable_error_type(unsigned long addr) +{ + struct ei_entry *ent; + + list_for_each_entry(ent, &error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return ent->etype; + } + return EI_ETYPE_NONE; +} + /* * Lookup and populate the error_injection_list. * @@ -42,16 +54,17 @@ bool within_error_injection_list(unsigned long addr) * bpf_error_injection, so we need to populate the list of the symbols that have * been marked as safe for overriding. */ -static void populate_error_injection_list(unsigned long *start, - unsigned long *end, void *priv) +static void populate_error_injection_list(struct error_injection_entry *start, + struct error_injection_entry *end, + void *priv) { - unsigned long *iter; + struct error_injection_entry *iter; struct ei_entry *ent; unsigned long entry, offset = 0, size = 0; mutex_lock(&ei_mutex); for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)*iter); + entry = arch_deref_entry_point((void *)iter->addr); if (!kernel_text_address(entry) || !kallsyms_lookup_size_offset(entry, &size, &offset)) { @@ -65,6 +78,7 @@ static void populate_error_injection_list(unsigned long *start, break; ent->start_addr = entry; ent->end_addr = entry + size; + ent->etype = iter->etype; ent->priv = priv; INIT_LIST_HEAD(&ent->list); list_add_tail(&ent->list, &error_injection_list); @@ -73,8 +87,8 @@ static void populate_error_injection_list(unsigned long *start, } /* Markers of the _error_inject_whitelist section */ -extern unsigned long __start_error_injection_whitelist[]; -extern unsigned long __stop_error_injection_whitelist[]; +extern struct error_injection_entry __start_error_injection_whitelist[]; +extern struct error_injection_entry __stop_error_injection_whitelist[]; static void __init populate_kernel_ei_list(void) { @@ -157,11 +171,26 @@ static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos) return seq_list_next(v, &error_injection_list, pos); } +static const char *error_type_string(int etype) +{ + switch (etype) { + case EI_ETYPE_NULL: + return "NULL"; + case EI_ETYPE_ERRNO: + return "ERRNO"; + case EI_ETYPE_ERRNO_NULL: + return "ERRNO_NULL"; + default: + return "(unknown)"; + } +} + static int ei_seq_show(struct seq_file *m, void *v) { struct ei_entry *ent = list_entry(v, struct ei_entry, list); - seq_printf(m, "%pf\n", (void *)ent->start_addr); + seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr, + error_type_string(ent->etype)); return 0; } -- cgit v1.2.3 From 4b1a29a7f5425d32640b34b8a755f34e02f64d0f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:56:03 +0900 Subject: error-injection: Support fault injection framework Support in-kernel fault-injection framework via debugfs. This allows you to inject a conditional error to specified function using debugfs interfaces. Here is the result of test script described in Documentation/fault-injection/fault-injection.txt =========== # ./test_fail_function.sh 1+0 records in 1+0 records out 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s btrfs-progs v4.4 See http://btrfs.wiki.kernel.org for more information. Label: (null) UUID: bfa96010-12e9-4360-aed0-42eec7af5798 Node size: 16384 Sector size: 4096 Filesystem size: 1001.00MiB Block group profiles: Data: single 8.00MiB Metadata: DUP 58.00MiB System: DUP 12.00MiB SSD detected: no Incompat features: extref, skinny-metadata Number of devices: 1 Devices: ID SIZE PATH 1 1001.00MiB /dev/loop2 mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory SUCCESS! =========== Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- Documentation/fault-injection/fault-injection.txt | 68 +++++ kernel/Makefile | 1 + kernel/fail_function.c | 349 ++++++++++++++++++++++ lib/Kconfig.debug | 10 + 4 files changed, 428 insertions(+) create mode 100644 kernel/fail_function.c (limited to 'lib') diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 918972babcd8..f4a32463ca48 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -30,6 +30,12 @@ o fail_mmc_request injects MMC data errors on devices permitted by setting debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request +o fail_function + + injects error return on specific functions, which are marked by + ALLOW_ERROR_INJECTION() macro, by setting debugfs entries + under /sys/kernel/debug/fail_function. No boot option supported. + Configure fault-injection capabilities behavior ----------------------------------------------- @@ -123,6 +129,29 @@ configuration of fault-injection capabilities. default is 'N', setting it to 'Y' will disable failure injections when dealing with private (address space) futexes. +- /sys/kernel/debug/fail_function/inject: + + Format: { 'function-name' | '!function-name' | '' } + specifies the target function of error injection by name. + If the function name leads '!' prefix, given function is + removed from injection list. If nothing specified ('') + injection list is cleared. + +- /sys/kernel/debug/fail_function/injectable: + + (read only) shows error injectable functions and what type of + error values can be specified. The error type will be one of + below; + - NULL: retval must be 0. + - ERRNO: retval must be -1 to -MAX_ERRNO (-4096). + - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096). + +- /sys/kernel/debug/fail_function//retval: + + specifies the "error" return value to inject to the given + function for given function. This will be created when + user specifies new injection entry. + o Boot option In order to inject faults while debugfs is not available (early boot time), @@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT echo "Injecting errors into the module $module... (interrupt to stop)" sleep 1000000 +------------------------------------------------------------------------------ + +o Inject open_ctree error while btrfs mount + +#!/bin/bash + +rm -f testfile.img +dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 +DEVICE=$(losetup --show -f testfile.img) +mkfs.btrfs -f $DEVICE +mkdir -p tmpmnt + +FAILTYPE=fail_function +FAILFUNC=open_ctree +echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject +echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval +echo N > /sys/kernel/debug/$FAILTYPE/task-filter +echo 100 > /sys/kernel/debug/$FAILTYPE/probability +echo 0 > /sys/kernel/debug/$FAILTYPE/interval +echo -1 > /sys/kernel/debug/$FAILTYPE/times +echo 0 > /sys/kernel/debug/$FAILTYPE/space +echo 1 > /sys/kernel/debug/$FAILTYPE/verbose + +mount -t btrfs $DEVICE tmpmnt +if [ $? -ne 0 ] +then + echo "SUCCESS!" +else + echo "FAILED!" + umount tmpmnt +fi + +echo > /sys/kernel/debug/$FAILTYPE/inject + +rmdir tmpmnt +losetup -d $DEVICE +rm testfile.img + + Tool to run command with failslab or fail_page_alloc ---------------------------------------------------- In order to make it easier to accomplish the tasks mentioned above, we can use diff --git a/kernel/Makefile b/kernel/Makefile index 172d151d429c..f85ae5dfa474 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_GCOV_KERNEL) += gcov/ obj-$(CONFIG_KCOV) += kcov.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o diff --git a/kernel/fail_function.c b/kernel/fail_function.c new file mode 100644 index 000000000000..21b0122cb39c --- /dev/null +++ b/kernel/fail_function.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fail_function.c: Function-based error injection + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs); + +struct fei_attr { + struct list_head list; + struct kprobe kp; + unsigned long retval; +}; +static DEFINE_MUTEX(fei_lock); +static LIST_HEAD(fei_attr_list); +static DECLARE_FAULT_ATTR(fei_fault_attr); +static struct dentry *fei_debugfs_dir; + +static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv) +{ + switch (get_injectable_error_type(addr)) { + case EI_ETYPE_NULL: + if (retv != 0) + return 0; + break; + case EI_ETYPE_ERRNO: + if (retv < (unsigned long)-MAX_ERRNO) + return (unsigned long)-EINVAL; + break; + case EI_ETYPE_ERRNO_NULL: + if (retv != 0 && retv < (unsigned long)-MAX_ERRNO) + return (unsigned long)-EINVAL; + break; + } + + return retv; +} + +static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr) +{ + struct fei_attr *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (attr) { + attr->kp.symbol_name = kstrdup(sym, GFP_KERNEL); + if (!attr->kp.symbol_name) { + kfree(attr); + return NULL; + } + attr->kp.pre_handler = fei_kprobe_handler; + attr->retval = adjust_error_retval(addr, 0); + INIT_LIST_HEAD(&attr->list); + } + return attr; +} + +static void fei_attr_free(struct fei_attr *attr) +{ + if (attr) { + kfree(attr->kp.symbol_name); + kfree(attr); + } +} + +static struct fei_attr *fei_attr_lookup(const char *sym) +{ + struct fei_attr *attr; + + list_for_each_entry(attr, &fei_attr_list, list) { + if (!strcmp(attr->kp.symbol_name, sym)) + return attr; + } + + return NULL; +} + +static bool fei_attr_is_valid(struct fei_attr *_attr) +{ + struct fei_attr *attr; + + list_for_each_entry(attr, &fei_attr_list, list) { + if (attr == _attr) + return true; + } + + return false; +} + +static int fei_retval_set(void *data, u64 val) +{ + struct fei_attr *attr = data; + unsigned long retv = (unsigned long)val; + int err = 0; + + mutex_lock(&fei_lock); + /* + * Since this operation can be done after retval file is removed, + * It is safer to check the attr is still valid before accessing + * its member. + */ + if (!fei_attr_is_valid(attr)) { + err = -ENOENT; + goto out; + } + + if (attr->kp.addr) { + if (adjust_error_retval((unsigned long)attr->kp.addr, + val) != retv) + err = -EINVAL; + } + if (!err) + attr->retval = val; +out: + mutex_unlock(&fei_lock); + + return err; +} + +static int fei_retval_get(void *data, u64 *val) +{ + struct fei_attr *attr = data; + int err = 0; + + mutex_lock(&fei_lock); + /* Here we also validate @attr to ensure it still exists. */ + if (!fei_attr_is_valid(attr)) + err = -ENOENT; + else + *val = attr->retval; + mutex_unlock(&fei_lock); + + return err; +} +DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set, + "%llx\n"); + +static int fei_debugfs_add_attr(struct fei_attr *attr) +{ + struct dentry *dir; + + dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir); + if (!dir) + return -ENOMEM; + + if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) { + debugfs_remove_recursive(dir); + return -ENOMEM; + } + + return 0; +} + +static void fei_debugfs_remove_attr(struct fei_attr *attr) +{ + struct dentry *dir; + + dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir); + if (dir) + debugfs_remove_recursive(dir); +} + +static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs) +{ + struct fei_attr *attr = container_of(kp, struct fei_attr, kp); + + if (should_fail(&fei_fault_attr, 1)) { + regs_set_return_value(regs, attr->retval); + override_function_with_return(regs); + /* Kprobe specific fixup */ + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + + return 0; +} +NOKPROBE_SYMBOL(fei_kprobe_handler) + +static void *fei_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&fei_lock); + return seq_list_start(&fei_attr_list, *pos); +} + +static void fei_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&fei_lock); +} + +static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &fei_attr_list, pos); +} + +static int fei_seq_show(struct seq_file *m, void *v) +{ + struct fei_attr *attr = list_entry(v, struct fei_attr, list); + + seq_printf(m, "%pf\n", attr->kp.addr); + return 0; +} + +static const struct seq_operations fei_seq_ops = { + .start = fei_seq_start, + .next = fei_seq_next, + .stop = fei_seq_stop, + .show = fei_seq_show, +}; + +static int fei_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fei_seq_ops); +} + +static void fei_attr_remove(struct fei_attr *attr) +{ + fei_debugfs_remove_attr(attr); + unregister_kprobe(&attr->kp); + list_del(&attr->list); + fei_attr_free(attr); +} + +static void fei_attr_remove_all(void) +{ + struct fei_attr *attr, *n; + + list_for_each_entry_safe(attr, n, &fei_attr_list, list) { + fei_attr_remove(attr); + } +} + +static ssize_t fei_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct fei_attr *attr; + unsigned long addr; + char *buf, *sym; + int ret; + + /* cut off if it is too long */ + if (count > KSYM_NAME_LEN) + count = KSYM_NAME_LEN; + buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, buffer, count)) { + ret = -EFAULT; + goto out; + } + buf[count] = '\0'; + sym = strstrip(buf); + + mutex_lock(&fei_lock); + + /* Writing just spaces will remove all injection points */ + if (sym[0] == '\0') { + fei_attr_remove_all(); + ret = count; + goto out; + } + /* Writing !function will remove one injection point */ + if (sym[0] == '!') { + attr = fei_attr_lookup(sym + 1); + if (!attr) { + ret = -ENOENT; + goto out; + } + fei_attr_remove(attr); + ret = count; + goto out; + } + + addr = kallsyms_lookup_name(sym); + if (!addr) { + ret = -EINVAL; + goto out; + } + if (!within_error_injection_list(addr)) { + ret = -ERANGE; + goto out; + } + if (fei_attr_lookup(sym)) { + ret = -EBUSY; + goto out; + } + attr = fei_attr_new(sym, addr); + if (!attr) { + ret = -ENOMEM; + goto out; + } + + ret = register_kprobe(&attr->kp); + if (!ret) + ret = fei_debugfs_add_attr(attr); + if (ret < 0) + fei_attr_remove(attr); + else { + list_add_tail(&attr->list, &fei_attr_list); + ret = count; + } +out: + kfree(buf); + mutex_unlock(&fei_lock); + return ret; +} + +static const struct file_operations fei_ops = { + .open = fei_open, + .read = seq_read, + .write = fei_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init fei_debugfs_init(void) +{ + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_function", NULL, + &fei_fault_attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + /* injectable attribute is just a symlink of error_inject/list */ + if (!debugfs_create_symlink("injectable", dir, + "../error_injection/list")) + goto error; + + if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops)) + goto error; + + fei_debugfs_dir = dir; + + return 0; +error: + debugfs_remove_recursive(dir); + return -ENOMEM; +} + +late_initcall(fei_debugfs_init); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2a33efdd1fea..890d4766cef3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1551,6 +1551,16 @@ config FAIL_FUTEX help Provide fault-injection capability for futexes. +config FAIL_FUNCTION + bool "Fault-injection capability for functions" + depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION + help + Provide function-based fault-injection capability. + This will allow you to override a specific function with a return + with given return value. As a result, function caller will see + an error value and have to handle it. This is useful to test the + error handling in various subsystems. + config FAULT_INJECTION_DEBUG_FS bool "Debugfs entries for fault-injection capabilities" depends on FAULT_INJECTION && SYSFS && DEBUG_FS -- cgit v1.2.3 From 002e67454f61bb67d8071ac4d0cacb86a01d18e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 16:30:23 +0100 Subject: dma-direct: rename dma_noop to dma_direct The trivial direct mapping implementation already does a virtual to physical translation which isn't strictly a noop, and will soon learn to do non-direct but linear physical to dma translations through the device offset and a few small tricks. Rename it to a better fitting name. Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin --- MAINTAINERS | 2 +- arch/arm/Kconfig | 2 +- arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping-nommu.c | 8 ++--- arch/m32r/Kconfig | 2 +- arch/riscv/Kconfig | 2 +- arch/s390/Kconfig | 2 +- include/asm-generic/dma-mapping.h | 2 +- include/linux/dma-mapping.h | 2 +- lib/Kconfig | 2 +- lib/Makefile | 2 +- lib/dma-direct.c | 63 +++++++++++++++++++++++++++++++++++ lib/dma-noop.c | 68 -------------------------------------- 13 files changed, 77 insertions(+), 82 deletions(-) create mode 100644 lib/dma-direct.c delete mode 100644 lib/dma-noop.c (limited to 'lib') diff --git a/MAINTAINERS b/MAINTAINERS index 234e642e7149..2d54e636d625 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4334,7 +4334,7 @@ T: git git://git.infradead.org/users/hch/dma-mapping.git W: http://git.infradead.org/users/hch/dma-mapping.git S: Supported F: lib/dma-debug.c -F: lib/dma-noop.c +F: lib/dma-direct.c F: lib/dma-virt.c F: drivers/base/dma-mapping.c F: drivers/base/dma-coherent.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 00d889a37965..430a0aa710d6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -25,7 +25,7 @@ config ARM select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS - select DMA_NOOP_OPS if !MMU + select DMA_DIRECT_OPS if !MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index e5d9020c9ee1..8436f6ade57d 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -18,7 +18,7 @@ extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_noop_ops; + return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_direct_ops; } #ifdef __arch_page_to_dma diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 6db5fc26d154..4d8042521e89 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -22,7 +22,7 @@ #include "dma.h" /* - * dma_noop_ops is used if + * dma_direct_ops is used if * - MMU/MPU is off * - cpu is v7m w/o cache support * - device is coherent @@ -39,7 +39,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - const struct dma_map_ops *ops = &dma_noop_ops; + const struct dma_map_ops *ops = &dma_direct_ops; void *ret; /* @@ -70,7 +70,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - const struct dma_map_ops *ops = &dma_noop_ops; + const struct dma_map_ops *ops = &dma_direct_ops; if (attrs & DMA_ATTR_NON_CONSISTENT) { ops->free(dev, size, cpu_addr, dma_addr, attrs); @@ -213,7 +213,7 @@ EXPORT_SYMBOL(arm_nommu_dma_ops); static const struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent) { - return coherent ? &dma_noop_ops : &arm_nommu_dma_ops; + return coherent ? &dma_direct_ops : &arm_nommu_dma_ops; } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 498398d915c1..dd84ee194579 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -19,7 +19,7 @@ config M32R select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select CPU_NO_EFFICIENT_FFS - select DMA_NOOP_OPS + select DMA_DIRECT_OPS select ARCH_NO_COHERENT_DMA_MMAP if !MMU config SBUS diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2c6adf12713a..865e14f50c14 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -83,7 +83,7 @@ config PGTABLE_LEVELS config HAVE_KPROBES def_bool n -config DMA_NOOP_OPS +config DMA_DIRECT_OPS def_bool y menu "Platform type" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 829c67986db7..9376637229c9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,7 +140,7 @@ config S390 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS - select DMA_NOOP_OPS + select DMA_DIRECT_OPS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 164031531d85..880a292d792f 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -4,7 +4,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return &dma_noop_ops; + return &dma_direct_ops; } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 46542ad9d709..34fe8463d10e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -136,7 +136,7 @@ struct dma_map_ops { int is_phys; }; -extern const struct dma_map_ops dma_noop_ops; +extern const struct dma_map_ops dma_direct_ops; extern const struct dma_map_ops dma_virt_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) diff --git a/lib/Kconfig b/lib/Kconfig index c5e84fbcb30b..9d3d649c9dc9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -409,7 +409,7 @@ config HAS_DMA depends on !NO_DMA default y -config DMA_NOOP_OPS +config DMA_DIRECT_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) default n diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8ffd..749851abe85a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,7 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o +lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o diff --git a/lib/dma-direct.c b/lib/dma-direct.c new file mode 100644 index 000000000000..0ec3262a3148 --- /dev/null +++ b/lib/dma-direct.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * lib/dma-noop.c + * + * DMA operations that map to physical addresses without flushing memory. + */ +#include +#include +#include +#include +#include + +static void *dma_direct_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + void *ret; + + ret = (void *)__get_free_pages(gfp, get_order(size)); + if (ret) + *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); + + return ret; +} + +static void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); +} + +static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset); + void *va; + + BUG_ON(!sg_page(sg)); + va = sg_virt(sg); + sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset; + sg_dma_len(sg) = sg->length; + } + + return nents; +} + +const struct dma_map_ops dma_direct_ops = { + .alloc = dma_direct_alloc, + .free = dma_direct_free, + .map_page = dma_direct_map_page, + .map_sg = dma_direct_map_sg, +}; +EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noop.c b/lib/dma-noop.c deleted file mode 100644 index a10185b0c2d4..000000000000 --- a/lib/dma-noop.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * lib/dma-noop.c - * - * DMA operations that map to physical addresses without flushing memory. - */ -#include -#include -#include -#include -#include - -static void *dma_noop_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); - - return ret; -} - -static void dma_noop_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); -} - -static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset); - void *va; - - BUG_ON(!sg_page(sg)); - va = sg_virt(sg); - sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset; - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_noop_ops = { - .alloc = dma_noop_alloc, - .free = dma_noop_free, - .map_page = dma_noop_map_page, - .map_sg = dma_noop_map_sg, -}; - -EXPORT_SYMBOL(dma_noop_ops); -- cgit v1.2.3 From 2e86a04780c6ebb64581013054f8260d040c2c9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Dec 2017 11:29:51 +0100 Subject: dma-direct: use phys_to_dma This means it uses whatever linear remapping scheme that the architecture provides is used in the generic dma_direct ops. Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin --- lib/dma-direct.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 0ec3262a3148..12ea9653781b 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * lib/dma-noop.c - * - * DMA operations that map to physical addresses without flushing memory. + * DMA operations that map physical memory directly without using an IOMMU or + * flushing caches. */ #include #include -#include +#include #include #include @@ -17,7 +16,7 @@ static void *dma_direct_alloc(struct device *dev, size_t size, ret = (void *)__get_free_pages(gfp, get_order(size)); if (ret) - *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); + *dma_handle = phys_to_dma(dev, virt_to_phys(ret)); return ret; } @@ -32,7 +31,7 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); + return phys_to_dma(dev, page_to_phys(page)) + offset; } static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, @@ -42,12 +41,9 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) { - dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset); - void *va; - BUG_ON(!sg_page(sg)); - va = sg_virt(sg); - sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset; + + sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); sg_dma_len(sg) = sg->length; } -- cgit v1.2.3 From 2797596992898034256a6c40168d357ccf0ba0b8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 16:30:47 +0100 Subject: dma-direct: add dma address sanity checks Roughly based on the x86 pci-nommu implementation. Signed-off-by: Christoph Hellwig --- lib/dma-direct.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 12ea9653781b..32fd4d9e4c47 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -9,6 +9,23 @@ #include #include +#define DIRECT_MAPPING_ERROR 0 + +static bool +check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, + const char *caller) +{ + if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + if (*dev->dma_mask >= DMA_BIT_MASK(32)) { + dev_err(dev, + "%s: overflow %pad+%zu of device mask %llx\n", + caller, &dma_addr, size, *dev->dma_mask); + } + return false; + } + return true; +} + static void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -31,7 +48,11 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - return phys_to_dma(dev, page_to_phys(page)) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; + + if (!check_addr(dev, dma_addr, size, __func__)) + return DIRECT_MAPPING_ERROR; + return dma_addr; } static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, @@ -44,16 +65,24 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, BUG_ON(!sg_page(sg)); sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); + if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) + return 0; sg_dma_len(sg) = sg->length; } return nents; } +static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == DIRECT_MAPPING_ERROR; +} + const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc, .free = dma_direct_free, .map_page = dma_direct_map_page, .map_sg = dma_direct_map_sg, + .mapping_error = dma_direct_mapping_error, }; EXPORT_SYMBOL(dma_direct_ops); -- cgit v1.2.3 From 080321d3b3139b3b3ec0da31a7887eebd50b5979 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Dec 2017 11:51:44 +0100 Subject: dma-direct: add support for CMA allocation Try the CMA allocator for coherent allocations if supported. Roughly modelled after the x86 code. Signed-off-by: Christoph Hellwig --- lib/dma-direct.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 32fd4d9e4c47..a9ae98be7af3 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #define DIRECT_MAPPING_ERROR 0 @@ -29,19 +30,30 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, static void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - void *ret; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + int page_order = get_order(size); + struct page *page = NULL; - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = phys_to_dma(dev, virt_to_phys(ret)); + /* CMA can be used only in the context which permits sleeping */ + if (gfpflags_allow_blocking(gfp)) + page = dma_alloc_from_contiguous(dev, count, page_order, gfp); + if (!page) + page = alloc_pages(gfp, page_order); + if (!page) + return NULL; - return ret; + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + memset(page_address(page), 0, size); + return page_address(page); } static void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - free_pages((unsigned long)cpu_addr, get_order(size)); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) + free_pages((unsigned long)cpu_addr, get_order(size)); } static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, -- cgit v1.2.3 From 21f237e4d08511eb662b59d8ff1b64be68e0c2bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Dec 2017 11:55:23 +0100 Subject: dma-direct: use node local allocations for coherent memory To preserve the x86 behavior. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- lib/dma-direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index a9ae98be7af3..f04a424f91fa 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -38,7 +38,7 @@ static void *dma_direct_alloc(struct device *dev, size_t size, if (gfpflags_allow_blocking(gfp)) page = dma_alloc_from_contiguous(dev, count, page_order, gfp); if (!page) - page = alloc_pages(gfp, page_order); + page = alloc_pages_node(dev_to_node(dev), gfp, page_order); if (!page) return NULL; -- cgit v1.2.3 From c61e9637340e1d0672c6f0c0b11aa81d7262c49d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 23:39:03 +0100 Subject: dma-direct: add support for allocation from ZONE_DMA and ZONE_DMA32 This allows to dip into zones for lower memory if they are available. If one of the zones is not available the corresponding GFP_* flag will evaluate to 0 so they won't change anything. We provide an arch tunable for those architectures that do not use GFP_DMA for the lowest 24-bits, given that there are a few. Roughly based on the x86 code. Signed-off-by: Christoph Hellwig --- lib/dma-direct.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index f04a424f91fa..8f76032ebc3c 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -12,6 +12,14 @@ #define DIRECT_MAPPING_ERROR 0 +/* + * Most architectures use ZONE_DMA for the first 16 Megabytes, but + * some use it for entirely different regions: + */ +#ifndef ARCH_ZONE_DMA_BITS +#define ARCH_ZONE_DMA_BITS 24 +#endif + static bool check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, const char *caller) @@ -34,6 +42,12 @@ static void *dma_direct_alloc(struct device *dev, size_t size, int page_order = get_order(size); struct page *page = NULL; + /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ + if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + gfp |= GFP_DMA; + if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) + gfp |= GFP_DMA32; + /* CMA can be used only in the context which permits sleeping */ if (gfpflags_allow_blocking(gfp)) page = dma_alloc_from_contiguous(dev, count, page_order, gfp); -- cgit v1.2.3 From 95f183916d4b0bc1943684948ecdd2469f1aa978 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 23:40:57 +0100 Subject: dma-direct: retry allocations using GFP_DMA for small masks If an attempt to allocate memory succeeded, but isn't inside the supported DMA mask, retry the allocation with GFP_DMA set as a last resort. Based on the x86 code, but an off by one error in what is now dma_coherent_ok has been fixed vs the x86 code. Signed-off-by: Christoph Hellwig --- lib/dma-direct.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 8f76032ebc3c..4e43c2bb7f5f 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -35,6 +35,11 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, return true; } +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +{ + return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask; +} + static void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -48,11 +53,29 @@ static void *dma_direct_alloc(struct device *dev, size_t size, if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; +again: /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) + if (gfpflags_allow_blocking(gfp)) { page = dma_alloc_from_contiguous(dev, count, page_order, gfp); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } if (!page) page = alloc_pages_node(dev_to_node(dev), gfp, page_order); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __free_pages(page, page_order); + page = NULL; + + if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && + !(gfp & GFP_DMA)) { + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; + goto again; + } + } + if (!page) return NULL; -- cgit v1.2.3 From 19dca8c0efa30e0a45e79f237060d0f307045752 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Dec 2017 13:46:06 +0100 Subject: dma-direct: make dma_direct_{alloc,free} available to other implementations So that they don't need to indirect through the operation vector. Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin --- arch/arm/mm/dma-mapping-nommu.c | 9 +++------ include/linux/dma-direct.h | 5 +++++ lib/dma-direct.c | 6 +++--- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 4d8042521e89..619f24a42d09 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include @@ -39,7 +39,6 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - const struct dma_map_ops *ops = &dma_direct_ops; void *ret; /* @@ -48,7 +47,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, */ if (attrs & DMA_ATTR_NON_CONSISTENT) - return ops->alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc(dev, size, dma_handle, gfp, attrs); ret = dma_alloc_from_global_coherent(size, dma_handle); @@ -70,10 +69,8 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - const struct dma_map_ops *ops = &dma_direct_ops; - if (attrs & DMA_ATTR_NON_CONSISTENT) { - ops->free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); } else { int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 10e924b7cba7..4788bf0bf683 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -38,4 +38,9 @@ static inline void dma_mark_clean(void *addr, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs); +void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); + #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 4e43c2bb7f5f..784a68dfdbe3 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -40,8 +40,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask; } -static void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); @@ -84,7 +84,7 @@ again: return page_address(page); } -static void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, +void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; -- cgit v1.2.3 From 1a9777a8a01fb88659a3dda48080c95c34cab7d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Dec 2017 15:04:32 +0100 Subject: dma-direct: reject too small dma masks Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 1 + lib/dma-direct.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'lib') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 4788bf0bf683..bcdb1a3e4b1f 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -42,5 +42,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 784a68dfdbe3..40b1f92f2214 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -122,6 +122,24 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, return nents; } +int dma_direct_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_ZONE_DMA + if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + return 0; +#else + /* + * Because 32-bit DMA masks are so common we expect every architecture + * to be able to satisfy them - either by not supporting more physical + * memory, or by providing a ZONE_DMA32. If neither is the case, the + * architecture needs to use an IOMMU instead of the direct mapping. + */ + if (mask < DMA_BIT_MASK(32)) + return 0; +#endif + return 1; +} + static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == DIRECT_MAPPING_ERROR; @@ -132,6 +150,7 @@ const struct dma_map_ops dma_direct_ops = { .free = dma_direct_free, .map_page = dma_direct_map_page, .map_sg = dma_direct_map_sg, + .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, }; EXPORT_SYMBOL(dma_direct_ops); -- cgit v1.2.3 From d0bc0c2a31c95002d37c3cc511ffdcab851b3256 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Jan 2018 14:24:19 +0100 Subject: swiotlb: suppress warning when __GFP_NOWARN is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTM tries to allocate coherent memory in chunks of 2MB first to improve TLB efficiency and falls back to allocating 4K pages if that fails. Suppress the warning when the 2MB allocations fails since there is a valid fall back path. Signed-off-by: Christian König Reported-by: Mike Galbraith Acked-by: Konrad Rzeszutek Wilk Bug: https://bugs.freedesktop.org/show_bug.cgi?id=104082 CC: stable@vger.kernel.org Signed-off-by: Christoph Hellwig --- lib/swiotlb.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 6583f3512386..125c1062119f 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -586,7 +586,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - if (printk_ratelimit()) + if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: @@ -713,6 +713,7 @@ void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { + bool warn = !(flags & __GFP_NOWARN); dma_addr_t dev_addr; void *ret; int order = get_order(size); @@ -738,8 +739,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ - phys_addr_t paddr = map_single(hwdev, 0, size, - DMA_FROM_DEVICE, 0); + phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE, + warn ? 0 : DMA_ATTR_NO_WARN); if (paddr == SWIOTLB_MAP_ERROR) goto err_warn; @@ -769,9 +770,11 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; err_warn: - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", - dev_name(hwdev), size); - dump_stack(); + if (warn && printk_ratelimit()) { + pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + dev_name(hwdev), size); + dump_stack(); + } return NULL; } -- cgit v1.2.3 From 7f2c8bbd321f18e4ccfd262748bd58fb7d4bb1db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Dec 2017 14:14:54 +0100 Subject: swiotlb: rename swiotlb_free to swiotlb_exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christoph Hellwig Acked-by: Christian König Reviewed-by: Konrad Rzeszutek Wilk --- arch/powerpc/kernel/dma-swiotlb.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- include/linux/swiotlb.h | 4 ++-- lib/swiotlb.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 506ac4fafac5..88f3963ca30f 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -121,7 +121,7 @@ static int __init check_swiotlb_enabled(void) if (ppc_swiotlb_enable) swiotlb_print_info(); else - swiotlb_free(); + swiotlb_exit(); return 0; } diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0d77603c2f50..0ee0f8f34251 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -120,7 +120,7 @@ void __init pci_swiotlb_late_init(void) { /* An IOMMU turned us off. */ if (!swiotlb) - swiotlb_free(); + swiotlb_exit(); else { printk(KERN_INFO "PCI-DMA: " "Using software bounce buffering for IO (SWIOTLB)\n"); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 24ed817082ee..606375e35d87 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -115,10 +115,10 @@ extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB -extern void __init swiotlb_free(void); +extern void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); #else -static inline void swiotlb_free(void) { } +static inline void swiotlb_exit(void) { } static inline unsigned int swiotlb_max_segment(void) { return 0; } #endif diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 125c1062119f..cf5311908fa9 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -417,7 +417,7 @@ cleanup2: return -ENOMEM; } -void __init swiotlb_free(void) +void __init swiotlb_exit(void) { if (!io_tlb_orig_addr) return; -- cgit v1.2.3 From 251533eb35b22f9e87a440b14c6f20e2626397b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 16:44:16 +0100 Subject: swiotlb: add common swiotlb_map_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently all architectures that want to use swiotlb have to implement their own dma_map_ops instances. Provide a generic one based on the x86 implementation which first calls into dma_direct to try a full blown direct mapping implementation (including e.g. CMA) before falling back allocating from the swiotlb buffer. Signed-off-by: Christoph Hellwig Acked-by: Christian König --- include/linux/swiotlb.h | 8 ++++++++ lib/swiotlb.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'lib') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 606375e35d87..5b1f2a00491c 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -66,6 +66,12 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, enum dma_sync_target target); /* Accessory functions. */ + +void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs); +void swiotlb_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); @@ -126,4 +132,6 @@ extern void swiotlb_print_info(void); extern int is_swiotlb_buffer(phys_addr_t paddr); extern void swiotlb_set_max_segment(unsigned int); +extern const struct dma_map_ops swiotlb_dma_ops; + #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cf5311908fa9..0fae2f45c3c0 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -1087,3 +1087,46 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_dma_supported); + +#ifdef CONFIG_DMA_DIRECT_OPS +void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + void *vaddr; + + /* + * Don't print a warning when the first allocation attempt fails. + * swiotlb_alloc_coherent() will print a warning when the DMA memory + * allocation ultimately failed. + */ + gfp |= __GFP_NOWARN; + + vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + if (!vaddr) + vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); + return vaddr; +} + +void swiotlb_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) +{ + if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) + swiotlb_free_coherent(dev, size, vaddr, dma_addr); + else + dma_direct_free(dev, size, vaddr, dma_addr, attrs); +} + +const struct dma_map_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc = swiotlb_alloc, + .free = swiotlb_free, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, +}; +#endif /* CONFIG_DMA_DIRECT_OPS */ -- cgit v1.2.3 From aaf796dc6e84e809e4c791b6517326b26312c972 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Dec 2017 15:07:34 +0100 Subject: swiotlb: wire up ->dma_supported in swiotlb_dma_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To properly reject too small DMA masks based on the addressability of the bounce buffer. Signed-off-by: Christoph Hellwig Acked-by: Christian König --- lib/swiotlb.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 0fae2f45c3c0..539fd1099ba9 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -1128,5 +1128,6 @@ const struct dma_map_ops swiotlb_dma_ops = { .unmap_sg = swiotlb_unmap_sg_attrs, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, + .dma_supported = swiotlb_dma_supported, }; #endif /* CONFIG_DMA_DIRECT_OPS */ -- cgit v1.2.3 From a25381aa3ae60a2e028c95f1318649b13cbbce30 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Dec 2017 20:56:02 +0100 Subject: swiotlb: refactor coherent buffer freeing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out a new swiotlb_free_buffer helper that checks if an address is allocated from the swiotlb bounce buffer, and if yes frees it. This allows to simplify the swiotlb_free implemenation that uses dma_direct_free to free the non-bounce buffer allocations. Signed-off-by: Christoph Hellwig Acked-by: Christian König --- lib/swiotlb.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 539fd1099ba9..1a147f1354a1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -780,22 +780,31 @@ err_warn: } EXPORT_SYMBOL(swiotlb_alloc_coherent); +static bool swiotlb_free_buffer(struct device *dev, size_t size, + dma_addr_t dma_addr) +{ + phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); + + WARN_ON_ONCE(irqs_disabled()); + + if (!is_swiotlb_buffer(phys_addr)) + return false; + + /* + * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. + * DMA_ATTR_SKIP_CPU_SYNC is optional. + */ + swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + return true; +} + void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - WARN_ON(irqs_disabled()); - if (!is_swiotlb_buffer(paddr)) + if (!swiotlb_free_buffer(hwdev, size, dev_addr)) free_pages((unsigned long)vaddr, get_order(size)); - else - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -1110,9 +1119,7 @@ void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void swiotlb_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { - if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) - swiotlb_free_coherent(dev, size, vaddr, dma_addr); - else + if (!swiotlb_free_buffer(dev, size, dma_addr)) dma_direct_free(dev, size, vaddr, dma_addr, attrs); } -- cgit v1.2.3 From 0176adb004065d6815a8e67946752df4cd947c5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 22:15:30 +0100 Subject: swiotlb: refactor coherent buffer allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out a new swiotlb_alloc_buffer helper that allocates DMA coherent memory from the swiotlb bounce buffer. This allows to simplify the swiotlb_alloc implemenation that uses dma_direct_alloc to try to allocate a reachable buffer first. Signed-off-by: Christoph Hellwig Acked-by: Christian König --- lib/swiotlb.c | 122 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 65 insertions(+), 57 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 1a147f1354a1..0039b7c5e690 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -709,75 +709,79 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); -void * -swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) +static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, + size_t size) { - bool warn = !(flags & __GFP_NOWARN); - dma_addr_t dev_addr; - void *ret; - int order = get_order(size); - u64 dma_mask = DMA_BIT_MASK(32); + u64 mask = DMA_BIT_MASK(32); - if (hwdev && hwdev->coherent_dma_mask) - dma_mask = hwdev->coherent_dma_mask; + if (dev && dev->coherent_dma_mask) + mask = dev->coherent_dma_mask; + return addr + size - 1 <= mask; +} - ret = (void *)__get_free_pages(flags, order); - if (ret) { - dev_addr = swiotlb_virt_to_bus(hwdev, ret); - if (dev_addr + size - 1 > dma_mask) { - /* - * The allocated memory isn't reachable by the device. - */ - free_pages((unsigned long) ret, order); - ret = NULL; - } - } - if (!ret) { - /* - * We are either out of memory or the device can't DMA to - * GFP_DMA memory; fall back on map_single(), which - * will grab memory from the lowest available address range. - */ - phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE, - warn ? 0 : DMA_ATTR_NO_WARN); - if (paddr == SWIOTLB_MAP_ERROR) - goto err_warn; +static void * +swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, + unsigned long attrs) +{ + phys_addr_t phys_addr; - ret = phys_to_virt(paddr); - dev_addr = swiotlb_phys_to_dma(hwdev, paddr); + if (swiotlb_force == SWIOTLB_NO_FORCE) + goto out_warn; - /* Confirm address can be DMA'd by device */ - if (dev_addr + size - 1 > dma_mask) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dma_mask, - (unsigned long long)dev_addr); + phys_addr = swiotlb_tbl_map_single(dev, + swiotlb_phys_to_dma(dev, io_tlb_start), + 0, size, DMA_FROM_DEVICE, 0); + if (phys_addr == SWIOTLB_MAP_ERROR) + goto out_warn; - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * The DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(hwdev, paddr, - size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - goto err_warn; - } - } + *dma_handle = swiotlb_phys_to_dma(dev, phys_addr); + if (dma_coherent_ok(dev, *dma_handle, size)) + goto out_unmap; - *dma_handle = dev_addr; - memset(ret, 0, size); + memset(phys_to_virt(phys_addr), 0, size); + return phys_to_virt(phys_addr); - return ret; +out_unmap: + dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)(dev ? dev->coherent_dma_mask : 0), + (unsigned long long)*dma_handle); -err_warn: - if (warn && printk_ratelimit()) { - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", - dev_name(hwdev), size); + /* + * DMA_TO_DEVICE to avoid memcpy in unmap_single. + * DMA_ATTR_SKIP_CPU_SYNC is optional. + */ + swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); +out_warn: + if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { + dev_warn(dev, + "swiotlb: coherent allocation failed, size=%zu\n", + size); dump_stack(); } - return NULL; } + +void * +swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + int order = get_order(size); + unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0; + void *ret; + + ret = (void *)__get_free_pages(flags, order); + if (ret) { + *dma_handle = swiotlb_virt_to_bus(hwdev, ret); + if (dma_coherent_ok(hwdev, *dma_handle, size)) { + memset(ret, 0, size); + return ret; + } + free_pages((unsigned long)ret, order); + } + + return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs); +} EXPORT_SYMBOL(swiotlb_alloc_coherent); static bool swiotlb_free_buffer(struct device *dev, size_t size, @@ -1103,6 +1107,10 @@ void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, { void *vaddr; + /* temporary workaround: */ + if (gfp & __GFP_NOWARN) + attrs |= DMA_ATTR_NO_WARN; + /* * Don't print a warning when the first allocation attempt fails. * swiotlb_alloc_coherent() will print a warning when the DMA memory @@ -1112,7 +1120,7 @@ void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); if (!vaddr) - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); + vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); return vaddr; } -- cgit v1.2.3 From 4bd89ed39b2ab8dc4ac4b6c59b07d420b0213bec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Dec 2017 14:25:05 +0100 Subject: swiotlb: remove various exports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All these symbols are only used by arch dma_ops implementations or xen-swiotlb. None of which can be modular. Signed-off-by: Christoph Hellwig Acked-by: Christian König --- lib/swiotlb.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 0039b7c5e690..c43ec2271469 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -605,7 +605,6 @@ found: return tlb_addr; } -EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); /* * Allocates bounce buffer and returns its kernel virtual address. @@ -675,7 +674,6 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, } spin_unlock_irqrestore(&io_tlb_lock, flags); } -EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, @@ -707,7 +705,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, BUG(); } } -EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, size_t size) @@ -884,7 +881,6 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); } -EXPORT_SYMBOL_GPL(swiotlb_map_page); /* * Unmap a single streaming mode DMA translation. The dma_addr and size must @@ -925,7 +921,6 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, { unmap_single(hwdev, dev_addr, size, dir, attrs); } -EXPORT_SYMBOL_GPL(swiotlb_unmap_page); /* * Make physical memory consistent for a single streaming mode DMA translation @@ -963,7 +958,6 @@ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); } -EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); void swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, @@ -971,7 +965,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, { swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); } -EXPORT_SYMBOL(swiotlb_sync_single_for_device); /* * Map a set of buffers described by scatterlist in streaming mode for DMA. @@ -1023,7 +1016,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, } return nelems; } -EXPORT_SYMBOL(swiotlb_map_sg_attrs); /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules @@ -1043,7 +1035,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); } -EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); /* * Make physical memory consistent for a set of streaming mode DMA translations @@ -1071,7 +1062,6 @@ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); } -EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, @@ -1079,14 +1069,12 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, { swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } -EXPORT_SYMBOL(swiotlb_sync_sg_for_device); int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer)); } -EXPORT_SYMBOL(swiotlb_dma_mapping_error); /* * Return whether the given device DMA address mask can be supported @@ -1099,7 +1087,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) { return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } -EXPORT_SYMBOL(swiotlb_dma_supported); #ifdef CONFIG_DMA_DIRECT_OPS void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, -- cgit v1.2.3 From 8c7a8d1c4b9c30a2be3b31a2e6af1cefd45574eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 11:00:54 -0800 Subject: lib/scatterlist: Fix chaining support in sgl_alloc_order() This patch avoids that workloads with large block sizes (megabytes) can trigger the following call stack with the ib_srpt driver (that driver is the only driver that chains scatterlists allocated by sgl_alloc_order()): BUG: Bad page state in process kworker/0:1H pfn:2423a78 page:fffffb03d08e9e00 count:-3 mapcount:0 mapping: (null) index:0x0 flags: 0x57ffffc0000000() raw: 0057ffffc0000000 0000000000000000 0000000000000000 fffffffdffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: nonzero _count CPU: 0 PID: 733 Comm: kworker/0:1H Tainted: G I 4.15.0-rc7.bart+ #1 Hardware name: HP ProLiant DL380 G7, BIOS P67 08/16/2015 Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] Call Trace: dump_stack+0x5c/0x83 bad_page+0xf5/0x10f get_page_from_freelist+0xa46/0x11b0 __alloc_pages_nodemask+0x103/0x290 sgl_alloc_order+0x101/0x180 target_alloc_sgl+0x2c/0x40 [target_core_mod] srpt_alloc_rw_ctxs+0x173/0x2d0 [ib_srpt] srpt_handle_new_iu+0x61e/0x7f0 [ib_srpt] __ib_process_cq+0x55/0xa0 [ib_core] ib_cq_poll_work+0x1b/0x60 [ib_core] process_one_work+0x141/0x340 worker_thread+0x47/0x3e0 kthread+0xf5/0x130 ret_from_fork+0x1f/0x30 Fixes: e80a0af4759a ("lib/scatterlist: Introduce sgl_alloc() and sgl_free()") Reported-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Bart Van Assche Cc: Nicholas A. Bellinger Cc: Laurence Oberman Signed-off-by: Jens Axboe --- drivers/target/target_core_transport.c | 2 +- include/linux/scatterlist.h | 1 + lib/scatterlist.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index a001ba711cca..c03a78ee26cd 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2300,7 +2300,7 @@ queue_full: void target_free_sgl(struct scatterlist *sgl, int nents) { - sgl_free(sgl); + sgl_free_n_order(sgl, nents, 0); } EXPORT_SYMBOL(target_free_sgl); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b8a7c1d1dbe3..22b2131bcdcd 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -282,6 +282,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, gfp_t gfp, unsigned int *nent_p); struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, unsigned int *nent_p); +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order); void sgl_free_order(struct scatterlist *sgl, int order); void sgl_free(struct scatterlist *sgl); #endif /* CONFIG_SGL_ALLOC */ diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9afc9b432083..53728d391d3a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -512,7 +512,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, if (!sgl) return NULL; - sg_init_table(sgl, nent); + sg_init_table(sgl, nalloc); sg = sgl; while (length) { elem_len = min_t(u64, length, PAGE_SIZE << order); @@ -526,7 +526,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, length -= elem_len; sg = sg_next(sg); } - WARN_ON_ONCE(sg); + WARN_ONCE(length, "length = %lld\n", length); if (nent_p) *nent_p = nent; return sgl; @@ -549,22 +549,44 @@ struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, EXPORT_SYMBOL(sgl_alloc); /** - * sgl_free_order - free a scatterlist and its pages + * sgl_free_n_order - free a scatterlist and its pages * @sgl: Scatterlist with one or more elements + * @nents: Maximum number of elements to free * @order: Second argument for __free_pages() + * + * Notes: + * - If several scatterlists have been chained and each chain element is + * freed separately then it's essential to set nents correctly to avoid that a + * page would get freed twice. + * - All pages in a chained scatterlist can be freed at once by setting @nents + * to a high number. */ -void sgl_free_order(struct scatterlist *sgl, int order) +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) { struct scatterlist *sg; struct page *page; + int i; - for (sg = sgl; sg; sg = sg_next(sg)) { + for_each_sg(sgl, sg, nents, i) { + if (!sg) + break; page = sg_page(sg); if (page) __free_pages(page, order); } kfree(sgl); } +EXPORT_SYMBOL(sgl_free_n_order); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + sgl_free_n_order(sgl, INT_MAX, order); +} EXPORT_SYMBOL(sgl_free_order); /** -- cgit v1.2.3 From fcd1c9177195489c40198d2769649439dd88505b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 20 Jan 2018 01:24:31 +0100 Subject: bpf: add couple of test cases for signed extended imms Add a couple of test cases for interpreter and JIT that are related to an issue we faced some time ago in Cilium [1], which is fixed in LLVM with commit e53750e1e086 ("bpf: fix bug on silently truncating 64-bit immediate"). Test cases were run-time checking kernel to behave as intended which should also provide some guidance for current or new JITs in case they should trip over this. Added for cBPF and eBPF. [1] https://github.com/cilium/cilium/pull/2162 Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- lib/test_bpf.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f369889e521d..e3938e395cba 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = { { { ETH_HLEN, 42 } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, }, + /* Checking interpreter vs JIT wrt signed extended imms. */ + { + "JNE signed compare, test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_REG(BPF_JNE, R2, R4, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 4", + .u.insns_int = { + BPF_LD_IMM64(R1, -17104896), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 5", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xfefb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 6", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x7efb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 7", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12), + BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, 2), + }, + CLASSIC | FLAG_NO_DATA, + {}, + { { 0, 2 } }, + }, }; static struct net_device dev; -- cgit v1.2.3 From 76f8ab1bd1cb0be3233ab07790c3108d986aeabf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:13:45 +0000 Subject: test_firmware: make local symbol test_fw_config static Fixes the following sparse warnings: lib/test_firmware.c:99:20: warning: symbol 'test_fw_config' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 964784dc1602..1e907dd3b4fe 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -96,7 +96,7 @@ struct test_config { struct device *device); }; -struct test_config *test_fw_config; +static struct test_config *test_fw_config; static ssize_t test_fw_misc_read(struct file *f, char __user *buf, size_t size, loff_t *offset) -- cgit v1.2.3 From a5e1923356505e46476c2fb518559b7a4d9d25b1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:12:55 +0000 Subject: test_firmware: fix missing unlock on error in config_num_requests_store() Add the missing unlock before return from function config_num_requests_store() in the error handling case. Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests") Cc: stable Signed-off-by: Wei Yongjun Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 1e907dd3b4fe..078a61480573 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -371,6 +371,7 @@ static ssize_t config_num_requests_store(struct device *dev, if (test_fw_config->reqs) { pr_err("Must call release_all_firmware prior to changing config\n"); rc = -EINVAL; + mutex_unlock(&test_fw_mutex); goto out; } mutex_unlock(&test_fw_mutex); -- cgit v1.2.3 From 172856eac7cfaaeabad6a282d3c79d687ad69fc0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 22 Jan 2018 14:27:11 -0800 Subject: kobject: Export kobj_ns_grab_current() and kobj_ns_drop() Make it possible to call these two functions from a kernel module. Note: despite their name, these two functions can be used meaningfully independent of kobjects. A later patch will add calls to these functions from the SRP driver because this patch series modifies the SRP driver such that it can hold a reference to a namespace that can last longer than the lifetime of the process through which the namespace reference was obtained. Signed-off-by: Bart Van Assche Acked-by: Greg Kroah-Hartman Signed-off-by: Doug Ledford --- lib/kobject.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 763d70a18941..06b849eee0ca 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1039,6 +1039,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type) return ns; } +EXPORT_SYMBOL_GPL(kobj_ns_grab_current); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) { @@ -1074,3 +1075,4 @@ void kobj_ns_drop(enum kobj_ns_type type, void *ns) kobj_ns_ops_tbl[type]->drop_ns(ns); spin_unlock(&kobj_ns_type_lock); } +EXPORT_SYMBOL_GPL(kobj_ns_drop); -- cgit v1.2.3 From 841a915d20c7b22fc4f36f12368daf94d9f8cb10 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 28 Dec 2017 20:40:25 -0500 Subject: vsprintf: Do not have bprintf dereference pointers When trace_printk() was introduced, it was discussed that making it be as low overhead as possible, that the processing of the format string should be delayed until it is read. That is, a "trace_printk()" should not convert the %d into numbers and so on, but instead, save the fmt string and all the args in the buffer at the time of recording. When the trace_printk() data is read, it would then parse the format string and do the conversions of the saved arguments in the tracing buffer. The code to perform this was added to vsprintf where vbin_printf() would save the arguments of a specified format string in a buffer, then bstr_printf() could be used to convert the buffer with the same format string into the final output, as if vsprintf() was called in one go. The issue arises when dereferenced pointers are used. The problem is that something like %*pbl which reads a bitmask, will save the pointer to the bitmask in the buffer. Then the reading of the buffer via bstr_printf() will then look at the pointer to process the final output. Obviously the value of that pointer could have changed since the time it was recorded to the time the buffer is read. Worse yet, the bitmask could be unmapped, and the reading of the trace buffer could actually cause a kernel oops. Another problem is that user space tools such as perf and trace-cmd do not have access to the contents of these pointers, and they become useless when the tracing buffer is extracted. Instead of having vbin_printf() simply save the pointer in the buffer for later processing, have it perform the formatting at the time bin_printf() is called. This will fix the issue of dereferencing pointers at a later time, and has the extra benefit of having user space tools understand these values. Since perf and trace-cmd already can handle %p[sSfF] via saving kallsyms, their pointers are saved and not processed during vbin_printf(). If they were converted, it would break perf and trace-cmd, as they would not know how to deal with the conversion. Link: http://lkml.kernel.org/r/20171228204025.14a71d8f@gandalf.local.home Reported-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- lib/vsprintf.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01c3957b2de6..c0c3542d92fa 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2516,29 +2516,34 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) { struct printf_spec spec = {0}; char *str, *end; + int width; str = (char *)bin_buf; end = (char *)(bin_buf + size); #define save_arg(type) \ -do { \ +({ \ + unsigned long long value; \ if (sizeof(type) == 8) { \ - unsigned long long value; \ + unsigned long long val8; \ str = PTR_ALIGN(str, sizeof(u32)); \ - value = va_arg(args, unsigned long long); \ + val8 = va_arg(args, unsigned long long); \ if (str + sizeof(type) <= end) { \ - *(u32 *)str = *(u32 *)&value; \ - *(u32 *)(str + 4) = *((u32 *)&value + 1); \ + *(u32 *)str = *(u32 *)&val8; \ + *(u32 *)(str + 4) = *((u32 *)&val8 + 1); \ } \ + value = val8; \ } else { \ - unsigned long value; \ + unsigned int val4; \ str = PTR_ALIGN(str, sizeof(type)); \ - value = va_arg(args, int); \ + val4 = va_arg(args, int); \ if (str + sizeof(type) <= end) \ - *(typeof(type) *)str = (type)value; \ + *(typeof(type) *)str = (type)(long)val4; \ + value = (unsigned long long)val4; \ } \ str += sizeof(type); \ -} while (0) + value; \ +}) while (*fmt) { int read = format_decode(fmt, &spec); @@ -2554,7 +2559,10 @@ do { \ case FORMAT_TYPE_WIDTH: case FORMAT_TYPE_PRECISION: - save_arg(int); + width = (int)save_arg(int); + /* Pointers may require the width */ + if (*fmt == 'p') + set_field_width(&spec, width); break; case FORMAT_TYPE_CHAR: @@ -2576,7 +2584,27 @@ do { \ } case FORMAT_TYPE_PTR: - save_arg(void *); + /* Dereferenced pointers must be done now */ + switch (*fmt) { + /* Dereference of functions is still OK */ + case 'S': + case 's': + case 'F': + case 'f': + save_arg(void *); + break; + default: + if (!isalnum(*fmt)) { + save_arg(void *); + break; + } + str = pointer(fmt, str, end, va_arg(args, void *), + spec); + if (str + 1 < end) + *str++ = '\0'; + else + end[-1] = '\0'; /* Must be nul terminated */ + } /* skip all alphanumeric pointer suffixes */ while (isalnum(*fmt)) fmt++; @@ -2728,11 +2756,39 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; } - case FORMAT_TYPE_PTR: - str = pointer(fmt, str, end, get_arg(void *), spec); + case FORMAT_TYPE_PTR: { + bool process = false; + int copy, len; + /* Non function dereferences were already done */ + switch (*fmt) { + case 'S': + case 's': + case 'F': + case 'f': + process = true; + break; + default: + if (!isalnum(*fmt)) { + process = true; + break; + } + /* Pointer dereference was already processed */ + if (str < end) { + len = copy = strlen(args); + if (copy > end - str) + copy = end - str; + memcpy(str, args, copy); + str += len; + args += len; + } + } + if (process) + str = pointer(fmt, str, end, get_arg(void *), spec); + while (isalnum(*fmt)) fmt++; break; + } case FORMAT_TYPE_PERCENT_CHAR: if (str < end) -- cgit v1.2.3 From 7328c8f48d1895b3fec98b0b319cfb856b4c4fa1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 26 Jan 2018 11:45:16 -0600 Subject: PCI: Add SPDX GPL-2.0 when no license was specified b24413180f56 ("License cleanup: add SPDX GPL-2.0 license identifier to files with no license") added SPDX GPL-2.0 to several PCI files that previously contained no license information. Add SPDX GPL-2.0 to all other PCI files that did not contain any license information and hence were under the default GPL version 2 license of the kernel. Signed-off-by: Bjorn Helgaas Reviewed-by: Greg Kroah-Hartman --- drivers/pci/Kconfig | 1 + drivers/pci/access.c | 1 + drivers/pci/ats.c | 1 + drivers/pci/bus.c | 1 + drivers/pci/dwc/Kconfig | 2 ++ drivers/pci/endpoint/Kconfig | 1 + drivers/pci/endpoint/Makefile | 1 + drivers/pci/endpoint/functions/Kconfig | 1 + drivers/pci/endpoint/functions/Makefile | 1 + drivers/pci/host-bridge.c | 1 + drivers/pci/host/Kconfig | 2 ++ drivers/pci/host/pci-v3-semi.c | 1 + drivers/pci/hotplug/Kconfig | 1 + drivers/pci/iov.c | 1 + drivers/pci/msi.c | 1 + drivers/pci/pci-acpi.c | 1 + drivers/pci/pci.c | 1 + drivers/pci/pcie/Kconfig | 1 + drivers/pci/pcie/aer/Kconfig | 1 + drivers/pci/pcie/aer/Kconfig.debug | 1 + drivers/pci/pcie/portdrv_bus.c | 1 + drivers/pci/probe.c | 1 + drivers/pci/remove.c | 1 + drivers/pci/rom.c | 1 + drivers/pci/search.c | 1 + drivers/pci/setup-bus.c | 1 + drivers/pci/setup-irq.c | 1 + drivers/pci/slot.c | 1 + drivers/pci/switch/Kconfig | 2 ++ drivers/pci/switch/Makefile | 1 + drivers/pci/vpd.c | 1 + drivers/pci/xen-pcifront.c | 1 + lib/pci_iomap.c | 1 + 33 files changed, 36 insertions(+) (limited to 'lib') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index bda151788f3f..949aa74206cd 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI configuration # diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 913d6722ece9..5a64da3fb033 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index ad8ddbbbf245..6ad80a1fd5a7 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/ats.c * diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index bc56cf19afd3..30a4d33038bf 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/bus.c * diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig index 113e09440f85..fe1cb56b71f1 100644 --- a/drivers/pci/dwc/Kconfig +++ b/drivers/pci/dwc/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + menu "DesignWare PCI Core Support" config PCIE_DW diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig index c09623ca8c3b..d1e7e4199432 100644 --- a/drivers/pci/endpoint/Kconfig +++ b/drivers/pci/endpoint/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Endpoint Support # diff --git a/drivers/pci/endpoint/Makefile b/drivers/pci/endpoint/Makefile index 1041f80a4645..95b2fe47e3b0 100644 --- a/drivers/pci/endpoint/Makefile +++ b/drivers/pci/endpoint/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for PCI Endpoint Support # diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig index 2942066607e0..8820d0f7ec77 100644 --- a/drivers/pci/endpoint/functions/Kconfig +++ b/drivers/pci/endpoint/functions/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Endpoint Functions # diff --git a/drivers/pci/endpoint/functions/Makefile b/drivers/pci/endpoint/functions/Makefile index 6d94a4801838..d6fafff080e2 100644 --- a/drivers/pci/endpoint/functions/Makefile +++ b/drivers/pci/endpoint/functions/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for PCI Endpoint Functions # diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c index add66236215c..ac8d81268296 100644 --- a/drivers/pci/host-bridge.c +++ b/drivers/pci/host-bridge.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * host bridge related code */ diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 38d12980db0f..a4ed7484d127 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + menu "PCI host controller drivers" depends on PCI diff --git a/drivers/pci/host/pci-v3-semi.c b/drivers/pci/host/pci-v3-semi.c index 02f6e1e3a421..7fef64869d19 100644 --- a/drivers/pci/host/pci-v3-semi.c +++ b/drivers/pci/host/pci-v3-semi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Support for V3 Semiconductor PCI Local Bus to PCI Bridge * Copyright (C) 2017 Linus Walleij diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index aadce45a9b4a..a8f21d051e0c 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Hotplug support # diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 6bacb8995e96..c533325e8d86 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/iov.c * diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index e06607167858..7f7547d200e4 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * File: msi.c * Purpose: PCI Message Signaled Interrupt (MSI) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 4708eb9df71b..3b82bb86a203 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * File: pci-acpi.c * Purpose: Provide PCI support in ACPI diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4a7c6864fdf4..50e716b3e2b8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * PCI Bus Services, see include/linux/pci.h for further explanation. * diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index ac53edbc9613..6d75a2eb6ecb 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Express Port Bus Configuration # diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig index 7d1437b01fdd..5a64eb3d6c7a 100644 --- a/drivers/pci/pcie/aer/Kconfig +++ b/drivers/pci/pcie/aer/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Express Root Port Device AER Configuration # diff --git a/drivers/pci/pcie/aer/Kconfig.debug b/drivers/pci/pcie/aer/Kconfig.debug index 9142949734f5..67e02174b65b 100644 --- a/drivers/pci/pcie/aer/Kconfig.debug +++ b/drivers/pci/pcie/aer/Kconfig.debug @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # PCI Express Root Port Device AER Debug Configuration # diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 87e79a6ffb5a..f0fba552a0e2 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * File: portdrv_bus.c * Purpose: PCI Express Port Bus Driver's Bus Overloading Functions diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 14e0ea1ff38b..3ea2d610d607 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * probe.c - PCI detection and setup code */ diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 2fa0dbde36b7..6f072eae4f7a 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 1f5e6af96c83..9ba3fa841eb0 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/rom.c * diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 4c6044ad7368..bc1e023f1353 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * PCI searching functions. * diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index b1ad466199ad..0a26648f1712 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/setup-bus.c * diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index 86106c44ce94..9c8d81b3cf82 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/setup-irq.c * diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index e42909524dee..d10f556dc03e 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/pci/slot.c * Copyright (C) 2006 Matthew Wilcox diff --git a/drivers/pci/switch/Kconfig b/drivers/pci/switch/Kconfig index 4c49648e0646..aee28a5bb98f 100644 --- a/drivers/pci/switch/Kconfig +++ b/drivers/pci/switch/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + menu "PCI switch controller drivers" depends on PCI diff --git a/drivers/pci/switch/Makefile b/drivers/pci/switch/Makefile index 37d8cfb03f3f..acd56d3b4a35 100644 --- a/drivers/pci/switch/Makefile +++ b/drivers/pci/switch/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_PCI_SW_SWITCHTEC) += switchtec.o diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index 39b79070335d..70fba57d6103 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * File: vpd.c * Purpose: Provide PCI VPD support diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 8fc2e9532575..82fe8526ac90 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Xen PCI Frontend. * diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index c10fba461454..2d3eb1cb73b8 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Implement the default iomap interfaces * -- cgit v1.2.3 From 21ccaf21497b72f42133182716a42dbf573d314b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Jan 2018 23:33:48 +0100 Subject: bpf: add further test cases around div/mod and others Update selftests to relfect recent changes and add various new test cases. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- lib/test_bpf.c | 8 +- tools/testing/selftests/bpf/test_verifier.c | 343 ++++++++++++++++++++++++++-- 2 files changed, 336 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e3938e395cba..4cd9ea9b3449 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2003,10 +2003,14 @@ static struct bpf_test tests[] = { { { 4, 0 }, { 5, 10 } } }, { - "INT: DIV by zero", + /* This one doesn't go through verifier, but is just raw insn + * as opposed to cBPF tests from here. Thus div by 0 tests are + * done in test_verifier in BPF kselftests. + */ + "INT: DIV by -1", .u.insns_int = { BPF_ALU64_REG(BPF_MOV, R6, R1), - BPF_ALU64_IMM(BPF_MOV, R7, 0), + BPF_ALU64_IMM(BPF_MOV, R7, -1), BPF_LD_ABS(BPF_B, 3), BPF_ALU32_REG(BPF_DIV, R0, R7), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 9e7075b268be..697bd83de295 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -111,7 +112,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "DIV32 by 0, zero check 2", @@ -123,7 +124,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "DIV64 by 0, zero check", @@ -135,7 +136,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD32 by 0, zero check 1", @@ -147,7 +148,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD32 by 0, zero check 2", @@ -159,7 +160,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .retval = 0, + .retval = 42, }, { "MOD64 by 0, zero check", @@ -171,13 +172,245 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, + }, + { + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, + }, + /* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + { + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, .retval = 0, }, + { + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, { "empty prog", .insns = { }, - .errstr = "last insn is not an exit or jmp", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -374,7 +607,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode c4", }, { "arsh32 on reg", @@ -385,7 +618,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode cc", }, { "arsh64 on imm", @@ -501,7 +734,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_CALL uses reserved", + .errstr = "unknown opcode 8d", .result = REJECT, }, { @@ -691,7 +924,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(0, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -709,7 +942,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -718,7 +951,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, -1, -1, -1, -1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -7543,7 +7776,7 @@ static struct bpf_test tests[] = { }, BPF_EXIT_INSN(), }, - .errstr = "BPF_END uses reserved fields", + .errstr = "unknown opcode d7", .result = REJECT, }, { @@ -8963,6 +9196,90 @@ static struct bpf_test tests[] = { .result = ACCEPT, .retval = 1, }, + { + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, { "calls: overlapping caller/callee", .insns = { -- cgit v1.2.3 From 1a3241ff10d038ecd096d03380327f2a0b5840a6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 1 Feb 2018 21:00:50 +0300 Subject: lib/strscpy: Shut up KASAN false-positives in strscpy() strscpy() performs the word-at-a-time optimistic reads. So it may may access the memory past the end of the object, which is perfectly fine since strscpy() doesn't use that (past-the-end) data and makes sure the optimistic read won't cross a page boundary. Use new read_word_at_a_time() to shut up the KASAN. Note that this potentially could hide some bugs. In example bellow, stscpy() will copy more than we should (1-3 extra uninitialized bytes): char dst[8]; char *src; src = kmalloc(5, GFP_KERNEL); memset(src, 0xff, 5); strscpy(dst, src, 8); Signed-off-by: Andrey Ryabinin Signed-off-by: Linus Torvalds --- lib/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/string.c b/lib/string.c index 64a9e33f1daa..2c0900a5d51a 100644 --- a/lib/string.c +++ b/lib/string.c @@ -203,7 +203,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count) while (max >= sizeof(unsigned long)) { unsigned long c, data; - c = *(unsigned long *)(src+res); + c = read_word_at_a_time(src+res); if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); -- cgit v1.2.3 From e0371b8be7b058e6418dbf38d7185887db1f8885 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Wed, 31 Jan 2018 10:34:25 +0100 Subject: lib/Kconfig.debug: Remove blank help text Blank help texts are probably either a typo, a Kconfig misunderstanding, or some kind of half-committing to adding a help text (in which case a TODO comment would be clearer, if the help text really can't be added right away). Best to remove them, IMO. Signed-off-by: Ulf Magnusson Acked-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64d7c19d3167..3f307692d5af 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -351,7 +351,6 @@ config SECTION_MISMATCH_WARN_ONLY # config ARCH_WANT_FRAME_POINTERS bool - help config FRAME_POINTER bool "Compile the kernel with frame pointers" -- cgit v1.2.3 From 09584b406742413ac4c8d7e030374d4daa045b69 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Feb 2018 22:37:15 -0800 Subject: bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y With CONFIG_BPF_JIT_ALWAYS_ON is defined in the config file, tools/testing/selftests/bpf/test_kmod.sh failed like below: [root@localhost bpf]# ./test_kmod.sh sysctl: setting key "net.core.bpf_jit_enable": Invalid argument [ JIT enabled:0 hardened:0 ] [ 132.175681] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 132.458834] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:0 ] [ 133.456025] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 133.730935] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:1 ] [ 134.769730] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 135.050864] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:2 ] [ 136.442882] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 136.821810] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [root@localhost bpf]# The test_kmod.sh load/remove test_bpf.ko multiple times with different settings for sysctl net.core.bpf_jit_{enable,harden}. The failed test #297 of test_bpf.ko is designed such that JIT always fails. Commit 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config) introduced the following tightening logic: ... if (!bpf_prog_is_dev_bound(fp->aux)) { fp = bpf_int_jit_compile(fp); #ifdef CONFIG_BPF_JIT_ALWAYS_ON if (!fp->jited) { *err = -ENOTSUPP; return fp; } #endif ... With this logic, Test #297 always gets return value -ENOTSUPP when CONFIG_BPF_JIT_ALWAYS_ON is defined, causing the test failure. This patch fixed the failure by marking Test #297 as expected failure when CONFIG_BPF_JIT_ALWAYS_ON is defined. Fixes: 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config) Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 4cd9ea9b3449..b4e22345963f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -83,6 +83,7 @@ struct bpf_test { __u32 result; } test[MAX_SUBTESTS]; int (*fill_helper)(struct bpf_test *self); + int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */ __u8 frag_data[MAX_DATA]; int stack_depth; /* for eBPF only, since tests don't call verifier */ }; @@ -2026,7 +2027,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: div_k_0", @@ -2036,7 +2039,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: unknown insn", @@ -2047,7 +2052,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: out of range spill/fill", @@ -2057,7 +2064,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "JUMPS + HOLES", @@ -2149,6 +2158,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: LDX + RET X", @@ -2159,6 +2170,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { /* Mainly checking JIT here. */ "M[]: alt STX + LDX", @@ -2333,6 +2346,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { /* Passes checker but fails during runtime. */ "LD [SKF_AD_OFF-1]", @@ -5395,6 +5410,7 @@ static struct bpf_test tests[] = { { }, { }, .fill_helper = bpf_fill_maxinsns4, + .expected_errcode = -EINVAL, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Very long jump", @@ -5450,10 +5466,15 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 0, 0xababcbac } }, .fill_helper = bpf_fill_maxinsns11, + .expected_errcode = -ENOTSUPP, }, { "BPF_MAXINSNS: ld_abs+get_processor_id", @@ -6344,7 +6365,7 @@ static struct bpf_prog *generate_filter(int which, int *err) *err = bpf_prog_create(&fp, &fprog); if (tests[which].aux & FLAG_EXPECTED_FAIL) { - if (*err == -EINVAL) { + if (*err == tests[which].expected_errcode) { pr_cont("PASS\n"); /* Verifier rejected filter as expected. */ *err = 0; -- cgit v1.2.3 From 234a4624efe5629a777b4c00dbdf41dd8b7332db Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 09:56:36 -0500 Subject: idr: Delete idr_replace_ext function Changing idr_replace's 'id' argument to 'unsigned long' works for all callers. Callers which passed a negative ID now get -ENOENT instead of -EINVAL. No callers relied on this error value. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 3 +-- lib/idr.c | 15 +++------------ net/sched/act_api.c | 2 +- net/sched/cls_basic.c | 2 +- net/sched/cls_bpf.c | 2 +- net/sched/cls_flower.c | 2 +- net/sched/cls_u32.c | 2 +- 7 files changed, 9 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 118987a17ada..f1299c4dc45f 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -136,8 +136,7 @@ int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); -void *idr_replace(struct idr *, void *, int id); -void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); +void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); static inline void *idr_remove(struct idr *idr, unsigned long id) diff --git a/lib/idr.c b/lib/idr.c index 2593ce513a18..577bfd4fe5c2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -147,18 +147,9 @@ EXPORT_SYMBOL(idr_get_next_ext); * the one being replaced!). * * Returns: the old value on success. %-ENOENT indicates that @id was not - * found. %-EINVAL indicates that @id or @ptr were not valid. + * found. %-EINVAL indicates that @ptr was not valid. */ -void *idr_replace(struct idr *idr, void *ptr, int id) -{ - if (id < 0) - return ERR_PTR(-EINVAL); - - return idr_replace_ext(idr, ptr, id); -} -EXPORT_SYMBOL(idr_replace); - -void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id) +void *idr_replace(struct idr *idr, void *ptr, unsigned long id) { struct radix_tree_node *node; void __rcu **slot = NULL; @@ -175,7 +166,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id) return entry; } -EXPORT_SYMBOL(idr_replace_ext); +EXPORT_SYMBOL(idr_replace); /** * DOC: IDA description diff --git a/net/sched/act_api.c b/net/sched/act_api.c index be5b2b455371..1572466be031 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -348,7 +348,7 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) struct tcf_idrinfo *idrinfo = tn->idrinfo; spin_lock_bh(&idrinfo->lock); - idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); + idr_replace(&idrinfo->action_idr, a, a->tcfa_index); spin_unlock_bh(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_insert); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 121dff0a1763..588c635f195e 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -235,7 +235,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; if (fold) { - idr_replace_ext(&head->handle_idr, fnew, fnew->handle); + idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index e45137e3f567..8cb3a33b1afd 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -526,7 +526,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (oldprog) { - idr_replace_ext(&head->handle_idr, prog, handle); + idr_replace(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); tcf_exts_get_net(&oldprog->exts); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e098e8ab76c9..0d1b0c11de34 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -967,7 +967,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (fold) { fnew->handle = handle; - idr_replace_ext(&head->handle_idr, fnew, fnew->handle); + idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index bd55ed783cb1..5b256da985b1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -848,7 +848,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, if (pins->handle == n->handle) break; - idr_replace_ext(&ht->handle_idr, n, n->handle); + idr_replace(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } -- cgit v1.2.3 From e096f6a762bc54d0e5d44ba8b196e70b58e04367 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 10:14:27 -0500 Subject: idr: Add idr_alloc_u32 helper All current users of idr_alloc_ext() actually want to allocate a u32 and idr_alloc_u32() fits their needs better. Like idr_get_next(), it uses a 'nextid' argument which serves as both a pointer to the start ID and the assigned ID (instead of a separate minimum and pointer-to-assigned-ID argument). It uses a 'max' argument rather than 'end' because the semantics that idr_alloc has for 'end' don't work well for unsigned types. Since idr_alloc_u32() returns an errno instead of the allocated ID, mark it as __must_check to help callers use it correctly. Include copious kernel-doc. Chris Mi has promised to contribute test-cases for idr_alloc_u32. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 2 ++ lib/idr.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 7867d6117535..561a4cbabca6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -131,6 +131,8 @@ static inline int idr_alloc_ext(struct idr *idr, void *ptr, return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true); } +int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *nextid, + unsigned long max, gfp_t); int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); diff --git a/lib/idr.c b/lib/idr.c index 577bfd4fe5c2..6d4ec2c2982b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -7,6 +7,37 @@ DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); static DEFINE_SPINLOCK(simple_ida_lock); +/** + * idr_alloc_u32() - Allocate an ID. + * @idr: IDR handle. + * @ptr: Pointer to be associated with the new ID. + * @nextid: Pointer to an ID. + * @max: The maximum ID to allocate (inclusive). + * @gfp: Memory allocation flags. + * + * Allocates an unused ID in the range specified by @nextid and @max. + * Note that @max is inclusive whereas the @end parameter to idr_alloc() + * is exclusive. + * + * The caller should provide their own locking to ensure that two + * concurrent modifications to the IDR are not possible. Read-only + * accesses to the IDR may be done under the RCU read lock or may + * exclude simultaneous writers. + * + * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed, + * or -ENOSPC if no free IDs could be found. If an error occurred, + * @nextid is unchanged. + */ +int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, + unsigned long max, gfp_t gfp) +{ + unsigned long tmp = *nextid; + int ret = idr_alloc_ext(idr, ptr, &tmp, tmp, max + 1, gfp); + *nextid = tmp; + return ret; +} +EXPORT_SYMBOL_GPL(idr_alloc_u32); + int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, unsigned long start, unsigned long end, gfp_t gfp, bool ext) -- cgit v1.2.3 From 460488c58ca8b9167463ac22ec9a2e33db351962 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 15:16:24 -0500 Subject: idr: Remove idr_alloc_ext It has no more users, so remove it. Move idr_alloc() back into idr.c, move the guts of idr_alloc_cmn() into idr_alloc_u32(), remove the wrappers around idr_get_free_cmn() and rename it to idr_get_free(). While there is now no interface to allocate IDs larger than a u32, the IDR internals remain ready to handle a larger ID should a need arise. These changes make it possible to provide the guarantee that, if the nextid pointer points into the object, the object's ID will be initialised before a concurrent lookup can find the object. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 51 +------------- include/linux/radix-tree.h | 17 +---- lib/idr.c | 128 +++++++++++++++++++++++------------- lib/radix-tree.c | 3 +- tools/testing/radix-tree/idr-test.c | 17 +++++ 5 files changed, 105 insertions(+), 111 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 561a4cbabca6..fa2a04b984df 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,7 +15,6 @@ #include #include #include -#include struct idr { struct radix_tree_root idr_rt; @@ -82,55 +81,7 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) void idr_preload(gfp_t gfp_mask); -int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, - unsigned long start, unsigned long end, gfp_t gfp, - bool ext); - -/** - * idr_alloc - allocate an id - * @idr: idr handle - * @ptr: pointer to be associated with the new id - * @start: the minimum id (inclusive) - * @end: the maximum id (exclusive) - * @gfp: memory allocation flags - * - * Allocates an unused ID in the range [start, end). Returns -ENOSPC - * if there are no unused IDs in that range. - * - * Note that @end is treated as max when <= 0. This is to always allow - * using @start + N as @end as long as N is inside integer range. - * - * Simultaneous modifications to the @idr are not allowed and should be - * prevented by the user, usually with a lock. idr_alloc() may be called - * concurrently with read-only accesses to the @idr, such as idr_find() and - * idr_for_each_entry(). - */ -static inline int idr_alloc(struct idr *idr, void *ptr, - int start, int end, gfp_t gfp) -{ - unsigned long id; - int ret; - - if (WARN_ON_ONCE(start < 0)) - return -EINVAL; - - ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false); - - if (ret) - return ret; - - return id; -} - -static inline int idr_alloc_ext(struct idr *idr, void *ptr, - unsigned long *index, - unsigned long start, - unsigned long end, - gfp_t gfp) -{ - return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true); -} - +int idr_alloc(struct idr *, void *, int start, int end, gfp_t); int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *nextid, unsigned long max, gfp_t); int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 23a9c89c7ad9..fc55ff31eca7 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -356,24 +356,9 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); -void __rcu **idr_get_free_cmn(struct radix_tree_root *root, +void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max); -static inline void __rcu **idr_get_free(struct radix_tree_root *root, - struct radix_tree_iter *iter, - gfp_t gfp, - int end) -{ - return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX); -} - -static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root, - struct radix_tree_iter *iter, - gfp_t gfp, - unsigned long end) -{ - return idr_get_free_cmn(root, iter, gfp, end - 1); -} enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ diff --git a/lib/idr.c b/lib/idr.c index 6d4ec2c2982b..c3f16307b908 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -17,7 +18,9 @@ static DEFINE_SPINLOCK(simple_ida_lock); * * Allocates an unused ID in the range specified by @nextid and @max. * Note that @max is inclusive whereas the @end parameter to idr_alloc() - * is exclusive. + * is exclusive. The new ID is assigned to @nextid before the pointer + * is inserted into the IDR, so if @nextid points into the object pointed + * to by @ptr, a concurrent lookup will not find an uninitialised ID. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only @@ -30,67 +33,104 @@ static DEFINE_SPINLOCK(simple_ida_lock); */ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned long max, gfp_t gfp) -{ - unsigned long tmp = *nextid; - int ret = idr_alloc_ext(idr, ptr, &tmp, tmp, max + 1, gfp); - *nextid = tmp; - return ret; -} -EXPORT_SYMBOL_GPL(idr_alloc_u32); - -int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, - unsigned long start, unsigned long end, gfp_t gfp, - bool ext) { struct radix_tree_iter iter; void __rcu **slot; if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return -EINVAL; + if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR))) + idr->idr_rt.gfp_mask |= IDR_RT_MARKER; - radix_tree_iter_init(&iter, start); - if (ext) - slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end); - else - slot = idr_get_free(&idr->idr_rt, &iter, gfp, end); + radix_tree_iter_init(&iter, *nextid); + slot = idr_get_free(&idr->idr_rt, &iter, gfp, max); if (IS_ERR(slot)) return PTR_ERR(slot); + *nextid = iter.index; + /* there is a memory barrier inside radix_tree_iter_replace() */ radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); - if (index) - *index = iter.index; return 0; } -EXPORT_SYMBOL_GPL(idr_alloc_cmn); +EXPORT_SYMBOL_GPL(idr_alloc_u32); /** - * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion - * @idr: idr handle - * @ptr: pointer to be associated with the new id - * @start: the minimum id (inclusive) - * @end: the maximum id (exclusive) - * @gfp: memory allocation flags - * - * Allocates an ID larger than the last ID allocated if one is available. - * If not, it will attempt to allocate the smallest ID that is larger or - * equal to @start. + * idr_alloc() - Allocate an ID. + * @idr: IDR handle. + * @ptr: Pointer to be associated with the new ID. + * @start: The minimum ID (inclusive). + * @end: The maximum ID (exclusive). + * @gfp: Memory allocation flags. + * + * Allocates an unused ID in the range specified by @start and @end. If + * @end is <= 0, it is treated as one larger than %INT_MAX. This allows + * callers to use @start + N as @end as long as N is within integer range. + * + * The caller should provide their own locking to ensure that two + * concurrent modifications to the IDR are not possible. Read-only + * accesses to the IDR may be done under the RCU read lock or may + * exclude simultaneous writers. + * + * Return: The newly allocated ID, -ENOMEM if memory allocation failed, + * or -ENOSPC if no free IDs could be found. */ -int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) +int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) { - int id, curr = idr->idr_next; + u32 id = start; + int ret; + + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; - if (curr < start) - curr = start; + ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp); + if (ret) + return ret; - id = idr_alloc(idr, ptr, curr, end, gfp); - if ((id == -ENOSPC) && (curr > start)) - id = idr_alloc(idr, ptr, start, curr, gfp); + return id; +} +EXPORT_SYMBOL_GPL(idr_alloc); - if (id >= 0) - idr->idr_next = id + 1U; +/** + * idr_alloc_cyclic() - Allocate an ID cyclically. + * @idr: IDR handle. + * @ptr: Pointer to be associated with the new ID. + * @start: The minimum ID (inclusive). + * @end: The maximum ID (exclusive). + * @gfp: Memory allocation flags. + * + * Allocates an unused ID in the range specified by @nextid and @end. If + * @end is <= 0, it is treated as one larger than %INT_MAX. This allows + * callers to use @start + N as @end as long as N is within integer range. + * The search for an unused ID will start at the last ID allocated and will + * wrap around to @start if no free IDs are found before reaching @end. + * + * The caller should provide their own locking to ensure that two + * concurrent modifications to the IDR are not possible. Read-only + * accesses to the IDR may be done under the RCU read lock or may + * exclude simultaneous writers. + * + * Return: The newly allocated ID, -ENOMEM if memory allocation failed, + * or -ENOSPC if no free IDs could be found. + */ +int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) +{ + u32 id = idr->idr_next; + int err, max = end > 0 ? end - 1 : INT_MAX; + if ((int)id < start) + id = start; + + err = idr_alloc_u32(idr, ptr, &id, max, gfp); + if ((err == -ENOSPC) && (id > start)) { + id = start; + err = idr_alloc_u32(idr, ptr, &id, max, gfp); + } + if (err) + return err; + + idr->idr_next = id + 1; return id; } EXPORT_SYMBOL(idr_alloc_cyclic); @@ -167,10 +207,10 @@ void *idr_get_next_ext(struct idr *idr, unsigned long *nextid) EXPORT_SYMBOL(idr_get_next_ext); /** - * idr_replace - replace pointer for given id - * @idr: idr handle - * @ptr: New pointer to associate with the ID - * @id: Lookup key + * idr_replace() - replace pointer for given ID. + * @idr: IDR handle. + * @ptr: New pointer to associate with the ID. + * @id: ID to change. * * Replace the pointer registered with an ID and return the old value. * This function can be called under the RCU read lock concurrently with @@ -257,7 +297,7 @@ EXPORT_SYMBOL(idr_replace); * bitmap, which is excessive. */ -#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) +#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1) /** * ida_get_new_above - allocate new ID above or equal to a start id diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c8d55565fafa..0a7ae3288a24 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -2135,7 +2136,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) } EXPORT_SYMBOL(ida_pre_get); -void __rcu **idr_get_free_cmn(struct radix_tree_root *root, +void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max) { diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 892ef8855b02..36437ade429c 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -215,6 +215,23 @@ void idr_checks(void) assert(idr_is_empty(&idr)); + idr_set_cursor(&idr, INT_MAX - 3UL); + for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) { + struct item *item; + unsigned int id; + if (i <= INT_MAX) + item = item_create(i, 0); + else + item = item_create(i - INT_MAX - 1, 0); + + id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL); + assert(id == item->index); + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); + assert(idr_is_empty(&idr)); + for (i = 1; i < 10000; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i); -- cgit v1.2.3 From 7a4575778f4db109b8b78e6dba03271096793f88 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 15:39:51 -0500 Subject: idr: Rename idr_for_each_entry_ext Most places in the kernel that we need to distinguish functions by the type of their arguments, we use '_ul' as a suffix for the unsigned long variant, not '_ext'. Also add kernel-doc. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 52 ++++++++++++++++++++++++++++++++-------------------- lib/idr.c | 30 ++++++++++++++++++++---------- net/sched/act_api.c | 6 +++--- 3 files changed, 55 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index fa2a04b984df..95a82cf2ed02 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -88,7 +88,7 @@ int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); -void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); +void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); @@ -121,16 +121,18 @@ static inline void idr_preload_end(void) } /** - * idr_find - return pointer for given id - * @idr: idr handle - * @id: lookup key + * idr_find() - Return pointer for given ID. + * @idr: IDR handle. + * @id: Pointer ID. * - * Return the pointer given the id it has been registered with. A %NULL - * return indicates that @id is not valid or you passed %NULL in - * idr_get_new(). + * Looks up the pointer associated with this ID. A %NULL pointer may + * indicate that @id is not allocated or that the %NULL pointer was + * associated with this ID. * * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. + * + * Return: The pointer associated with this ID. */ static inline void *idr_find(const struct idr *idr, unsigned long id) { @@ -138,28 +140,38 @@ static inline void *idr_find(const struct idr *idr, unsigned long id) } /** - * idr_for_each_entry - iterate over an idr's elements of a given type - * @idr: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * idr_for_each_entry() - Iterate over an IDR's elements of a given type. + * @idr: IDR handle. + * @entry: The type * to use as cursor + * @id: Entry ID. * * @entry and @id do not need to be initialized before the loop, and - * after normal terminatinon @entry is left with the value NULL. This + * after normal termination @entry is left with the value NULL. This * is convenient for a "not found" value. */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) -#define idr_for_each_entry_ext(idr, entry, id) \ - for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id) /** - * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type - * @idr: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type. + * @idr: IDR handle. + * @entry: The type * to use as cursor. + * @id: Entry ID. + * + * @entry and @id do not need to be initialized before the loop, and + * after normal termination @entry is left with the value NULL. This + * is convenient for a "not found" value. + */ +#define idr_for_each_entry_ul(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next_ul(idr, &(id))) != NULL; ++id) + +/** + * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type + * @idr: IDR handle. + * @entry: The type * to use as a cursor. + * @id: Entry ID. * - * Continue to iterate over list of given type, continuing after - * the current position. + * Continue to iterate over entries, continuing after the current position. */ #define idr_for_each_entry_continue(idr, entry, id) \ for ((entry) = idr_get_next((idr), &(id)); \ diff --git a/lib/idr.c b/lib/idr.c index c3f16307b908..3df44d528b68 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -136,13 +136,13 @@ int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) EXPORT_SYMBOL(idr_alloc_cyclic); /** - * idr_for_each - iterate through all stored pointers - * @idr: idr handle - * @fn: function to be called for each pointer - * @data: data passed to callback function + * idr_for_each() - Iterate through all stored pointers. + * @idr: IDR handle. + * @fn: Function to be called for each pointer. + * @data: Data passed to callback function. * * The callback function will be called for each entry in @idr, passing - * the id, the pointer and the data pointer passed to this function. + * the ID, the entry and @data. * * If @fn returns anything other than %0, the iteration stops and that * value is returned from this function. @@ -169,9 +169,9 @@ int idr_for_each(const struct idr *idr, EXPORT_SYMBOL(idr_for_each); /** - * idr_get_next - Find next populated entry - * @idr: idr handle - * @nextid: Pointer to lowest possible ID to return + * idr_get_next() - Find next populated entry. + * @idr: IDR handle. + * @nextid: Pointer to an ID. * * Returns the next populated entry in the tree with an ID greater than * or equal to the value pointed to by @nextid. On exit, @nextid is updated @@ -192,7 +192,17 @@ void *idr_get_next(struct idr *idr, int *nextid) } EXPORT_SYMBOL(idr_get_next); -void *idr_get_next_ext(struct idr *idr, unsigned long *nextid) +/** + * idr_get_next_ul() - Find next populated entry. + * @idr: IDR handle. + * @nextid: Pointer to an ID. + * + * Returns the next populated entry in the tree with an ID greater than + * or equal to the value pointed to by @nextid. On exit, @nextid is updated + * to the ID of the found value. To use in a loop, the value pointed to by + * nextid must be incremented by the user. + */ +void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) { struct radix_tree_iter iter; void __rcu **slot; @@ -204,7 +214,7 @@ void *idr_get_next_ext(struct idr *idr, unsigned long *nextid) *nextid = iter.index; return rcu_dereference_raw(*slot); } -EXPORT_SYMBOL(idr_get_next_ext); +EXPORT_SYMBOL(idr_get_next_ul); /** * idr_replace() - replace pointer for given ID. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9b6916a92423..eba6682727dd 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -124,7 +124,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, s_i = cb->args[0]; - idr_for_each_entry_ext(idr, p, id) { + idr_for_each_entry_ul(idr, p, id) { index++; if (index < s_i) continue; @@ -181,7 +181,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - idr_for_each_entry_ext(idr, p, id) { + idr_for_each_entry_ul(idr, p, id) { ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) { module_put(ops->owner); @@ -351,7 +351,7 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, int ret; unsigned long id = 1; - idr_for_each_entry_ext(idr, p, id) { + idr_for_each_entry_ul(idr, p, id) { ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); -- cgit v1.2.3 From 72fd6c7be701d80eef34da305a6294c61520fe13 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Nov 2017 15:50:12 -0500 Subject: idr: Warn if old iterators see large IDs Now that the IDR can be used to store large IDs, it is possible somebody might only partially convert their old code and use the iterators which can only handle IDs up to INT_MAX. It's probably unwise to show them a truncated ID, so settle for spewing warnings to dmesg, and terminating the iteration. Signed-off-by: Matthew Wilcox --- lib/idr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 3df44d528b68..b47055efceb0 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -159,7 +159,11 @@ int idr_for_each(const struct idr *idr, void __rcu **slot; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { - int ret = fn(iter.index, rcu_dereference_raw(*slot), data); + int ret; + + if (WARN_ON_ONCE(iter.index > INT_MAX)) + break; + ret = fn(iter.index, rcu_dereference_raw(*slot), data); if (ret) return ret; } @@ -187,6 +191,9 @@ void *idr_get_next(struct idr *idr, int *nextid) if (!slot) return NULL; + if (WARN_ON_ONCE(iter.index > INT_MAX)) + return NULL; + *nextid = iter.index; return rcu_dereference_raw(*slot); } -- cgit v1.2.3 From 6ce711f2750031d12cec91384ac5cfa0a485b60a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 30 Nov 2017 13:45:11 -0500 Subject: idr: Make 1-based IDRs more efficient About 20% of the IDR users in the kernel want the allocated IDs to start at 1. The implementation currently searches all the way down the left hand side of the tree, finds no free ID other than ID 0, walks all the way back up, and then all the way down again. This patch 'rebases' the ID so we fill the entire radix tree, rather than leave a gap at 0. Chris Wilson says: "I did the quick hack of allocating index 0 of the idr and that eradicated idr_get_free() from being at the top of the profiles for the many-object stress tests. This improvement will be much appreciated." Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 66 +++++++++++++++++++--------------- lib/idr.c | 70 ++++++++++++++++++++++++++++++++----- tools/testing/radix-tree/idr-test.c | 7 ++-- 3 files changed, 103 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 95a82cf2ed02..86b38df6e121 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -18,6 +18,7 @@ struct idr { struct radix_tree_root idr_rt; + unsigned int idr_base; unsigned int idr_next; }; @@ -30,12 +31,20 @@ struct idr { /* Set the IDR flag and the IDR_FREE tag */ #define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT)) -#define IDR_INIT \ -{ \ - .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER) \ +#define IDR_INIT_BASE(base) { \ + .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER), \ + .idr_base = (base), \ + .idr_next = 0, \ } #define DEFINE_IDR(name) struct idr name = IDR_INIT +/** + * IDR_INIT() - Initialise an IDR. + * + * A freshly-initialised IDR contains no IDs. + */ +#define IDR_INIT IDR_INIT_BASE(0) + /** * idr_get_cursor - Return the current position of the cyclic allocator * @idr: idr handle @@ -81,10 +90,12 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) void idr_preload(gfp_t gfp_mask); -int idr_alloc(struct idr *, void *, int start, int end, gfp_t); -int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *nextid, +int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); +int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *id, unsigned long max, gfp_t); -int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); +int idr_alloc_cyclic(struct idr *, void *ptr, int start, int end, gfp_t); +void *idr_remove(struct idr *, unsigned long id); +void *idr_find(const struct idr *, unsigned long id); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); @@ -92,15 +103,31 @@ void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); -static inline void *idr_remove(struct idr *idr, unsigned long id) +/** + * idr_init_base() - Initialise an IDR. + * @idr: IDR handle. + * @base: The base value for the IDR. + * + * This variation of idr_init() creates an IDR which will allocate IDs + * starting at %base. + */ +static inline void idr_init_base(struct idr *idr, int base) { - return radix_tree_delete_item(&idr->idr_rt, id, NULL); + INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); + idr->idr_base = base; + idr->idr_next = 0; } +/** + * idr_init() - Initialise an IDR. + * @idr: IDR handle. + * + * Initialise a dynamically allocated IDR. To initialise a + * statically allocated IDR, use DEFINE_IDR(). + */ static inline void idr_init(struct idr *idr) { - INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); - idr->idr_next = 0; + idr_init_base(idr, 0); } static inline bool idr_is_empty(const struct idr *idr) @@ -120,25 +147,6 @@ static inline void idr_preload_end(void) preempt_enable(); } -/** - * idr_find() - Return pointer for given ID. - * @idr: IDR handle. - * @id: Pointer ID. - * - * Looks up the pointer associated with this ID. A %NULL pointer may - * indicate that @id is not allocated or that the %NULL pointer was - * associated with this ID. - * - * This function can be called under rcu_read_lock(), given that the leaf - * pointers lifetimes are correctly managed. - * - * Return: The pointer associated with this ID. - */ -static inline void *idr_find(const struct idr *idr, unsigned long id) -{ - return radix_tree_lookup(&idr->idr_rt, id); -} - /** * idr_for_each_entry() - Iterate over an IDR's elements of a given type. * @idr: IDR handle. diff --git a/lib/idr.c b/lib/idr.c index b47055efceb0..c98d77fcf393 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -36,18 +36,21 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, { struct radix_tree_iter iter; void __rcu **slot; + int base = idr->idr_base; + int id = *nextid; if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return -EINVAL; if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR))) idr->idr_rt.gfp_mask |= IDR_RT_MARKER; - radix_tree_iter_init(&iter, *nextid); - slot = idr_get_free(&idr->idr_rt, &iter, gfp, max); + id = (id < base) ? 0 : id - base; + radix_tree_iter_init(&iter, id); + slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base); if (IS_ERR(slot)) return PTR_ERR(slot); - *nextid = iter.index; + *nextid = iter.index + base; /* there is a memory barrier inside radix_tree_iter_replace() */ radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); @@ -135,6 +138,46 @@ int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) } EXPORT_SYMBOL(idr_alloc_cyclic); +/** + * idr_remove() - Remove an ID from the IDR. + * @idr: IDR handle. + * @id: Pointer ID. + * + * Removes this ID from the IDR. If the ID was not previously in the IDR, + * this function returns %NULL. + * + * Since this function modifies the IDR, the caller should provide their + * own locking to ensure that concurrent modification of the same IDR is + * not possible. + * + * Return: The pointer formerly associated with this ID. + */ +void *idr_remove(struct idr *idr, unsigned long id) +{ + return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL); +} +EXPORT_SYMBOL_GPL(idr_remove); + +/** + * idr_find() - Return pointer for given ID. + * @idr: IDR handle. + * @id: Pointer ID. + * + * Looks up the pointer associated with this ID. A %NULL pointer may + * indicate that @id is not allocated or that the %NULL pointer was + * associated with this ID. + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + * + * Return: The pointer associated with this ID. + */ +void *idr_find(const struct idr *idr, unsigned long id) +{ + return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base); +} +EXPORT_SYMBOL_GPL(idr_find); + /** * idr_for_each() - Iterate through all stored pointers. * @idr: IDR handle. @@ -157,13 +200,14 @@ int idr_for_each(const struct idr *idr, { struct radix_tree_iter iter; void __rcu **slot; + int base = idr->idr_base; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { int ret; if (WARN_ON_ONCE(iter.index > INT_MAX)) break; - ret = fn(iter.index, rcu_dereference_raw(*slot), data); + ret = fn(iter.index + base, rcu_dereference_raw(*slot), data); if (ret) return ret; } @@ -186,15 +230,19 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; + int base = idr->idr_base; + int id = *nextid; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + id = (id < base) ? 0 : id - base; + slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); if (!slot) return NULL; + id = iter.index + base; - if (WARN_ON_ONCE(iter.index > INT_MAX)) + if (WARN_ON_ONCE(id > INT_MAX)) return NULL; - *nextid = iter.index; + *nextid = id; return rcu_dereference_raw(*slot); } EXPORT_SYMBOL(idr_get_next); @@ -213,12 +261,15 @@ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) { struct radix_tree_iter iter; void __rcu **slot; + unsigned long base = idr->idr_base; + unsigned long id = *nextid; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + id = (id < base) ? 0 : id - base; + slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); if (!slot) return NULL; - *nextid = iter.index; + *nextid = iter.index + base; return rcu_dereference_raw(*slot); } EXPORT_SYMBOL(idr_get_next_ul); @@ -245,6 +296,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return ERR_PTR(-EINVAL); + id -= idr->idr_base; entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 36437ade429c..44ef9eba5a7a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,11 +153,12 @@ void idr_nowait_test(void) idr_destroy(&idr); } -void idr_get_next_test(void) +void idr_get_next_test(int base) { unsigned long i; int nextid; DEFINE_IDR(idr); + idr_init_base(&idr, base); int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; @@ -244,7 +245,9 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); - idr_get_next_test(); + idr_get_next_test(0); + idr_get_next_test(1); + idr_get_next_test(4); } /* -- cgit v1.2.3 From 00a14294bb33af533f7ac002fb20623fdd8ea0d7 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 6 Feb 2018 15:36:16 -0800 Subject: kasan: add tests for alloca poisoning Link: http://lkml.kernel.org/r/20171204191735.132544-5-paullawrence@google.com Signed-off-by: Greg Hackmann Signed-off-by: Paul Lawrence Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Masahiro Yamada Cc: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index ef1a3ac1397e..2724f86c4cef 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -472,6 +472,26 @@ static noinline void __init use_after_scope_test(void) p[1023] = 1; } +static noinline void __init kasan_alloca_oob_left(void) +{ + volatile int i = 10; + char alloca_array[i]; + char *p = alloca_array - 1; + + pr_info("out-of-bounds to left on alloca\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_alloca_oob_right(void) +{ + volatile int i = 10; + char alloca_array[i]; + char *p = alloca_array + i; + + pr_info("out-of-bounds to right on alloca\n"); + *(volatile char *)p; +} + static int __init kmalloc_tests_init(void) { /* @@ -502,6 +522,8 @@ static int __init kmalloc_tests_init(void) memcg_accounted_kmem_cache(); kasan_stack_oob(); kasan_global_oob(); + kasan_alloca_oob_left(); + kasan_alloca_oob_right(); ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); -- cgit v1.2.3 From 47adccce3e8a31d315f47183ab1185862b2fc5d4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:23 -0800 Subject: kasan: detect invalid frees for large objects Patch series "kasan: detect invalid frees". KASAN detects double-frees, but does not detect invalid-frees (when a pointer into a middle of heap object is passed to free). We recently had a very unpleasant case in crypto code which freed an inner object inside of a heap allocation. This left unnoticed during free, but totally corrupted heap and later lead to a bunch of random crashes all over kernel code. Detect invalid frees. This patch (of 5): Detect frees of pointers into middle of large heap objects. I dropped const from kasan_kfree_large() because it starts propagating through a bunch of functions in kasan_report.c, slab/slub nearest_obj(), all of their local variables, fixup_red_left(), etc. Link: http://lkml.kernel.org/r/1b45b4fe1d20fc0de1329aab674c1dd973fee723.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- lib/test_kasan.c | 33 +++++++++++++++++++++++++++++++++ mm/kasan/kasan.c | 12 +++++------- mm/kasan/kasan.h | 3 +-- mm/kasan/report.c | 3 +-- mm/slub.c | 4 ++-- 6 files changed, 44 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e3eb834c9a35..fc9e642533f8 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,7 +56,7 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); -void kasan_kfree_large(const void *ptr); +void kasan_kfree_large(void *ptr); void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); @@ -108,7 +108,7 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache, const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} -static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kfree_large(void *ptr) {} static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 2724f86c4cef..e9c5d765be66 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -94,6 +94,37 @@ static noinline void __init kmalloc_pagealloc_oob_right(void) ptr[size] = 0; kfree(ptr); } + +static noinline void __init kmalloc_pagealloc_uaf(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc pagealloc allocation: use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + ptr[0] = 0; +} + +static noinline void __init kmalloc_pagealloc_invalid_free(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc pagealloc allocation: invalid-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr + 1); +} #endif static noinline void __init kmalloc_large_oob_right(void) @@ -505,6 +536,8 @@ static int __init kmalloc_tests_init(void) kmalloc_node_oob_right(); #ifdef CONFIG_SLUB kmalloc_pagealloc_oob_right(); + kmalloc_pagealloc_uaf(); + kmalloc_pagealloc_invalid_free(); #endif kmalloc_large_oob_right(); kmalloc_oob_krealloc_more(); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 8aaee42fcfab..ecb64fda79e6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -511,8 +511,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_double_free(cache, object, - __builtin_return_address(1)); + kasan_report_invalid_free(object, __builtin_return_address(1)); return true; } @@ -602,12 +601,11 @@ void kasan_poison_kfree(void *ptr) kasan_poison_slab_free(page->slab_cache, ptr); } -void kasan_kfree_large(const void *ptr) +void kasan_kfree_large(void *ptr) { - struct page *page = virt_to_page(ptr); - - kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), - KASAN_FREE_PAGE); + if (ptr != page_address(virt_to_head_page(ptr))) + kasan_report_invalid_free(ptr, __builtin_return_address(1)); + /* The object will be poisoned by page_alloc. */ } int kasan_module_alloc(void *addr, size_t size) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 9a768dd71c51..bf353a18c908 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -107,8 +107,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -void kasan_report_double_free(struct kmem_cache *cache, void *object, - void *ip); +void kasan_report_invalid_free(void *object, void *ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index eff12e040498..55916ad21722 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -326,8 +326,7 @@ static void print_shadow_for_address(const void *addr) } } -void kasan_report_double_free(struct kmem_cache *cache, void *object, - void *ip) +void kasan_report_invalid_free(void *object, void *ip) { unsigned long flags; diff --git a/mm/slub.c b/mm/slub.c index cc71176c6eef..b54f8787c674 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1356,7 +1356,7 @@ static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) kasan_kmalloc_large(ptr, size, flags); } -static inline void kfree_hook(const void *x) +static inline void kfree_hook(void *x) { kmemleak_free(x); kasan_kfree_large(x); @@ -3910,7 +3910,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); - kfree_hook(x); + kfree_hook(object); __free_pages(page, compound_order(page)); return; } -- cgit v1.2.3 From b1d5728939ebe01a773a75a72e7161408ec9805e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:36:37 -0800 Subject: kasan: detect invalid frees Detect frees of pointers into middle of heap objects. Link: http://lkml.kernel.org/r/cb569193190356beb018a03bb8d6fbae67e7adbc.1514378558.git.dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.c | 6 ++++++ 2 files changed, 56 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index e9c5d765be66..a808d81b409d 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -523,6 +523,54 @@ static noinline void __init kasan_alloca_oob_right(void) *(volatile char *)p; } +static noinline void __init kmem_cache_double_free(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", size, 0, 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("double-free on heap object\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + kmem_cache_free(cache, p); + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static noinline void __init kmem_cache_invalid_free(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU, + NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("invalid-free of heap object\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + kmem_cache_free(cache, p + 1); + kmem_cache_destroy(cache); +} + static int __init kmalloc_tests_init(void) { /* @@ -560,6 +608,8 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + kmem_cache_double_free(); + kmem_cache_invalid_free(); kasan_restore_multi_shot(multishot); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 578843fab5dc..3fb497d4fbf8 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -495,6 +495,12 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, s8 shadow_byte; unsigned long rounded_up_size; + if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) != + object)) { + kasan_report_invalid_free(object, ip); + return true; + } + /* RCU slabs could be legally used after free within the RCU period */ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; -- cgit v1.2.3 From 48c232395431c23d35cf3b4c5a090bd793316578 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Feb 2018 15:36:48 -0800 Subject: kasan: remove redundant initialization of variable 'real_size' Variable real_size is initialized with a value that is never read, it is re-assigned a new value later on, hence the initialization is redundant and can be removed. Cleans up clang warning: lib/test_kasan.c:422:21: warning: Value stored to 'real_size' during its initialization is never read Link: http://lkml.kernel.org/r/20180206144950.32457-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Andrey Ryabinin Reviewed-by: Andrew Morton Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index a808d81b409d..98854a64b014 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -419,7 +419,7 @@ static noinline void __init kasan_stack_oob(void) static noinline void __init ksize_unpoisons_memory(void) { char *ptr; - size_t size = 123, real_size = size; + size_t size = 123, real_size; pr_info("ksize() unpoisons the whole allocated chunk\n"); ptr = kmalloc(size, GFP_KERNEL); -- cgit v1.2.3 From c724f193619c896621bf5818d71ce77437f49a06 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:02 -0800 Subject: bitmap: new bitmap_copy_safe and bitmap_{from,to}_arr32 This patchset replaces bitmap_{to,from}_u32array with more simple and standard looking copy-like functions. bitmap_from_u32array() takes 4 arguments (bitmap_to_u32array is similar): - unsigned long *bitmap, which is destination; - unsigned int nbits, the length of destination bitmap, in bits; - const u32 *buf, the source; and - unsigned int nwords, the length of source buffer in ints. In description to the function it is detailed like: * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining * bits between nword and nbits in @bitmap (if any) are cleared. Having two size arguments looks unneeded and potentially dangerous. It is unneeded because normally user of copy-like function should take care of the size of destination and make it big enough to fit source data. And it is dangerous because function may hide possible error if user doesn't provide big enough bitmap, and data becomes silently dropped. That's why all copy-like functions have 1 argument for size of copying data, and I don't see any reason to make bitmap_from_u32array() different. One exception that comes in mind is strncpy() which also provides size of destination in arguments, but it's strongly argued by the possibility of taking broken strings in source. This is not the case of bitmap_{from,to}_u32array(). There is no many real users of bitmap_{from,to}_u32array(), and they all very clearly provide size of destination matched with the size of source, so additional functionality is not used in fact. Like this: bitmap_from_u32array(to->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_usettings.link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NU32); Where: #define __ETHTOOL_LINK_MODE_MASK_NU32 \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) In this patch, bitmap_copy_safe and bitmap_{from,to}_arr32 are introduced. 'Safe' in bitmap_copy_safe() stands for clearing unused bits in bitmap beyond last bit till the end of last word. It is useful for hardening API when bitmap is assumed to be exposed to userspace. bitmap_{from,to}_arr32 functions are replacements for bitmap_{from,to}_u32array. They don't take unneeded nwords argument, and so simpler in implementation and understanding. This patch suggests optimization for 32-bit systems - aliasing bitmap_{from,to}_arr32 to bitmap_copy_safe. Other possible optimization is aliasing 64-bit LE bitmap_{from,to}_arr32 to more generic function(s). But I didn't end up with the function that would be helpful by itself, and can be used to alias 64-bit LE bitmap_{from,to}_arr32, like bitmap_copy_safe() does. So I preferred to leave things as is. The following patch switches kernel to new API and introduces test for it. Discussion is here: https://lkml.org/lkml/2017/11/15/592 [ynorov@caviumnetworks.com: rename bitmap_copy_safe to bitmap_copy_clear_tail] Link: http://lkml.kernel.org/r/20180201172508.5739-3-ynorov@caviumnetworks.com Link: http://lkml.kernel.org/r/20171228150019.27953-1-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Cc: Ben Hutchings Cc: David Decotigny , Cc: David S. Miller , Cc: Geert Uytterhoeven Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 31 ++++++++++++++++++++++++++++ lib/bitmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) (limited to 'lib') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3489253e38fc..dac9dff90350 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -66,6 +66,8 @@ * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) + * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst + * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * */ @@ -228,6 +230,35 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, } } +/* + * Copy bitmap and clear tail bits in last word. + */ +static inline void bitmap_copy_clear_tail(unsigned long *dst, + const unsigned long *src, unsigned int nbits) +{ + bitmap_copy(dst, src, nbits); + if (nbits % BITS_PER_LONG) + dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); +} + +/* + * On 32-bit systems bitmaps are represented as u32 arrays internally, and + * therefore conversion is not needed when copying data from/to arrays of u32. + */ +#if BITS_PER_LONG == 64 +extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, + unsigned int nbits); +extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, + unsigned int nbits); +#else +#define bitmap_from_arr32(bitmap, buf, nbits) \ + bitmap_copy_clear_tail((unsigned long *) (bitmap), \ + (const unsigned long *) (buf), (nbits)) +#define bitmap_to_arr32(buf, bitmap, nbits) \ + bitmap_copy_clear_tail((unsigned long *) (buf), \ + (const unsigned long *) (bitmap), (nbits)) +#endif + static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { diff --git a/lib/bitmap.c b/lib/bitmap.c index d8f0c094b18e..47fe6441562c 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1214,3 +1214,59 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } EXPORT_SYMBOL(bitmap_copy_le); #endif + +#if BITS_PER_LONG == 64 +/** + * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap + * @bitmap: array of unsigned longs, the destination bitmap + * @buf: array of u32 (in host byte order), the source bitmap + * @nbits: number of bits in @bitmap + */ +void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, + unsigned int nbits) +{ + unsigned int i, halfwords; + + if (!nbits) + return; + + halfwords = DIV_ROUND_UP(nbits, 32); + for (i = 0; i < halfwords; i++) { + bitmap[i/2] = (unsigned long) buf[i]; + if (++i < halfwords) + bitmap[i/2] |= ((unsigned long) buf[i]) << 32; + } + + /* Clear tail bits in last word beyond nbits. */ + if (nbits % BITS_PER_LONG) + bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits); +} +EXPORT_SYMBOL(bitmap_from_arr32); + +/** + * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap + * @bitmap: array of unsigned longs, the source bitmap + * @nbits: number of bits in @bitmap + */ +void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int i, halfwords; + + if (!nbits) + return; + + halfwords = DIV_ROUND_UP(nbits, 32); + for (i = 0; i < halfwords; i++) { + buf[i] = (u32) (bitmap[i/2] & UINT_MAX); + if (++i < halfwords) + buf[i] = (u32) (bitmap[i/2] >> 32); + } + + /* Clear tail bits in last element of array beyond nbits. */ + if (nbits % BITS_PER_LONG) + buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31)); +} +EXPORT_SYMBOL(bitmap_to_arr32); + +#endif -- cgit v1.2.3 From 3aa56885e51683a19c8aa71739fd279b3f501cd7 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:06 -0800 Subject: bitmap: replace bitmap_{from,to}_u32array with bitmap_{from,to}_arr32 over the kernel. Additionally to it: * __check_eq_bitmap() now takes single nbits argument. * __check_eq_u32_array is not used in new test but may be used in future. So I don't remove it here, but annotate as __used. Tested on arm64 and 32-bit BE mips. [arnd@arndb.de: perf: arm_dsu_pmu: convert to bitmap_from_arr32] Link: http://lkml.kernel.org/r/20180201172508.5739-2-ynorov@caviumnetworks.com [ynorov@caviumnetworks.com: fix net/core/ethtool.c] Link: http://lkml.kernel.org/r/20180205071747.4ekxtsbgxkj5b2fz@yury-thinkpad Link: http://lkml.kernel.org/r/20171228150019.27953-2-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Signed-off-by: Arnd Bergmann Cc: Ben Hutchings Cc: David Decotigny , Cc: David S. Miller , Cc: Geert Uytterhoeven Cc: Matthew Wilcox Cc: Rasmus Villemoes Cc: Heiner Kallweit Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/perf_event.c | 5 +- drivers/perf/arm_dsu_pmu.c | 6 +- include/linux/bitmap.h | 11 +-- lib/bitmap.c | 87 ----------------- lib/test_bitmap.c | 206 +++++++---------------------------------- net/core/ethtool.c | 53 +++++------ 6 files changed, 57 insertions(+), 311 deletions(-) (limited to 'lib') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3affca3dd96a..75b220ba73a3 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -925,9 +925,8 @@ static void __armv8pmu_probe_pmu(void *info) pmceid[0] = read_sysreg(pmceid0_el0); pmceid[1] = read_sysreg(pmceid1_el0); - bitmap_from_u32array(cpu_pmu->pmceid_bitmap, - ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid, - ARRAY_SIZE(pmceid)); + bitmap_from_arr32(cpu_pmu->pmceid_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 93c50e377507..38f2cc2a6c74 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -658,10 +658,8 @@ static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu) return; cpmceid[0] = __dsu_pmu_read_pmceid(0); cpmceid[1] = __dsu_pmu_read_pmceid(1); - bitmap_from_u32array(dsu_pmu->cpmceid_bitmap, - DSU_PMU_MAX_COMMON_EVENTS, - cpmceid, - ARRAY_SIZE(cpmceid)); + bitmap_from_arr32(dsu_pmu->cpmceid_bitmap, cpmceid, + DSU_PMU_MAX_COMMON_EVENTS); } static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index dac9dff90350..e43533ec7660 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -64,8 +64,6 @@ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) - * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * @@ -176,14 +174,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern unsigned int bitmap_from_u32array(unsigned long *bitmap, - unsigned int nbits, - const u32 *buf, - unsigned int nwords); -extern unsigned int bitmap_to_u32array(u32 *buf, - unsigned int nwords, - const unsigned long *bitmap, - unsigned int nbits); + #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else diff --git a/lib/bitmap.c b/lib/bitmap.c index 47fe6441562c..9e498c77ed0e 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1105,93 +1105,6 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) } EXPORT_SYMBOL(bitmap_allocate_region); -/** - * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap - * @bitmap: array of unsigned longs, the destination bitmap, non NULL - * @nbits: number of bits in @bitmap - * @buf: array of u32 (in host byte order), the source bitmap, non NULL - * @nwords: number of u32 words in @buf - * - * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining - * bits between nword and nbits in @bitmap (if any) are cleared. In - * last word of @bitmap, the bits beyond nbits (if any) are kept - * unchanged. - * - * Return the number of bits effectively copied. - */ -unsigned int -bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits, - const u32 *buf, unsigned int nwords) -{ - unsigned int dst_idx, src_idx; - - for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) { - unsigned long part = 0; - - if (src_idx < nwords) - part = buf[src_idx++]; - -#if BITS_PER_LONG == 64 - if (src_idx < nwords) - part |= ((unsigned long) buf[src_idx++]) << 32; -#endif - - if (dst_idx < nbits/BITS_PER_LONG) - bitmap[dst_idx] = part; - else { - unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); - - bitmap[dst_idx] = (bitmap[dst_idx] & ~mask) - | (part & mask); - } - } - - return min_t(unsigned int, nbits, 32*nwords); -} -EXPORT_SYMBOL(bitmap_from_u32array); - -/** - * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits - * @buf: array of u32 (in host byte order), the dest bitmap, non NULL - * @nwords: number of u32 words in @buf - * @bitmap: array of unsigned longs, the source bitmap, non NULL - * @nbits: number of bits in @bitmap - * - * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining - * bits after nbits in @buf (if any) are cleared. - * - * Return the number of bits effectively copied. - */ -unsigned int -bitmap_to_u32array(u32 *buf, unsigned int nwords, - const unsigned long *bitmap, unsigned int nbits) -{ - unsigned int dst_idx = 0, src_idx = 0; - - while (dst_idx < nwords) { - unsigned long part = 0; - - if (src_idx < BITS_TO_LONGS(nbits)) { - part = bitmap[src_idx]; - if (src_idx >= nbits/BITS_PER_LONG) - part &= BITMAP_LAST_WORD_MASK(nbits); - src_idx++; - } - - buf[dst_idx++] = part & 0xffffffffUL; - -#if BITS_PER_LONG == 64 - if (dst_idx < nwords) { - part >>= 32; - buf[dst_idx++] = part & 0xffffffffUL; - } -#endif - } - - return min_t(unsigned int, nbits, 32*nwords); -} -EXPORT_SYMBOL(bitmap_to_u32array); - /** * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. * @dst: destination buffer diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index aa1f2669bdd5..de7ef2996a07 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -23,7 +23,7 @@ __check_eq_uint(const char *srcfile, unsigned int line, const unsigned int exp_uint, unsigned int x) { if (exp_uint != x) { - pr_warn("[%s:%u] expected %u, got %u\n", + pr_err("[%s:%u] expected %u, got %u\n", srcfile, line, exp_uint, x); return false; } @@ -33,19 +33,13 @@ __check_eq_uint(const char *srcfile, unsigned int line, static bool __init __check_eq_bitmap(const char *srcfile, unsigned int line, - const unsigned long *exp_bmap, unsigned int exp_nbits, - const unsigned long *bmap, unsigned int nbits) + const unsigned long *exp_bmap, const unsigned long *bmap, + unsigned int nbits) { - if (exp_nbits != nbits) { - pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n", - srcfile, line, exp_nbits, nbits); - return false; - } - if (!bitmap_equal(exp_bmap, bmap, nbits)) { pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n", srcfile, line, - exp_nbits, exp_bmap, nbits, bmap); + nbits, exp_bmap, nbits, bmap); return false; } return true; @@ -66,6 +60,10 @@ __check_eq_pbl(const char *srcfile, unsigned int line, return true; } +static bool __init +__check_eq_u32_array(const char *srcfile, unsigned int line, + const u32 *exp_arr, unsigned int exp_len, + const u32 *arr, unsigned int len) __used; static bool __init __check_eq_u32_array(const char *srcfile, unsigned int line, const u32 *exp_arr, unsigned int exp_len, @@ -255,171 +253,29 @@ static void __init test_bitmap_parselist(void) } } -static void __init test_bitmap_u32_array_conversions(void) +static void __init test_bitmap_arr32(void) { - DECLARE_BITMAP(bmap1, 1024); - DECLARE_BITMAP(bmap2, 1024); - u32 exp_arr[32], arr[32]; - unsigned nbits; - - for (nbits = 0 ; nbits < 257 ; ++nbits) { - const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32); - unsigned int i, rv; - - bitmap_zero(bmap1, nbits); - bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ - - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); - expect_eq_uint(nbits, rv); - - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s); - expect_eq_uint(nbits, rv); - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - for (i = 0 ; i < nbits ; ++i) { - /* - * test conversion bitmap -> u32[] - */ - - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ - - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); - expect_eq_uint(nbits, rv); - - /* 1st used u32 words contain expected bit set, the - * remaining words are left unchanged (0xff) - */ - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - - /* same, with longer array to fill - */ - memset(arr, 0xff, sizeof(arr)); - rv = bitmap_to_u32array(arr, 32, bmap1, nbits); - expect_eq_uint(nbits, rv); - - /* 1st used u32 words contain expected bit set, the - * remaining words are all 0s - */ - memset(exp_arr, 0, sizeof(exp_arr)); - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - - /* - * test conversion u32[] -> bitmap - */ - - /* the 1st nbits of bmap2 are identical to - * bmap1, the remaining bits of bmap2 are left - * unchanged (all 1s) - */ - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, nbits, - exp_arr, used_u32s); - expect_eq_uint(nbits, rv); - - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - /* same, with more bits to fill - */ - memset(arr, 0xff, sizeof(arr)); /* garbage */ - memset(arr, 0, used_u32s*sizeof(u32)); - arr[i/32] = (1U<<(i%32)); - - bitmap_fill(bmap2, 1024); - rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s); - expect_eq_uint(used_u32s*32, rv); - - /* the 1st nbits of bmap2 are identical to - * bmap1, the remaining bits of bmap2 are cleared - */ - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - expect_eq_bitmap(bmap1, 1024, bmap2, 1024); - - - /* - * test short conversion bitmap -> u32[] (1 - * word too short) - */ - if (used_u32s > 1) { - bitmap_zero(bmap1, 1024); - __set_bit(i, bmap1); - bitmap_set(bmap1, nbits, - 1024 - nbits); /* garbage */ - memset(arr, 0xff, sizeof(arr)); - - rv = bitmap_to_u32array(arr, used_u32s - 1, - bmap1, nbits); - expect_eq_uint((used_u32s - 1)*32, rv); - - /* 1st used u32 words contain expected - * bit set, the remaining words are - * left unchanged (0xff) - */ - memset(exp_arr, 0xff, sizeof(exp_arr)); - memset(exp_arr, 0, - (used_u32s-1)*sizeof(*exp_arr)); - if ((i/32) < (used_u32s - 1)) - exp_arr[i/32] = (1U<<(i%32)); - expect_eq_u32_array(exp_arr, 32, arr, 32); - } - - /* - * test short conversion u32[] -> bitmap (3 - * bits too short) - */ - if (nbits > 3) { - memset(arr, 0xff, sizeof(arr)); /* garbage */ - memset(arr, 0, used_u32s*sizeof(*arr)); - arr[i/32] = (1U<<(i%32)); - - bitmap_zero(bmap1, 1024); - rv = bitmap_from_u32array(bmap1, nbits - 3, - arr, used_u32s); - expect_eq_uint(nbits - 3, rv); - - /* we are expecting the bit < nbits - - * 3 (none otherwise), and the rest of - * bmap1 unchanged (0-filled) - */ - bitmap_zero(bmap2, 1024); - if (i < nbits - 3) - __set_bit(i, bmap2); - expect_eq_bitmap(bmap2, 1024, bmap1, 1024); - - /* do the same with bmap1 initially - * 1-filled - */ - - bitmap_fill(bmap1, 1024); - rv = bitmap_from_u32array(bmap1, nbits - 3, - arr, used_u32s); - expect_eq_uint(nbits - 3, rv); - - /* we are expecting the bit < nbits - - * 3 (none otherwise), and the rest of - * bmap1 unchanged (1-filled) - */ - bitmap_zero(bmap2, 1024); - if (i < nbits - 3) - __set_bit(i, bmap2); - bitmap_set(bmap2, nbits-3, 1024 - nbits + 3); - expect_eq_bitmap(bmap2, 1024, bmap1, 1024); - } - } + unsigned int nbits, next_bit, len = sizeof(exp) * 8; + u32 arr[sizeof(exp) / 4]; + DECLARE_BITMAP(bmap2, len); + + memset(arr, 0xa5, sizeof(arr)); + + for (nbits = 0; nbits < len; ++nbits) { + bitmap_to_arr32(arr, exp, nbits); + bitmap_from_arr32(bmap2, arr, nbits); + expect_eq_bitmap(bmap2, exp, nbits); + + next_bit = find_next_bit(bmap2, + round_up(nbits, BITS_PER_LONG), nbits); + if (next_bit < round_up(nbits, BITS_PER_LONG)) + pr_err("bitmap_copy_arr32(nbits == %d:" + " tail is not safely cleared: %d\n", + nbits, next_bit); + + if (nbits < len - 32) + expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)], + 0xa5a5a5a5); } } @@ -454,7 +310,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { test_zero_fill_copy(); - test_bitmap_u32_array_conversions(); + test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 107b122c8969..494e6a5d7306 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -616,18 +616,15 @@ static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, return -EFAULT; memcpy(&to->base, &link_usettings.base, sizeof(to->base)); - bitmap_from_u32array(to->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NU32); - bitmap_from_u32array(to->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NU32); - bitmap_from_u32array(to->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS, - link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NU32); + bitmap_from_arr32(to->link_modes.supported, + link_usettings.link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_from_arr32(to->link_modes.advertising, + link_usettings.link_modes.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_from_arr32(to->link_modes.lp_advertising, + link_usettings.link_modes.lp_advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); return 0; } @@ -643,18 +640,15 @@ store_link_ksettings_for_user(void __user *to, struct ethtool_link_usettings link_usettings; memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); - bitmap_to_u32array(link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_u32array(link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_u32array(link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NU32, - from->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.supported, + from->link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.advertising, + from->link_modes.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_to_arr32(link_usettings.link_modes.lp_advertising, + from->link_modes.lp_advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) return -EFAULT; @@ -2358,10 +2352,8 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, useraddr += sizeof(*per_queue_opt); - bitmap_from_u32array(queue_mask, - MAX_NUM_QUEUE, - per_queue_opt->queue_mask, - DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, + MAX_NUM_QUEUE); for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; @@ -2393,10 +2385,7 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev, useraddr += sizeof(*per_queue_opt); - bitmap_from_u32array(queue_mask, - MAX_NUM_QUEUE, - per_queue_opt->queue_mask, - DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); + bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE); n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); if (!backup) -- cgit v1.2.3 From ee3527bd5e48e6c892abfdcb36969c1eb2fd4a6e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:10 -0800 Subject: lib/test_bitmap.c: add bitmap_zero()/bitmap_clear() test cases Explicitly test bitmap_zero() and bitmap_clear() functions. Link: http://lkml.kernel.org/r/20180109172430.87452-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Rasmus Villemoes Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index de7ef2996a07..9734af711816 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -105,6 +105,35 @@ __check_eq_u32_array(const char *srcfile, unsigned int line, #define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) #define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) +static void __init test_zero_clear(void) +{ + DECLARE_BITMAP(bmap, 1024); + + /* Known way to set all bits */ + memset(bmap, 0xff, 128); + + expect_eq_pbl("0-22", bmap, 23); + expect_eq_pbl("0-1023", bmap, 1024); + + /* single-word bitmaps */ + bitmap_clear(bmap, 0, 9); + expect_eq_pbl("9-1023", bmap, 1024); + + bitmap_zero(bmap, 35); + expect_eq_pbl("64-1023", bmap, 1024); + + /* cross boundaries operations */ + bitmap_clear(bmap, 79, 19); + expect_eq_pbl("64-78,98-1023", bmap, 1024); + + bitmap_zero(bmap, 115); + expect_eq_pbl("128-1023", bmap, 1024); + + /* Zeroing entire area */ + bitmap_zero(bmap, 1024); + expect_eq_pbl("", bmap, 1024); +} + static void __init test_zero_fill_copy(void) { DECLARE_BITMAP(bmap1, 1024); @@ -309,6 +338,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { + test_zero_clear(); test_zero_fill_copy(); test_bitmap_arr32(); test_bitmap_parselist(); -- cgit v1.2.3 From 978f369c5c4777c32e686ecff5aaa5b677afc564 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:13 -0800 Subject: lib/test_bitmap.c: add bitmap_fill()/bitmap_set() test cases Explicitly test bitmap_fill() and bitmap_set() functions. For bitmap_fill() we expect a consistent behaviour as in bitmap_zero(), i.e. the trailing bits will be set up to unsigned long boundary. Link: http://lkml.kernel.org/r/20180109172430.87452-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Randy Dunlap Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 9734af711816..6889fcc0e1f4 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -134,6 +134,35 @@ static void __init test_zero_clear(void) expect_eq_pbl("", bmap, 1024); } +static void __init test_fill_set(void) +{ + DECLARE_BITMAP(bmap, 1024); + + /* Known way to clear all bits */ + memset(bmap, 0x00, 128); + + expect_eq_pbl("", bmap, 23); + expect_eq_pbl("", bmap, 1024); + + /* single-word bitmaps */ + bitmap_set(bmap, 0, 9); + expect_eq_pbl("0-8", bmap, 1024); + + bitmap_fill(bmap, 35); + expect_eq_pbl("0-63", bmap, 1024); + + /* cross boundaries operations */ + bitmap_set(bmap, 79, 19); + expect_eq_pbl("0-63,79-97", bmap, 1024); + + bitmap_fill(bmap, 115); + expect_eq_pbl("0-127", bmap, 1024); + + /* Zeroing entire area */ + bitmap_fill(bmap, 1024); + expect_eq_pbl("0-1023", bmap, 1024); +} + static void __init test_zero_fill_copy(void) { DECLARE_BITMAP(bmap1, 1024); @@ -339,6 +368,7 @@ static void noinline __init test_mem_optimisations(void) static int __init test_bitmap_init(void) { test_zero_clear(); + test_fill_set(); test_zero_fill_copy(); test_bitmap_arr32(); test_bitmap_parselist(); -- cgit v1.2.3 From fe81814c3e091adde489e9d7ac1179340845e396 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Feb 2018 15:38:17 -0800 Subject: lib/test_bitmap.c: clean up test_zero_fill_copy() test case and rename Since we have separate explicit test cases for bitmap_zero() / bitmap_clear() and bitmap_fill() / bitmap_set(), clean up test_zero_fill_copy() to only test bitmap_copy() functionality and thus rename a function to reflect the changes. While here, replace bitmap_fill() by bitmap_set() with proper values. Link: http://lkml.kernel.org/r/20180109172430.87452-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Yury Norov Cc: Randy Dunlap Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6889fcc0e1f4..b3f235baa05d 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -163,7 +163,7 @@ static void __init test_fill_set(void) expect_eq_pbl("0-1023", bmap, 1024); } -static void __init test_zero_fill_copy(void) +static void __init test_copy(void) { DECLARE_BITMAP(bmap1, 1024); DECLARE_BITMAP(bmap2, 1024); @@ -172,36 +172,20 @@ static void __init test_zero_fill_copy(void) bitmap_zero(bmap2, 1024); /* single-word bitmaps */ - expect_eq_pbl("", bmap1, 23); - - bitmap_fill(bmap1, 19); - expect_eq_pbl("0-18", bmap1, 1024); - + bitmap_set(bmap1, 0, 19); bitmap_copy(bmap2, bmap1, 23); expect_eq_pbl("0-18", bmap2, 1024); - bitmap_fill(bmap2, 23); - expect_eq_pbl("0-22", bmap2, 1024); - + bitmap_set(bmap2, 0, 23); bitmap_copy(bmap2, bmap1, 23); expect_eq_pbl("0-18", bmap2, 1024); - bitmap_zero(bmap1, 23); - expect_eq_pbl("", bmap1, 1024); - /* multi-word bitmaps */ - bitmap_zero(bmap1, 1024); - expect_eq_pbl("", bmap1, 1024); - - bitmap_fill(bmap1, 109); - expect_eq_pbl("0-108", bmap1, 1024); - + bitmap_set(bmap1, 0, 109); bitmap_copy(bmap2, bmap1, 1024); expect_eq_pbl("0-108", bmap2, 1024); bitmap_fill(bmap2, 1024); - expect_eq_pbl("0-1023", bmap2, 1024); - bitmap_copy(bmap2, bmap1, 1024); expect_eq_pbl("0-108", bmap2, 1024); @@ -216,9 +200,6 @@ static void __init test_zero_fill_copy(void) bitmap_fill(bmap2, 1024); bitmap_copy(bmap2, bmap1, 97); /* ... but aligned on word length */ expect_eq_pbl("0-108,128-1023", bmap2, 1024); - - bitmap_zero(bmap2, 97); /* ... but 0-padded til word length */ - expect_eq_pbl("128-1023", bmap2, 1024); } #define PARSE_TIME 0x1 @@ -369,7 +350,7 @@ static int __init test_bitmap_init(void) { test_zero_clear(); test_fill_set(); - test_zero_fill_copy(); + test_copy(); test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); -- cgit v1.2.3 From a571b272ab0f82399e8b2ede8c95d153d76a3534 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 6 Feb 2018 15:38:24 -0800 Subject: lib/stackdepot.c: use a non-instrumented version of memcmp() stackdepot used to call memcmp(), which compiler tools normally instrument, therefore every lookup used to unnecessarily call instrumented code. This is somewhat ok in the case of KASAN, but under KMSAN a lot of time was spent in the instrumentation. Link: http://lkml.kernel.org/r/20171117172149.69562-1-glider@google.com Signed-off-by: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/stackdepot.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f87d138e9672..e513459a5601 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -163,6 +163,21 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size) STACK_HASH_SEED); } +/* Use our own, non-instrumented version of memcmp(). + * + * We actually don't care about the order, just the equality. + */ +static inline +int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, + unsigned int n) +{ + for ( ; n-- ; u1++, u2++) { + if (*u1 != *u2) + return 1; + } + return 0; +} + /* Find a stack that is equal to the one stored in entries in the hash */ static inline struct stack_record *find_stack(struct stack_record *bucket, unsigned long *entries, int size, @@ -173,10 +188,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, for (found = bucket; found; found = found->next) { if (found->hash == hash && found->size == size && - !memcmp(entries, found->entries, - size * sizeof(unsigned long))) { + !stackdepot_memcmp(entries, found->entries, size)) return found; - } } return NULL; } -- cgit v1.2.3 From dceeb3e7fd5cdafb6b8f70321fc4d994c95c3554 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:27 -0800 Subject: lib/test_find_bit.c: rename to find_bit_benchmark.c As suggested in review comments, rename test_find_bit.c to find_bit_benchmark.c. Link: http://lkml.kernel.org/r/20171124143040.a44jvhmnaiyedg2i@yury-thinkpad Signed-off-by: Yury Norov Tested-by: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Clement Courbet Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/Makefile | 2 +- lib/find_bit_benchmark.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++ lib/test_find_bit.c | 144 ----------------------------------------------- 4 files changed, 146 insertions(+), 146 deletions(-) create mode 100644 lib/find_bit_benchmark.c delete mode 100644 lib/test_find_bit.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64d7c19d3167..cb9dd85a8356 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1841,7 +1841,7 @@ config TEST_BPF If unsure, say N. -config TEST_FIND_BIT +config FIND_BIT_BENCHMARK tristate "Test find_bit functions" default n help diff --git a/lib/Makefile b/lib/Makefile index 7adb066692b3..a90d4fcd748f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -46,8 +46,8 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o +obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o obj-$(CONFIG_TEST_BPF) += test_bpf.o -obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c new file mode 100644 index 000000000000..f4394a36f9aa --- /dev/null +++ b/lib/find_bit_benchmark.c @@ -0,0 +1,144 @@ +/* + * Test for find_*_bit functions. + * + * Copyright (c) 2017 Cavium. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * find_bit functions are widely used in kernel, so the successful boot + * is good enough test for correctness. + * + * This test is focused on performance of traversing bitmaps. Two typical + * scenarios are reproduced: + * - randomly filled bitmap with approximately equal number of set and + * cleared bits; + * - sparse bitmap with few set bits at random positions. + */ + +#include +#include +#include +#include +#include +#include + +#define BITMAP_LEN (4096UL * 8 * 10) +#define SPARSE 500 + +static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; + +/* + * This is Schlemiel the Painter's algorithm. It should be called after + * all other tests for the same bitmap because it sets all bits of bitmap to 1. + */ +static int __init test_find_first_bit(void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_bit(bitmap, len); + __clear_bit(i, bitmap); + } + cycles = get_cycles() - cycles; + pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_zero_bit(bitmap, len, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_last_bit(const void *bitmap, unsigned long len) +{ + unsigned long l, cnt = 0; + cycles_t cycles; + + cycles = get_cycles(); + do { + cnt++; + l = find_last_bit(bitmap, len); + if (l >= len) + break; + len = l; + } while (len); + cycles = get_cycles() - cycles; + pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init find_bit_test(void) +{ + unsigned long nbits = BITMAP_LEN / SPARSE; + + pr_err("\nStart testing find_bit() with random-filled bitmap\n"); + + get_random_bytes(bitmap, sizeof(bitmap)); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + pr_err("\nStart testing find_bit() with sparse bitmap\n"); + + bitmap_zero(bitmap, BITMAP_LEN); + + while (nbits--) + __set_bit(prandom_u32() % BITMAP_LEN, bitmap); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + return 0; +} +module_init(find_bit_test); + +static void __exit test_find_bit_cleanup(void) +{ +} +module_exit(test_find_bit_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_find_bit.c b/lib/test_find_bit.c deleted file mode 100644 index f4394a36f9aa..000000000000 --- a/lib/test_find_bit.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Test for find_*_bit functions. - * - * Copyright (c) 2017 Cavium. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -/* - * find_bit functions are widely used in kernel, so the successful boot - * is good enough test for correctness. - * - * This test is focused on performance of traversing bitmaps. Two typical - * scenarios are reproduced: - * - randomly filled bitmap with approximately equal number of set and - * cleared bits; - * - sparse bitmap with few set bits at random positions. - */ - -#include -#include -#include -#include -#include -#include - -#define BITMAP_LEN (4096UL * 8 * 10) -#define SPARSE 500 - -static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; - -/* - * This is Schlemiel the Painter's algorithm. It should be called after - * all other tests for the same bitmap because it sets all bits of bitmap to 1. - */ -static int __init test_find_first_bit(void *bitmap, unsigned long len) -{ - unsigned long i, cnt; - cycles_t cycles; - - cycles = get_cycles(); - for (cnt = i = 0; i < len; cnt++) { - i = find_first_bit(bitmap, len); - __clear_bit(i, bitmap); - } - cycles = get_cycles() - cycles; - pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); - - return 0; -} - -static int __init test_find_next_bit(const void *bitmap, unsigned long len) -{ - unsigned long i, cnt; - cycles_t cycles; - - cycles = get_cycles(); - for (cnt = i = 0; i < BITMAP_LEN; cnt++) - i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); - - return 0; -} - -static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) -{ - unsigned long i, cnt; - cycles_t cycles; - - cycles = get_cycles(); - for (cnt = i = 0; i < BITMAP_LEN; cnt++) - i = find_next_zero_bit(bitmap, len, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); - - return 0; -} - -static int __init test_find_last_bit(const void *bitmap, unsigned long len) -{ - unsigned long l, cnt = 0; - cycles_t cycles; - - cycles = get_cycles(); - do { - cnt++; - l = find_last_bit(bitmap, len); - if (l >= len) - break; - len = l; - } while (len); - cycles = get_cycles() - cycles; - pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); - - return 0; -} - -static int __init find_bit_test(void) -{ - unsigned long nbits = BITMAP_LEN / SPARSE; - - pr_err("\nStart testing find_bit() with random-filled bitmap\n"); - - get_random_bytes(bitmap, sizeof(bitmap)); - - test_find_next_bit(bitmap, BITMAP_LEN); - test_find_next_zero_bit(bitmap, BITMAP_LEN); - test_find_last_bit(bitmap, BITMAP_LEN); - test_find_first_bit(bitmap, BITMAP_LEN); - - pr_err("\nStart testing find_bit() with sparse bitmap\n"); - - bitmap_zero(bitmap, BITMAP_LEN); - - while (nbits--) - __set_bit(prandom_u32() % BITMAP_LEN, bitmap); - - test_find_next_bit(bitmap, BITMAP_LEN); - test_find_next_zero_bit(bitmap, BITMAP_LEN); - test_find_last_bit(bitmap, BITMAP_LEN); - test_find_first_bit(bitmap, BITMAP_LEN); - - return 0; -} -module_init(find_bit_test); - -static void __exit test_find_bit_cleanup(void) -{ -} -module_exit(test_find_bit_cleanup); - -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 15ff67bf85c6c02ab7d850deea0199516e8f16a0 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 6 Feb 2018 15:38:31 -0800 Subject: lib/find_bit_benchmark.c: improvements As suggested in review comments: * printk: align numbers using whitespaces instead of tabs; * return error value from init() to avoid calling rmmod if testing again; * use ktime_get instead of get_cycles as some arches don't support it; The output in dmesg (on QEMU arm64): [ 38.823430] Start testing find_bit() with random-filled bitmap [ 38.845358] find_next_bit: 20138448 ns, 163968 iterations [ 38.856217] find_next_zero_bit: 10615328 ns, 163713 iterations [ 38.863564] find_last_bit: 7111888 ns, 163967 iterations [ 40.944796] find_first_bit: 2081007216 ns, 163968 iterations [ 40.944975] [ 40.944975] Start testing find_bit() with sparse bitmap [ 40.945268] find_next_bit: 73216 ns, 656 iterations [ 40.967858] find_next_zero_bit: 22461008 ns, 327025 iterations [ 40.968047] find_last_bit: 62320 ns, 656 iterations [ 40.978060] find_first_bit: 9889360 ns, 656 iterations Link: http://lkml.kernel.org/r/20171124143040.a44jvhmnaiyedg2i@yury-thinkpad Signed-off-by: Yury Norov Tested-by: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Clement Courbet Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/find_bit_benchmark.c | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index f4394a36f9aa..67b19233c28f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -43,16 +43,15 @@ static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; static int __init test_find_first_bit(void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < len; cnt++) { i = find_first_bit(bitmap, len); __clear_bit(i, bitmap); } - cycles = get_cycles() - cycles; - pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_first_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -60,14 +59,13 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < BITMAP_LEN; cnt++) i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_next_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -75,14 +73,13 @@ static int __init test_find_next_bit(const void *bitmap, unsigned long len) static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); for (cnt = i = 0; i < BITMAP_LEN; cnt++) i = find_next_zero_bit(bitmap, len, i) + 1; - cycles = get_cycles() - cycles; - pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -90,9 +87,9 @@ static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) static int __init test_find_last_bit(const void *bitmap, unsigned long len) { unsigned long l, cnt = 0; - cycles_t cycles; + ktime_t time; - cycles = get_cycles(); + time = ktime_get(); do { cnt++; l = find_last_bit(bitmap, len); @@ -100,9 +97,8 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len) break; len = l; } while (len); - cycles = get_cycles() - cycles; - pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", - (u64)cycles, cnt); + time = ktime_get() - time; + pr_err("find_last_bit: %18llu ns, %6ld iterations\n", time, cnt); return 0; } @@ -132,13 +128,12 @@ static int __init find_bit_test(void) test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); - return 0; + /* + * Everything is OK. Return error just to let user run benchmark + * again without annoying rmmod. + */ + return -EINVAL; } module_init(find_bit_test); -static void __exit test_find_bit_cleanup(void) -{ -} -module_exit(test_find_bit_cleanup); - MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 0ade34c37012ea5c516d9aa4d19a56e9f40a55ed Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 6 Feb 2018 15:38:34 -0800 Subject: lib: optimize cpumask_next_and() We've measured that we spend ~0.6% of sys cpu time in cpumask_next_and(). It's essentially a joined iteration in search for a non-zero bit, which is currently implemented as a lookup join (find a nonzero bit on the lhs, lookup the rhs to see if it's set there). Implement a direct join (find a nonzero bit on the incrementally built join). Also add generic bitmap benchmarks in the new `test_find_bit` module for new function (see `find_next_and_bit` in [2] and [3] below). For cpumask_next_and, direct benchmarking shows that it's 1.17x to 14x faster with a geometric mean of 2.1 on 32 CPUs [1]. No impact on memory usage. Note that on Arm, the new pure-C implementation still outperforms the old one that uses a mix of C and asm (`find_next_bit`) [3]. [1] Approximate benchmark code: ``` unsigned long src1p[nr_cpumask_longs] = {pattern1}; unsigned long src2p[nr_cpumask_longs] = {pattern2}; for (/*a bunch of repetitions*/) { for (int n = -1; n <= nr_cpu_ids; ++n) { asm volatile("" : "+rm"(src1p)); // prevent any optimization asm volatile("" : "+rm"(src2p)); unsigned long result = cpumask_next_and(n, src1p, src2p); asm volatile("" : "+rm"(result)); } } ``` Results: pattern1 pattern2 time_before/time_after 0x0000ffff 0x0000ffff 1.65 0x0000ffff 0x00005555 2.24 0x0000ffff 0x00001111 2.94 0x0000ffff 0x00000000 14.0 0x00005555 0x0000ffff 1.67 0x00005555 0x00005555 1.71 0x00005555 0x00001111 1.90 0x00005555 0x00000000 6.58 0x00001111 0x0000ffff 1.46 0x00001111 0x00005555 1.49 0x00001111 0x00001111 1.45 0x00001111 0x00000000 3.10 0x00000000 0x0000ffff 1.18 0x00000000 0x00005555 1.18 0x00000000 0x00001111 1.17 0x00000000 0x00000000 1.25 ----------------------------- geo.mean 2.06 [2] test_find_next_bit, X86 (skylake) [ 3913.477422] Start testing find_bit() with random-filled bitmap [ 3913.477847] find_next_bit: 160868 cycles, 16484 iterations [ 3913.477933] find_next_zero_bit: 169542 cycles, 16285 iterations [ 3913.478036] find_last_bit: 201638 cycles, 16483 iterations [ 3913.480214] find_first_bit: 4353244 cycles, 16484 iterations [ 3913.480216] Start testing find_next_and_bit() with random-filled bitmap [ 3913.481074] find_next_and_bit: 89604 cycles, 8216 iterations [ 3913.481075] Start testing find_bit() with sparse bitmap [ 3913.481078] find_next_bit: 2536 cycles, 66 iterations [ 3913.481252] find_next_zero_bit: 344404 cycles, 32703 iterations [ 3913.481255] find_last_bit: 2006 cycles, 66 iterations [ 3913.481265] find_first_bit: 17488 cycles, 66 iterations [ 3913.481266] Start testing find_next_and_bit() with sparse bitmap [ 3913.481272] find_next_and_bit: 764 cycles, 1 iterations [3] test_find_next_bit, arm (v7 odroid XU3). [ 267.206928] Start testing find_bit() with random-filled bitmap [ 267.214752] find_next_bit: 4474 cycles, 16419 iterations [ 267.221850] find_next_zero_bit: 5976 cycles, 16350 iterations [ 267.229294] find_last_bit: 4209 cycles, 16419 iterations [ 267.279131] find_first_bit: 1032991 cycles, 16420 iterations [ 267.286265] Start testing find_next_and_bit() with random-filled bitmap [ 267.302386] find_next_and_bit: 2290 cycles, 8140 iterations [ 267.309422] Start testing find_bit() with sparse bitmap [ 267.316054] find_next_bit: 191 cycles, 66 iterations [ 267.322726] find_next_zero_bit: 8758 cycles, 32703 iterations [ 267.329803] find_last_bit: 84 cycles, 66 iterations [ 267.336169] find_first_bit: 4118 cycles, 66 iterations [ 267.342627] Start testing find_next_and_bit() with sparse bitmap [ 267.356919] find_next_and_bit: 91 cycles, 1 iterations [courbet@google.com: v6] Link: http://lkml.kernel.org/r/20171129095715.23430-1-courbet@google.com [geert@linux-m68k.org: m68k/bitops: always include ] Link: http://lkml.kernel.org/r/1512556816-28627-1-git-send-email-geert@linux-m68k.org Link: http://lkml.kernel.org/r/20171128131334.23491-1-courbet@google.com Signed-off-by: Clement Courbet Signed-off-by: Geert Uytterhoeven Cc: Yury Norov Cc: Geert Uytterhoeven Cc: Alexey Dobriyan Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/bitops.h | 1 + arch/m68k/include/asm/bitops.h | 3 +- arch/unicore32/include/asm/bitops.h | 2 ++ include/asm-generic/bitops/find.h | 20 +++++++++++ include/linux/bitmap.h | 6 +++- lib/cpumask.c | 9 ++--- lib/find_bit.c | 59 ++++++++++++++++++++++++--------- lib/find_bit_benchmark.c | 25 +++++++++++++- tools/include/asm-generic/bitops/find.h | 16 +++++++++ tools/lib/find_bit.c | 39 ++++++++++++++++------ 10 files changed, 147 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index ce5ee762ed66..4cab9bb823fb 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -338,6 +338,7 @@ static inline int find_next_bit_le(const void *p, int size, int offset) #endif +#include #include /* diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index dda58cfe8c22..93b47b1f6fb4 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -311,7 +311,6 @@ static inline int bfchg_mem_test_and_change_bit(int nr, * functions. */ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) -#include #include #else @@ -441,6 +440,8 @@ static inline unsigned long ffz(unsigned long word) #endif +#include + #ifdef __KERNEL__ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h index 401f597bc38c..c0cbdbe17168 100644 --- a/arch/unicore32/include/asm/bitops.h +++ b/arch/unicore32/include/asm/bitops.h @@ -44,4 +44,6 @@ static inline int fls(int x) #define find_first_bit find_first_bit #define find_first_zero_bit find_first_zero_bit +#include + #endif /* __UNICORE_BITOPS_H__ */ diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 1ba611e16fa0..8a1ee10014de 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -55,8 +71,12 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_first_bit #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#endif +#ifndef find_first_zero_bit #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index d9bf699e0e7a..5f11fbdc27f8 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -88,8 +88,12 @@ * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr - * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_zero_bit(addr, nbits, bit) + * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + * find_next_and_bit(addr1, addr2, nbits, bit) + * Same as find_next_bit, but in + * (*addr1 & *addr2) * */ diff --git a/lib/cpumask.c b/lib/cpumask.c index 35fe142ebb5e..beca6244671a 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -33,10 +33,11 @@ EXPORT_SYMBOL(cpumask_next); int cpumask_next_and(int n, const struct cpumask *src1p, const struct cpumask *src2p) { - while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) - if (cpumask_test_cpu(n, src2p)) - break; - return n; + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits, n + 1); } EXPORT_SYMBOL(cpumask_next_and); diff --git a/lib/find_bit.c b/lib/find_bit.c index 6ed74f78380c..ee3df93ba69a 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -21,22 +21,29 @@ #include #include -#if !defined(find_next_bit) || !defined(find_next_zero_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -47,7 +54,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -61,7 +71,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit); #endif @@ -70,11 +80,21 @@ EXPORT_SYMBOL(find_next_bit); unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit); #endif +#if !defined(find_next_and_bit) +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_and_bit); +#endif + #ifndef find_first_bit /* * Find the first set bit in a memory region. @@ -146,15 +166,19 @@ static inline unsigned long ext2_swab(const unsigned long y) } #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) -static unsigned long _find_next_bit_le(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit_le(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); @@ -165,7 +189,10 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(ext2_swab(tmp)), nbits); @@ -176,7 +203,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { - return _find_next_bit_le(addr, size, offset, ~0UL); + return _find_next_bit_le(addr, NULL, size, offset, ~0UL); } EXPORT_SYMBOL(find_next_zero_bit_le); #endif @@ -185,7 +212,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le); unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { - return _find_next_bit_le(addr, size, offset, 0UL); + return _find_next_bit_le(addr, NULL, size, offset, 0UL); } EXPORT_SYMBOL(find_next_bit_le); #endif diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 67b19233c28f..5985a25e6cbc 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -35,6 +35,7 @@ #define SPARSE 500 static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; +static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata; /* * This is Schlemiel the Painter's algorithm. It should be called after @@ -103,6 +104,22 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len) return 0; } +static int __init test_find_next_and_bit(const void *bitmap, + const void *bitmap2, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i+1); + cycles = get_cycles() - cycles; + pr_err("find_next_and_bit:\t\t%llu cycles, %ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + static int __init find_bit_test(void) { unsigned long nbits = BITMAP_LEN / SPARSE; @@ -110,23 +127,29 @@ static int __init find_bit_test(void) pr_err("\nStart testing find_bit() with random-filled bitmap\n"); get_random_bytes(bitmap, sizeof(bitmap)); + get_random_bytes(bitmap2, sizeof(bitmap2)); test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); bitmap_zero(bitmap, BITMAP_LEN); + bitmap_zero(bitmap2, BITMAP_LEN); - while (nbits--) + while (nbits--) { __set_bit(prandom_u32() % BITMAP_LEN, bitmap); + __set_bit(prandom_u32() % BITMAP_LEN, bitmap2); + } test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* * Everything is OK. Return error just to let user run benchmark diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 9311fadaaab2..16ed1982cb34 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 42c15f906aac..a88bd507091e 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -22,22 +22,29 @@ #include #include -#if !defined(find_next_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -48,7 +55,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -62,7 +72,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } #endif @@ -104,6 +114,15 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); +} +#endif + +#ifndef find_next_and_bit +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); } #endif -- cgit v1.2.3 From d3deafaa8b5c14cd1a001d0be675fc1e242dce42 Mon Sep 17 00:00:00 2001 From: Vincent Legoll Date: Tue, 6 Feb 2018 15:38:38 -0800 Subject: lib/: make RUNTIME_TESTS a menuconfig to ease disabling it all No need to get into the submenu to disable all related config entries. This makes it easier to disable all RUNTIME_TESTS config options without entering the submenu. It will also enable one to see that en/dis-abled state from the outside menu. This is only intended to change menuconfig UI, not change the config dependencies. Link: http://lkml.kernel.org/r/20171209162742.7363-1-vincent.legoll@gmail.com Signed-off-by: Vincent Legoll Cc: Ingo Molnar Cc: Byungchul Park Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Josh Poimboeuf Cc: Geert Uytterhoeven Cc: Randy Dunlap Cc: "Luis R. Rodriguez" Cc: Nicholas Piggin Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb9dd85a8356..1a1423923bcf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1641,7 +1641,10 @@ config DMA_API_DEBUG If unsure, say N. -menu "Runtime Testing" +menuconfig RUNTIME_TESTING_MENU + bool "Runtime Testing" + +if RUNTIME_TESTING_MENU config LKDTM tristate "Linux Kernel Dump Test Tool Module" @@ -1929,7 +1932,7 @@ config TEST_DEBUG_VIRTUAL If unsure, say N. -endmenu # runtime tests +endif # RUNTIME_TESTING_MENU config MEMTEST bool "Memtest" -- cgit v1.2.3 From 92fc7cb8ae4d021cf7740e4ad0ced9fa9e07dae0 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Tue, 6 Feb 2018 15:38:42 -0800 Subject: lib/test_sort.c: add module unload support test_sort.c performs array-based and linked list sort test. Code allows to compile either as a loadable modules or builtin into the kernel. Current code is not allow to unload the test_sort.ko module after successful completion. This patch adds support to unload the "test_sort.ko" module by adding module_exit support. Previous patch was implemented auto unload support by returning -EAGAIN from module_init() function on successful case, but this approach is not ideal. The auto-unload might seem like a nice optimization, but it encourages inconsistent behaviour. And behaviour that is different from all other normal modules. Link: http://lkml.kernel.org/r/1513967133-6843-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Pravin Shedge Cc: Kostenzer Felix Cc: Andy Shevchenko Cc: Geert Uytterhoeven Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_sort.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/test_sort.c b/lib/test_sort.c index d389c1cc2f6c..385c0ed5202f 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -39,5 +39,11 @@ exit: return err; } +static void __exit test_sort_exit(void) +{ +} + module_init(test_sort_init); +module_exit(test_sort_exit); + MODULE_LICENSE("GPL"); -- cgit v1.2.3 From b8fe1120b4ba342b4f156d24e952d6e686b20298 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 6 Feb 2018 15:40:38 -0800 Subject: lib/ubsan.c: s/missaligned/misaligned/ A vist from the spelling fairy. Cc: David Laight Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf..1e2328fa002d 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -281,7 +281,7 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data *data, unsigned long ptr) { unsigned long flags; @@ -322,7 +322,7 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } -- cgit v1.2.3 From 42440c1f9911b4b7b8ba3dc4e90c1197bc561211 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:42 -0800 Subject: lib/ubsan: add type mismatch handler for new GCC/Clang UBSAN=y fails to build with new GCC/clang: arch/x86/kernel/head64.o: In function `sanitize_boot_params': arch/x86/include/asm/bootparam_utils.h:37: undefined reference to `__ubsan_handle_type_mismatch_v1' because Clang and GCC 8 slightly changed ABI for 'type mismatch' errors. Compiler now uses new __ubsan_handle_type_mismatch_v1() function with slightly modified 'struct type_mismatch_data'. Let's add new 'struct type_mismatch_data_common' which is independent from compiler's layout of 'struct type_mismatch_data'. And make __ubsan_handle_type_mismatch[_v1]() functions transform compiler-dependent type mismatch data to our internal representation. This way, we can support both old and new compilers with minimal amount of change. Link: http://lkml.kernel.org/r/20180119152853.16806-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Sodagudi Prasad Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- lib/ubsan.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/ubsan.c b/lib/ubsan.c index 1e2328fa002d..50d1d5c25deb 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_misaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_misaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,7 +315,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { @@ -326,8 +326,36 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index 88f23557edbe..7e30b26497e0 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -37,6 +37,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; -- cgit v1.2.3 From bac7a1fff7926fb9891a18fe33650884b0e13e41 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:45 -0800 Subject: lib/ubsan: remove returns-nonnull-attribute checks Similarly to type mismatch checks, new GCC 8.x and Clang also changed for ABI for returns_nonnull checks. While we can update our code to conform the new ABI it's more reasonable to just remove it. Because it's just dead code, we don't have any single user of returns_nonnull attribute in the whole kernel. And AFAIU the advantage that this attribute could bring would be mitigated by -fno-delete-null-pointer-checks cflag that we use to build the kernel. So it's unlikely we will have a lot of returns_nonnull attribute in future. So let's just remove the code, it has no use. [aryabinin@virtuozzo.com: fix warning] Link: http://lkml.kernel.org/r/20180122165711.11510-1-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/20180119152853.16806-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Sodagudi Prasad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 24 ------------------------ lib/ubsan.h | 5 ----- scripts/Makefile.ubsan | 1 - 3 files changed, 30 deletions(-) (limited to 'lib') diff --git a/lib/ubsan.c b/lib/ubsan.c index 50d1d5c25deb..59fee96c29a0 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -141,11 +141,6 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type, } } -static bool location_is_valid(struct source_location *loc) -{ - return loc->file_name != NULL; -} - static DEFINE_SPINLOCK(report_lock); static void ubsan_prologue(struct source_location *location, @@ -356,25 +351,6 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, } EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); -void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) -{ - unsigned long flags; - - if (suppress_report(&data->location)) - return; - - ubsan_prologue(&data->location, &flags); - - pr_err("null pointer returned from function declared to never return null\n"); - - if (location_is_valid(&data->attr_location)) - print_source_location("returns_nonnull attribute specified in", - &data->attr_location); - - ubsan_epilogue(&flags); -} -EXPORT_SYMBOL(__ubsan_handle_nonnull_return); - void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, unsigned long bound) { diff --git a/lib/ubsan.h b/lib/ubsan.h index 7e30b26497e0..f4d8d0bd4016 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -57,11 +57,6 @@ struct nonnull_arg_data { int arg_index; }; -struct nonnull_return_data { - struct source_location location; - struct source_location attr_location; -}; - struct vla_bound_data { struct source_location location; struct type_descriptor *type; diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 8fd4d44fbcd1..b593b36ccff8 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -7,7 +7,6 @@ ifdef CONFIG_UBSAN CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size) - CFLAGS_UBSAN += $(call cc-option, -fsanitize=returns-nonnull-attribute) CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool) CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum) -- cgit v1.2.3 From e7c52b84fb18f08ce49b6067ae6285aca79084a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Feb 2018 15:41:41 -0800 Subject: kasan: rework Kconfig settings We get a lot of very large stack frames using gcc-7.0.1 with the default -fsanitize-address-use-after-scope --param asan-stack=1 options, which can easily cause an overflow of the kernel stack, e.g. drivers/gpu/drm/i915/gvt/handlers.c:2434:1: warning: the frame size of 46176 bytes is larger than 3072 bytes drivers/net/wireless/ralink/rt2x00/rt2800lib.c:5650:1: warning: the frame size of 23632 bytes is larger than 3072 bytes lib/atomic64_test.c:250:1: warning: the frame size of 11200 bytes is larger than 3072 bytes drivers/gpu/drm/i915/gvt/handlers.c:2621:1: warning: the frame size of 9208 bytes is larger than 3072 bytes drivers/media/dvb-frontends/stv090x.c:3431:1: warning: the frame size of 6816 bytes is larger than 3072 bytes fs/fscache/stats.c:287:1: warning: the frame size of 6536 bytes is larger than 3072 bytes To reduce this risk, -fsanitize-address-use-after-scope is now split out into a separate CONFIG_KASAN_EXTRA Kconfig option, leading to stack frames that are smaller than 2 kilobytes most of the time on x86_64. An earlier version of this patch also prevented combining KASAN_EXTRA with KASAN_INLINE, but that is no longer necessary with gcc-7.0.1. All patches to get the frame size below 2048 bytes with CONFIG_KASAN=y and CONFIG_KASAN_EXTRA=n have been merged by maintainers now, so we can bring back that default now. KASAN_EXTRA=y still causes lots of warnings but now defaults to !COMPILE_TEST to disable it in allmodconfig, and it remains disabled in all other defconfigs since it is a new option. I arbitrarily raise the warning limit for KASAN_EXTRA to 3072 to reduce the noise, but an allmodconfig kernel still has around 50 warnings on gcc-7. I experimented a bit more with smaller stack frames and have another follow-up series that reduces the warning limit for 64-bit architectures to 1280 bytes (without CONFIG_KASAN). With earlier versions of this patch series, I also had patches to address the warnings we get with KASAN and/or KASAN_EXTRA, using a "noinline_if_stackbloat" annotation. That annotation now got replaced with a gcc-8 bugfix (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715) and a workaround for older compilers, which means that KASAN_EXTRA is now just as bad as before and will lead to an instant stack overflow in a few extreme cases. This reverts parts of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). Two patches in linux-next should be merged first to avoid introducing warnings in an allmodconfig build: 3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN") 16c3ada89cff ("media: r820t: fix r820t_write_reg for KASAN") Do we really need to backport this? I think we do: without this patch, enabling KASAN will lead to unavoidable kernel stack overflow in certain device drivers when built with gcc-7 or higher on linux-4.10+ or any version that contains a backport of commit c5caf21ab0cf8. Most people are probably still on older compilers, but it will get worse over time as they upgrade their distros. The warnings we get on kernels older than this should all be for code that uses dangerously large stack frames, though most of them do not cause an actual stack overflow by themselves.The asan-stack option was added in linux-4.0, and commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y") effectively turned off the warning for allmodconfig kernels, so I would like to see this fix backported to any kernels later than 4.0. I have done dozens of fixes for individual functions with stack frames larger than 2048 bytes with asan-stack, and I plan to make sure that all those fixes make it into the stable kernels as well (most are already there). Part of the complication here is that asan-stack (from 4.0) was originally assumed to always require much larger stacks, but that turned out to be a combination of multiple gcc bugs that we have now worked around and fixed, but sanitize-address-use-after-scope (from v4.10) has a much higher inherent stack usage and also suffers from at least three other problems that we have analyzed but not yet fixed upstream, each of them makes the stack usage more severe than it should be. Link: http://lkml.kernel.org/r/20171221134744.2295529-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Andrey Ryabinin Cc: Mauro Carvalho Chehab Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/Kconfig.kasan | 11 +++++++++++ scripts/Makefile.kasan | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1a1423923bcf..b66c264d4194 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 - default 0 if KASAN + default 3072 if KASAN_EXTRA default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index bd38aab05929..3d35d062970d 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -20,6 +20,17 @@ config KASAN Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). +config KASAN_EXTRA + bool "KAsan: extra checks" + depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST + help + This enables further checks in the kernel address sanitizer, for now + it only includes the address-use-after-scope check that can lead + to excessive kernel stack usage, frame size warnings and longer + compile time. + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more + + choice prompt "Instrumentation type" depends on KASAN diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 3fb382c69ff6..69552a39951d 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -38,7 +38,9 @@ else endif +ifdef CONFIG_KASAN_EXTRA CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) +endif CFLAGS_KASAN_NOSANITIZE := -fno-builtin -- cgit v1.2.3