From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 15:23:44 -0800 Subject: rbtree: Add support for augmented rbtrees Add support for augmented rbtrees in core rbtree code. This will be used in subsequent patches, in x86 PAT code, which needs interval trees to efficiently keep track of PAT ranges. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- lib/rbtree.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/rbtree.c b/lib/rbtree.c index e2aa3be29858..15e10b1afdd2 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,6 +44,11 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(right); + } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -67,12 +72,20 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(left); + } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; + if (root->augment_cb) + root->augment_cb(node); + while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -227,12 +240,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; + int old_parent_cb = 0; + int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { + old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -247,8 +263,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { + successor_parent_cb = 1; if (child) rb_set_parent(child, parent); + parent->rb_left = child; node->rb_right = old->rb_right; @@ -259,6 +277,24 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); + if (root->augment_cb) { + /* + * Here, three different nodes can have new children. + * The parent of the successor node that was selected + * to replace the node to be erased. + * The node that is getting erased and is now replaced + * by its successor. + * The parent of the node getting erased-replaced. + */ + if (successor_parent_cb) + root->augment_cb(parent); + + root->augment_cb(node); + + if (old_parent_cb) + root->augment_cb(rb_parent(old)); + } + goto color; } @@ -267,15 +303,19 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - if (parent) - { + + if (parent) { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - } - else + + if (root->augment_cb) + root->augment_cb(parent); + + } else { root->rb_node = child; + } color: if (color == RB_BLACK) -- cgit v1.2.3 From 4d1ee80f3a7df7fe9cdec26e651e6201c45b10d4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 29 Jan 2010 20:59:17 +0000 Subject: idr: export idr_get_next() idr_get_next() was accidentally not exported when added. It is about to be used by mtdcore, which may be built as a module. Signed-off-by: Ben Hutchings Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- lib/idr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 1cac726c44bc..21f9266d1e41 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -621,7 +621,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) } return NULL; } - +EXPORT_SYMBOL(idr_get_next); /** -- cgit v1.2.3 From 86a8938078a8bb518c5376de493e348c7490d506 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:24 +0100 Subject: lib: Add self-test for atomic64_t This patch adds self-test on boot code for atomic64_t. This has been used to test the later changes in this patchset. Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/Kconfig.debug | 7 +++ lib/Makefile | 2 + lib/atomic64_test.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 lib/atomic64_test.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 25c3ed594c54..3676c517a073 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1054,6 +1054,13 @@ config DMA_API_DEBUG This option causes a performance degredation. Use only if you want to debug device drivers. If unsure, say N. +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 347ad8db29d3..4af4786fe281 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -99,6 +99,8 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 000000000000..4ff649e46bad --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,158 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include +#include + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(!atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_CMPXCHG64 + "586", +#else + "386", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); -- cgit v1.2.3 From 8f4f202b335144bf5be5c9e5b1bc9477ecdae958 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 26 Feb 2010 12:22:40 +0100 Subject: lib: Only test atomic64_dec_if_positive on archs having it Currently atomic64_dec_if_positive() is only supported by PowerPC, MIPS and x86-32. Signed-off-by: Luca Barbieri LKML-Reference: <1267183361-20775-1-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 4ff649e46bad..0effcacbebda 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -112,6 +112,7 @@ static __init int test_atomic64(void) r += one; BUG_ON(v.counter != r); +#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) INIT(onestwos); BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); r -= one; @@ -124,6 +125,9 @@ static __init int test_atomic64(void) INIT(-one); BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); BUG_ON(v.counter != r); +#else +#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above +#endif INIT(onestwos); BUG_ON(atomic64_inc_not_zero(&v)); -- cgit v1.2.3 From d7f6de1e9c4a12e11ba7186c70f0f40caa76f590 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 26 Feb 2010 12:22:41 +0100 Subject: x86: Implement atomic[64]_dec_if_positive() Add support for atomic_dec_if_positive(), and atomic64_dec_if_positive() for x86-64. atomic64_dec_if_positive() for x86-32 was already implemented in a previous patch. Signed-off-by: Luca Barbieri LKML-Reference: <1267183361-20775-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/atomic.h | 23 +++++++++++++++++++++++ arch/x86/include/asm/atomic64_64.h | 23 +++++++++++++++++++++++ lib/atomic64_test.c | 2 +- 3 files changed, 47 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..706c69492c14 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline int atomic_dec_if_positive(atomic_t *v) +{ + int c, old, dec; + c = atomic_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + /** * atomic_inc_short - increment of a short integer * @v: pointer to type int diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..4d6e2cd6c88c 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long c, old, dec; + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 0effcacbebda..58efdabb3845 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -112,7 +112,7 @@ static __init int test_atomic64(void) r += one; BUG_ON(v.counter != r); -#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) +#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) INIT(onestwos); BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); r -= one; -- cgit v1.2.3 From 9efbcd590243045111670c171a951923b877b57d Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:45 +0100 Subject: lib: Fix atomic64_add_unless test atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The test assumed the opposite convention. Reported-by: H. Peter Anvin Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 58efdabb3845..ee8e6de8b413 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -104,11 +104,11 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); INIT(v0); - BUG_ON(!atomic64_add_unless(&v, one, v0)); + BUG_ON(atomic64_add_unless(&v, one, v0)); BUG_ON(v.counter != r); INIT(v0); - BUG_ON(atomic64_add_unless(&v, one, v1)); + BUG_ON(!atomic64_add_unless(&v, one, v1)); r += one; BUG_ON(v.counter != r); -- cgit v1.2.3 From 97577896f6b9c056fa0a5e9f6a608110cb3dcd33 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:47 +0100 Subject: lib: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The generic implementation did the opposite thing. Reported-by: H. Peter Anvin Confirmed-by: Paul Mackerras Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-4-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/atomic64.c b/lib/atomic64.c index 8bee16ec7524..a21c12bc727c 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; spinlock_t *lock = lock_addr(v); - int ret = 1; + int ret = 0; spin_lock_irqsave(lock, flags); if (v->counter != u) { v->counter += a; - ret = 0; + ret = 1; } spin_unlock_irqrestore(lock, flags); return ret; -- cgit v1.2.3 From 25a304f277ad70166eeae25a4958d2049005c33a Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:48 +0100 Subject: lib: Fix atomic64_inc_not_zero test atomic64_inc_not_zero must return 1 if it perfomed the add and 0 otherwise. The test assumed the opposite convention. Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index ee8e6de8b413..f7bb706c9c3a 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -130,16 +130,16 @@ static __init int test_atomic64(void) #endif INIT(onestwos); - BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(!atomic64_inc_not_zero(&v)); r += one; BUG_ON(v.counter != r); INIT(0); - BUG_ON(!atomic64_inc_not_zero(&v)); + BUG_ON(atomic64_inc_not_zero(&v)); BUG_ON(v.counter != r); INIT(-one); - BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(!atomic64_inc_not_zero(&v)); r += one; BUG_ON(v.counter != r); -- cgit v1.2.3 From a5c9161f27c3e1ae6c0094d262f03a7e98262181 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 Mar 2010 11:49:23 -0800 Subject: x86, atomic64: In selftest, distinguish x86-64 from 586+ The x86-64 implementation of the atomics is totally different from the i586+ implementation, which makes it quite confusing to call it "586+". Also fix indentation, and add "i" for "i386" and "i586" as used elsewhere in the kernel. Signed-off-by: H. Peter Anvin Cc: Luca Barbieri LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com> --- lib/atomic64_test.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index f7bb706c9c3a..65e482caf5e9 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -144,14 +144,16 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); #ifdef CONFIG_X86 - printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n", -#ifdef CONFIG_X86_CMPXCHG64 - "586", + printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_64 + "x86-64", +#elif defined(CONFIG_X86_CMPXCHG64) + "i586+", #else - "386", + "i386+", #endif - boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", - boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); #else printk(KERN_INFO "atomic64 test passed\n"); #endif -- cgit v1.2.3 From 1fb2f77c037624601fd214fb7c29faa84cd7bdd7 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Fri, 26 Mar 2010 20:38:35 +0100 Subject: debugobjects: Section mismatch cleanup This patch marks two functions, which only get called at initialization, as __init. Here is also interesting, that modpost doesn't catch here the right function name. WARNING: lib/built-in.o(.text+0x585f): Section mismatch in reference from the function T.506() to the variable .init.data:obj The function T.506() references the variable __initdata obj. This is often because T.506 lacks a __initdata annotation or the annotation of obj is wrong. Signed-off-by: Henrik Kretzschmar LKML-Reference: <1269632315-19403-1-git-send-email-henne@nachtwindheim.de> Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a9a8996d286a..c4ecd3ce7fd4 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -773,7 +773,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state) } } -static int +static int __init check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) { struct debug_bucket *db; @@ -916,7 +916,7 @@ void __init debug_objects_early_init(void) /* * Convert the statically allocated objects to dynamic ones: */ -static int debug_objects_replace_static_objects(void) +static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; struct hlist_node *node, *tmp; -- cgit v1.2.3 From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2010 15:03:07 +0100 Subject: bitops: Optimize hweight() by making use of compile-time evaluation Rename the extisting runtime hweight() implementations to __arch_hweight(), rename the compile-time versions to __const_hweight() and then have hweight() pick between them. Suggested-by: H. Peter Anvin Signed-off-by: Peter Zijlstra LKML-Reference: <20100318111929.GB11152@aftab> Acked-by: H. Peter Anvin LKML-Reference: <1265028224.24455.154.camel@laptop> Signed-off-by: H. Peter Anvin --- arch/alpha/include/asm/bitops.h | 18 +++++++------ arch/ia64/include/asm/bitops.h | 11 ++++---- arch/sparc/include/asm/bitops_64.h | 11 ++++---- include/asm-generic/bitops/arch_hweight.h | 11 ++++++++ include/asm-generic/bitops/const_hweight.h | 42 ++++++++++++++++++++++++++++++ include/asm-generic/bitops/hweight.h | 8 ++---- include/linux/bitops.h | 25 ------------------ lib/hweight.c | 19 +++++++------- 8 files changed, 87 insertions(+), 58 deletions(-) create mode 100644 include/asm-generic/bitops/arch_hweight.h create mode 100644 include/asm-generic/bitops/const_hweight.h (limited to 'lib') diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 15f3ae25c511..296da1d5ed57 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -405,29 +405,31 @@ static inline int fls(int x) #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) /* Whee. EV67 can calculate it directly. */ -static inline unsigned long hweight64(unsigned long w) +static inline unsigned long __arch_hweight64(unsigned long w) { return __kernel_ctpop(w); } -static inline unsigned int hweight32(unsigned int w) +static inline unsigned int __arch_weight32(unsigned int w) { - return hweight64(w); + return __arch_hweight64(w); } -static inline unsigned int hweight16(unsigned int w) +static inline unsigned int __arch_hweight16(unsigned int w) { - return hweight64(w & 0xffff); + return __arch_hweight64(w & 0xffff); } -static inline unsigned int hweight8(unsigned int w) +static inline unsigned int __arch_hweight8(unsigned int w) { - return hweight64(w & 0xff); + return __arch_hweight64(w & 0xff); } #else -#include +#include #endif +#include + #endif /* __KERNEL__ */ #include diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 6ebc229a1c51..9da3df6f1a52 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -437,17 +437,18 @@ __fls (unsigned long x) * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word */ -static __inline__ unsigned long -hweight64 (unsigned long x) +static __inline__ unsigned long __arch_hweight64(unsigned long x) { unsigned long result; result = ia64_popcnt(x); return result; } -#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) -#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) -#define hweight8(x) (unsigned int) hweight64((x) & 0xfful) +#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) +#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) +#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) + +#include #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index e72ac9cdfb98..766121a67a24 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); #ifdef ULTRA_HAS_POPULATION_COUNT -static inline unsigned int hweight64(unsigned long w) +static inline unsigned int __arch_hweight64(unsigned long w) { unsigned int res; @@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w) return res; } -static inline unsigned int hweight32(unsigned int w) +static inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; @@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w) return res; } -static inline unsigned int hweight16(unsigned int w) +static inline unsigned int __arch_hweight16(unsigned int w) { unsigned int res; @@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w) return res; } -static inline unsigned int hweight8(unsigned int w) +static inline unsigned int __arch_hweight8(unsigned int w) { unsigned int res; @@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w) #else -#include +#include #endif +#include #include #endif /* __KERNEL__ */ diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h new file mode 100644 index 000000000000..3a7be842cdce --- /dev/null +++ b/include/asm-generic/bitops/arch_hweight.h @@ -0,0 +1,11 @@ +#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_ + +#include + +extern unsigned int __arch_hweight32(unsigned int w); +extern unsigned int __arch_hweight16(unsigned int w); +extern unsigned int __arch_hweight8(unsigned int w); +extern unsigned long __arch_hweight64(__u64 w); + +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h new file mode 100644 index 000000000000..fa2a50b7ee66 --- /dev/null +++ b/include/asm-generic/bitops/const_hweight.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ +#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ + +/* + * Compile time versions of __arch_hweightN() + */ +#define __const_hweight8(w) \ + ( (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))) ) + +#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) +#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) +#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32)) + +/* + * Generic interface. + */ +#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w)) +#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w)) +#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w)) +#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w)) + +/* + * Interface for known constant arguments + */ +#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w)) +#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w)) +#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w)) +#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w)) + +/* + * Type invariant interface to the compile time constant hweight functions. + */ +#define HWEIGHT(w) HWEIGHT64((u64)w) + +#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */ diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h index fbbc383771da..a94d6519c7ed 100644 --- a/include/asm-generic/bitops/hweight.h +++ b/include/asm-generic/bitops/hweight.h @@ -1,11 +1,7 @@ #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_ #define _ASM_GENERIC_BITOPS_HWEIGHT_H_ -#include - -extern unsigned int hweight32(unsigned int w); -extern unsigned int hweight16(unsigned int w); -extern unsigned int hweight8(unsigned int w); -extern unsigned long hweight64(__u64 w); +#include +#include #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b79389879238..c55d5bc4ee58 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -/* - * Clearly slow versions of the hweightN() functions, their benefit is - * of course compile time evaluation of constant arguments. - */ -#define HWEIGHT8(w) \ - ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ - (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ - (!!((w) & (1ULL << 7))) ) - -#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) -#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) -#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) - -/* - * Type invariant version that simply casts things to the - * largest type. - */ -#define HWEIGHT(w) HWEIGHT64((u64)(w)) - /** * rol32 - rotate a 32-bit value left * @word: value to rotate diff --git a/lib/hweight.c b/lib/hweight.c index 63ee4eb1228d..a6927e76840f 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,7 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int hweight32(unsigned int w) +unsigned int __arch_hweight32(unsigned int w) { #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; @@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w) return (res + (res >> 16)) & 0x000000FF; #endif } -EXPORT_SYMBOL(hweight32); +EXPORT_SYMBOL(__arch_hweight32); -unsigned int hweight16(unsigned int w) +unsigned int __arch_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(hweight16); +EXPORT_SYMBOL(__arch_hweight16); -unsigned int hweight8(unsigned int w) +unsigned int __arch_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(hweight8); +EXPORT_SYMBOL(__arch_hweight8); -unsigned long hweight64(__u64 w) +unsigned long __arch_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + return __arch_hweight32((unsigned int)(w >> 32)) + + __arch_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(hweight64); +EXPORT_SYMBOL(__arch_hweight64); -- cgit v1.2.3 From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Mar 2010 17:34:46 +0100 Subject: x86: Add optimized popcnt variants Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 5 +++ arch/x86/include/asm/alternative.h | 9 +++-- arch/x86/include/asm/arch_hweight.h | 59 +++++++++++++++++++++++++++++++ arch/x86/include/asm/bitops.h | 4 ++- include/asm-generic/bitops/arch_hweight.h | 22 +++++++++--- lib/Makefile | 3 ++ lib/hweight.c | 20 +++++------ scripts/Makefile.lib | 4 +++ 8 files changed, 108 insertions(+), 18 deletions(-) create mode 100644 arch/x86/include/asm/arch_hweight.h (limited to 'lib') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0eacb1ffb421..89d8c54cdd37 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,6 +238,11 @@ config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR +config ARCH_HWEIGHT_CFLAGS + string + default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 + default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 + config KTIME_SCALAR def_bool X86_32 source "init/Kconfig" diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..67dae51e7fd0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -39,9 +39,6 @@ #define LOCK_PREFIX "" #endif -/* This must be included *after* the definition of LOCK_PREFIX */ -#include - struct alt_instr { u8 *instr; /* original instruction */ u8 *replacement; @@ -95,6 +92,12 @@ static inline int alternatives_text_reserved(void *start, void *end) "663:\n\t" newinstr "\n664:\n" /* replacement */ \ ".previous" +/* + * This must be included *after* the definition of ALTERNATIVE due to + * + */ +#include + /* * Alternative instructions for different CPU types or capabilities. * diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..d1fc3c219ae6 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h @@ -0,0 +1,59 @@ +#ifndef _ASM_X86_HWEIGHT_H +#define _ASM_X86_HWEIGHT_H + +#ifdef CONFIG_64BIT +/* popcnt %rdi, %rax */ +#define POPCNT ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define REG_IN "D" +#define REG_OUT "a" +#else +/* popcnt %eax, %eax */ +#define POPCNT ".byte 0xf3,0x0f,0xb8,0xc0" +#define REG_IN "a" +#define REG_OUT "a" +#endif + +/* + * __sw_hweightXX are called from within the alternatives below + * and callee-clobbered registers need to be taken care of. See + * ARCH_HWEIGHT_CFLAGS in for the respective + * compiler switches. + */ +static inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int res = 0; + + asm (ALTERNATIVE("call __sw_hweight32", POPCNT, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +static inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res = 0; + +#ifdef CONFIG_X86_32 + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +#else + asm (ALTERNATIVE("call __sw_hweight64", POPCNT, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); +#endif /* CONFIG_X86_32 */ + + return res; +} + +#endif diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -444,7 +444,9 @@ static inline int fls(int x) #define ARCH_HAS_FAST_MULTIPLIER 1 -#include +#include + +#include #endif /* __KERNEL__ */ diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h index 3a7be842cdce..9a81c1e9436c 100644 --- a/include/asm-generic/bitops/arch_hweight.h +++ b/include/asm-generic/bitops/arch_hweight.h @@ -3,9 +3,23 @@ #include -extern unsigned int __arch_hweight32(unsigned int w); -extern unsigned int __arch_hweight16(unsigned int w); -extern unsigned int __arch_hweight8(unsigned int w); -extern unsigned long __arch_hweight64(__u64 w); +inline unsigned int __arch_hweight32(unsigned int w) +{ + return __sw_hweight32(w); +} +inline unsigned int __arch_hweight16(unsigned int w) +{ + return __sw_hweight16(w); +} + +inline unsigned int __arch_hweight8(unsigned int w) +{ + return __sw_hweight8(w); +} + +inline unsigned long __arch_hweight64(__u64 w) +{ + return __sw_hweight64(w); +} #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/lib/Makefile b/lib/Makefile index 2e152aed7198..abe63a8ad143 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,7 +39,10 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o + +CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o diff --git a/lib/hweight.c b/lib/hweight.c index a6927e76840f..3c79d50814cf 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,7 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int __arch_hweight32(unsigned int w) +unsigned int __sw_hweight32(unsigned int w) { #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; @@ -24,30 +24,30 @@ unsigned int __arch_hweight32(unsigned int w) return (res + (res >> 16)) & 0x000000FF; #endif } -EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__sw_hweight32); -unsigned int __arch_hweight16(unsigned int w) +unsigned int __sw_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__sw_hweight16); -unsigned int __arch_hweight8(unsigned int w) +unsigned int __sw_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__sw_hweight8); -unsigned long __arch_hweight64(__u64 w) +unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return __arch_hweight32((unsigned int)(w >> 32)) + - __arch_hweight32((unsigned int)w); + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -64,4 +64,4 @@ unsigned long __arch_hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(__arch_hweight64); +EXPORT_SYMBOL(__sw_hweight64); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f9bdf264473d..cbcd654215e6 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -245,3 +245,7 @@ quiet_cmd_lzo = LZO $@ cmd_lzo = (cat $(filter-out FORCE,$^) | \ lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) + +# misc stuff +# --------------------------------------------------------------------------- +quote:=" -- cgit v1.2.3 From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 Apr 2010 16:23:07 +0800 Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat There is no need to disable lockdep after an RCU lockdep splat, so remove the debug_lockdeps_off() from lockdep_rcu_dereference(). To avoid repeated lockdep splats, use a static variable in the inlined rcu_dereference_check() and rcu_dereference_protected() macros so that a given instance splats only once, but so that multiple instances can be detected per boot. This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY, which is disabled by default. This provides the normal lockdep behavior by default, but permits people who want to find multiple RCU-lockdep splats per boot to easily do so. Requested-by: Eric Paris Signed-off-by: Lai Jiangshan Tested-by: Eric Paris Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++---- kernel/lockdep.c | 3 +++ lib/Kconfig.debug | 12 ++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index db266bbed23f..4dca2752cfde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -192,6 +192,15 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); +#define __do_rcu_dereference_check(c) \ + do { \ + static bool __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ + __warned = true; \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + } \ + } while (0) + /** * rcu_dereference_check - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing @@ -221,8 +230,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ rcu_dereference_raw(p); \ }) @@ -239,8 +247,7 @@ extern int rcu_my_thread_group_empty(void); */ #define rcu_dereference_protected(p, c) \ ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + __do_rcu_dereference_check(c); \ (p); \ }) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2594e1ce41cb..3a756ba8d5d8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3801,8 +3801,11 @@ void lockdep_rcu_dereference(const char *file, const int line) { struct task_struct *curr = current; +#ifndef CONFIG_PROVE_RCU_REPEATEDLY if (!debug_locks_off()) return; +#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ + /* Note: the following can be executed concurrently, so be careful. */ printk("\n===================================================\n"); printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); printk( "---------------------------------------------------\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 935248bdbc47..94090b4bb7d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -512,6 +512,18 @@ config PROVE_RCU Say N if you are unsure. +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- cgit v1.2.3 From 55ec936ff4e57cc626db336a7bf33b267390e9b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 13 Apr 2010 12:22:33 -0700 Subject: rcu: enable CPU_STALL_VERBOSE by default The CPU_STALL_VERBOSE kernel configuration parameter was added to 2.6.34 to identify any preempted/blocked tasks that were preventing the current grace period from completing when running preemptible RCU. As is conventional for new configurations parameters, this defaulted disabled. It is now time to enable it by default. Signed-off-by: Paul E. McKenney --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 94090b4bb7d2..930a9e5eae08 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -805,7 +805,7 @@ config RCU_CPU_STALL_DETECTOR config RCU_CPU_STALL_VERBOSE bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk detailed per-task information for any tasks that are stalling the current RCU grace period. -- cgit v1.2.3 From a5d8e467f83f6672104f276223a88e3b50cbd375 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:38 -0400 Subject: Debugobjects transition check Implement a basic state machine checker in the debugobjects. This state machine checker detects races and inconsistencies within the "active" life of a debugobject. The checker only keeps track of the current state; all the state machine logic is kept at the object instance level. The checker works by adding a supplementary "unsigned int astate" field to the debug_obj structure. It keeps track of the current "active state" of the object. The only constraints that are imposed on the states by the debugobjects system is that: - activation of an object sets the current active state to 0, - deactivation of an object expects the current active state to be 0. For the rest of the states, the state mapping is determined by the specific object instance. Therefore, the logic keeping track of the state machine is within the specialized instance, without any need to know about it at the debugobject level. The current object active state is changed by calling: debug_object_active_state(addr, descr, expect, next) where "expect" is the expected state and "next" is the next state to move to if the expected state is found. A warning is generated if the expected is not found. Signed-off-by: Mathieu Desnoyers Reviewed-by: Thomas Gleixner Acked-by: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney --- include/linux/debugobjects.h | 11 +++++++++ lib/debugobjects.c | 59 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 8c243aaa86a7..597692f1fc8d 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -20,12 +20,14 @@ struct debug_obj_descr; * struct debug_obj - representaion of an tracked object * @node: hlist node to link the object into the tracker list * @state: tracked object state + * @astate: current active state * @object: pointer to the real object * @descr: pointer to an object type specific debug description structure */ struct debug_obj { struct hlist_node node; enum debug_obj_state state; + unsigned int astate; void *object; struct debug_obj_descr *descr; }; @@ -60,6 +62,15 @@ extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); extern void debug_object_free (void *addr, struct debug_obj_descr *descr); +/* + * Active state: + * - Set at 0 upon initialization. + * - Must return to 0 before deactivation. + */ +extern void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next); + extern void debug_objects_early_init(void); extern void debug_objects_mem_init(void); #else diff --git a/lib/debugobjects.c b/lib/debugobjects.c index b862b30369ff..076464fd2072 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; hlist_del(&obj->node); hlist_add_head(&obj->node, &b->list); @@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg) if (limit < 5 && obj->descr != descr_test) { limit++; - WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, - obj_states[obj->state], obj->descr->name); + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); } debug_objects_warnings++; } @@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - obj->state = ODEBUG_STATE_INACTIVE; + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); break; case ODEBUG_STATE_DESTROYED: @@ -553,6 +559,53 @@ out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + #ifdef CONFIG_DEBUG_OBJECTS_FREE static void __debug_check_no_obj_freed(const void *address, unsigned long size) { -- cgit v1.2.3 From 91af70814105f4c05e6e11b51c3269907b71794b Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Wed, 12 May 2010 11:38:45 +0100 Subject: rwsem: Test for no active locks in __rwsem_do_wake undo code If there are no active threasd using a semaphore, it is always correct to unqueue blocked threads. This seems to be what was intended in the undo code. What was done instead, was to look for a sem count of zero - this is an impossible situation, given that at least one thread is known to be queued on the semaphore. The code might be correct as written, but it's hard to reason about and it's not what was intended (otherwise the goto out would have been unconditional). Go for checking the active count - the alternative is not worth the headache. Signed-off-by: Michel Lespinasse Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- lib/rwsem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 3e3365e5665e..ceba8e28807a 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -136,9 +136,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) out: return sem; - /* undo the change to count, but check for a transition 1->0 */ + /* undo the change to the active count, but check for a transition + * 1->0 */ undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) + if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) goto out; goto try_again; } -- cgit v1.2.3 From 43aa7ac736a4e21aae4882bd8f7c67403aed45b8 Mon Sep 17 00:00:00 2001 From: "kirjanov@gmail.com" Date: Sat, 15 May 2010 12:32:34 -0400 Subject: lib/btree: fix possible NULL pointer dereference mempool_alloc() can return null in atomic case. Signed-off-by: Denis Kirjanov Cc: Joern Engel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/btree.c b/lib/btree.c index 41859a820218..c9c6f0351526 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -95,7 +95,8 @@ static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) unsigned long *node; node = mempool_alloc(head->mempool, gfp); - memset(node, 0, NODESIZE); + if (likely(node)) + memset(node, 0, NODESIZE); return node; } -- cgit v1.2.3 From b2be05273a1744d175bf4b67f6665637bb9ac7a8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 3 Apr 2010 19:34:56 +0100 Subject: panic: Allow warnings to set different taint flags WARN() is used in some places to report firmware or hardware bugs that are then worked-around. These bugs do not affect the stability of the kernel and should not set the flag for TAINT_WARN. To allow for this, add WARN_TAINT() and WARN_TAINT_ONCE() macros that take a taint number as argument. Architectures that implement warnings using trap instructions instead of calls to warn_slowpath_*() now implement __WARN_TAINT(taint) instead of __WARN(). Signed-off-by: Ben Hutchings Acked-by: Helge Deller Tested-by: Paul Mundt Signed-off-by: David Woodhouse --- Documentation/oops-tracing.txt | 1 + arch/parisc/include/asm/bug.h | 8 ++++---- arch/powerpc/include/asm/bug.h | 6 +++--- arch/s390/include/asm/bug.h | 8 ++++---- arch/sh/include/asm/bug.h | 4 ++-- include/asm-generic/bug.h | 34 ++++++++++++++++++++++++++++++++-- kernel/panic.c | 24 ++++++++++++++++++++---- lib/bug.c | 2 +- 8 files changed, 67 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index c10c022b911c..069fab3ea4d6 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -256,6 +256,7 @@ characters, each representing a particular tainted value. 9: 'A' if the ACPI table has been overridden. 10: 'W' if a warning has previously been issued by the kernel. + (Though some warnings may set more specific taint flags.) 11: 'C' if a staging driver has been loaded. diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 75e46c557a16..72cfdb0cfdd1 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -44,7 +44,7 @@ #endif #ifdef CONFIG_DEBUG_BUGVERBOSE -#define __WARN() \ +#define __WARN_TAINT(taint) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ @@ -54,11 +54,11 @@ "\t.org 2b+%c3\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_WARNING), \ + "i" (BUGFLAG_TAINT(taint)), \ "i" (sizeof(struct bug_entry)) ); \ } while(0) #else -#define __WARN() \ +#define __WARN_TAINT(taint) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ @@ -67,7 +67,7 @@ "\t.short %c0\n" \ "\t.org 2b+%c1\n" \ "\t.popsection" \ - : : "i" (BUGFLAG_WARNING), \ + : : "i" (BUGFLAG_TAINT(taint)), \ "i" (sizeof(struct bug_entry)) ); \ } while(0) #endif diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 2c15212e1700..065c590c991d 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -85,12 +85,12 @@ } \ } while (0) -#define __WARN() do { \ +#define __WARN_TAINT(taint) do { \ __asm__ __volatile__( \ "1: twi 31,0,0\n" \ _EMIT_BUG_ENTRY \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_WARNING), \ + "i" (BUGFLAG_TAINT(taint)), \ "i" (sizeof(struct bug_entry))); \ } while (0) @@ -104,7 +104,7 @@ "1: "PPC_TLNEI" %4,0\n" \ _EMIT_BUG_ENTRY \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_WARNING), \ + "i" (BUGFLAG_TAINT(TAINT_WARN)), \ "i" (sizeof(struct bug_entry)), \ "r" (__ret_warn_on)); \ } \ diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 9beeb9db9b23..bf90d1fd97a5 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -46,18 +46,18 @@ unreachable(); \ } while (0) -#define __WARN() do { \ - __EMIT_BUG(BUGFLAG_WARNING); \ +#define __WARN_TAINT(taint) do { \ + __EMIT_BUG(BUGFLAG_TAINT(taint)); \ } while (0) #define WARN_ON(x) ({ \ int __ret_warn_on = !!(x); \ if (__builtin_constant_p(__ret_warn_on)) { \ if (__ret_warn_on) \ - __EMIT_BUG(BUGFLAG_WARNING); \ + __WARN(); \ } else { \ if (unlikely(__ret_warn_on)) \ - __EMIT_BUG(BUGFLAG_WARNING); \ + __WARN(); \ } \ unlikely(__ret_warn_on); \ }) diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h index d02c01b3e6b9..6323f864d111 100644 --- a/arch/sh/include/asm/bug.h +++ b/arch/sh/include/asm/bug.h @@ -48,7 +48,7 @@ do { \ "i" (sizeof(struct bug_entry))); \ } while (0) -#define __WARN() \ +#define __WARN_TAINT(taint) \ do { \ __asm__ __volatile__ ( \ "1:\t.short %O0\n" \ @@ -57,7 +57,7 @@ do { \ : "n" (TRAPA_BUG_OPCODE), \ "i" (__FILE__), \ "i" (__LINE__), \ - "i" (BUGFLAG_WARNING), \ + "i" (BUGFLAG_TAINT(taint)), \ "i" (sizeof(struct bug_entry))); \ } while (0) diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 18c435d7c082..c2c9ba032d46 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -25,7 +25,10 @@ struct bug_entry { }; #endif /* __ASSEMBLY__ */ -#define BUGFLAG_WARNING (1<<0) +#define BUGFLAG_WARNING (1 << 0) +#define BUGFLAG_TAINT(taint) (BUGFLAG_WARNING | ((taint) << 8)) +#define BUG_GET_TAINT(bug) ((bug)->flags >> 8) + #endif /* CONFIG_GENERIC_BUG */ /* @@ -56,17 +59,25 @@ struct bug_entry { * appear at runtime. Use the versions with printk format strings * to provide better diagnostics. */ -#ifndef __WARN +#ifndef __WARN_TAINT #ifndef __ASSEMBLY__ extern void warn_slowpath_fmt(const char *file, const int line, const char *fmt, ...) __attribute__((format(printf, 3, 4))); +extern void warn_slowpath_fmt_taint(const char *file, const int line, + unsigned taint, const char *fmt, ...) + __attribute__((format(printf, 4, 5))); extern void warn_slowpath_null(const char *file, const int line); #define WANT_WARN_ON_SLOWPATH #endif #define __WARN() warn_slowpath_null(__FILE__, __LINE__) #define __WARN_printf(arg...) warn_slowpath_fmt(__FILE__, __LINE__, arg) +#define __WARN_printf_taint(taint, arg...) \ + warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg) #else +#define __WARN() __WARN_TAINT(TAINT_WARN) #define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) +#define __WARN_printf_taint(taint, arg...) \ + do { printk(arg); __WARN_TAINT(taint); } while (0) #endif #ifndef WARN_ON @@ -87,6 +98,13 @@ extern void warn_slowpath_null(const char *file, const int line); }) #endif +#define WARN_TAINT(condition, taint, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf_taint(taint, format); \ + unlikely(__ret_warn_on); \ +}) + #else /* !CONFIG_BUG */ #ifndef HAVE_ARCH_BUG #define BUG() do {} while(0) @@ -110,6 +128,8 @@ extern void warn_slowpath_null(const char *file, const int line); }) #endif +#define WARN_TAINT(condition, taint, format...) WARN_ON(condition) + #endif #define WARN_ON_ONCE(condition) ({ \ @@ -132,6 +152,16 @@ extern void warn_slowpath_null(const char *file, const int line); unlikely(__ret_warn_once); \ }) +#define WARN_TAINT_ONCE(condition, taint, format...) ({ \ + static bool __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once)) \ + if (WARN_TAINT(!__warned, taint, format)) \ + __warned = true; \ + unlikely(__ret_warn_once); \ +}) + #define WARN_ON_RATELIMIT(condition, state) \ WARN_ON((condition) && __ratelimit(state)) diff --git a/kernel/panic.c b/kernel/panic.c index 13d966b4c14a..8b821bce66e6 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -365,7 +365,8 @@ struct slowpath_args { va_list args; }; -static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args) +static void warn_slowpath_common(const char *file, int line, void *caller, + unsigned taint, struct slowpath_args *args) { const char *board; @@ -381,7 +382,7 @@ static void warn_slowpath_common(const char *file, int line, void *caller, struc print_modules(); dump_stack(); print_oops_end_marker(); - add_taint(TAINT_WARN); + add_taint(taint); } void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) @@ -390,14 +391,29 @@ void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) args.fmt = fmt; va_start(args.args, fmt); - warn_slowpath_common(file, line, __builtin_return_address(0), &args); + warn_slowpath_common(file, line, __builtin_return_address(0), + TAINT_WARN, &args); va_end(args.args); } EXPORT_SYMBOL(warn_slowpath_fmt); +void warn_slowpath_fmt_taint(const char *file, int line, + unsigned taint, const char *fmt, ...) +{ + struct slowpath_args args; + + args.fmt = fmt; + va_start(args.args, fmt); + warn_slowpath_common(file, line, __builtin_return_address(0), + taint, &args); + va_end(args.args); +} +EXPORT_SYMBOL(warn_slowpath_fmt_taint); + void warn_slowpath_null(const char *file, int line) { - warn_slowpath_common(file, line, __builtin_return_address(0), NULL); + warn_slowpath_common(file, line, __builtin_return_address(0), + TAINT_WARN, NULL); } EXPORT_SYMBOL(warn_slowpath_null); #endif diff --git a/lib/bug.c b/lib/bug.c index 300e41afbf97..f13daf435211 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -165,7 +165,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) (void *)bugaddr); show_regs(regs); - add_taint(TAINT_WARN); + add_taint(BUG_GET_TAINT(bug)); return BUG_TRAP_TYPE_WARN; } -- cgit v1.2.3 From fab1c23242528771a955c475ef23d99156a71a7f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 18 May 2010 14:35:18 +0800 Subject: Unified UUID/GUID definition There are many different UUID/GUID definitions in kernel, such as that in EFI, many file systems, some drivers, etc. Every kernel components need UUID/GUID has its own definition. This patch provides a unified definition for UUID/GUID. UUID is defined via typedef. This makes that UUID appears more like a preliminary type, and makes the data type explicit (comparing with implicit "u8 uuid[16]"). The binary representation of UUID/GUID can be little-endian (used by EFI, etc) or big-endian (defined by RFC4122), so both is defined. Signed-off-by: Huang Ying Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- include/linux/uuid.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/Makefile | 2 +- lib/uuid.c | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 include/linux/uuid.h create mode 100644 lib/uuid.c (limited to 'lib') diff --git a/include/linux/uuid.h b/include/linux/uuid.h new file mode 100644 index 000000000000..5b7efbfcee4e --- /dev/null +++ b/include/linux/uuid.h @@ -0,0 +1,70 @@ +/* + * UUID/GUID definition + * + * Copyright (C) 2010, Intel Corp. + * Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_UUID_H_ +#define _LINUX_UUID_H_ + +#include +#include + +typedef struct { + __u8 b[16]; +} uuid_le; + +typedef struct { + __u8 b[16]; +} uuid_be; + +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_le) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_be) \ +{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define NULL_UUID_LE \ + UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_le)); +} + +static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_be)); +} + +extern void uuid_le_gen(uuid_le *u); +extern void uuid_be_gen(uuid_be *u); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 0d4015205c64..f3eb6e8766be 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o + string_helpers.o gcd.o lcm.o list_sort.o uuid.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 000000000000..8fadd7cef46c --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,53 @@ +/* + * Unified UUID/GUID definition + * + * Copyright (C) 2009, Intel Corp. + * Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +static void __uuid_gen_common(__u8 b[16]) +{ + int i; + u32 r; + + for (i = 0; i < 4; i++) { + r = random32(); + memcpy(b + i * 4, &r, 4); + } + /* reversion 0b10 */ + b[8] = (b[8] & 0x3F) | 0x80; +} + +void uuid_le_gen(uuid_le *lu) +{ + __uuid_gen_common(lu->b); + /* version 4 : random generation */ + lu->b[7] = (lu->b[7] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_le_gen); + +void uuid_be_gen(uuid_be *bu) +{ + __uuid_gen_common(bu->b); + /* version 4 : random generation */ + bu->b[6] = (bu->b[6] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_be_gen); -- cgit v1.2.3 From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:21 -0500 Subject: kgdb: core changes to support kdb These are the minimum changes to the kgdb core in order to enable an API to connect a new front end (kdb) to the debug core. This patch introduces the dbg_kdb_mode variable controls where the user level I/O is routed. It will be routed to the gdbstub (kgdb) or to the kdb front end which is a simple shell available over the kgdboc connection. You can switch back and forth between kdb or the gdb stub mode of operation dynamically. From gdb stub mode you can blindly type "$3#33", or from the kdb mode you can enter "kgdb" to switch to the gdb stub. The logic in the debug core depends on kdb to look for the typical gdb connection sequences and return immediately with KGDB_PASS_EVENT if a gdb serial command sequence is detected. That should allow a reasonably seamless transition between kdb -> gdb without leaving the kernel exception state. The two gdb serial queries that kdb is responsible for detecting are the "?" and "qSupported" packets. CC: Ingo Molnar Signed-off-by: Jason Wessel Acked-by: Martin Hicks --- arch/arm/kernel/kgdb.c | 5 +++ arch/mips/kernel/kgdb.c | 5 +++ arch/powerpc/kernel/kgdb.c | 5 +++ arch/x86/kernel/kgdb.c | 5 +++ include/linux/kgdb.h | 11 ++++- kernel/debug/debug_core.c | 107 ++++++++++++++++++++++++++++++++++++++------- kernel/debug/debug_core.h | 24 ++++++++++ kernel/debug/gdbstub.c | 36 +++++++++++++++ lib/Kconfig.kgdb | 8 +++- 9 files changed, 186 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a5b846b9895d..c868a8864117 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -98,6 +98,11 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) gdb_regs[_CPSR] = thread_regs->ARM_cpsr; } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->ARM_pc = pc; +} + static int compiled_break; int kgdb_arch_handle_exception(int exception_vector, int signo, diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 50c9bb880667..6ed4c83c869b 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -180,6 +180,11 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) *(ptr++) = regs->cp0_epc; } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->cp0_epc = pc; +} + /* * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, * then try to fall into the debugger diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 41bada0298c8..c81e3de1306e 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -309,6 +309,11 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->nip = pc; +} + /* * This function does PowerPC specific procesing for interfacing to gdb. */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b2258ca91003..f95a2c0b915c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -690,6 +690,11 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) return instruction_pointer(regs); } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 4830142ec339..5b37df00000d 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -16,10 +16,12 @@ #include #include #include - #include +#ifdef CONFIG_HAVE_ARCH_KGDB #include +#endif +#ifdef CONFIG_KGDB struct pt_regs; /** @@ -262,6 +264,7 @@ extern struct kgdb_arch arch_kgdb_ops; extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); +extern void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc); extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); extern struct kgdb_io *dbg_io_ops; @@ -279,5 +282,9 @@ extern int kgdb_nmicallback(int cpu, void *regs); extern int kgdb_single_step; extern atomic_t kgdb_active; - +#define in_dbg_master() \ + (raw_smp_processor_id() == atomic_read(&kgdb_active)) +#else /* ! CONFIG_KGDB */ +#define in_dbg_master() (0) +#endif /* ! CONFIG_KGDB */ #endif /* _KGDB_H_ */ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 7e03969330bc..6e1fa829fdeb 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,11 @@ static DEFINE_SPINLOCK(kgdb_registration_lock); static int kgdb_con_registered; /* determine if kgdb console output should be used */ static int kgdb_use_con; +/* Next cpu to become the master debug core */ +int dbg_switch_cpu; + +/* Use kdb or gdbserver mode */ +static int dbg_kdb_mode = 1; static int __init opt_kgdb_con(char *str) { @@ -100,6 +106,7 @@ static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { * The CPU# of the active CPU, or -1 if none: */ atomic_t kgdb_active = ATOMIC_INIT(-1); +EXPORT_SYMBOL_GPL(kgdb_active); /* * We use NR_CPUs not PERCPU, in case kgdb is used to debug early @@ -301,7 +308,7 @@ int dbg_set_sw_break(unsigned long addr) return 0; } -static int kgdb_deactivate_sw_breakpoints(void) +int dbg_deactivate_sw_breakpoints(void) { unsigned long addr; int error; @@ -395,8 +402,14 @@ static int kgdb_io_ready(int print_wait) return 1; if (atomic_read(&kgdb_setting_breakpoint)) return 1; - if (print_wait) + if (print_wait) { +#ifdef CONFIG_KGDB_KDB + if (!dbg_kdb_mode) + printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n"); +#else printk(KERN_CRIT "KGDB: Waiting for remote debugger\n"); +#endif + } return 1; } @@ -410,7 +423,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks) /* Panic on recursive debugger calls: */ exception_level++; addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs); - kgdb_deactivate_sw_breakpoints(); + dbg_deactivate_sw_breakpoints(); /* * If the break point removed ok at the place exception @@ -443,11 +456,24 @@ static int kgdb_reenter_check(struct kgdb_state *ks) return 1; } +static void dbg_cpu_switch(int cpu, int next_cpu) +{ + /* Mark the cpu we are switching away from as a slave when it + * holds the kgdb_active token. This must be done so that the + * that all the cpus wait in for the debug core will not enter + * again as the master. */ + if (cpu == atomic_read(&kgdb_active)) { + kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; + kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER; + } + kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; +} + static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) { unsigned long flags; int sstep_tries = 100; - int error = 0; + int error; int i, cpu; int trace_on = 0; acquirelock: @@ -460,6 +486,8 @@ acquirelock: cpu = ks->cpu; kgdb_info[cpu].debuggerinfo = regs; kgdb_info[cpu].task = current; + kgdb_info[cpu].ret_state = 0; + kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; /* * Make sure the above info reaches the primary CPU before * our cpu_in_kgdb[] flag setting does: @@ -471,7 +499,11 @@ acquirelock: * master cpu and acquire the kgdb_active lock: */ while (1) { - if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { +cpu_loop: + if (kgdb_info[cpu].exception_state & DCPU_NEXT_MASTER) { + kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; + goto cpu_master_loop; + } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) break; } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { @@ -513,7 +545,7 @@ return_normal: } if (!kgdb_io_ready(1)) { - error = 1; + kgdb_info[cpu].ret_state = 1; goto kgdb_restore; /* No I/O connection, resume the system */ } @@ -548,7 +580,7 @@ return_normal: * Wait for the other CPUs to be notified and be waiting for us: */ for_each_online_cpu(i) { - while (!atomic_read(&cpu_in_kgdb[i])) + while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i])) cpu_relax(); } @@ -557,7 +589,7 @@ return_normal: * in the debugger and all secondary CPUs are quiescent */ kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code); - kgdb_deactivate_sw_breakpoints(); + dbg_deactivate_sw_breakpoints(); kgdb_single_step = 0; kgdb_contthread = current; exception_level = 0; @@ -565,8 +597,26 @@ return_normal: if (trace_on) tracing_off(); - /* Talk to debugger with gdbserial protocol */ - error = gdb_serial_stub(ks); + while (1) { +cpu_master_loop: + if (dbg_kdb_mode) { + kgdb_connected = 1; + error = kdb_stub(ks); + } else { + error = gdb_serial_stub(ks); + } + + if (error == DBG_PASS_EVENT) { + dbg_kdb_mode = !dbg_kdb_mode; + kgdb_connected = 0; + } else if (error == DBG_SWITCH_CPU_EVENT) { + dbg_cpu_switch(cpu, dbg_switch_cpu); + goto cpu_loop; + } else { + kgdb_info[cpu].ret_state = error; + break; + } + } /* Call the I/O driver's post_exception routine */ if (dbg_io_ops->post_exception) @@ -578,11 +628,16 @@ return_normal: for (i = NR_CPUS-1; i >= 0; i--) atomic_dec(&passive_cpu_wait[i]); /* - * Wait till all the CPUs have quit - * from the debugger. + * Wait till all the CPUs have quit from the debugger, + * but allow a CPU that hit an exception and is + * waiting to become the master to remain in the debug + * core. */ for_each_online_cpu(i) { - while (atomic_read(&cpu_in_kgdb[i])) + while (kgdb_do_roundup && + atomic_read(&cpu_in_kgdb[i]) && + !(kgdb_info[i].exception_state & + DCPU_WANT_MASTER)) cpu_relax(); } } @@ -603,7 +658,7 @@ kgdb_restore: clocksource_touch_watchdog(); local_irq_restore(flags); - return error; + return kgdb_info[cpu].ret_state; } /* @@ -632,7 +687,8 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) return 0; /* Ouch, double exception ! */ kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; ret = kgdb_cpu_enter(ks, regs); - kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER; + kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER | + DCPU_IS_SLAVE); return ret; } @@ -665,7 +721,7 @@ static void kgdb_console_write(struct console *co, const char *s, /* If we're debugging, or KGDB has not connected, don't try * and print. */ - if (!kgdb_connected || atomic_read(&kgdb_active) != -1) + if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode) return; local_irq_save(flags); @@ -687,8 +743,14 @@ static void sysrq_handle_dbg(int key, struct tty_struct *tty) printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); return; } - if (!kgdb_connected) + if (!kgdb_connected) { +#ifdef CONFIG_KGDB_KDB + if (!dbg_kdb_mode) + printk(KERN_CRIT "KGDB or $3#33 for KDB\n"); +#else printk(KERN_CRIT "Entering KGDB\n"); +#endif + } kgdb_breakpoint(); } @@ -817,6 +879,16 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops) } EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); +int dbg_io_get_char(void) +{ + int ret = dbg_io_ops->read_char(); + if (!dbg_kdb_mode) + return ret; + if (ret == 127) + return 8; + return ret; +} + /** * kgdb_breakpoint - generate breakpoint exception * @@ -839,6 +911,7 @@ static int __init opt_kgdb_wait(char *str) { kgdb_break_asap = 1; + kdb_init(KDB_INIT_EARLY); if (kgdb_io_module_registered) kgdb_initial_breakpoint(); diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index db554f9be51d..44cf3de8cf9e 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h @@ -38,6 +38,8 @@ struct debuggerinfo_struct { void *debuggerinfo; struct task_struct *task; int exception_state; + int ret_state; + int irq_depth; }; extern struct debuggerinfo_struct kgdb_info[]; @@ -47,9 +49,31 @@ extern int dbg_remove_all_break(void); extern int dbg_set_sw_break(unsigned long addr); extern int dbg_remove_sw_break(unsigned long addr); extern int dbg_activate_sw_breakpoints(void); +extern int dbg_deactivate_sw_breakpoints(void); + +/* polled character access to i/o module */ +extern int dbg_io_get_char(void); + +/* stub return value for switching between the gdbstub and kdb */ +#define DBG_PASS_EVENT -12345 +/* Switch from one cpu to another */ +#define DBG_SWITCH_CPU_EVENT -123456 +extern int dbg_switch_cpu; /* gdbstub interface functions */ extern int gdb_serial_stub(struct kgdb_state *ks); extern void gdbstub_msg_write(const char *s, int len); +/* gdbstub functions used for kdb <-> gdbstub transition */ +extern int gdbstub_state(struct kgdb_state *ks, char *cmd); + +#ifdef CONFIG_KGDB_KDB +extern int kdb_stub(struct kgdb_state *ks); +#else /* ! CONFIG_KGDB_KDB */ +static inline int kdb_stub(struct kgdb_state *ks) +{ + return DBG_PASS_EVENT; +} +#endif /* CONFIG_KGDB_KDB */ + #endif /* _DEBUG_CORE_H_ */ diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index ccdf0929f12d..188203a19657 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -887,6 +887,13 @@ int gdb_serial_stub(struct kgdb_state *ks) case 'Z': /* Break point set */ gdb_cmd_break(ks); break; +#ifdef CONFIG_KGDB_KDB + case '3': /* Escape into back into kdb */ + if (remcom_in_buffer[1] == '\0') { + gdb_cmd_detachkill(ks); + return DBG_PASS_EVENT; + } +#endif case 'C': /* Exception passing */ tmp = gdb_cmd_exception_pass(ks); if (tmp > 0) @@ -932,3 +939,32 @@ kgdb_exit: error = 1; return error; } + +int gdbstub_state(struct kgdb_state *ks, char *cmd) +{ + int error; + + switch (cmd[0]) { + case 'e': + error = kgdb_arch_handle_exception(ks->ex_vector, + ks->signo, + ks->err_code, + remcom_in_buffer, + remcom_out_buffer, + ks->linux_regs); + return error; + case 's': + case 'c': + strcpy(remcom_in_buffer, cmd); + return 0; + case '?': + gdb_cmd_status(ks); + break; + case '\0': + strcpy(remcom_out_buffer, ""); + break; + } + dbg_io_ops->write_char('+'); + put_packet(remcom_out_buffer); + return 0; +} diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 9b5d1d7f2ef7..78de43a5e902 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB bool menuconfig KGDB - bool "KGDB: kernel debugging with remote gdb" + bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB depends on DEBUG_KERNEL && EXPERIMENTAL help @@ -57,4 +57,10 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_KDB + bool "KGDB_KDB: include kdb frontend for kgdb" + default n + help + KDB frontend for kernel + endif # KGDB -- cgit v1.2.3 From ada64e4c98eb5f04a9ca223c5ff9e7ac22ce6404 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:24 -0500 Subject: kgdboc,keyboard: Keyboard driver for kdb with kgdb This patch adds in the kdb PS/2 keyboard driver. This was mostly a direct port from the original kdb where I cleaned up the code against checkpatch.pl and added the glue to stitch it into kgdb. This patch also enables early kdb debug via kgdbwait and the keyboard. All the access to configure kdb using either a serial console or the keyboard is done via kgdboc. If you want to use only the keyboard and want to break in early you would add to your kernel command arguments: kgdboc=kbd kgdbwait If you wanted serial and or the keyboard access you could use: kgdboc=kbd,ttyS0 You can also configure kgdboc as a kernel module or at run time with the sysfs where you can activate and deactivate kgdb. Turn it on: echo kbd,ttyS0 > /sys/module/kgdboc/parameters/kgdboc Turn it off: echo "" > /sys/module/kgdboc/parameters/kgdboc Signed-off-by: Jason Wessel Reviewed-by: Dmitry Torokhov --- Documentation/kernel-parameters.txt | 8 +- drivers/serial/kgdboc.c | 61 +++++++++-- kernel/debug/kdb/Makefile | 1 + kernel/debug/kdb/kdb_keyboard.c | 212 ++++++++++++++++++++++++++++++++++++ lib/Kconfig.kgdb | 7 ++ 5 files changed, 279 insertions(+), 10 deletions(-) create mode 100644 kernel/debug/kdb/kdb_keyboard.c (limited to 'lib') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b12bacd252fc..3845e3a84a52 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1121,9 +1121,11 @@ and is between 256 and 4096 characters. It is defined in the file zone if it does not. kgdboc= [HW] kgdb over consoles. - Requires a tty driver that supports console polling. - (only serial supported for now) - Format: [,baud] + Requires a tty driver that supports console polling, + or a supported polling keyboard driver (non-usb). + Serial only format: [,baud] + keyboard only format: kbd + keyboard and serial format: kbd,[,baud] kmac= [MIPS] korina ethernet MAC address. Configure the RouterBoard 532 series on-chip diff --git a/drivers/serial/kgdboc.c b/drivers/serial/kgdboc.c index eadc1ab6bbce..ecef6e1a599a 100644 --- a/drivers/serial/kgdboc.c +++ b/drivers/serial/kgdboc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define MAX_CONFIG_LEN 40 @@ -32,6 +33,40 @@ static struct kparam_string kps = { static struct tty_driver *kgdb_tty_driver; static int kgdb_tty_line; +#ifdef CONFIG_KDB_KEYBOARD +static int kgdboc_register_kbd(char **cptr) +{ + if (strncmp(*cptr, "kbd", 3) == 0) { + if (kdb_poll_idx < KDB_POLL_FUNC_MAX) { + kdb_poll_funcs[kdb_poll_idx] = kdb_get_kbd_char; + kdb_poll_idx++; + if (cptr[0][3] == ',') + *cptr += 4; + else + return 1; + } + } + return 0; +} + +static void kgdboc_unregister_kbd(void) +{ + int i; + + for (i = 0; i < kdb_poll_idx; i++) { + if (kdb_poll_funcs[i] == kdb_get_kbd_char) { + kdb_poll_idx--; + kdb_poll_funcs[i] = kdb_poll_funcs[kdb_poll_idx]; + kdb_poll_funcs[kdb_poll_idx] = NULL; + i--; + } + } +} +#else /* ! CONFIG_KDB_KEYBOARD */ +#define kgdboc_register_kbd(x) 0 +#define kgdboc_unregister_kbd() +#endif /* ! CONFIG_KDB_KEYBOARD */ + static int kgdboc_option_setup(char *opt) { if (strlen(opt) > MAX_CONFIG_LEN) { @@ -45,25 +80,38 @@ static int kgdboc_option_setup(char *opt) __setup("kgdboc=", kgdboc_option_setup); +static void cleanup_kgdboc(void) +{ + kgdboc_unregister_kbd(); + if (configured == 1) + kgdb_unregister_io_module(&kgdboc_io_ops); +} + static int configure_kgdboc(void) { struct tty_driver *p; int tty_line = 0; int err; + char *cptr = config; err = kgdboc_option_setup(config); if (err || !strlen(config) || isspace(config[0])) goto noconfig; err = -ENODEV; + kgdb_tty_driver = NULL; + + if (kgdboc_register_kbd(&cptr)) + goto do_register; - p = tty_find_polling_driver(config, &tty_line); + p = tty_find_polling_driver(cptr, &tty_line); if (!p) goto noconfig; kgdb_tty_driver = p; kgdb_tty_line = tty_line; +do_register: err = kgdb_register_io_module(&kgdboc_io_ops); if (err) goto noconfig; @@ -75,6 +123,7 @@ static int configure_kgdboc(void) noconfig: config[0] = 0; configured = 0; + cleanup_kgdboc(); return err; } @@ -88,20 +137,18 @@ static int __init init_kgdboc(void) return configure_kgdboc(); } -static void cleanup_kgdboc(void) -{ - if (configured == 1) - kgdb_unregister_io_module(&kgdboc_io_ops); -} - static int kgdboc_get_char(void) { + if (!kgdb_tty_driver) + return -1; return kgdb_tty_driver->ops->poll_get_char(kgdb_tty_driver, kgdb_tty_line); } static void kgdboc_put_char(u8 chr) { + if (!kgdb_tty_driver) + return; kgdb_tty_driver->ops->poll_put_char(kgdb_tty_driver, kgdb_tty_line, chr); } diff --git a/kernel/debug/kdb/Makefile b/kernel/debug/kdb/Makefile index d1e925eddbcd..d4fc58f4b88d 100644 --- a/kernel/debug/kdb/Makefile +++ b/kernel/debug/kdb/Makefile @@ -8,6 +8,7 @@ CCVERSION := $(shell $(CC) -v 2>&1 | sed -ne '$$p') obj-y := kdb_io.o kdb_main.o kdb_support.o kdb_bt.o gen-kdb_cmds.o kdb_bp.o kdb_debugger.o +obj-$(CONFIG_KDB_KEYBOARD) += kdb_keyboard.o clean-files := gen-kdb_cmds.c diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c new file mode 100644 index 000000000000..4bca634975c0 --- /dev/null +++ b/kernel/debug/kdb/kdb_keyboard.c @@ -0,0 +1,212 @@ +/* + * Kernel Debugger Architecture Dependent Console I/O handler + * + * This file is subject to the terms and conditions of the GNU General Public + * License. + * + * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include + +/* Keyboard Controller Registers on normal PCs. */ + +#define KBD_STATUS_REG 0x64 /* Status register (R) */ +#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */ + +/* Status Register Bits */ + +#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */ +#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ + +static int kbd_exists; + +/* + * Check if the keyboard controller has a keypress for us. + * Some parts (Enter Release, LED change) are still blocking polled here, + * but hopefully they are all short. + */ +int kdb_get_kbd_char(void) +{ + int scancode, scanstatus; + static int shift_lock; /* CAPS LOCK state (0-off, 1-on) */ + static int shift_key; /* Shift next keypress */ + static int ctrl_key; + u_short keychar; + + if (KDB_FLAG(NO_I8042) || KDB_FLAG(NO_VT_CONSOLE) || + (inb(KBD_STATUS_REG) == 0xff && inb(KBD_DATA_REG) == 0xff)) { + kbd_exists = 0; + return -1; + } + kbd_exists = 1; + + if ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) + return -1; + + /* + * Fetch the scancode + */ + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + + /* + * Ignore mouse events. + */ + if (scanstatus & KBD_STAT_MOUSE_OBF) + return -1; + + /* + * Ignore release, trigger on make + * (except for shift keys, where we want to + * keep the shift state so long as the key is + * held down). + */ + + if (((scancode&0x7f) == 0x2a) || ((scancode&0x7f) == 0x36)) { + /* + * Next key may use shift table + */ + if ((scancode & 0x80) == 0) + shift_key = 1; + else + shift_key = 0; + return -1; + } + + if ((scancode&0x7f) == 0x1d) { + /* + * Left ctrl key + */ + if ((scancode & 0x80) == 0) + ctrl_key = 1; + else + ctrl_key = 0; + return -1; + } + + if ((scancode & 0x80) != 0) + return -1; + + scancode &= 0x7f; + + /* + * Translate scancode + */ + + if (scancode == 0x3a) { + /* + * Toggle caps lock + */ + shift_lock ^= 1; + +#ifdef KDB_BLINK_LED + kdb_toggleled(0x4); +#endif + return -1; + } + + if (scancode == 0x0e) { + /* + * Backspace + */ + return 8; + } + + /* Special Key */ + switch (scancode) { + case 0xF: /* Tab */ + return 9; + case 0x53: /* Del */ + return 4; + case 0x47: /* Home */ + return 1; + case 0x4F: /* End */ + return 5; + case 0x4B: /* Left */ + return 2; + case 0x48: /* Up */ + return 16; + case 0x50: /* Down */ + return 14; + case 0x4D: /* Right */ + return 6; + } + + if (scancode == 0xe0) + return -1; + + /* + * For Japanese 86/106 keyboards + * See comment in drivers/char/pc_keyb.c. + * - Masahiro Adegawa + */ + if (scancode == 0x73) + scancode = 0x59; + else if (scancode == 0x7d) + scancode = 0x7c; + + if (!shift_lock && !shift_key && !ctrl_key) { + keychar = plain_map[scancode]; + } else if ((shift_lock || shift_key) && key_maps[1]) { + keychar = key_maps[1][scancode]; + } else if (ctrl_key && key_maps[4]) { + keychar = key_maps[4][scancode]; + } else { + keychar = 0x0020; + kdb_printf("Unknown state/scancode (%d)\n", scancode); + } + keychar &= 0x0fff; + if (keychar == '\t') + keychar = ' '; + switch (KTYP(keychar)) { + case KT_LETTER: + case KT_LATIN: + if (isprint(keychar)) + break; /* printable characters */ + /* drop through */ + case KT_SPEC: + if (keychar == K_ENTER) + break; + /* drop through */ + default: + return -1; /* ignore unprintables */ + } + + if ((scancode & 0x7f) == 0x1c) { + /* + * enter key. All done. Absorb the release scancode. + */ + while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) + ; + + /* + * Fetch the scancode + */ + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + + while (scanstatus & KBD_STAT_MOUSE_OBF) { + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + } + + if (scancode != 0x9c) { + /* + * Wasn't an enter-release, why not? + */ + kdb_printf("kdb: expected enter got 0x%x status 0x%x\n", + scancode, scanstatus); + } + + return 13; + } + + return keychar & 0xff; +} +EXPORT_SYMBOL_GPL(kdb_get_kbd_char); diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 78de43a5e902..ee8ae7132f20 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -63,4 +63,11 @@ config KGDB_KDB help KDB frontend for kernel +config KDB_KEYBOARD + bool "KGDB_KDB: keyboard as input device" + depends on VT && KGDB_KDB + default n + help + KDB can use a PS/2 type keyboard for an input device + endif # KGDB -- cgit v1.2.3 From f503b5ae53cb557ac351a668fcac1baab1cef0db Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:25 -0500 Subject: x86,kgdb: Add low level debug hook The only way the debugger can handle a trap in inside rcu_lock, notify_die, or atomic_notifier_call_chain without a triple fault is to have a low level "first opportunity handler" in the int3 exception handler. Generally this will be something the vast majority of folks will not need, but for those who need it, it is added as a kernel .config option called KGDB_LOW_LEVEL_TRAP. CC: Ingo Molnar CC: Thomas Gleixner CC: H. Peter Anvin CC: x86@kernel.org Signed-off-by: Jason Wessel --- arch/x86/include/asm/kgdb.h | 3 +++ arch/x86/kernel/kgdb.c | 22 +++++++++++++++++++++- arch/x86/kernel/traps.c | 6 ++++++ include/linux/kgdb.h | 1 + kernel/debug/debug_core.c | 2 +- lib/Kconfig.kgdb | 9 +++++++++ 6 files changed, 41 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index e6c6c808489f..006da3687cdc 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -76,4 +76,7 @@ static inline void arch_kgdb_breakpoint(void) #define BREAK_INSTR_SIZE 1 #define CACHE_FLUSH_IS_SAFE 1 +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); + #endif /* _ASM_X86_KGDB_H */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index acba57169938..95b89d4cb8f1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -538,7 +538,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; } - if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs)) + if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) return NOTIFY_DONE; /* Must touch watchdog before return to normal operation */ @@ -546,6 +546,26 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_STOP; } +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP +int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig, + + }; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + return __kgdb_notify(&args, cmd); +} +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 02cfb9b8f5b1..7eaad4c5110a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -451,6 +452,11 @@ void restart_nmi(void) /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ #ifdef CONFIG_KPROBES if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 406f6f9286f3..19d1b29a2694 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -60,6 +60,7 @@ struct uart_port; void kgdb_breakpoint(void); extern int kgdb_connected; +extern int kgdb_io_module_registered; extern atomic_t kgdb_setting_breakpoint; extern atomic_t kgdb_cpu_doing_single_step; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 88a83a225374..375e42f0baf0 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -66,7 +66,7 @@ int kgdb_connected; EXPORT_SYMBOL_GPL(kgdb_connected); /* All the KGDB handlers are installed */ -static int kgdb_io_module_registered; +int kgdb_io_module_registered; /* Guard for recursive entry */ static int exception_level; diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index ee8ae7132f20..c56ccb4ad292 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -57,6 +57,15 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_LOW_LEVEL_TRAP + bool "KGDB: Allow debugging with traps in notifiers" + depends on X86 + default n + help + This will add an extra call back to kgdb for the breakpoint + exception handler on which will will allow kgdb to step + through a notify handler. + config KGDB_KDB bool "KGDB_KDB: include kdb frontend for kgdb" default n -- cgit v1.2.3 From 5dd11d5d47d248850c58292513f0e164ba98b01e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:26 -0500 Subject: mips,kgdb: kdb low level trap catch and stack trace The only way the debugger can handle a trap in inside rcu_lock, notify_die, or atomic_notifier_call_chain without a recursive fault is to have a low level "first opportunity handler" do_trap_or_bp() handler. Generally this will be something the vast majority of folks will not need, but for those who need it, it is added as a kernel .config option called KGDB_LOW_LEVEL_TRAP. Also added was a die notification for oops such that kdb can catch an oops for analysis. There appeared to be no obvious way to pass the struct pt_regs from the original exception back to the stack back tracer, so a special case was added to show_stack() for when kdb is active because you generally desire to generally look at the back trace of the original exception. Signed-off-by: Jason Wessel Acked-by: Ralf Baechle --- arch/mips/include/asm/kgdb.h | 2 ++ arch/mips/kernel/kgdb.c | 22 +++++++++++++++++++++- arch/mips/kernel/traps.c | 13 +++++++++++++ lib/Kconfig.kgdb | 2 +- 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/arch/mips/include/asm/kgdb.h b/arch/mips/include/asm/kgdb.h index 48223b09396c..19002d605ac4 100644 --- a/arch/mips/include/asm/kgdb.h +++ b/arch/mips/include/asm/kgdb.h @@ -38,6 +38,8 @@ extern int kgdb_early_setup; extern void *saved_vectors[32]; extern void handle_exception(struct pt_regs *regs); extern void breakinst(void); +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); #endif /* __KERNEL__ */ diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 6ed4c83c869b..9b78ff6e9b84 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -203,7 +203,7 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, if (atomic_read(&kgdb_active) != -1) kgdb_nmicallback(smp_processor_id(), regs); - if (kgdb_handle_exception(trap, compute_signal(trap), 0, regs)) + if (kgdb_handle_exception(trap, compute_signal(trap), cmd, regs)) return NOTIFY_DONE; if (atomic_read(&kgdb_setting_breakpoint)) @@ -217,6 +217,26 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, return NOTIFY_STOP; } +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP +int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig, + + }; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + return kgdb_mips_notify(NULL, cmd, &args); +} +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + static struct notifier_block kgdb_notifier = { .notifier_call = kgdb_mips_notify, }; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d612c6dcb746..950bde8813fc 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -185,6 +186,11 @@ void show_stack(struct task_struct *task, unsigned long *sp) regs.regs[29] = task->thread.reg29; regs.regs[31] = 0; regs.cp0_epc = task->thread.reg31; +#ifdef CONFIG_KGDB_KDB + } else if (atomic_read(&kgdb_active) != -1 && + kdb_current_regs) { + memcpy(®s, kdb_current_regs, sizeof(regs)); +#endif /* CONFIG_KGDB_KDB */ } else { prepare_frametrace(®s); } @@ -360,6 +366,8 @@ void __noreturn die(const char * str, struct pt_regs * regs) unsigned long dvpret = dvpe(); #endif /* CONFIG_MIPS_MT_SMTC */ + notify_die(DIE_OOPS, str, (struct pt_regs *)regs, SIGSEGV, 0, 0); + console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); @@ -704,6 +712,11 @@ static void do_trap_or_bp(struct pt_regs *regs, unsigned int code, siginfo_t info; char b[40]; +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_TRAP, str, regs, code, 0, 0) == NOTIFY_STOP) + return; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + if (notify_die(DIE_TRAP, str, regs, code, 0, 0) == NOTIFY_STOP) return; diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index c56ccb4ad292..43cb93fa2651 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -59,7 +59,7 @@ config KGDB_TESTS_BOOT_STRING config KGDB_LOW_LEVEL_TRAP bool "KGDB: Allow debugging with traps in notifiers" - depends on X86 + depends on X86 || MIPS default n help This will add an extra call back to kgdb for the breakpoint -- cgit v1.2.3 From db1afffab0b5d9f6d31f8f4bea44c9cb3bc59351 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Mar 2010 15:14:51 +1100 Subject: kref: remove kref_set Of the three uses of kref_set in the kernel: One really should be kref_put as the code is letting go of a reference, Two really should be kref_init because the kref is being initialised. This suggests that making kref_set available encourages bad code. So fix the three uses and remove kref_set completely. Signed-off-by: NeilBrown Acked-by: Mimi Zohar Acked-by: Serge Hallyn Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 1 - kernel/user_namespace.c | 4 ++-- lib/kref.c | 15 ++------------- security/integrity/ima/ima_iint.c | 4 ++-- 4 files changed, 6 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/include/linux/kref.h b/include/linux/kref.h index baf4b9e4b194..6cc38fc07ab7 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -21,7 +21,6 @@ struct kref { atomic_t refcount; }; -void kref_set(struct kref *kref, int num); void kref_init(struct kref *kref); void kref_get(struct kref *kref); int kref_put(struct kref *kref, void (*release) (struct kref *kref)); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 076c7c8215b0..b2d70d38dff4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -54,8 +54,8 @@ int create_user_ns(struct cred *new) #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - /* alloc_uid() incremented the userns refcount. Just set it to 1 */ - kref_set(&ns->kref, 1); + /* root_user holds a reference to ns, our reference can be dropped */ + put_user_ns(ns); return 0; } diff --git a/lib/kref.c b/lib/kref.c index 6d19f690380b..d3d227a08a4b 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -15,24 +15,14 @@ #include #include -/** - * kref_set - initialize object and set refcount to requested number. - * @kref: object in question. - * @num: initial reference counter - */ -void kref_set(struct kref *kref, int num) -{ - atomic_set(&kref->refcount, num); - smp_mb(); -} - /** * kref_init - initialize object. * @kref: object in question. */ void kref_init(struct kref *kref) { - kref_set(kref, 1); + atomic_set(&kref->refcount, 1); + smp_mb(); } /** @@ -72,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } -EXPORT_SYMBOL(kref_set); EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c index 2dc2d6594145..7625b85c2274 100644 --- a/security/integrity/ima/ima_iint.c +++ b/security/integrity/ima/ima_iint.c @@ -94,7 +94,7 @@ void iint_free(struct kref *kref) iint->opencount); iint->opencount = 0; } - kref_set(&iint->refcount, 1); + kref_init(&iint->refcount); kmem_cache_free(iint_cache, iint); } @@ -133,7 +133,7 @@ static void init_once(void *foo) iint->readcount = 0; iint->writecount = 0; iint->opencount = 0; - kref_set(&iint->refcount, 1); + kref_init(&iint->refcount); } static int __init ima_iintcache_init(void) -- cgit v1.2.3 From bc451f2058238013e1cdf4acd443c01734d332f0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:25 -0700 Subject: kobj: Add basic infrastructure for dealing with namespaces. Move complete knowledge of namespaces into the kobject layer so we can use that information when reporting kobjects to userspace. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 9 +++++ drivers/base/core.c | 77 +++++++++++++++++++++++++++++------- include/linux/device.h | 3 ++ include/linux/kobject.h | 26 ++++++++++++ lib/kobject.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 204 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/drivers/base/class.c b/drivers/base/class.c index 9c6a0d6408e7..8e231d05b400 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -63,6 +63,14 @@ static void class_release(struct kobject *kobj) kfree(cp); } +static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj) +{ + struct class_private *cp = to_class(kobj); + struct class *class = cp->class; + + return class->ns_type; +} + static const struct sysfs_ops class_sysfs_ops = { .show = class_attr_show, .store = class_attr_store, @@ -71,6 +79,7 @@ static const struct sysfs_ops class_sysfs_ops = { static struct kobj_type class_ktype = { .sysfs_ops = &class_sysfs_ops, .release = class_release, + .child_ns_type = class_child_ns_type, }; /* Hotplug events for classes go to the class class_subsys */ diff --git a/drivers/base/core.c b/drivers/base/core.c index 356dd011b8f9..f0699918e2f6 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -131,9 +131,21 @@ static void device_release(struct kobject *kobj) kfree(p); } +static const void *device_namespace(struct kobject *kobj) +{ + struct device *dev = to_dev(kobj); + const void *ns = NULL; + + if (dev->class && dev->class->ns_type) + ns = dev->class->namespace(dev); + + return ns; +} + static struct kobj_type device_ktype = { .release = device_release, .sysfs_ops = &dev_sysfs_ops, + .namespace = device_namespace, }; @@ -595,11 +607,59 @@ static struct kobject *virtual_device_parent(struct device *dev) return virtual_dir; } -static struct kobject *get_device_parent(struct device *dev, - struct device *parent) +struct class_dir { + struct kobject kobj; + struct class *class; +}; + +#define to_class_dir(obj) container_of(obj, struct class_dir, kobj) + +static void class_dir_release(struct kobject *kobj) +{ + struct class_dir *dir = to_class_dir(kobj); + kfree(dir); +} + +static const +struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj) { + struct class_dir *dir = to_class_dir(kobj); + return dir->class->ns_type; +} + +static struct kobj_type class_dir_ktype = { + .release = class_dir_release, + .sysfs_ops = &kobj_sysfs_ops, + .child_ns_type = class_dir_child_ns_type +}; + +static struct kobject * +class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) +{ + struct class_dir *dir; int retval; + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return NULL; + + dir->class = class; + kobject_init(&dir->kobj, &class_dir_ktype); + + dir->kobj.kset = &class->p->class_dirs; + + retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name); + if (retval < 0) { + kobject_put(&dir->kobj); + return NULL; + } + return &dir->kobj; +} + + +static struct kobject *get_device_parent(struct device *dev, + struct device *parent) +{ if (dev->class) { static DEFINE_MUTEX(gdp_mutex); struct kobject *kobj = NULL; @@ -634,18 +694,7 @@ static struct kobject *get_device_parent(struct device *dev, } /* or create a new class-directory at the parent device */ - k = kobject_create(); - if (!k) { - mutex_unlock(&gdp_mutex); - return NULL; - } - k->kset = &dev->class->p->class_dirs; - retval = kobject_add(k, parent_kobj, "%s", dev->class->name); - if (retval < 0) { - mutex_unlock(&gdp_mutex); - kobject_put(k); - return NULL; - } + k = class_dir_create_and_add(dev->class, parent_kobj); /* do not emit an uevent for this simple "glue" directory */ mutex_unlock(&gdp_mutex); return k; diff --git a/include/linux/device.h b/include/linux/device.h index 6f9619190aaf..7bb9f426f3e6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -202,6 +202,9 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + const struct kobj_ns_type_operations *ns_type; + const void *(*namespace)(struct device *dev); + const struct dev_pm_ops *pm; struct class_private *p; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 3950d3c2850d..d9456f69904f 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -108,6 +108,8 @@ struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; struct attribute **default_attrs; + const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); + const void *(*namespace)(struct kobject *kobj); }; struct kobj_uevent_env { @@ -134,6 +136,30 @@ struct kobj_attribute { extern const struct sysfs_ops kobj_sysfs_ops; +enum kobj_ns_type { + KOBJ_NS_TYPE_NONE = 0, + KOBJ_NS_TYPES +}; + +struct sock; +struct kobj_ns_type_operations { + enum kobj_ns_type type; + const void *(*current_ns)(void); + const void *(*netlink_ns)(struct sock *sk); + const void *(*initial_ns)(void); +}; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int kobj_ns_type_registered(enum kobj_ns_type type); +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); + +const void *kobj_ns_current(enum kobj_ns_type type); +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); +const void *kobj_ns_initial(enum kobj_ns_type type); +void kobj_ns_exit(enum kobj_ns_type type, const void *ns); + + /** * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem. * diff --git a/lib/kobject.c b/lib/kobject.c index 8115eb1bbf4d..bbb2bb40ee1f 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -850,6 +850,109 @@ struct kset *kset_create_and_add(const char *name, } EXPORT_SYMBOL_GPL(kset_create_and_add); + +static DEFINE_SPINLOCK(kobj_ns_type_lock); +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +{ + enum kobj_ns_type type = ops->type; + int error; + + spin_lock(&kobj_ns_type_lock); + + error = -EINVAL; + if (type >= KOBJ_NS_TYPES) + goto out; + + error = -EINVAL; + if (type <= KOBJ_NS_TYPE_NONE) + goto out; + + error = -EBUSY; + if (kobj_ns_ops_tbl[type]) + goto out; + + error = 0; + kobj_ns_ops_tbl[type] = ops; + +out: + spin_unlock(&kobj_ns_type_lock); + return error; +} + +int kobj_ns_type_registered(enum kobj_ns_type type) +{ + int registered = 0; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + registered = kobj_ns_ops_tbl[type] != NULL; + spin_unlock(&kobj_ns_type_lock); + + return registered; +} + +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +{ + const struct kobj_ns_type_operations *ops = NULL; + + if (parent && parent->ktype->child_ns_type) + ops = parent->ktype->child_ns_type(parent); + + return ops; +} + +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +{ + return kobj_child_ns_ops(kobj->parent); +} + + +const void *kobj_ns_current(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->current_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_initial(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->initial_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +{ +} + + EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); -- cgit v1.2.3 From 3ff195b011d7decf501a4d55aeed312731094796 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:26 -0700 Subject: sysfs: Implement sysfs tagged directory support. The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and potentially a few other directories of the form /sys/ ... /net/*. What this patch does is to add an additional tag field to the sysfs dirent structure. For directories that should show different contents depending on the context such as /sys/class/net/, and /sys/devices/virtual/net/ this tag field is used to specify the context in which those directories should be visible. Effectively this is the same as creating multiple distinct directories with the same name but internally to sysfs the result is nicer. I am calling the concept of a single directory that looks like multiple directories all at the same path in the filesystem tagged directories. For the networking namespace the set of directories whose contents I need to filter with tags can depend on the presence or absence of hotplug hardware or which modules are currently loaded. Which means I need a simple race free way to setup those directories as tagged. To achieve a reace free design all tagged directories are created and managed by sysfs itself. Users of this interface: - define a type in the sysfs_tag_type enumeration. - call sysfs_register_ns_types with the type and it's operations - sysfs_exit_ns when an individual tag is no longer valid - Implement mount_ns() which returns the ns of the calling process so we can attach it to a sysfs superblock. - Implement ktype.namespace() which returns the ns of a syfs kobject. Everything else is left up to sysfs and the driver layer. For the network namespace mount_ns and namespace() are essentially one line functions, and look to remain that. Tags are currently represented a const void * pointers as that is both generic, prevides enough information for equality comparisons, and is trivial to create for current users, as it is just the existing namespace pointer. The work needed in sysfs is more extensive. At each directory or symlink creating I need to check if the directory it is being created in is a tagged directory and if so generate the appropriate tag to place on the sysfs_dirent. Likewise at each symlink or directory removal I need to check if the sysfs directory it is being removed from is a tagged directory and if so figure out which tag goes along with the name I am deleting. Currently only directories which hold kobjects, and symlinks are supported. There is not enough information in the current file attribute interfaces to give us anything to discriminate on which makes it useless, and there are no potential users which makes it an uninteresting problem to solve. Signed-off-by: Eric W. Biederman Signed-off-by: Benjamin Thery Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 2 +- drivers/md/bitmap.c | 4 +- drivers/md/md.c | 6 +-- fs/sysfs/bin.c | 2 +- fs/sysfs/dir.c | 112 ++++++++++++++++++++++++++++++++++++++----------- fs/sysfs/file.c | 17 ++++---- fs/sysfs/group.c | 6 +-- fs/sysfs/inode.c | 4 +- fs/sysfs/mount.c | 33 ++++++++++++++- fs/sysfs/symlink.c | 15 +++++-- fs/sysfs/sysfs.h | 20 +++++++-- include/linux/sysfs.h | 10 +++++ lib/kobject.c | 1 + 13 files changed, 181 insertions(+), 51 deletions(-) (limited to 'lib') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index eb0c3fe44b29..cae1b8c5b08c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -399,7 +399,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, goto free_id; } - pdesc->value_sd = sysfs_get_dirent(dev->kobj.sd, "value"); + pdesc->value_sd = sysfs_get_dirent(dev->kobj.sd, NULL, "value"); if (!pdesc->value_sd) { ret = -ENODEV; goto free_id; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 26ac8aad0b19..f084249295d9 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1678,9 +1678,9 @@ int bitmap_create(mddev_t *mddev) bitmap->mddev = mddev; - bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); + bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); if (bm) { - bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); + bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); sysfs_put(bm); } else bitmap->sysfs_can_clear = NULL; diff --git a/drivers/md/md.c b/drivers/md/md.c index cefd63daff31..a9fd491796ac 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1766,7 +1766,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) kobject_del(&rdev->kobj); goto fail; } - rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state"); + rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state"); list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); @@ -4189,7 +4189,7 @@ static int md_alloc(dev_t dev, char *name) mutex_unlock(&disks_mutex); if (!error) { kobject_uevent(&mddev->kobj, KOBJ_ADD); - mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); + mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); } mddev_put(mddev); return error; @@ -4398,7 +4398,7 @@ static int do_md_run(mddev_t * mddev) printk(KERN_WARNING "md: cannot register extra attributes for %s\n", mdname(mddev)); - mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); + mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index e9d293593e52..806b277453f9 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -501,7 +501,7 @@ int sysfs_create_bin_file(struct kobject *kobj, void sysfs_remove_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { - sysfs_hash_and_remove(kobj->sd, attr->attr.name); + sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 590717861c7a..b2b83067ccc8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -380,9 +380,15 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; - if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) + if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) return -EEXIST; + if (sysfs_ns_type(acxt->parent_sd) && !sd->s_ns) { + WARN(1, KERN_WARNING "sysfs: ns required in '%s' for '%s'\n", + acxt->parent_sd->s_name, sd->s_name); + return -EINVAL; + } + sd->s_parent = sysfs_get(acxt->parent_sd); sysfs_link_sibling(sd); @@ -533,13 +539,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name) { struct sysfs_dirent *sd; - for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) + for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) { + if (sd->s_ns != ns) + continue; if (!strcmp(sd->s_name, name)) return sd; + } return NULL; } @@ -558,12 +568,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name) { struct sysfs_dirent *sd; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, name); + sd = sysfs_find_dirent(parent_sd, ns, name); sysfs_get(sd); mutex_unlock(&sysfs_mutex); @@ -572,7 +583,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, EXPORT_SYMBOL_GPL(sysfs_get_dirent); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - const char *name, struct sysfs_dirent **p_sd) + enum kobj_ns_type type, const void *ns, const char *name, + struct sysfs_dirent **p_sd) { umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; @@ -583,6 +595,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, sd = sysfs_new_dirent(name, mode, SYSFS_DIR); if (!sd) return -ENOMEM; + + sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT); + sd->s_ns = ns; sd->s_dir.kobj = kobj; /* link in */ @@ -601,7 +616,25 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd) { - return create_dir(kobj, kobj->sd, name, p_sd); + return create_dir(kobj, kobj->sd, + KOBJ_NS_TYPE_NONE, NULL, name, p_sd); +} + +static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + enum kobj_ns_type type; + + ops = kobj_child_ns_ops(kobj); + if (!ops) + return KOBJ_NS_TYPE_NONE; + + type = ops->type; + BUG_ON(type <= KOBJ_NS_TYPE_NONE); + BUG_ON(type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(type)); + + return type; } /** @@ -610,7 +643,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, */ int sysfs_create_dir(struct kobject * kobj) { + enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; + const void *ns = NULL; int error = 0; BUG_ON(!kobj); @@ -620,7 +655,11 @@ int sysfs_create_dir(struct kobject * kobj) else parent_sd = &sysfs_root; - error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); + if (sysfs_ns_type(parent_sd)) + ns = kobj->ktype->namespace(kobj); + type = sysfs_read_ns_type(kobj); + + error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); if (!error) kobj->sd = sd; return error; @@ -630,13 +669,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *ret = NULL; - struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata; + struct dentry *parent = dentry->d_parent; + struct sysfs_dirent *parent_sd = parent->d_fsdata; struct sysfs_dirent *sd; struct inode *inode; + enum kobj_ns_type type; + const void *ns; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); + type = sysfs_ns_type(parent_sd); + ns = sysfs_info(dir->i_sb)->ns[type]; + + sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); /* no such entry */ if (!sd) { @@ -735,7 +780,8 @@ void sysfs_remove_dir(struct kobject * kobj) } int sysfs_rename(struct sysfs_dirent *sd, - struct sysfs_dirent *new_parent_sd, const char *new_name) + struct sysfs_dirent *new_parent_sd, const void *new_ns, + const char *new_name) { const char *dup_name = NULL; int error; @@ -743,12 +789,12 @@ int sysfs_rename(struct sysfs_dirent *sd, mutex_lock(&sysfs_mutex); error = 0; - if ((sd->s_parent == new_parent_sd) && + if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) && (strcmp(sd->s_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_name)) + if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) goto out; /* rename sysfs_dirent */ @@ -770,6 +816,7 @@ int sysfs_rename(struct sysfs_dirent *sd, sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); } + sd->s_ns = new_ns; error = 0; out: @@ -780,19 +827,28 @@ int sysfs_rename(struct sysfs_dirent *sd, int sysfs_rename_dir(struct kobject *kobj, const char *new_name) { - return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name); + struct sysfs_dirent *parent_sd = kobj->sd->s_parent; + const void *new_ns = NULL; + + if (sysfs_ns_type(parent_sd)) + new_ns = kobj->ktype->namespace(kobj); + + return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); } int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) { struct sysfs_dirent *sd = kobj->sd; struct sysfs_dirent *new_parent_sd; + const void *new_ns = NULL; BUG_ON(!sd->s_parent); + if (sysfs_ns_type(sd->s_parent)) + new_ns = kobj->ktype->namespace(kobj); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; - return sysfs_rename(sd, new_parent_sd, sd->s_name); + return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); } /* Relationship between s_mode and the DT_xxx types */ @@ -807,32 +863,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp) return 0; } -static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd, - ino_t ino, struct sysfs_dirent *pos) +static struct sysfs_dirent *sysfs_dir_pos(const void *ns, + struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) { if (pos) { int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && pos->s_parent == parent_sd && ino == pos->s_ino; sysfs_put(pos); - if (valid) - return pos; + if (!valid) + pos = NULL; } - pos = NULL; - if ((ino > 1) && (ino < INT_MAX)) { + if (!pos && (ino > 1) && (ino < INT_MAX)) { pos = parent_sd->s_dir.children; while (pos && (ino > pos->s_ino)) pos = pos->s_sibling; } + while (pos && pos->s_ns != ns) + pos = pos->s_sibling; return pos; } -static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd, - ino_t ino, struct sysfs_dirent *pos) +static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, + struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) { - pos = sysfs_dir_pos(parent_sd, ino, pos); + pos = sysfs_dir_pos(ns, parent_sd, ino, pos); if (pos) pos = pos->s_sibling; + while (pos && pos->s_ns != ns) + pos = pos->s_sibling; return pos; } @@ -841,8 +900,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; struct sysfs_dirent *pos = filp->private_data; + enum kobj_ns_type type; + const void *ns; ino_t ino; + type = sysfs_ns_type(parent_sd); + ns = sysfs_info(dentry->d_sb)->ns[type]; + if (filp->f_pos == 0) { ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) @@ -857,9 +921,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) filp->f_pos++; } mutex_lock(&sysfs_mutex); - for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos); + for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); pos; - pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) { + pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { const char * name; unsigned int type; int len, ret; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index e222b2582746..1beaa739d0a6 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -478,9 +478,12 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) mutex_lock(&sysfs_mutex); if (sd && dir) - sd = sysfs_find_dirent(sd, dir); + /* Only directories are tagged, so no need to pass + * a tag explicitly. + */ + sd = sysfs_find_dirent(sd, NULL, dir); if (sd && attr) - sd = sysfs_find_dirent(sd, attr); + sd = sysfs_find_dirent(sd, NULL, attr); if (sd) sysfs_notify_dirent(sd); @@ -569,7 +572,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, int error; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); else dir_sd = sysfs_get(kobj->sd); @@ -599,7 +602,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) mutex_lock(&sysfs_mutex); rc = -ENOENT; - sd = sysfs_find_dirent(kobj->sd, attr->name); + sd = sysfs_find_dirent(kobj->sd, NULL, attr->name); if (!sd) goto out; @@ -624,7 +627,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file); void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) { - sysfs_hash_and_remove(kobj->sd, attr->name); + sysfs_hash_and_remove(kobj->sd, NULL, attr->name); } void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) @@ -646,11 +649,11 @@ void sysfs_remove_file_from_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); else dir_sd = sysfs_get(kobj->sd); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, attr->name); + sysfs_hash_and_remove(dir_sd, NULL, attr->name); sysfs_put(dir_sd); } } diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index fe611949a7f7..23c1e598792a 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -23,7 +23,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, int i; for (i = 0, attr = grp->attrs; *attr; i++, attr++) - sysfs_hash_and_remove(dir_sd, (*attr)->name); + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); } static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, @@ -39,7 +39,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, * visibility. Do this by first removing then * re-adding (if required) the file */ if (update) - sysfs_hash_and_remove(dir_sd, (*attr)->name); + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) @@ -132,7 +132,7 @@ void sysfs_remove_group(struct kobject * kobj, struct sysfs_dirent *sd; if (grp->name) { - sd = sysfs_get_dirent(dir_sd, grp->name); + sd = sysfs_get_dirent(dir_sd, NULL, grp->name); if (!sd) { WARN(!sd, KERN_WARNING "sysfs group %p not found for " "kobject '%s'\n", grp, kobject_name(kobj)); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index a4a0a9419711..cf2bad1462ea 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -324,7 +324,7 @@ void sysfs_delete_inode(struct inode *inode) sysfs_put(sd); } -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name) { struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; @@ -334,7 +334,7 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) sysfs_addrm_start(&acxt, dir_sd); - sd = sysfs_find_dirent(dir_sd, name); + sd = sysfs_find_dirent(dir_sd, ns, name); if (sd) sysfs_remove_one(&acxt, sd); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 50e4fb6a7403..1afa32ba242c 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -35,7 +35,7 @@ static const struct super_operations sysfs_ops = { struct sysfs_dirent sysfs_root = { .s_name = "", .s_count = ATOMIC_INIT(1), - .s_flags = SYSFS_DIR, + .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, .s_ino = 1, }; @@ -76,7 +76,13 @@ static int sysfs_test_super(struct super_block *sb, void *data) { struct sysfs_super_info *sb_info = sysfs_info(sb); struct sysfs_super_info *info = data; + enum kobj_ns_type type; int found = 1; + + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) { + if (sb_info->ns[type] != info->ns[type]) + found = 0; + } return found; } @@ -93,6 +99,7 @@ static int sysfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { struct sysfs_super_info *info; + enum kobj_ns_type type; struct super_block *sb; int error; @@ -100,6 +107,10 @@ static int sysfs_get_sb(struct file_system_type *fs_type, info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) goto out; + + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) + info->ns[type] = kobj_ns_current(type); + sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); if (IS_ERR(sb) || sb->s_fs_info != info) kfree(info); @@ -137,6 +148,26 @@ static struct file_system_type sysfs_fs_type = { .kill_sb = sysfs_kill_sb, }; +void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) +{ + struct super_block *sb; + + mutex_lock(&sysfs_mutex); + spin_lock(&sb_lock); + list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { + struct sysfs_super_info *info = sysfs_info(sb); + /* Ignore superblocks that are in the process of unmounting */ + if (sb->s_count <= S_BIAS) + continue; + /* Ignore superblocks with the wrong ns */ + if (info->ns[type] != ns) + continue; + info->ns[type] = NULL; + } + spin_unlock(&sb_lock); + mutex_unlock(&sysfs_mutex); +} + int __init sysfs_init(void) { int err = -ENOMEM; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 942f239a2132..b6ebdaa00f37 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -58,6 +58,8 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, if (!sd) goto out_put; + if (sysfs_ns_type(parent_sd)) + sd->s_ns = target->ktype->namespace(target); sd->s_symlink.target_sd = target_sd; target_sd = NULL; /* reference is now owned by the symlink */ @@ -121,7 +123,7 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) else parent_sd = kobj->sd; - sysfs_hash_and_remove(parent_sd, name); + sysfs_hash_and_remove(parent_sd, NULL, name); } /** @@ -137,6 +139,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, const char *old, const char *new) { struct sysfs_dirent *parent_sd, *sd = NULL; + const void *old_ns = NULL, *new_ns = NULL; int result; if (!kobj) @@ -144,8 +147,11 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, else parent_sd = kobj->sd; + if (targ->sd) + old_ns = targ->sd->s_ns; + result = -ENOENT; - sd = sysfs_get_dirent(parent_sd, old); + sd = sysfs_get_dirent(parent_sd, old_ns, old); if (!sd) goto out; @@ -155,7 +161,10 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, if (sd->s_symlink.target_sd->s_dir.kobj != targ) goto out; - result = sysfs_rename(sd, parent_sd, new); + if (sysfs_ns_type(parent_sd)) + new_ns = targ->ktype->namespace(targ); + + result = sysfs_rename(sd, parent_sd, new_ns, new); out: sysfs_put(sd); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 030a39dbb02c..93847d54c2e3 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -58,6 +58,7 @@ struct sysfs_dirent { struct sysfs_dirent *s_sibling; const char *s_name; + const void *s_ns; union { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; @@ -81,14 +82,22 @@ struct sysfs_dirent { #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) -#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK -#define SYSFS_FLAG_REMOVED 0x0200 +#define SYSFS_NS_TYPE_MASK 0xff00 +#define SYSFS_NS_TYPE_SHIFT 8 + +#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) +#define SYSFS_FLAG_REMOVED 0x020000 static inline unsigned int sysfs_type(struct sysfs_dirent *sd) { return sd->s_flags & SYSFS_TYPE_MASK; } +static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) +{ + return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define sysfs_dirent_init_lockdep(sd) \ do { \ @@ -115,6 +124,7 @@ struct sysfs_addrm_cxt { * mount.c */ struct sysfs_super_info { + const void *ns[KOBJ_NS_TYPES]; }; #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) extern struct sysfs_dirent sysfs_root; @@ -140,8 +150,10 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name); struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name); struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); @@ -152,7 +164,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, void sysfs_remove_subdir(struct sysfs_dirent *sd); int sysfs_rename(struct sysfs_dirent *sd, - struct sysfs_dirent *new_parent_sd, const char *new_name); + struct sysfs_dirent *new_parent_sd, const void *ns, const char *new_name); static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { @@ -182,7 +194,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name); int sysfs_inode_init(void); /* diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f0496b3d1811..1885d21b0c80 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -20,6 +20,7 @@ struct kobject; struct module; +enum kobj_ns_type; /* FIXME * The *owner field is no longer used. @@ -168,10 +169,14 @@ void sysfs_remove_file_from_group(struct kobject *kobj, void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); void sysfs_notify_dirent(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name); struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); void sysfs_printk_last_file(void); + +void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); + int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -301,6 +306,7 @@ static inline void sysfs_notify_dirent(struct sysfs_dirent *sd) } static inline struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, const unsigned char *name) { return NULL; @@ -313,6 +319,10 @@ static inline void sysfs_put(struct sysfs_dirent *sd) { } +static inline void sysfs_exit_ns(enum kobj_ns_type type, const void *tag) +{ +} + static inline int __must_check sysfs_init(void) { return 0; diff --git a/lib/kobject.c b/lib/kobject.c index bbb2bb40ee1f..b2c6d1f56e65 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -950,6 +950,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type) void kobj_ns_exit(enum kobj_ns_type type, const void *ns) { + sysfs_exit_ns(type, ns); } -- cgit v1.2.3 From be867b194a3ae3c680c29521287ae49b4d44d420 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 3 May 2010 16:23:15 -0500 Subject: sysfs: Comment sysfs directory tagging logic Add some in-line comments to explain the new infrastructure, which was introduced to support sysfs directory tagging with namespaces. I think an overall description someplace might be good too, but it didn't really seem to fit into Documentation/filesystems/sysfs.txt, which appears more geared toward users, rather than maintainers, of sysfs. (Tejun, please let me know if I can make anything clearer or failed altogether to comment something that should be commented.) Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Cc: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 8 ++++++++ fs/sysfs/sysfs.h | 13 ++++++++++++- include/linux/kobject.h | 11 +++++++++++ include/linux/sysfs.h | 1 + lib/kobject.c | 11 +++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index a63eb4ba7867..7e54bac8c4b0 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -614,6 +614,14 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, KOBJ_NS_TYPE_NONE, NULL, name, p_sd); } +/** + * sysfs_read_ns_type: return associated ns_type + * @kobj: the kobject being queried + * + * Each kobject can be tagged with exactly one namespace type + * (i.e. network or user). Return the ns_type associated with + * this object if any + */ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 93847d54c2e3..6a13105b5594 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -58,7 +58,7 @@ struct sysfs_dirent { struct sysfs_dirent *s_sibling; const char *s_name; - const void *s_ns; + const void *s_ns; /* namespace tag */ union { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; @@ -82,6 +82,7 @@ struct sysfs_dirent { #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) +/* identify any namespace tag on sysfs_dirents */ #define SYSFS_NS_TYPE_MASK 0xff00 #define SYSFS_NS_TYPE_SHIFT 8 @@ -93,6 +94,10 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd) return sd->s_flags & SYSFS_TYPE_MASK; } +/* + * Return any namespace tags on this dirent. + * enum kobj_ns_type is defined in linux/kobject.h + */ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) { return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT; @@ -123,6 +128,12 @@ struct sysfs_addrm_cxt { /* * mount.c */ + +/* + * Each sb is associated with a set of namespace tags (i.e. + * the network namespace of the task which mounted this sysfs + * instance). + */ struct sysfs_super_info { const void *ns[KOBJ_NS_TYPES]; }; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index d9456f69904f..b60d2dfe4e69 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -136,12 +136,23 @@ struct kobj_attribute { extern const struct sysfs_ops kobj_sysfs_ops; +/* + * Namespace types which are used to tag kobjects and sysfs entries. + * Network namespace will likely be the first. + */ enum kobj_ns_type { KOBJ_NS_TYPE_NONE = 0, KOBJ_NS_TYPES }; struct sock; + +/* + * Callbacks so sysfs can determine namespaces + * @current_ns: return calling task's namespace + * @netlink_ns: return namespace to which a sock belongs (right?) + * @initial_ns: return the initial namespace (i.e. init_net_ns) + */ struct kobj_ns_type_operations { enum kobj_ns_type type; const void *(*current_ns)(void); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 976c4664b216..47f1c95b5298 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -178,6 +178,7 @@ struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); void sysfs_printk_last_file(void); +/* Called to clear a ns tag when it is no longer valid */ void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); int __must_check sysfs_init(void); diff --git a/lib/kobject.c b/lib/kobject.c index b2c6d1f56e65..f07c57252e82 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,6 +948,17 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ void kobj_ns_exit(enum kobj_ns_type type, const void *ns) { sysfs_exit_ns(type, ns); -- cgit v1.2.3 From 07e98962fa778b9782c8845dfcb06a84cc050744 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:44 -0700 Subject: kobject: Send hotplug events in all network namespaces Open a copy of the uevent kernel socket in each network namespace so we can send uevents in all network namespaces. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 68 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7b48d44ced6e..9084f2550c2a 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -24,13 +24,19 @@ #include #include #include +#include u64 uevent_seqnum; char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; static DEFINE_SPINLOCK(sequence_lock); -#if defined(CONFIG_NET) -static struct sock *uevent_sock; +#ifdef CONFIG_NET +struct uevent_sock { + struct list_head list; + struct sock *sk; +}; +static LIST_HEAD(uevent_sock_list); +static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ @@ -100,6 +106,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, u64 seq; int i = 0; int retval = 0; +#ifdef CONFIG_NET + struct uevent_sock *ue_sk; +#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -211,7 +220,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #if defined(CONFIG_NET) /* send netlink message */ - if (uevent_sock) { + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; struct sk_buff *skb; size_t len; @@ -241,6 +252,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } else retval = -ENOMEM; } + mutex_unlock(&uevent_sock_mutex); #endif /* call uevent_helper, usually only enabled during early boot */ @@ -320,18 +332,58 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) -static int __init kobject_uevent_init(void) +static int uevent_net_init(struct net *net) { - uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, - 1, NULL, NULL, THIS_MODULE); - if (!uevent_sock) { + struct uevent_sock *ue_sk; + + ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); + if (!ue_sk) + return -ENOMEM; + + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); + if (!ue_sk->sk) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); return -ENODEV; } - netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); return 0; } +static void uevent_net_exit(struct net *net) +{ + struct uevent_sock *ue_sk; + + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + if (sock_net(ue_sk->sk) == net) + goto found; + } + mutex_unlock(&uevent_sock_mutex); + return; + +found: + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + + netlink_kernel_release(ue_sk->sk); + kfree(ue_sk); +} + +static struct pernet_operations uevent_net_ops = { + .init = uevent_net_init, + .exit = uevent_net_exit, +}; + +static int __init kobject_uevent_init(void) +{ + netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + return register_pernet_subsys(&uevent_net_ops); +} + + postcore_initcall(kobject_uevent_init); #endif -- cgit v1.2.3 From 5f71a29629b4717445f8b7f5fb8f50c2d262b68e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:47 -0700 Subject: kobj: Send hotplug events in the proper namespace. Utilize netlink_broacast_filtered to allow sending hotplug events in the proper namespace. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9084f2550c2a..239c8e83fc28 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -83,6 +83,22 @@ out: return ret; } +static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct kobject *kobj = data; + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *sock_ns, *ns; + ns = kobj->ktype->namespace(kobj); + sock_ns = ops->netlink_ns(dsk); + return sock_ns != ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -244,8 +260,10 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast(uevent_sock, skb, 0, 1, - GFP_KERNEL); + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS) retval = 0; -- cgit v1.2.3 From 417daa1e8f893fbac88fd395340ba7779fd3926c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:48 -0700 Subject: hotplug: netns aware uevent_helper It only makes sense for uevent_helper to get events in the intial namespaces. It's invocation is not per namespace and it is not clear how we could make it's invocation namespace aware. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 239c8e83fc28..59c15511d58a 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include #include @@ -99,6 +99,21 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) return 0; } +static int kobj_usermode_filter(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *init_ns, *ns; + ns = kobj->ktype->namespace(kobj); + init_ns = ops->initial_ns(); + return ns != init_ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -274,7 +289,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #endif /* call uevent_helper, usually only enabled during early boot */ - if (uevent_helper[0]) { + if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { char *argv [3]; argv [0] = uevent_helper; -- cgit v1.2.3 From 0dbdd1bfe0b83798763540111118b78b70792f00 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Mon, 24 May 2010 12:13:20 -0700 Subject: lib/atomic64_test: fix missing include of linux/kernel.h Fix a build-failure (http://kisskb.ellerman.id.au/kisskb/buildresult/2601239/) by adding the missing include file (linux/kernel.h) for printk and KERN_INFO. Signed-off-by: Peter Huewe LKML-Reference: <201005241913.o4OJDKdf010884@imap1.linux-foundation.org> Cc: Luca Barbieri Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 65e482caf5e9..9087d71537dd 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -9,6 +9,7 @@ * (at your option) any later version. */ #include +#include #include #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) -- cgit v1.2.3 From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 24 May 2010 14:33:03 -0700 Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with USHRT_MAX, SHRT_MAX and SHRT_MIN - C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not USHORT_MAX/SHORT_MAX/SHORT_MIN. - Make SHRT_MIN of type s16, not int, for consistency. [akpm@linux-foundation.org: fix drivers/dma/timb_dma.c] [akpm@linux-foundation.org: fix security/keys/keyring.c] Signed-off-by: Alexey Dobriyan Acked-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/timb_dma.c | 2 +- drivers/scsi/fcoe/fcoe.c | 2 +- drivers/scsi/mpt2sas/mpt2sas_base.c | 2 +- drivers/scsi/mpt2sas/mpt2sas_config.c | 2 +- drivers/vhost/vhost.c | 2 +- fs/nfs/super.c | 4 ++-- fs/nfsd/nfsctl.c | 4 ++-- fs/ocfs2/blockcheck.c | 4 ++-- include/linux/kernel.h | 6 +++--- include/linux/sched.h | 2 +- include/net/ip.h | 6 +++--- include/net/ipv6.h | 6 +++--- ipc/msg.c | 12 ++++++------ ipc/util.c | 4 ++-- lib/vsprintf.c | 2 +- net/9p/protocol.c | 2 +- net/dccp/options.c | 2 +- net/ipv4/udp.c | 8 ++++---- net/mac80211/sta_info.c | 2 +- net/sunrpc/rpcb_clnt.c | 2 +- security/keys/keyring.c | 6 +++--- 21 files changed, 41 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 0172fa3c7a2b..a1bf77c1993f 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -188,7 +188,7 @@ static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single) static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc, struct scatterlist *sg, bool last) { - if (sg_dma_len(sg) > USHORT_MAX) { + if (sg_dma_len(sg) > USHRT_MAX) { dev_err(chan2dev(&td_chan->chan), "Too big sg element\n"); return -EINVAL; } diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 9276121db1ef..44a07593de56 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -688,7 +688,7 @@ static int fcoe_shost_config(struct fc_lport *lport, struct device *dev) } if (!lport->vport) - fc_host_max_npiv_vports(lport->host) = USHORT_MAX; + fc_host_max_npiv_vports(lport->host) = USHRT_MAX; snprintf(fc_host_symbolic_name(lport->host), FC_SYMBOLIC_NAME_SIZE, "%s v%s over %s", FCOE_NAME, FCOE_VERSION, diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index b830d61684dd..0ec1ed389c20 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -3757,7 +3757,7 @@ _base_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase) if (ioc->config_cmds.status & MPT2_CMD_PENDING) { ioc->config_cmds.status |= MPT2_CMD_RESET; mpt2sas_base_free_smid(ioc, ioc->config_cmds.smid); - ioc->config_cmds.smid = USHORT_MAX; + ioc->config_cmds.smid = USHRT_MAX; complete(&ioc->config_cmds.done); } break; diff --git a/drivers/scsi/mpt2sas/mpt2sas_config.c b/drivers/scsi/mpt2sas/mpt2sas_config.c index e762dd3e2fcb..c65442982d7b 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_config.c +++ b/drivers/scsi/mpt2sas/mpt2sas_config.c @@ -258,7 +258,7 @@ mpt2sas_config_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, #ifdef CONFIG_SCSI_MPT2SAS_LOGGING _config_display_some_debug(ioc, smid, "config_done", mpi_reply); #endif - ioc->config_cmds.smid = USHORT_MAX; + ioc->config_cmds.smid = USHRT_MAX; complete(&ioc->config_cmds.done); return 1; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 750effe0f98b..c6fb8e968f21 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -806,7 +806,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, count = indirect->len / sizeof desc; /* Buffers are chained via a 16 bit next field, so * we can have at most 2^16 of these. */ - if (count > USHORT_MAX + 1) { + if (count > USHRT_MAX + 1) { vq_err(vq, "Indirect buffer length too big: %d\n", indirect->len); return -E2BIG; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f8b1157daa2..04214fc5c304 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1060,7 +1060,7 @@ static int nfs_parse_mount_options(char *raw, goto out_nomem; rc = strict_strtoul(string, 10, &option); kfree(string); - if (rc != 0 || option > USHORT_MAX) + if (rc != 0 || option > USHRT_MAX) goto out_invalid_value; mnt->nfs_server.port = option; break; @@ -1181,7 +1181,7 @@ static int nfs_parse_mount_options(char *raw, goto out_nomem; rc = strict_strtoul(string, 10, &option); kfree(string); - if (rc != 0 || option > USHORT_MAX) + if (rc != 0 || option > USHRT_MAX) goto out_invalid_value; mnt->mount_server.port = option; break; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bc3194ea01f5..508941c23af7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -998,7 +998,7 @@ static ssize_t __write_ports_addxprt(char *buf) if (sscanf(buf, "%15s %4u", transport, &port) != 2) return -EINVAL; - if (port < 1 || port > USHORT_MAX) + if (port < 1 || port > USHRT_MAX) return -EINVAL; err = nfsd_create_serv(); @@ -1040,7 +1040,7 @@ static ssize_t __write_ports_delxprt(char *buf) if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) return -EINVAL; - if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL) + if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) return -EINVAL; xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index b7428c5d0d3b..ec6d12339593 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -403,7 +403,7 @@ void ocfs2_block_check_compute(void *data, size_t blocksize, * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no * larger than 16 bits. */ - BUG_ON(ecc > USHORT_MAX); + BUG_ON(ecc > USHRT_MAX); bc->bc_crc32e = cpu_to_le32(crc); bc->bc_ecc = cpu_to_le16((u16)ecc); @@ -508,7 +508,7 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no * larger than 16 bits. */ - BUG_ON(ecc > USHORT_MAX); + BUG_ON(ecc > USHRT_MAX); bc->bc_crc32e = cpu_to_le32(crc); bc->bc_ecc = cpu_to_le16((u16)ecc); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cc5e3ffe9fce..a2e7c32e17e7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -24,9 +24,9 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; -#define USHORT_MAX ((u16)(~0U)) -#define SHORT_MAX ((s16)(USHORT_MAX>>1)) -#define SHORT_MIN (-SHORT_MAX - 1) +#define USHRT_MAX ((u16)(~0U)) +#define SHRT_MAX ((s16)(USHRT_MAX>>1)) +#define SHRT_MIN ((s16)(-SHRT_MAX - 1)) #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) #define UINT_MAX (~0U) diff --git a/include/linux/sched.h b/include/linux/sched.h index 415b8f8a3f45..c0151ffd3541 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -384,7 +384,7 @@ struct user_namespace; * 1-3 now and depends on arch. We use "5" as safe margin, here. */ #define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) extern int sysctl_max_map_count; diff --git a/include/net/ip.h b/include/net/ip.h index 63548f0a44b1..452f229c380a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -358,11 +358,11 @@ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, - __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX, + __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX, IP_DEFRAG_CONNTRACK_OUT, - __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX, + __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, - __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eba5cc00325a..2600b69757b8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -354,11 +354,11 @@ struct inet_frag_queue; enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, - __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX, + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, IP6_DEFRAG_CONNTRACK_OUT, - __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX, + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, IP6_DEFRAG_CONNTRACK_BRIDGE_IN, - __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, }; struct ip6_create_arg { diff --git a/ipc/msg.c b/ipc/msg.c index 9547cb7ac313..747b65507a91 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -345,19 +345,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) out.msg_rtime = in->msg_rtime; out.msg_ctime = in->msg_ctime; - if (in->msg_cbytes > USHORT_MAX) - out.msg_cbytes = USHORT_MAX; + if (in->msg_cbytes > USHRT_MAX) + out.msg_cbytes = USHRT_MAX; else out.msg_cbytes = in->msg_cbytes; out.msg_lcbytes = in->msg_cbytes; - if (in->msg_qnum > USHORT_MAX) - out.msg_qnum = USHORT_MAX; + if (in->msg_qnum > USHRT_MAX) + out.msg_qnum = USHRT_MAX; else out.msg_qnum = in->msg_qnum; - if (in->msg_qbytes > USHORT_MAX) - out.msg_qbytes = USHORT_MAX; + if (in->msg_qbytes > USHRT_MAX) + out.msg_qbytes = USHRT_MAX; else out.msg_qbytes = in->msg_qbytes; out.msg_lqbytes = in->msg_qbytes; diff --git a/ipc/util.c b/ipc/util.c index 79ce84e890f7..69a0cc13d966 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -124,8 +124,8 @@ void ipc_init_ids(struct ipc_ids *ids) ids->seq = 0; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; - if (seq_limit > USHORT_MAX) - ids->seq_max = USHORT_MAX; + if (seq_limit > USHRT_MAX) + ids->seq_max = USHRT_MAX; else ids->seq_max = seq_limit; } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 46d34b0b74a8..20c95121d8a1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1980,7 +1980,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) { char *s = (char *)va_arg(args, char *); if (field_width == -1) - field_width = SHORT_MAX; + field_width = SHRT_MAX; /* first, skip leading white space in buffer */ str = skip_spaces(str); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 77d3aab4036b..149f82160130 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -394,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); int16_t len = 0; if (sptr) - len = MIN(strlen(sptr), USHORT_MAX); + len = MIN(strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 1b08cae9c65b..07395f861d35 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -296,7 +296,7 @@ static inline u8 dccp_ndp_len(const u64 ndp) { if (likely(ndp <= 0xFF)) return 1; - return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); + return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } int dccp_insert_option(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de6a698f91d..baeec29fe0f1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1686,8 +1686,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; - else if (val > USHORT_MAX) - val = USHORT_MAX; + else if (val > USHRT_MAX) + val = USHRT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1700,8 +1700,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; - else if (val > USHORT_MAX) - val = USHORT_MAX; + else if (val > USHRT_MAX) + val = USHRT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 730197591ab5..ba9360a475b0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sta->tx_filtered); for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX); + sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Allocated STA %pM\n", diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 121105355f60..dac219a56ae1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, port = ntohl(*p); dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, port); - if (unlikely(port > USHORT_MAX)) + if (unlikely(port > USHRT_MAX)) return -EIO; rpcb->r_port = port; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index ef03a82a0135..d37f713e73ce 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -669,7 +669,7 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) struct keyring_list *klist = container_of(rcu, struct keyring_list, rcu); - if (klist->delkey != USHORT_MAX) + if (klist->delkey != USHRT_MAX) key_put(klist->keys[klist->delkey]); kfree(klist); } @@ -746,7 +746,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, max += klist->maxkeys; ret = -ENFILE; - if (max > USHORT_MAX - 1) + if (max > USHRT_MAX - 1) goto error_quota; size = sizeof(*klist) + sizeof(struct key *) * max; if (size > PAGE_SIZE) @@ -763,7 +763,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, sizeof(struct key *) * klist->nkeys); nklist->delkey = klist->nkeys; nklist->nkeys = klist->nkeys + 1; - klist->delkey = USHORT_MAX; + klist->delkey = USHRT_MAX; } else { nklist->nkeys = 1; nklist->delkey = 0; -- cgit v1.2.3 From cf3b429b03e827c718030f42e7e3ceaca980475e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 May 2010 14:33:16 -0700 Subject: vsprintf.c: use noinline_for_stack Mark static functions with noinline_for_stack Before: akpm:/usr/src/25> objdump -d lib/vsprintf.o | perl scripts/checkstack.pl 0x00000e82 pointer [vsprintf.o]: 344 0x0000198c pointer [vsprintf.o]: 344 0x000025d6 scnprintf [vsprintf.o]: 216 0x00002648 scnprintf [vsprintf.o]: 216 0x00002565 snprintf [vsprintf.o]: 208 0x0000267c sprintf [vsprintf.o]: 208 0x000030a3 bprintf [vsprintf.o]: 208 0x00003b1e sscanf [vsprintf.o]: 208 0x00000608 number [vsprintf.o]: 136 0x00000937 number [vsprintf.o]: 136 After: akpm:/usr/src/25> objdump -d lib/vsprintf.o | perl scripts/checkstack.pl 0x00000a7c symbol_string [vsprintf.o]: 248 0x00000ae8 symbol_string [vsprintf.o]: 248 0x00002310 scnprintf [vsprintf.o]: 216 0x00002382 scnprintf [vsprintf.o]: 216 0x0000229f snprintf [vsprintf.o]: 208 0x000023b6 sprintf [vsprintf.o]: 208 0x00002ddd bprintf [vsprintf.o]: 208 0x00003858 sscanf [vsprintf.o]: 208 0x00000625 number [vsprintf.o]: 136 0x00000954 number [vsprintf.o]: 136 Signed-off-by: Joe Perches Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 67 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 20c95121d8a1..b8a2f549ab0e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -267,7 +267,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res) } EXPORT_SYMBOL(strict_strtoll); -static int skip_atoi(const char **s) +static noinline_for_stack +int skip_atoi(const char **s) { int i = 0; @@ -287,7 +288,8 @@ static int skip_atoi(const char **s) /* Formats correctly any integer in [0,99999]. * Outputs from one to five digits depending on input. * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ -static char *put_dec_trunc(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_trunc(char *buf, unsigned q) { unsigned d3, d2, d1, d0; d1 = (q>>4) & 0xf; @@ -324,7 +326,8 @@ static char *put_dec_trunc(char *buf, unsigned q) return buf; } /* Same with if's removed. Always emits five digits */ -static char *put_dec_full(char *buf, unsigned q) +static noinline_for_stack +char *put_dec_full(char *buf, unsigned q) { /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ /* but anyway, gcc produces better code with full-sized ints */ @@ -366,7 +369,8 @@ static char *put_dec_full(char *buf, unsigned q) return buf; } /* No inlining helps gcc to use registers better */ -static noinline char *put_dec(char *buf, unsigned long long num) +static noinline_for_stack +char *put_dec(char *buf, unsigned long long num) { while (1) { unsigned rem; @@ -417,8 +421,9 @@ struct printf_spec { s16 precision; /* # of digits/chars */ }; -static char *number(char *buf, char *end, unsigned long long num, - struct printf_spec spec) +static noinline_for_stack +char *number(char *buf, char *end, unsigned long long num, + struct printf_spec spec) { /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ @@ -537,7 +542,8 @@ static char *number(char *buf, char *end, unsigned long long num, return buf; } -static char *string(char *buf, char *end, const char *s, struct printf_spec spec) +static noinline_for_stack +char *string(char *buf, char *end, const char *s, struct printf_spec spec) { int len, i; @@ -567,8 +573,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec return buf; } -static char *symbol_string(char *buf, char *end, void *ptr, - struct printf_spec spec, char ext) +static noinline_for_stack +char *symbol_string(char *buf, char *end, void *ptr, + struct printf_spec spec, char ext) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS @@ -588,8 +595,9 @@ static char *symbol_string(char *buf, char *end, void *ptr, #endif } -static char *resource_string(char *buf, char *end, struct resource *res, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *resource_string(char *buf, char *end, struct resource *res, + struct printf_spec spec, const char *fmt) { #ifndef IO_RSRC_PRINTK_SIZE #define IO_RSRC_PRINTK_SIZE 6 @@ -690,8 +698,9 @@ static char *resource_string(char *buf, char *end, struct resource *res, return string(buf, end, sym, spec); } -static char *mac_address_string(char *buf, char *end, u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *mac_address_string(char *buf, char *end, u8 *addr, + struct printf_spec spec, const char *fmt) { char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; char *p = mac_addr; @@ -714,7 +723,8 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, return string(buf, end, mac_addr, spec); } -static char *ip4_string(char *p, const u8 *addr, const char *fmt) +static noinline_for_stack +char *ip4_string(char *p, const u8 *addr, const char *fmt) { int i; bool leading_zeros = (fmt[0] == 'i'); @@ -763,7 +773,8 @@ static char *ip4_string(char *p, const u8 *addr, const char *fmt) return p; } -static char *ip6_compressed_string(char *p, const char *addr) +static noinline_for_stack +char *ip6_compressed_string(char *p, const char *addr) { int i, j, range; unsigned char zerolength[8]; @@ -843,7 +854,8 @@ static char *ip6_compressed_string(char *p, const char *addr) return p; } -static char *ip6_string(char *p, const char *addr, const char *fmt) +static noinline_for_stack +char *ip6_string(char *p, const char *addr, const char *fmt) { int i; @@ -858,8 +870,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt) return p; } -static char *ip6_addr_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *ip6_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; @@ -871,8 +884,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr, return string(buf, end, ip6_addr, spec); } -static char *ip4_addr_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *ip4_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char ip4_addr[sizeof("255.255.255.255")]; @@ -881,8 +895,9 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, return string(buf, end, ip4_addr, spec); } -static char *uuid_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec, const char *fmt) +static noinline_for_stack +char *uuid_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) { char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; char *p = uuid; @@ -970,8 +985,9 @@ static char *uuid_string(char *buf, char *end, const u8 *addr, * function pointers are really function descriptors, which contain a * pointer to the real address. */ -static char *pointer(const char *fmt, char *buf, char *end, void *ptr, - struct printf_spec spec) +static noinline_for_stack +char *pointer(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) { if (!ptr) return string(buf, end, "(null)", spec); @@ -1040,7 +1056,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, * @precision: precision of a number * @qualifier: qualifier of a number (long, size_t, ...) */ -static int format_decode(const char *fmt, struct printf_spec *spec) +static noinline_for_stack +int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; -- cgit v1.2.3 From ea46c8f774f295c45fac48101d54be347d3d453b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 May 2010 14:33:21 -0700 Subject: dynamic_debug: small cleanup in ddebug_proc_write() This doesn't change behavior at all. In the original code, if nwords was zero then ddebug_parse_query() would return -EINVAL, now we just do it earlier. Signed-off-by: Dan Carpenter Acked-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index d6b8b9b1abfe..3df8eb17a607 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -456,7 +456,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, __func__, (int)len); nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); - if (nwords < 0) + if (nwords <= 0) return -EINVAL; if (ddebug_parse_query(words, nwords-1, &query)) return -EINVAL; -- cgit v1.2.3 From 2b2f68b5383ea107295d7f1483256866e2daa1e3 Mon Sep 17 00:00:00 2001 From: Florian Ragwitz Date: Mon, 24 May 2010 14:33:21 -0700 Subject: DYNAMIC_DEBUG: fix documentation errors [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Florian Ragwitz Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 2 +- lib/Kconfig.debug | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index f8c2e1767500..b3cd4de9432b 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -28,7 +28,7 @@ struct _ddebug { /* * The flags field controls the behaviour at the callsite. * The bits here are changed dynamically when the user - * writes commands to /dynamic_debug/ddebug + * writes commands to /dynamic_debug/control */ #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ #define _DPRINTK_FLAGS_DEFAULT 0 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d85be90d5888..231208948363 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1039,10 +1039,10 @@ config DYNAMIC_DEBUG Usage: - Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, + Dynamic debugging is controlled via the 'dynamic_debug/control' file, which is contained in the 'debugfs' filesystem. Thus, the debugfs filesystem must first be mounted before making use of this feature. - We refer the control file as: /dynamic_debug/ddebug. This + We refer the control file as: /dynamic_debug/control. This file contains a list of the debug statements that can be enabled. The format for each line of the file is: @@ -1057,7 +1057,7 @@ config DYNAMIC_DEBUG From a live system: - nullarbor:~ # cat /dynamic_debug/ddebug + nullarbor:~ # cat /dynamic_debug/control # filename:lineno [module]function flags format fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" @@ -1067,23 +1067,23 @@ config DYNAMIC_DEBUG // enable the message at line 1603 of file svcsock.c nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > - /dynamic_debug/ddebug + /dynamic_debug/control // enable all the messages in file svcsock.c nullarbor:~ # echo -n 'file svcsock.c +p' > - /dynamic_debug/ddebug + /dynamic_debug/control // enable all the messages in the NFS server module nullarbor:~ # echo -n 'module nfsd +p' > - /dynamic_debug/ddebug + /dynamic_debug/control // enable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process +p' > - /dynamic_debug/ddebug + /dynamic_debug/control // disable all 12 messages in the function svc_process() nullarbor:~ # echo -n 'func svc_process -p' > - /dynamic_debug/ddebug + /dynamic_debug/control See Documentation/dynamic-debug-howto.txt for additional information. -- cgit v1.2.3 From db0fd97c270f1e80321f7ae55234643ca0978c54 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 May 2010 14:33:22 -0700 Subject: lib/hexdump.c: reduce stack variable size and cleanups Reduce char linebuf[200] to the actual size required., which is 32 * 3 + 2 + 32 + 1, ie: linebuf[131]. Change examples to use bool true not int 1. Align multiline argument indentation to open parenthesis. Use temporary for ptr[j] so trigraph fits on single line. Convert printk ptr from %*p, (int)(2 * sizeof(void *)) to %p as %p uses the same calculation for size. Signed-off-by: Joe Perches Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/hexdump.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/hexdump.c b/lib/hexdump.c index 39af2560f765..1bd6a9779774 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -34,7 +34,7 @@ EXPORT_SYMBOL(hex_asc); * * E.g.: * hex_dump_to_buffer(frame->data, frame->len, 16, 1, - * linebuf, sizeof(linebuf), 1); + * linebuf, sizeof(linebuf), true); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -65,8 +65,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); ascii_column = 17 * ngroups + 2; break; } @@ -77,7 +77,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); + "%s%8.8x", j ? " " : "", *(ptr4 + j)); ascii_column = 9 * ngroups + 2; break; } @@ -88,7 +88,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; j < ngroups; j++) lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); + "%s%4.4x", j ? " " : "", *(ptr2 + j)); ascii_column = 5 * ngroups + 2; break; } @@ -111,9 +111,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; + } nil: linebuf[lx++] = '\0'; } @@ -143,7 +144,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * * E.g.: * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, - * 16, 1, frame->data, frame->len, 1); + * 16, 1, frame->data, frame->len, true); * * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO @@ -151,12 +152,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer); * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; - unsigned char linebuf[200]; + unsigned char linebuf[32 * 3 + 2 + 32 + 1]; if (rowsize != 16 && rowsize != 32) rowsize = 16; @@ -164,13 +165,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); + linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); + printk("%s%s%p: %s\n", + level, prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); @@ -196,9 +198,9 @@ EXPORT_SYMBOL(print_hex_dump); * rowsize of 16, groupsize of 1, and ASCII output included. */ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - const void *buf, size_t len) + const void *buf, size_t len) { print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, - buf, len, 1); + buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); -- cgit v1.2.3 From 903788892ea0fc7fcaf7e8e5fac9a77379fc215b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 May 2010 14:33:23 -0700 Subject: lib: introduce common method to convert hex digits hex_to_bin() is a little method which converts hex digit to its actual value. There are plenty of places where such functionality is needed. [akpm@linux-foundation.org: use tolower(), saving 3 bytes, test the more common case first - it's quicker] [akpm@linux-foundation.org: relocate tolower to make it even faster! (Joe)] Signed-off-by: Andy Shevchenko Cc: Tilman Schmidt Cc: Duncan Sands Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: "Richard Russon (FlatCap)" Cc: John W. Linville Cc: Len Brown Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ lib/hexdump.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'lib') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 05f332afc9e0..8317ec4b9f3b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -375,6 +375,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) return buf; } +extern int hex_to_bin(char ch); + #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif diff --git a/lib/hexdump.c b/lib/hexdump.c index 1bd6a9779774..5d7a4802c562 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -15,6 +15,24 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); +/** + * hex_to_bin - convert a hex digit to its real value + * @ch: ascii character represents hex digit + * + * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad + * input. + */ +int hex_to_bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return -1; +} +EXPORT_SYMBOL(hex_to_bin); + /** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump -- cgit v1.2.3 From 836e2af92503f1642dbc3c3281ec68ec1dd39d2e Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Mon, 24 May 2010 14:33:31 -0700 Subject: crc32: major optimization Precompute more crc32 values(0xcc00, 0xcc0000 and 0xcc000000) into tables. This increases the table size from 1KB to 4KB but the performance benfit makes it worth it: 28% faster on MPC8321, 266 MHz 2x faster on Core 2 Duo, 3.1GHz [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Joakim Tjernlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/crc32.c | 24 +++++++++++++++--------- lib/gen_crc32table.c | 47 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index bc5b936e9142..4855995fcde9 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -48,12 +48,20 @@ MODULE_LICENSE("GPL"); #if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 static inline u32 -crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) +crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8) +# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) +# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ + tab[2][(crc >> 8) & 255] ^ \ + tab[1][(crc >> 16) & 255] ^ \ + tab[0][(crc >> 24) & 255] # else -# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ + tab[1][(crc >> 8) & 255] ^ \ + tab[2][(crc >> 16) & 255] ^ \ + tab[3][(crc >> 24) & 255] # endif const u32 *b; size_t rem_len; @@ -70,10 +78,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) b = (const u32 *)buf; for (--b; len; --len) { crc ^= *++b; /* use pre increment for speed */ - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); - DO_CRC(0); + DO_CRC4; } len = rem_len; /* And the last few bytes */ @@ -85,6 +90,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) } return crc; #undef DO_CRC +#undef DO_CRC4 } #endif /** @@ -117,7 +123,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { # if CRC_LE_BITS == 8 - const u32 *tab = crc32table_le; + const u32 (*tab)[] = crc32table_le; crc = __cpu_to_le32(crc); crc = crc32_body(crc, p, len, tab); @@ -174,7 +180,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { # if CRC_BE_BITS == 8 - const u32 *tab = crc32table_be; + const u32 (*tab)[] = crc32table_be; crc = __cpu_to_be32(crc); crc = crc32_body(crc, p, len, tab); diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index bea5d97df991..85d0e412a04f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -7,8 +7,8 @@ #define LE_TABLE_SIZE (1 << CRC_LE_BITS) #define BE_TABLE_SIZE (1 << CRC_BE_BITS) -static uint32_t crc32table_le[LE_TABLE_SIZE]; -static uint32_t crc32table_be[BE_TABLE_SIZE]; +static uint32_t crc32table_le[4][LE_TABLE_SIZE]; +static uint32_t crc32table_be[4][BE_TABLE_SIZE]; /** * crc32init_le() - allocate and initialize LE table data @@ -22,12 +22,19 @@ static void crc32init_le(void) unsigned i, j; uint32_t crc = 1; - crc32table_le[0] = 0; + crc32table_le[0][0] = 0; for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - crc32table_le[i + j] = crc ^ crc32table_le[j]; + crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + } + for (i = 0; i < LE_TABLE_SIZE; i++) { + crc = crc32table_le[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); + crc32table_le[j][i] = crc; + } } } @@ -39,25 +46,35 @@ static void crc32init_be(void) unsigned i, j; uint32_t crc = 0x80000000; - crc32table_be[0] = 0; + crc32table_be[0][0] = 0; for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); for (j = 0; j < i; j++) - crc32table_be[i + j] = crc ^ crc32table_be[j]; + crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; + } + for (i = 0; i < BE_TABLE_SIZE; i++) { + crc = crc32table_be[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); + crc32table_be[j][i] = crc; + } } } -static void output_table(uint32_t table[], int len, char *trans) +static void output_table(uint32_t table[4][256], int len, char *trans) { - int i; + int i, j; - for (i = 0; i < len - 1; i++) { - if (i % ENTRIES_PER_LINE == 0) - printf("\n"); - printf("%s(0x%8.8xL), ", trans, table[i]); + for (j = 0 ; j < 4; j++) { + printf("{"); + for (i = 0; i < len - 1; i++) { + if (i % ENTRIES_PER_LINE == 0) + printf("\n"); + printf("%s(0x%8.8xL), ", trans, table[j][i]); + } + printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]); } - printf("%s(0x%8.8xL)\n", trans, table[len - 1]); } int main(int argc, char** argv) @@ -66,14 +83,14 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static const u32 crc32table_le[] = {"); + printf("static const u32 crc32table_le[4][256] = {"); output_table(crc32table_le, LE_TABLE_SIZE, "tole"); printf("};\n"); } if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static const u32 crc32table_be[] = {"); + printf("static const u32 crc32table_be[4][256] = {"); output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); printf("};\n"); } -- cgit v1.2.3 From 4762bbc1a3a1f22095278b74dd1b8cee04858641 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Mon, 24 May 2010 14:33:31 -0700 Subject: crc32: use __BYTE_ORDER macro for endian detection. Since crc32.c contains a nifty test program that can be executed in user space, make sure endian detection works reliably in user space too. Signed-off-by: Joakim Tjernlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/crc32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 4855995fcde9..3087ed899ee3 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -50,7 +50,7 @@ MODULE_LICENSE("GPL"); static inline u32 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { -# ifdef __LITTLE_ENDIAN +# if __BYTE_ORDER == __LITTLE_ENDIAN # define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) # define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ tab[2][(crc >> 8) & 255] ^ \ -- cgit v1.2.3 From 0d2daf5cc858bd9305bae187310a1dabaad0a2db Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 25 May 2010 23:43:03 -0700 Subject: revert "crc32: use __BYTE_ORDER macro for endian detection" It doesn't work on big-endian - those architectures don't define __LITTLE_ENDIAN. Cc: Joakim Tjernlund Reported-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/crc32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 3087ed899ee3..4855995fcde9 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -50,7 +50,7 @@ MODULE_LICENSE("GPL"); static inline u32 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { -# if __BYTE_ORDER == __LITTLE_ENDIAN +# ifdef __LITTLE_ENDIAN # define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) # define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ tab[2][(crc >> 8) & 255] ^ \ -- cgit v1.2.3 From 0ac0c0d0f837c499afd02a802f9cf52d3027fa3b Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 26 May 2010 14:42:51 -0700 Subject: cpusets: randomize node rotor used in cpuset_mem_spread_node() Some workloads that create a large number of small files tend to assign too many pages to node 0 (multi-node systems). Part of the reason is that the rotor (in cpuset_mem_spread_node()) used to assign nodes starts at node 0 for newly created tasks. This patch changes the rotor to be initialized to a random node number of the cpuset. [akpm@linux-foundation.org: fix layout] [Lee.Schermerhorn@hp.com: Define stub numa_random() for !NUMA configuration] Signed-off-by: Jack Steiner Signed-off-by: Lee Schermerhorn Cc: Christoph Lameter Cc: Pekka Enberg Cc: Paul Menage Cc: Jack Steiner Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 17 +++++++++++++++++ include/linux/bitmap.h | 1 + include/linux/nodemask.h | 8 ++++++++ kernel/fork.c | 4 ++++ lib/bitmap.c | 2 +- 5 files changed, 31 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 550df481accd..10c27bb1e95f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -2,6 +2,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -65,3 +66,19 @@ const struct cpumask *cpumask_of_node(int node) } EXPORT_SYMBOL(cpumask_of_node); #endif + +/* + * Return the bit number of a random bit set in the nodemask. + * (returns -1 if nodemask is empty) + */ +int __node_random(const nodemask_t *maskp) +{ + int w, bit = -1; + + w = nodes_weight(*maskp); + if (w) + bit = bitmap_ord_to_pos(maskp->bits, + get_random_int() % w, MAX_NUMNODES); + return bit; +} +EXPORT_SYMBOL(__node_random); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index daf8c480c786..6fb2720882fc 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -141,6 +141,7 @@ extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); +extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index dba35e413371..8a8f1d09c133 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -66,6 +66,8 @@ * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * + * int node_random(mask) Random node with set bit in mask + * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * @@ -430,6 +432,10 @@ static inline void node_set_offline(int nid) node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } + +#define node_random(mask) __node_random(&(mask)) +extern int __node_random(const nodemask_t *maskp); + #else static inline int node_state(int node, enum node_states state) @@ -460,6 +466,8 @@ static inline int num_node_state(enum node_states state) #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) + +static inline int node_random(const nodemask_t mask) { return 0; } #endif #define node_online_map node_states[N_ONLINE] diff --git a/kernel/fork.c b/kernel/fork.c index 4d57d9e3a6e9..2e9cc3139ec6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1079,6 +1079,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, } mpol_fix_fork_child_flag(p); #endif +#ifdef CONFIG_CPUSETS + p->cpuset_mem_spread_rotor = node_random(p->mems_allowed); + p->cpuset_slab_spread_rotor = node_random(p->mems_allowed); +#endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW diff --git a/lib/bitmap.c b/lib/bitmap.c index ffb78c916ccd..d7137e7e06e8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -672,7 +672,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) * * The bit positions 0 through @bits are valid positions in @buf. */ -static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) { int pos = 0; -- cgit v1.2.3 From c9d221f86e43d9fb16260fe18a8cd6767f36c8a5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:36 -0700 Subject: fault-injection: add CPU notifier error injection module I used this module to test the series of modification to the cpu notifiers code. Example1: inject CPU offline error (-1 == -EPERM) # modprobe cpu-notifier-error-inject cpu_down_prepare_error=-1 # echo 0 > /sys/devices/system/cpu/cpu1/online bash: echo: write error: Operation not permitted Example2: inject CPU online error (-2 == -ENOENT) # modprobe cpu-notifier-error-inject cpu_up_prepare_error=-2 # echo 1 > /sys/devices/system/cpu/cpu1/online bash: echo: write error: No such file or directory [akpm@linux-foundation.org: fix Kconfig help text] Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 12 ++++++++ lib/Makefile | 1 + lib/cpu-notifier-error-inject.c | 63 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 lib/cpu-notifier-error-inject.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 231208948363..e722e9d62221 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -898,6 +898,18 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.txt +config CPU_NOTIFIER_ERROR_INJECT + tristate "CPU notifier error injection module" + depends on HOTPLUG_CPU && DEBUG_KERNEL + help + This option provides a kernel module that can be used to test + the error handling of the cpu notifiers + + To compile this code as a module, choose M here: the module will + be called cpu-notifier-error-inject. + + If unsure, say N. + config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 9e6d3c29d73a..c8567a59d316 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c new file mode 100644 index 000000000000..4dc20321b0d5 --- /dev/null +++ b/lib/cpu-notifier-error-inject.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +static int priority; +static int cpu_up_prepare_error; +static int cpu_down_prepare_error; + +module_param(priority, int, 0); +MODULE_PARM_DESC(priority, "specify cpu notifier priority"); + +module_param(cpu_up_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_up_prepare_error, + "specify error code to inject CPU_UP_PREPARE action"); + +module_param(cpu_down_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_down_prepare_error, + "specify error code to inject CPU_DOWN_PREPARE action"); + +static int err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpu_up_prepare_error; + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + err = cpu_down_prepare_error; + break; + } + if (err) + printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err); + + return notifier_from_errno(err); +} + +static struct notifier_block err_inject_cpu_notifier = { + .notifier_call = err_inject_cpu_callback, +}; + +static int err_inject_init(void) +{ + err_inject_cpu_notifier.priority = priority; + + return register_hotcpu_notifier(&err_inject_cpu_notifier); +} + +static void err_inject_exit(void) +{ + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_DESCRIPTION("CPU notifier error injection module"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akinobu Mita "); -- cgit v1.2.3 From 2dcb22b346be7b7b7e630a8970d69cf3f1111ec1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 26 May 2010 14:43:38 -0700 Subject: idr: fix backtrack logic in idr_remove_all Currently idr_remove_all will fail with a use after free error if idr::layers is bigger than 2, which on 32 bit systems corresponds to items more than 1024. This is due to stepping back too many levels during backtracking. For simplicity let's assume that IDR_BITS=1 -> we have 2 nodes at each level below the root node and each leaf node stores two IDs. (In reality for 32 bit systems IDR_BITS=5, with 32 nodes at each sub-root level and 32 IDs in each leaf node). The sequence of freeing the nodes at the moment is as follows: layer 1 -> a(7) 2 -> b(3) c(5) 3 -> d(1) e(2) f(4) g(6) Until step 4 things go fine, but then node c is freed, whereas node g should be freed first. Since node c contains the pointer to node g we'll have a use after free error at step 6. How many levels we step back after visiting the leaf nodes is currently determined by the msb of the id we are currently visiting: Step 1. node d with IDs 0,1 is freed, current ID is advanced to 2. msb of the current ID bit 1. This means we need to step back 1 level to node b and take the next sibling, node e. 2-3. node e with IDs 2,3 is freed, current ID is 4, msb is bit 2. This means we need to step back 2 levels to node a, freeing node b on the way. 4-5. node f with IDs 4,5 is freed, current ID is 6, msb is still bit 2. This means we again need to step back 2 levels to node a and free c on the way. 6. We should visit node g, but its pointer is not available as node c was freed. The fix changes how we determine the number of levels to step back. Instead of deducting this merely from the msb of the current ID, we should really check if advancing the ID causes an overflow to a bit position corresponding to a given layer. In the above example overflow from bit 0 to bit 1 should mean stepping back 1 level. Overflow from bit 1 to bit 2 should mean stepping back 2 levels and so on. The fix was tested with IDs up to 1 << 20, which corresponds to 4 layers on 32 bit systems. Signed-off-by: Imre Deak Reviewed-by: Tejun Heo Cc: Eric Paris Cc: "Paul E. McKenney" Cc: [2.6.34.1] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 422a9d5069cc..c1a206901761 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -445,6 +445,7 @@ EXPORT_SYMBOL(idr_remove); void idr_remove_all(struct idr *idp) { int n, id, max; + int bt_mask; struct idr_layer *p; struct idr_layer *pa[MAX_LEVEL]; struct idr_layer **paa = &pa[0]; @@ -462,8 +463,10 @@ void idr_remove_all(struct idr *idp) p = p->ary[(id >> n) & IDR_MASK]; } + bt_mask = id; id += 1 << n; - while (n < fls(id)) { + /* Get the highest bit that the above add changed from 0->1. */ + while (n < fls(id ^ bt_mask)) { if (p) free_layer(p); n += IDR_BITS; -- cgit v1.2.3 From 5960164fde9bc2f2a99e751d3393faea316e7e36 Mon Sep 17 00:00:00 2001 From: Joe Eykholt Date: Wed, 26 May 2010 14:44:13 -0700 Subject: lib/random32: export pseudo-random number generator for modules This patch moves the definition of struct rnd_state and the inline __seed() function to linux/random.h. It renames the static __random32() function to prandom32() and exports it for use in modules. prandom32() is useful as a privately-seeded pseudo random number generator that can give the same result every time it is initialized. For FCoE FC-BB-6 VN2VN mode self-selected unique FC address generation, we need an pseudo-random number generator seeded with the 64-bit world-wide port name. A truly random generator or one seeded with randomness won't do because the same sequence of numbers should be generated each time we boot or the link comes up. A prandom32_seed() inline function is added to the header file. It is inlined not for speed, but so the function won't be expanded in the base kernel, but only in the module that uses it. Signed-off-by: Joe Eykholt Acked-by: Matt Mackall Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 28 ++++++++++++++++++++++++++++ lib/random32.c | 38 +++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/include/linux/random.h b/include/linux/random.h index 25d02fe5c9b5..fb7ab9de5f36 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -40,6 +40,10 @@ struct rand_pool_info { __u32 buf[0]; }; +struct rnd_state { + __u32 s1, s2, s3; +}; + /* Exported functions */ #ifdef __KERNEL__ @@ -74,6 +78,30 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l u32 random32(void); void srandom32(u32 seed); +u32 prandom32(struct rnd_state *); + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom32_seed - set seed for prandom32(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom32_seed(struct rnd_state *state, u64 seed) +{ + u32 i = (seed >> 32) ^ (seed << 10) ^ seed; + + state->s1 = __seed(i, 1); + state->s2 = __seed(i, 7); + state->s3 = __seed(i, 15); +} + #endif /* __KERNEL___ */ #endif /* _LINUX_RANDOM_H */ diff --git a/lib/random32.c b/lib/random32.c index 217d5c4b666d..870dc3fc0f0f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,13 +39,16 @@ #include #include -struct rnd_state { - u32 s1, s2, s3; -}; - static DEFINE_PER_CPU(struct rnd_state, net_rand_state); -static u32 __random32(struct rnd_state *state) +/** + * prandom32 - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use random32(). + */ +u32 prandom32(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) @@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } - -/* - * Handle minimum values for seeds - */ -static inline u32 __seed(u32 x, u32 m) -{ - return (x < m) ? x + m : x; -} +EXPORT_SYMBOL(prandom32); /** * random32 - pseudo random number generator @@ -75,7 +71,7 @@ u32 random32(void) { unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = __random32(state); + r = prandom32(state); put_cpu_var(state); return r; } @@ -118,12 +114,12 @@ static int __init random32_init(void) state->s3 = __seed(LCG(state->s2), 15); /* "warm it up" */ - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); } return 0; } @@ -147,7 +143,7 @@ static int __init random32_reseed(void) state->s3 = __seed(seeds[2], 15); /* mix it in */ - __random32(state); + prandom32(state); } return 0; } -- cgit v1.2.3 From 38388301b7b9d2921b58cfa1cd9b14c02d508c63 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 26 May 2010 14:44:18 -0700 Subject: swiotlb: remove unnecessary swiotlb_sync_single_range_* swiotlb_sync_single_range_for_cpu and swiotlb_sync_single_range_for_device are unnecessary because swiotlb_sync_single_for_cpu and swiotlb_sync_single_for_device can be used instead. Signed-off-by: FUJITA Tomonori Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swiotlb.h | 10 ---------- lib/swiotlb.c | 31 ------------------------------- 2 files changed, 41 deletions(-) (limited to 'lib') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index febedcf67c7e..81a4e213c6cf 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -73,16 +73,6 @@ extern void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); -extern void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir); - -extern void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir); - extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 5fddf720da73..a009055140ec 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -756,37 +756,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, } EXPORT_SYMBOL(swiotlb_sync_single_for_device); -/* - * Same as above, but for a sub-range of the mapping. - */ -static void -swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - int dir, int target) -{ - swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); -} - -void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_CPU); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); - -void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_DEVICE); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); - /* * Map a set of buffers described by scatterlist in streaming mode for DMA. * This is the scatter-gather version of the above swiotlb_map_page -- cgit v1.2.3 From edcd1d843adf09d1742d49ae04fa51bb63ddd1c3 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Wed, 26 May 2010 14:44:27 -0700 Subject: radix-tree: fix radix_tree_prev_hole() underflow case radix_tree_prev_hole() used LONG_MAX to detect underflow; however, ULONG_MAX is clearly what was intended, both here and by its only user (count_history_pages at mm/readahead.c). Reviewed-by: Wu Fengguang Signed-off-by: Cesar Eduardo Barros Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 2a087e0f9863..05da38bcc298 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -656,7 +656,7 @@ EXPORT_SYMBOL(radix_tree_next_hole); * * Returns: the index of the hole if found, otherwise returns an index * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, LONG_MAX will be returned. + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. * * radix_tree_next_hole may be called under rcu_read_lock. However, like * radix_tree_gang_lookup, this will not atomically search a snapshot of @@ -674,7 +674,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, if (!radix_tree_lookup(root, index)) break; index--; - if (index == LONG_MAX) + if (index == ULONG_MAX) break; } -- cgit v1.2.3 From 35926ff5fba8245bd1c6ac04155048f6f89232b1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 May 2010 09:00:03 -0700 Subject: Revert "cpusets: randomize node rotor used in cpuset_mem_spread_node()" This reverts commit 0ac0c0d0f837c499afd02a802f9cf52d3027fa3b, which caused cross-architecture build problems for all the wrong reasons. IA64 already added its own version of __node_random(), but the fact is, there is nothing architectural about the function, and the original commit was just badly done. Revert it, since no fix is forthcoming. Requested-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 17 ----------------- include/linux/bitmap.h | 1 - include/linux/nodemask.h | 8 -------- kernel/fork.c | 4 ---- lib/bitmap.c | 2 +- 5 files changed, 1 insertion(+), 31 deletions(-) (limited to 'lib') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 10c27bb1e95f..550df481accd 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -2,7 +2,6 @@ #include #include #include -#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -66,19 +65,3 @@ const struct cpumask *cpumask_of_node(int node) } EXPORT_SYMBOL(cpumask_of_node); #endif - -/* - * Return the bit number of a random bit set in the nodemask. - * (returns -1 if nodemask is empty) - */ -int __node_random(const nodemask_t *maskp) -{ - int w, bit = -1; - - w = nodes_weight(*maskp); - if (w) - bit = bitmap_ord_to_pos(maskp->bits, - get_random_int() % w, MAX_NUMNODES); - return bit; -} -EXPORT_SYMBOL(__node_random); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 6fb2720882fc..daf8c480c786 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -141,7 +141,6 @@ extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); -extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 8a8f1d09c133..dba35e413371 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -66,8 +66,6 @@ * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * - * int node_random(mask) Random node with set bit in mask - * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * @@ -432,10 +430,6 @@ static inline void node_set_offline(int nid) node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } - -#define node_random(mask) __node_random(&(mask)) -extern int __node_random(const nodemask_t *maskp); - #else static inline int node_state(int node, enum node_states state) @@ -466,8 +460,6 @@ static inline int num_node_state(enum node_states state) #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) - -static inline int node_random(const nodemask_t mask) { return 0; } #endif #define node_online_map node_states[N_ONLINE] diff --git a/kernel/fork.c b/kernel/fork.c index bf9fef6d1bfe..b6cce14ba047 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1086,10 +1086,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, } mpol_fix_fork_child_flag(p); #endif -#ifdef CONFIG_CPUSETS - p->cpuset_mem_spread_rotor = node_random(p->mems_allowed); - p->cpuset_slab_spread_rotor = node_random(p->mems_allowed); -#endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW diff --git a/lib/bitmap.c b/lib/bitmap.c index d7137e7e06e8..ffb78c916ccd 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -672,7 +672,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) * * The bit positions 0 through @bits are valid positions in @buf. */ -int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) { int pos = 0; -- cgit v1.2.3