From b6dd08652e2b70e73661c4975ae46398066c06f8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Aug 2010 10:33:19 +1000 Subject: radix-tree: clear all tags in radix_tree_node_rcu_free Commit f446daaea9d4a420d16c606f755f3689dcb2d0ce ("mm: implement writeback livelock avoidance using page tagging") introduced a new radix tree tag, increasing the number of tags in each node from 2 to 3. It did not, however, fix up the code in radix_tree_node_rcu_free() that cleans up after radix_tree_shrink() and hence could leave stray tags set in the new tag array. The result is that the livelock avoidance code added in the the above commit would hit stale tags when doing tag based lookups, resulting in livelocks when trying to traverse the tree. Fix this problem in radix_tree_node_rcu_free() so it doesn't happen again in the future by using a loop to walk all the tags up to RADIX_TREE_MAX_TAGS to clear the stray tags radix_tree_shrink() leaves behind. Signed-off-by: Dave Chinner Acked-by: Nick Piggin Acked-by: Jan Kara --- lib/radix-tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e907858498a6..1014171dd1c5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -174,14 +174,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); + int i; /* * must only free zeroed nodes into the slab. radix_tree_shrink * can leave us with a non-NULL entry in the first slot, so clear * that here to make sure. */ - tag_clear(node, 0, 0); - tag_clear(node, 1, 0); + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) + tag_clear(node, i, 0); + node->slots[0] = NULL; node->count = 0; -- cgit v1.2.3 From 144dcfc01221e1a79fa47ca897df7d5e3ab298e6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Aug 2010 10:33:53 +1000 Subject: radix-tree: radix_tree_range_tag_if_tagged() can set incorrect tags Commit ebf8aa44beed48cd17893a83d92a4403e5f9d9e2 ("radix-tree: omplement function radix_tree_range_tag_if_tagged") does not safely set tags on on intermediate tree nodes. The code walks down the tree setting tags before it has fully resolved the path to the leaf under the assumption there will be a leaf slot with the tag set in the range it is searching. Unfortunately, this is not a valid assumption - we can abort after setting a tag on an intermediate node if we overrun the number of tags we are allowed to set in a batch, or stop scanning because we we have passed the last scan index before we reach a leaf slot with the tag we are searching for set. As a result, we can leave the function with tags set on intemediate nodes which can be tripped over later by tag-based lookups. The result of these stale tags is that lookup may end prematurely or livelock because the lookup cannot make progress. The fix for the problem involves reocrding the traversal path we take to the leaf nodes, and only propagating the tags back up the tree once the tag is set in the leaf node slot. We are already recording the path for efficient traversal, so there is no additional overhead to do the intermediately node tag setting in this manner. This fixes a radix tree lookup livelock triggered by the new writeback sync livelock avoidance code introduced in commit f446daaea9d4a420d16c606f755f3689dcb2d0ce ("mm: implement writeback livelock avoidance using page tagging"). Signed-off-by: Dave Chinner Acked-by: Jan Kara --- lib/radix-tree.c | 57 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1014171dd1c5..e0ee8cb37e41 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -625,6 +625,13 @@ EXPORT_SYMBOL(radix_tree_tag_get); * also settag. The function stops either after tagging nr_to_tag items or * after reaching last_index. * + * The tags must be set from the leaf level only and propagated back up the + * path to the root. We must do this so that we resolve the full path before + * setting any tags on intermediate nodes. If we set tags as we descend, then + * we can get to the leaf node and find that the index that has the iftag + * set is outside the range we are scanning. This reults in dangling tags and + * can lead to problems with later tag operations (e.g. livelocks on lookups). + * * The function returns number of leaves where the tag was set and sets * *first_indexp to the first unscanned index. */ @@ -633,9 +640,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int iftag, unsigned int settag) { - unsigned int height = root->height, shift; - unsigned long tagged = 0, index = *first_indexp; - struct radix_tree_node *open_slots[height], *slot; + unsigned int height = root->height; + struct radix_tree_path path[height]; + struct radix_tree_path *pathp = path; + struct radix_tree_node *slot; + unsigned int shift; + unsigned long tagged = 0; + unsigned long index = *first_indexp; last_index = min(last_index, radix_tree_maxindex(height)); if (index > last_index) @@ -655,6 +666,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, shift = (height - 1) * RADIX_TREE_MAP_SHIFT; slot = radix_tree_indirect_to_ptr(root->rnode); + /* + * we fill the path from (root->height - 2) to 0, leaving the index at + * (root->height - 1) as a terminator. Zero the node in the terminator + * so that we can use this to end walk loops back up the path. + */ + path[height - 1].node = NULL; + for (;;) { int offset; @@ -663,17 +681,30 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; if (!tag_get(slot, iftag, offset)) goto next; + if (height > 1) { + /* Go down one level */ + height--; + shift -= RADIX_TREE_MAP_SHIFT; + path[height - 1].node = slot; + path[height - 1].offset = offset; + slot = slot->slots[offset]; + continue; + } + + /* tag the leaf */ + tagged++; tag_set(slot, settag, offset); - if (height == 1) { - tagged++; - goto next; + + /* walk back up the path tagging interior nodes */ + pathp = &path[0]; + while (pathp->node) { + /* stop if we find a node with the tag already set */ + if (tag_get(pathp->node, settag, pathp->offset)) + break; + tag_set(pathp->node, settag, pathp->offset); + pathp++; } - /* Go down one level */ - height--; - shift -= RADIX_TREE_MAP_SHIFT; - open_slots[height] = slot; - slot = slot->slots[offset]; - continue; + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; @@ -687,7 +718,7 @@ next: * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = open_slots[height]; + slot = path[height - 1].node; height++; shift += RADIX_TREE_MAP_SHIFT; } -- cgit v1.2.3 From f6e6e7799ebcad40fa15e4015beca2d776554302 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Fri, 13 Aug 2010 18:58:10 +0800 Subject: kobject_uevent: fix typo in comments s/ending/sending, s/kobject_uevent()/kobject_uevent_env() in the comments. Signed-off-by: Xiaotian Feng Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index b93579504dfa..70af0a7f97c0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -123,7 +123,7 @@ static int kobj_usermode_filter(struct kobject *kobj) * @kobj: struct kobject that the action is happening to * @envp_ext: pointer to environmental data * - * Returns 0 if kobject_uevent() is completed with success or the + * Returns 0 if kobject_uevent_env() is completed with success or the * corresponding error when it fails. */ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, @@ -317,7 +317,7 @@ exit: EXPORT_SYMBOL_GPL(kobject_uevent_env); /** - * kobject_uevent - notify userspace by ending an uevent + * kobject_uevent - notify userspace by sending an uevent * * @action: action that is happening * @kobj: struct kobject that the action is happening to -- cgit v1.2.3 From 7c44ece988a3419e51c833cf14be186a22224dd1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Aug 2010 17:35:52 +1000 Subject: Move .gitignore from drivers/md to lib/raid6 Another missing bit of the raid6 -> /lib move. Reported-by: Andreas Schwab Signed-off-by: NeilBrown --- drivers/md/.gitignore | 4 ---- lib/raid6/.gitignore | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 drivers/md/.gitignore create mode 100644 lib/raid6/.gitignore (limited to 'lib') diff --git a/drivers/md/.gitignore b/drivers/md/.gitignore deleted file mode 100644 index a7afec6b19c6..000000000000 --- a/drivers/md/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -mktables -raid6altivec*.c -raid6int*.c -raid6tables.c diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore new file mode 100644 index 000000000000..162becacf97c --- /dev/null +++ b/lib/raid6/.gitignore @@ -0,0 +1,4 @@ +mktables +altivec*.c +int*.c +tables.c -- cgit v1.2.3 From edce6820a9fdda85521211cb334a183e34cc455e Mon Sep 17 00:00:00 2001 From: Jeffrey Carlyle Date: Mon, 30 Aug 2010 19:55:09 +0200 Subject: scatterlist: prevent invalid free when alloc fails When alloc fails, free_table is being called. Depending on the number of bytes requested, we determine if we are going to call _get_free_page() or kmalloc(). When alloc fails, our math is wrong (due to sg_size - 1), and the last buffer is wrongfully assumed to have been allocated by kmalloc. Hence, kfree gets called and a panic occurs. Signed-off-by: Jeffrey Carlyle Signed-off-by: Olusanya Soyannwo Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- lib/scatterlist.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a5ec42868f99..4ceb05d772ae 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -248,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, left -= sg_size; sg = alloc_fn(alloc_size, gfp_mask); - if (unlikely(!sg)) - return -ENOMEM; + if (unlikely(!sg)) { + /* + * Adjust entry count to reflect that the last + * entry of the previous table won't be used for + * linkage. Without this, sg_kfree() may get + * confused. + */ + if (prv) + table->nents = ++table->orig_nents; + + return -ENOMEM; + } sg_init_table(sg, alloc_size); table->nents = table->orig_nents += sg_size; -- cgit v1.2.3 From f015ac3edd84ad72f88e08a4d83c56c360aae404 Mon Sep 17 00:00:00 2001 From: Don Mullis Date: Thu, 30 Sep 2010 15:15:32 -0700 Subject: lib/list_sort: do not pass bad pointers to cmp callback If the original list is a POT in length, the first callback from line 73 will pass a==b both pointing to the original list_head. This is dangerous because the 'list_sort()' user can use 'container_of()' and accesses the "containing" object, which does not necessary exist for the list head. So the user can access RAM which does not belong to him. If this is a write access, we can end up with memory corruption. Signed-off-by: Don Mullis Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/list_sort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/list_sort.c b/lib/list_sort.c index 4b5cb794c38b..a7616fa3162e 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -70,7 +70,7 @@ static void merge_and_restore_back_links(void *priv, * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ - (*cmp)(priv, tail, tail); + (*cmp)(priv, tail->next, tail->next); tail->next->prev = tail; tail = tail->next; -- cgit v1.2.3 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/avr32/kernel/module.c | 3 +-- arch/h8300/kernel/module.c | 3 +-- arch/mn10300/kernel/module.c | 3 +-- arch/parisc/kernel/module.c | 3 +-- arch/powerpc/kernel/module.c | 5 ----- arch/s390/kernel/module.c | 3 +-- arch/sh/kernel/module.c | 2 -- arch/x86/kernel/module.c | 3 +-- include/linux/module.h | 5 ++--- kernel/module.c | 4 ++++ lib/bug.c | 6 ++---- 11 files changed, 14 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9c..a727f54d64d6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20d..db4953dc4e1b 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd76993..196a111e2e29 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90c..6e81bb596e5b 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e0140..4ef93ae2235f 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sect; int err; - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); if (sect != NULL) @@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c355..f7167ee4604c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c04..ae0be697a89e 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..1c355c550960 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..aace066bad8f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..ccd641991842 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/lib/bug.c b/lib/bug.c index 7cdfad88128f..19552096d16b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) -- cgit v1.2.3