diff options
| -rw-r--r-- | arch/i386/Kconfig | 8 | ||||
| -rw-r--r-- | arch/i386/kernel/cpu/common.c | 8 | ||||
| -rw-r--r-- | arch/i386/mm/pageattr.c | 82 | ||||
| -rw-r--r-- | include/asm-i386/cacheflush.h | 5 | ||||
| -rw-r--r-- | include/linux/mm.h | 8 | ||||
| -rw-r--r-- | include/linux/slab.h | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 15 | ||||
| -rw-r--r-- | mm/slab.c | 218 |
8 files changed, 267 insertions, 79 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index ed5254e36eeb..3d78369616c0 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1339,6 +1339,14 @@ config DEBUG_SPINLOCK best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config DEBUG_PAGEALLOC + bool "Page alloc debugging" + depends on DEBUG_KERNEL + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index a114c2ab7f83..5e579ede103c 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -430,6 +430,14 @@ void __init early_cpu_init(void) rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif } /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 7bb9f7ebe469..51b777c42d53 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -13,6 +13,10 @@ #include <asm/processor.h> #include <asm/tlbflush.h> +static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + static inline pte_t *lookup_address(unsigned long address) { pgd_t *pgd = pgd_offset_k(address); @@ -31,10 +35,15 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot) { int i; unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); + struct page *base; pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); if (!base) return NULL; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -87,7 +96,7 @@ static inline void revert_page(struct page *kpte_page, unsigned long address) } static int -__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +__change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; @@ -123,7 +132,7 @@ __change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) } if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { - *oldpage = kpte_page; + list_add(&kpte_page->list, &df_list); revert_page(kpte_page, address); } return 0; @@ -134,12 +143,6 @@ static inline void flush_map(void) on_each_cpu(flush_kernel_map, NULL, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - /* * Change the page attributes of an page in the linear mapping. * @@ -156,47 +159,54 @@ static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage; int i; + unsigned long flags; - down_write(&init_mm.mmap_sem); + spin_lock_irqsave(&cpa_lock, flags); for (i = 0; i < numpages; i++, page++) { - fpage = NULL; - err = __change_page_attr(page, prot, &fpage); + err = __change_page_attr(page, prot); if (err) break; - if (fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df_list = df; - } - } } - up_write(&init_mm.mmap_sem); + spin_unlock_irqrestore(&cpa_lock, flags); return err; } void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + LIST_HEAD(l); + struct list_head* n; - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_splice_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); flush_map(); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } + n = l.next; + while (n != &l) { + struct page *pg = list_entry(n, struct page, list); + n = n->next; + __free_page(pg); + } } +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +EXPORT_SYMBOL(kernel_map_pages); +#endif + EXPORT_SYMBOL(change_page_attr); EXPORT_SYMBOL(global_flush_tlb); diff --git a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h index adc632b97542..d3ce5511dbbc 100644 --- a/include/asm-i386/cacheflush.h +++ b/include/asm-i386/cacheflush.h @@ -17,4 +17,9 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +#ifdef CONFIG_DEBUG_PAGEALLOC +/* internal debugging function */ +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + #endif /* _I386_CACHEFLUSH_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 492bc8aeb053..4d183974fd36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -609,5 +609,13 @@ extern struct page * follow_page(struct mm_struct *mm, unsigned long address, int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + +#ifndef CONFIG_DEBUG_PAGEALLOC +static inline void +kernel_map_pages(struct page *page, int numpages, int enable) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 843c8d638d29..9f8bccba4ad3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -114,6 +114,8 @@ extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dfd254c2c94b..741866b59d7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,6 +32,8 @@ #include <linux/sysctl.h> #include <linux/cpu.h> +#include <asm/tlbflush.h> + DECLARE_BITMAP(node_online_map, MAX_NUMNODES); DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS); struct pglist_data *pgdat_list; @@ -265,6 +267,7 @@ void __free_pages_ok(struct page *page, unsigned int order) mod_page_state(pgfree, 1 << order); free_pages_check(__FUNCTION__, page); list_add(&page->list, &list); + kernel_map_pages(page, 1<<order, 0); free_pages_bulk(page_zone(page), 1, &list, order); } @@ -440,6 +443,7 @@ static void free_hot_cold_page(struct page *page, int cold) struct per_cpu_pages *pcp; unsigned long flags; + kernel_map_pages(page, 1, 0); inc_page_state(pgfree); free_pages_check(__FUNCTION__, page); pcp = &zone->pageset[get_cpu()].pcp[cold]; @@ -556,7 +560,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -579,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += local_min * sysctl_lower_zone_protection; } @@ -594,7 +598,7 @@ rebalance: page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } goto nopage; } @@ -622,7 +626,7 @@ rebalance: (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -653,6 +657,9 @@ nopage: current->comm, order, gfp_mask); } return NULL; +got_pg: + kernel_map_pages(page, 1 << order, 1); + return page; } /* diff --git a/mm/slab.c b/mm/slab.c index cad1bb2e8ebb..afb8d8415999 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -89,7 +89,11 @@ #include <linux/notifier.h> #include <linux/kallsyms.h> #include <linux/cpu.h> +#include <linux/sysctl.h> + #include <asm/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, @@ -351,6 +355,34 @@ struct kmem_cache_s { #define POISON_AFTER 0x6b /* for use-after-free poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */ +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + if (cachep->flags & SLAB_RED_ZONE) + return BYTES_PER_WORD; + return 0; +} + +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + int len = 0; + + if (cachep->flags & SLAB_RED_ZONE) { + len += 2*BYTES_PER_WORD; + } + if (cachep->flags & SLAB_STORE_USER) { + len += BYTES_PER_WORD; + } + return len; +} +#else +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + return 0; +} +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + return 0; +} #endif /* @@ -765,16 +797,45 @@ static inline void kmem_freepages (kmem_cache_t *cachep, void *addr) } #if DEBUG -static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) { - int size = cachep->objsize; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; + int size = cachep->objsize-obj_dbglen(cachep); + + addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; + + if (size < 5*sizeof(unsigned long)) + return; + + *addr++=0x12345678; + *addr++=caller; + *addr++=smp_processor_id(); + size -= 3*sizeof(unsigned long); + { + unsigned long *sptr = &caller; + unsigned long svalue; + + while (((long) sptr & (THREAD_SIZE-1)) != 0) { + svalue = *sptr++; + if (kernel_text_address(svalue)) { + *addr++=svalue; + size -= sizeof(unsigned long); + if (size <= sizeof(unsigned long)) + break; + } + } + } + *addr++=0x87654321; +} +#endif + +static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) +{ + int size = cachep->objsize-obj_dbglen(cachep); + addr = &((char*)addr)[obj_dbghead(cachep)]; + memset(addr, val, size); *(unsigned char *)(addr+size-1) = POISON_END; } @@ -796,15 +857,11 @@ static void *scan_poisoned_obj(unsigned char* addr, unsigned int size) static void check_poison_obj(kmem_cache_t *cachep, void *addr) { - int size = cachep->objsize; void *end; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; - } + int size = cachep->objsize-obj_dbglen(cachep); + + addr = &((char*)addr)[obj_dbghead(cachep)]; + end = scan_poisoned_obj(addr, size); if (end) { int s; @@ -858,8 +915,16 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) void *objp = slabp->s_mem + cachep->objsize * i; int objlen = cachep->objsize; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) objlen -= BYTES_PER_WORD; @@ -952,6 +1017,10 @@ kmem_cache_create (const char *name, size_t size, size_t offset, } #if FORCED_DEBUG +#ifdef CONFIG_DEBUG_PAGEALLOC + if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128) + size = PAGE_SIZE-3*BYTES_PER_WORD; +#endif /* * Enable redzoning and last user accounting, except * - for caches with forced alignment: redzoning would violate the @@ -1404,6 +1473,8 @@ static void cache_init_objs (kmem_cache_t * cachep, slab_error(cachep, "constructor overwrote the" " start of an object"); } + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -1584,25 +1655,28 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - if (cachep->flags & SLAB_RED_ZONE) { - cachep->ctor(objp+BYTES_PER_WORD, + cachep->ctor(objp+obj_dbghead(cachep), cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } else { - cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - if (cachep->flags & SLAB_RED_ZONE) - cachep->dtor(objp+BYTES_PER_WORD, cachep, 0); - else - cachep->dtor(objp, cachep, 0); + cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); } - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { + store_stackinfo(cachep, objp, POISON_AFTER); + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + } else { + poison_obj(cachep, objp, POISON_AFTER); + } +#else poison_obj(cachep, objp, POISON_AFTER); #endif + } +#endif return objp; } @@ -1617,6 +1691,7 @@ static inline void check_slabp(kmem_cache_t *cachep, struct slab *slabp) for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; BUG_ON(entries > cachep->num); + BUG_ON(i < 0 || i >= cachep->num); } BUG_ON(entries != cachep->num - slabp->inuse); #endif @@ -1746,9 +1821,16 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, if (!objp) return objp; - if (cachep->flags & SLAB_POISON) { + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); - poison_obj(cachep, objp, POISON_BEFORE); +#endif + poison_obj(cachep, objp, POISON_BEFORE); } if (cachep->flags & SLAB_STORE_USER) { objlen -= BYTES_PER_WORD; @@ -2085,16 +2167,7 @@ free_percpu(const void *objp) unsigned int kmem_cache_size(kmem_cache_t *cachep) { - unsigned int objlen = cachep->objsize; - -#if DEBUG - if (cachep->flags & SLAB_RED_ZONE) - objlen -= 2*BYTES_PER_WORD; - if (cachep->flags & SLAB_STORE_USER) - objlen -= BYTES_PER_WORD; -#endif - - return objlen; + return cachep->objsize-obj_dbglen(cachep); } kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) @@ -2626,3 +2699,70 @@ unsigned int ksize(const void *objp) return size; } +void ptrinfo(unsigned long addr) +{ + struct page *page; + + printk("Dumping data about address %p.\n", (void*)addr); + if (!virt_addr_valid((void*)addr)) { + printk("virt addr invalid.\n"); + return; + } + do { + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd; + if (pgd_none(*pgd)) { + printk("No pgd.\n"); + break; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + printk("No pmd.\n"); + break; + } +#ifdef CONFIG_X86 + if (pmd_large(*pmd)) { + printk("Large page.\n"); + break; + } +#endif + printk("normal page, pte_val 0x%llx\n", + (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr))); + } while(0); + + page = virt_to_page((void*)addr); + printk("struct page at %p, flags %lxh.\n", page, page->flags); + if (PageSlab(page)) { + kmem_cache_t *c; + struct slab *s; + unsigned long flags; + int objnr; + void *objp; + + c = GET_PAGE_CACHE(page); + printk("belongs to cache %s.\n",c->name); + + spin_lock_irqsave(&c->spinlock, flags); + s = GET_PAGE_SLAB(page); + printk("slabp %p with %d inuse objects (from %d).\n", + s, s->inuse, c->num); + check_slabp(c,s); + + objnr = (addr-(unsigned long)s->s_mem)/c->objsize; + objp = s->s_mem+c->objsize*objnr; + printk("points into object no %d, starting at %p, len %d.\n", + objnr, objp, c->objsize); + if (objnr >= c->num) { + printk("Bad obj number.\n"); + } else { + kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); + + printk("redzone: %lxh/%lxh/%lxh.\n", + ((unsigned long*)objp)[0], + ((unsigned long*)(objp+c->objsize))[-2], + ((unsigned long*)(objp+c->objsize))[-1]); + } + spin_unlock_irqrestore(&c->spinlock, flags); + + } +} |
