summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/i386/Kconfig8
-rw-r--r--arch/i386/kernel/cpu/common.c8
-rw-r--r--arch/i386/mm/pageattr.c82
-rw-r--r--include/asm-i386/cacheflush.h5
-rw-r--r--include/linux/mm.h8
-rw-r--r--include/linux/slab.h2
-rw-r--r--mm/page_alloc.c15
-rw-r--r--mm/slab.c218
8 files changed, 267 insertions, 79 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index ed5254e36eeb..3d78369616c0 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1339,6 +1339,14 @@ config DEBUG_SPINLOCK
best used in conjunction with the NMI watchdog so that spinlock
deadlocks are also debuggable.
+config DEBUG_PAGEALLOC
+ bool "Page alloc debugging"
+ depends on DEBUG_KERNEL
+ help
+ Unmap pages from the kernel linear mapping after free_pages().
+ This results in a large slowdown, but helps to find certain types
+ of memory corruptions.
+
config DEBUG_HIGHMEM
bool "Highmem debugging"
depends on DEBUG_KERNEL && HIGHMEM
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index a114c2ab7f83..5e579ede103c 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -430,6 +430,14 @@ void __init early_cpu_init(void)
rise_init_cpu();
nexgen_init_cpu();
umc_init_cpu();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* pse is not compatible with on-the-fly unmapping,
+ * disable it even if the cpus claim to support it.
+ */
+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+ disable_pse = 1;
+#endif
}
/*
* cpu_init() initializes state that is per-CPU. Some data is already
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 7bb9f7ebe469..51b777c42d53 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -13,6 +13,10 @@
#include <asm/processor.h>
#include <asm/tlbflush.h>
+static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head df_list = LIST_HEAD_INIT(df_list);
+
+
static inline pte_t *lookup_address(unsigned long address)
{
pgd_t *pgd = pgd_offset_k(address);
@@ -31,10 +35,15 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot)
{
int i;
unsigned long addr;
- struct page *base = alloc_pages(GFP_KERNEL, 0);
+ struct page *base;
pte_t *pbase;
+
+ spin_unlock_irq(&cpa_lock);
+ base = alloc_pages(GFP_KERNEL, 0);
+ spin_lock_irq(&cpa_lock);
if (!base)
return NULL;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -87,7 +96,7 @@ static inline void revert_page(struct page *kpte_page, unsigned long address)
}
static int
-__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage)
+__change_page_attr(struct page *page, pgprot_t prot)
{
pte_t *kpte;
unsigned long address;
@@ -123,7 +132,7 @@ __change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage)
}
if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) {
- *oldpage = kpte_page;
+ list_add(&kpte_page->list, &df_list);
revert_page(kpte_page, address);
}
return 0;
@@ -134,12 +143,6 @@ static inline void flush_map(void)
on_each_cpu(flush_kernel_map, NULL, 1, 1);
}
-struct deferred_page {
- struct deferred_page *next;
- struct page *fpage;
-};
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
-
/*
* Change the page attributes of an page in the linear mapping.
*
@@ -156,47 +159,54 @@ static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
{
int err = 0;
- struct page *fpage;
int i;
+ unsigned long flags;
- down_write(&init_mm.mmap_sem);
+ spin_lock_irqsave(&cpa_lock, flags);
for (i = 0; i < numpages; i++, page++) {
- fpage = NULL;
- err = __change_page_attr(page, prot, &fpage);
+ err = __change_page_attr(page, prot);
if (err)
break;
- if (fpage) {
- struct deferred_page *df;
- df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
- if (!df) {
- flush_map();
- __free_page(fpage);
- } else {
- df->next = df_list;
- df->fpage = fpage;
- df_list = df;
- }
- }
}
- up_write(&init_mm.mmap_sem);
+ spin_unlock_irqrestore(&cpa_lock, flags);
return err;
}
void global_flush_tlb(void)
{
- struct deferred_page *df, *next_df;
+ LIST_HEAD(l);
+ struct list_head* n;
- down_read(&init_mm.mmap_sem);
- df = xchg(&df_list, NULL);
- up_read(&init_mm.mmap_sem);
+ BUG_ON(irqs_disabled());
+
+ spin_lock_irq(&cpa_lock);
+ list_splice_init(&df_list, &l);
+ spin_unlock_irq(&cpa_lock);
flush_map();
- for (; df; df = next_df) {
- next_df = df->next;
- if (df->fpage)
- __free_page(df->fpage);
- kfree(df);
- }
+ n = l.next;
+ while (n != &l) {
+ struct page *pg = list_entry(n, struct page, list);
+ n = n->next;
+ __free_page(pg);
+ }
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (PageHighMem(page))
+ return;
+ /* the return value is ignored - the calls cannot fail,
+ * large pages are disabled at boot time.
+ */
+ change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ /* we should perform an IPI and flush all tlbs,
+ * but that can deadlock->flush only current cpu.
+ */
+ __flush_tlb_all();
+}
+EXPORT_SYMBOL(kernel_map_pages);
+#endif
+
EXPORT_SYMBOL(change_page_attr);
EXPORT_SYMBOL(global_flush_tlb);
diff --git a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h
index adc632b97542..d3ce5511dbbc 100644
--- a/include/asm-i386/cacheflush.h
+++ b/include/asm-i386/cacheflush.h
@@ -17,4 +17,9 @@
void global_flush_tlb(void);
int change_page_attr(struct page *page, int numpages, pgprot_t prot);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+/* internal debugging function */
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
#endif /* _I386_CACHEFLUSH_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 492bc8aeb053..4d183974fd36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -609,5 +609,13 @@ extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
int write);
extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
unsigned long to, unsigned long size, pgprot_t prot);
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 843c8d638d29..9f8bccba4ad3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -114,6 +114,8 @@ extern kmem_cache_t *signal_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;
+void ptrinfo(unsigned long addr);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfd254c2c94b..741866b59d7d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,6 +32,8 @@
#include <linux/sysctl.h>
#include <linux/cpu.h>
+#include <asm/tlbflush.h>
+
DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS);
struct pglist_data *pgdat_list;
@@ -265,6 +267,7 @@ void __free_pages_ok(struct page *page, unsigned int order)
mod_page_state(pgfree, 1 << order);
free_pages_check(__FUNCTION__, page);
list_add(&page->list, &list);
+ kernel_map_pages(page, 1<<order, 0);
free_pages_bulk(page_zone(page), 1, &list, order);
}
@@ -440,6 +443,7 @@ static void free_hot_cold_page(struct page *page, int cold)
struct per_cpu_pages *pcp;
unsigned long flags;
+ kernel_map_pages(page, 1, 0);
inc_page_state(pgfree);
free_pages_check(__FUNCTION__, page);
pcp = &zone->pageset[get_cpu()].pcp[cold];
@@ -556,7 +560,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
- return page;
+ goto got_pg;
}
min += z->pages_low * sysctl_lower_zone_protection;
}
@@ -579,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
- return page;
+ goto got_pg;
}
min += local_min * sysctl_lower_zone_protection;
}
@@ -594,7 +598,7 @@ rebalance:
page = buffered_rmqueue(z, order, cold);
if (page)
- return page;
+ goto got_pg;
}
goto nopage;
}
@@ -622,7 +626,7 @@ rebalance:
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold);
if (page)
- return page;
+ goto got_pg;
}
min += z->pages_low * sysctl_lower_zone_protection;
}
@@ -653,6 +657,9 @@ nopage:
current->comm, order, gfp_mask);
}
return NULL;
+got_pg:
+ kernel_map_pages(page, 1 << order, 1);
+ return page;
}
/*
diff --git a/mm/slab.c b/mm/slab.c
index cad1bb2e8ebb..afb8d8415999 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,7 +89,11 @@
#include <linux/notifier.h>
#include <linux/kallsyms.h>
#include <linux/cpu.h>
+#include <linux/sysctl.h>
+
#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
/*
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
@@ -351,6 +355,34 @@ struct kmem_cache_s {
#define POISON_AFTER 0x6b /* for use-after-free poisoning */
#define POISON_END 0xa5 /* end-byte of poisoning */
+static inline int obj_dbghead(kmem_cache_t *cachep)
+{
+ if (cachep->flags & SLAB_RED_ZONE)
+ return BYTES_PER_WORD;
+ return 0;
+}
+
+static inline int obj_dbglen(kmem_cache_t *cachep)
+{
+ int len = 0;
+
+ if (cachep->flags & SLAB_RED_ZONE) {
+ len += 2*BYTES_PER_WORD;
+ }
+ if (cachep->flags & SLAB_STORE_USER) {
+ len += BYTES_PER_WORD;
+ }
+ return len;
+}
+#else
+static inline int obj_dbghead(kmem_cache_t *cachep)
+{
+ return 0;
+}
+static inline int obj_dbglen(kmem_cache_t *cachep)
+{
+ return 0;
+}
#endif
/*
@@ -765,16 +797,45 @@ static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
}
#if DEBUG
-static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller)
{
- int size = cachep->objsize;
- if (cachep->flags & SLAB_RED_ZONE) {
- addr += BYTES_PER_WORD;
- size -= 2*BYTES_PER_WORD;
- }
- if (cachep->flags & SLAB_STORE_USER) {
- size -= BYTES_PER_WORD;
+ int size = cachep->objsize-obj_dbglen(cachep);
+
+ addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)];
+
+ if (size < 5*sizeof(unsigned long))
+ return;
+
+ *addr++=0x12345678;
+ *addr++=caller;
+ *addr++=smp_processor_id();
+ size -= 3*sizeof(unsigned long);
+ {
+ unsigned long *sptr = &caller;
+ unsigned long svalue;
+
+ while (((long) sptr & (THREAD_SIZE-1)) != 0) {
+ svalue = *sptr++;
+ if (kernel_text_address(svalue)) {
+ *addr++=svalue;
+ size -= sizeof(unsigned long);
+ if (size <= sizeof(unsigned long))
+ break;
+ }
+ }
+
}
+ *addr++=0x87654321;
+}
+#endif
+
+static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
+{
+ int size = cachep->objsize-obj_dbglen(cachep);
+ addr = &((char*)addr)[obj_dbghead(cachep)];
+
memset(addr, val, size);
*(unsigned char *)(addr+size-1) = POISON_END;
}
@@ -796,15 +857,11 @@ static void *scan_poisoned_obj(unsigned char* addr, unsigned int size)
static void check_poison_obj(kmem_cache_t *cachep, void *addr)
{
- int size = cachep->objsize;
void *end;
- if (cachep->flags & SLAB_RED_ZONE) {
- addr += BYTES_PER_WORD;
- size -= 2*BYTES_PER_WORD;
- }
- if (cachep->flags & SLAB_STORE_USER) {
- size -= BYTES_PER_WORD;
- }
+ int size = cachep->objsize-obj_dbglen(cachep);
+
+ addr = &((char*)addr)[obj_dbghead(cachep)];
+
end = scan_poisoned_obj(addr, size);
if (end) {
int s;
@@ -858,8 +915,16 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
void *objp = slabp->s_mem + cachep->objsize * i;
int objlen = cachep->objsize;
- if (cachep->flags & SLAB_POISON)
+ if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
+ kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1);
+ else
+ check_poison_obj(cachep, objp);
+#else
check_poison_obj(cachep, objp);
+#endif
+ }
if (cachep->flags & SLAB_STORE_USER)
objlen -= BYTES_PER_WORD;
@@ -952,6 +1017,10 @@ kmem_cache_create (const char *name, size_t size, size_t offset,
}
#if FORCED_DEBUG
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128)
+ size = PAGE_SIZE-3*BYTES_PER_WORD;
+#endif
/*
* Enable redzoning and last user accounting, except
* - for caches with forced alignment: redzoning would violate the
@@ -1404,6 +1473,8 @@ static void cache_init_objs (kmem_cache_t * cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
+ if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
+ kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
#else
if (cachep->ctor)
cachep->ctor(objp, cachep, ctor_flags);
@@ -1584,25 +1655,28 @@ static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, v
* caller can perform a verify of its state (debugging).
* Called without the cache-lock held.
*/
- if (cachep->flags & SLAB_RED_ZONE) {
- cachep->ctor(objp+BYTES_PER_WORD,
+ cachep->ctor(objp+obj_dbghead(cachep),
cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
- } else {
- cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
- }
}
if (cachep->flags & SLAB_POISON && cachep->dtor) {
/* we want to cache poison the object,
* call the destruction callback
*/
- if (cachep->flags & SLAB_RED_ZONE)
- cachep->dtor(objp+BYTES_PER_WORD, cachep, 0);
- else
- cachep->dtor(objp, cachep, 0);
+ cachep->dtor(objp+obj_dbghead(cachep), cachep, 0);
}
- if (cachep->flags & SLAB_POISON)
+ if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
+ store_stackinfo(cachep, objp, POISON_AFTER);
+ kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
+ } else {
+ poison_obj(cachep, objp, POISON_AFTER);
+ }
+#else
poison_obj(cachep, objp, POISON_AFTER);
#endif
+ }
+#endif
return objp;
}
@@ -1617,6 +1691,7 @@ static inline void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
entries++;
BUG_ON(entries > cachep->num);
+ BUG_ON(i < 0 || i >= cachep->num);
}
BUG_ON(entries != cachep->num - slabp->inuse);
#endif
@@ -1746,9 +1821,16 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
if (!objp)
return objp;
- if (cachep->flags & SLAB_POISON) {
+ if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+ kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1);
+ else
+ check_poison_obj(cachep, objp);
+#else
check_poison_obj(cachep, objp);
- poison_obj(cachep, objp, POISON_BEFORE);
+#endif
+ poison_obj(cachep, objp, POISON_BEFORE);
}
if (cachep->flags & SLAB_STORE_USER) {
objlen -= BYTES_PER_WORD;
@@ -2085,16 +2167,7 @@ free_percpu(const void *objp)
unsigned int kmem_cache_size(kmem_cache_t *cachep)
{
- unsigned int objlen = cachep->objsize;
-
-#if DEBUG
- if (cachep->flags & SLAB_RED_ZONE)
- objlen -= 2*BYTES_PER_WORD;
- if (cachep->flags & SLAB_STORE_USER)
- objlen -= BYTES_PER_WORD;
-#endif
-
- return objlen;
+ return cachep->objsize-obj_dbglen(cachep);
}
kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
@@ -2626,3 +2699,70 @@ unsigned int ksize(const void *objp)
return size;
}
+void ptrinfo(unsigned long addr)
+{
+ struct page *page;
+
+ printk("Dumping data about address %p.\n", (void*)addr);
+ if (!virt_addr_valid((void*)addr)) {
+ printk("virt addr invalid.\n");
+ return;
+ }
+ do {
+ pgd_t *pgd = pgd_offset_k(addr);
+ pmd_t *pmd;
+ if (pgd_none(*pgd)) {
+ printk("No pgd.\n");
+ break;
+ }
+ pmd = pmd_offset(pgd, addr);
+ if (pmd_none(*pmd)) {
+ printk("No pmd.\n");
+ break;
+ }
+#ifdef CONFIG_X86
+ if (pmd_large(*pmd)) {
+ printk("Large page.\n");
+ break;
+ }
+#endif
+ printk("normal page, pte_val 0x%llx\n",
+ (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr)));
+ } while(0);
+
+ page = virt_to_page((void*)addr);
+ printk("struct page at %p, flags %lxh.\n", page, page->flags);
+ if (PageSlab(page)) {
+ kmem_cache_t *c;
+ struct slab *s;
+ unsigned long flags;
+ int objnr;
+ void *objp;
+
+ c = GET_PAGE_CACHE(page);
+ printk("belongs to cache %s.\n",c->name);
+
+ spin_lock_irqsave(&c->spinlock, flags);
+ s = GET_PAGE_SLAB(page);
+ printk("slabp %p with %d inuse objects (from %d).\n",
+ s, s->inuse, c->num);
+ check_slabp(c,s);
+
+ objnr = (addr-(unsigned long)s->s_mem)/c->objsize;
+ objp = s->s_mem+c->objsize*objnr;
+ printk("points into object no %d, starting at %p, len %d.\n",
+ objnr, objp, c->objsize);
+ if (objnr >= c->num) {
+ printk("Bad obj number.\n");
+ } else {
+ kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1);
+
+ printk("redzone: %lxh/%lxh/%lxh.\n",
+ ((unsigned long*)objp)[0],
+ ((unsigned long*)(objp+c->objsize))[-2],
+ ((unsigned long*)(objp+c->objsize))[-1]);
+ }
+ spin_unlock_irqrestore(&c->spinlock, flags);
+
+ }
+}