[ARM] Fix consistent_alloc()

The old consistent memory allocator, which sat behind dma_coherent_alloc() and pci_consistent_alloc() was completely unable to handle allocations from interrupt context because we traditionally used ioremap, which in turn: - allocates memory using GFP_KERNEL for the vm_struct and the page tables themselves. - calls get_vm_area, which uses write_lock, and therefore is unsafe to call from interrupt context. In order to address this issue, a new consistent_alloc() which avoids the above issues has been implemented. Essentially, we set aside a section of the kernel VM space, and pre-allocate page tables to cover this area. We allocate "consistent" memory within this region. The handling of the allocation is designed to be generic; it should be possible to replace the vmalloc(), ioremap() and module_alloc() without too much hastle, but that would clearly be a 2.7 thing at this stage.
author: Russell King <rmk@flint.arm.linux.org.uk> 2003-01-10 21:41:11 +0000
committer: Russell King <rmk@flint.arm.linux.org.uk> 2003-01-10 21:41:11 +0000
commit: 9804476ca07d30f7771dfc51fe3e2bd22d910817 (patch)
tree: 0fe12038c525b5f6241ba278c4b71d60c701cea2
parent: 28faab9908dcfabe654f07035f8c23cb0c1386d6 (diff)
4 files changed, 285 insertions, 75 deletions
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 5bf50fc12ba0..897669e0e5bf 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -1,48 +1,156 @@
 /*
  *  linux/arch/arm/mm/consistent.c
  *
- *  Copyright (C) 2000 Russell King
+ *  Copyright (C) 2000-2002 Russell King
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- *  Dynamic DMA mapping support.
+ *  DMA uncached mapping support.
  */
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/vmalloc.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
-#include <linux/pci.h>
+#include <linux/list.h>
 #include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+#define CONSISTENT_BASE	(0xffc00000)
+#define CONSISTENT_END	(0xffe00000)
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
 
 /*
- * This allocates one page of cache-coherent memory space and returns
- * both the virtual and a "dma" address to that space.
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static spinlock_t consistent_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * VM region handling support.
  *
- * We should allow this function to be called from interrupt context.
- * However, we call ioremap, which needs to fiddle around with various
- * things (like the vmlist_lock, and allocating page tables).  These
- * things aren't interrupt safe (yet).
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
  *
- * Note that this does *not* zero the allocated area!
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
  */
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
+struct vm_region {
+	struct list_head	vm_list;
+	unsigned long		vm_start;
+	unsigned long		vm_end;
+};
+
+static struct vm_region consistent_head = {
+	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start	= CONSISTENT_BASE,
+	.vm_end		= CONSISTENT_END,
+};
+
+#if 0
+static void vm_region_dump(struct vm_region *head, char *fn)
 {
-	struct page *page, *end, *free;
-	unsigned long order;
-	void *ret;
+	struct vm_region *c;
 
-	/* FIXME */
-	if (in_interrupt())
-		BUG();
+	printk("Consistent Allocation Map (%s):\n", fn);
+	list_for_each_entry(c, &head->list, vm_list) {
+		printk(" %p:  %08lx - %08lx   (0x%08x)\n", c,
+		       c->vm_start, c->vm_end, c->vm_end - c->vm_start);
+	}
+}
+#else
+#define vm_region_dump(head,fn)	do { } while(0)
+#endif
+
+static int vm_region_alloc(struct vm_region *head, struct vm_region *new, size_t size)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	struct vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto out;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto out;
+	}
+
+ found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	return 0;
+
+ out:
+	return -ENOMEM;
+}
+
+static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
+{
+	struct vm_region *c;
+	
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+ out:
+	return c;
+}
+
+/*
+ * This allocates one page of cache-coherent memory space and returns
+ * both the virtual and a "dma" address to that space.
+ */
+void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle,
+		       unsigned long cache_flags)
+{
+	struct page *page;
+	struct vm_region *c;
+	unsigned long order, flags;
+	void *ret = NULL;
+
+	if (!consistent_pte) {
+		printk(KERN_ERR "consistent_alloc: not initialised\n");
+		dump_stack();
+		return NULL;
+	}
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
@@ -51,75 +159,177 @@ void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
 	if (!page)
 		goto no_page;
 
-	*dma_handle = page_to_bus(page);
-	ret = __ioremap(page_to_pfn(page) << PAGE_SHIFT, size, 0,
-			PAGE_SIZE << order);
-	if (!ret)
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region.
+	 */
+	{
+		unsigned long kaddr = (unsigned long)page_address(page);
+		invalidate_dcache_range(kaddr, kaddr + size);
+	}
+
+	/*
+	 * Our housekeeping doesn't need to come from DMA,
+	 * but it must not come from highmem.
+	 */
+	c = kmalloc(sizeof(struct vm_region),
+		    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (!c)
 		goto no_remap;
 
-#if 0 /* ioremap_does_flush_cache_all */
-	{
-		void *virt = page_address(page);
+	spin_lock_irqsave(&consistent_lock, flags);
+	vm_region_dump(&consistent_head, "before alloc");
+
+	/*
+	 * Attempt to allocate a virtual address in the
+	 * consistent mapping region.
+	 */
+	if (!vm_region_alloc(&consistent_head, c, size)) {
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+		struct page *end = page + (1 << order);
+		pgprot_t prot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
+					 L_PTE_DIRTY | L_PTE_WRITE |
+					 cache_flags);
 
 		/*
-		 * we need to ensure that there are no cachelines in use, or
-		 * worse dirty in this area.  Really, we don't need to do
-		 * this since __ioremap does a flush_cache_all() anyway. --rmk
+		 * Set the "dma handle"
 		 */
-		invalidate_dcache_range(virt, virt + size);
-	}
-#endif
+		*handle = page_to_bus(page);
 
-	/*
-	 * free wasted pages.  We skip the first page since we know
-	 * that it will have count = 1 and won't require freeing.
-	 * We also mark the pages in use as reserved so that
-	 * remap_page_range works.
-	 */
-	free = page + (size >> PAGE_SHIFT);
-	end  = page + (1 << order);
+		do {
+			BUG_ON(!pte_none(*pte));
 
-	for (; page < end; page++) {
-		set_page_count(page, 1);
-		if (page >= free)
-			__free_page(page);
-		else
+			set_page_count(page, 1);
 			SetPageReserved(page);
+			set_pte(pte, mk_pte(page, prot));
+			page++;
+			pte++;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			set_page_count(page, 1);
+			__free_page(page);
+			page++;
+		}
+
+		ret = (void *)c->vm_start;
 	}
-	return ret;
 
-no_remap:
-	__free_pages(page, order);
-no_page:
-	return NULL;
+	vm_region_dump(&consistent_head, "after alloc");
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+ no_remap:
+	if (ret == NULL) {
+		kfree(c);
+		__free_pages(page, order);
+	}
+ no_page:
+	return ret;
 }
 
 /*
- * free a page as defined by the above mapping.  We expressly forbid
- * calling this from interrupt context.
+ * free a page as defined by the above mapping.
  */
 void consistent_free(void *vaddr, size_t size, dma_addr_t handle)
 {
-	struct page *page, *end;
+	struct vm_region *c;
+	unsigned long flags;
+	pte_t *ptep;
 
-	if (in_interrupt())
-		BUG();
-
-	/*
-	 * More messing around with the MM internals.  This is
-	 * sick, but then so is remap_page_range().
-	 */
 	size = PAGE_ALIGN(size);
-	page = virt_to_page(bus_to_virt(handle));
-	end = page + (size >> PAGE_SHIFT);
 
-	for (; page < end; page++)
-		ClearPageReserved(page);
+	spin_lock_irqsave(&consistent_lock, flags);
+	vm_region_dump(&consistent_head, "before free");
 
-	__iounmap(vaddr);
+	c = vm_region_find(&consistent_head, (unsigned long)vaddr);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "consistent_free: wrong size (%ld != %d)\n",
+		       c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	do {
+		pte_t pte = ptep_get_and_clear(ptep);
+		unsigned long pfn;
+
+		ptep++;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		printk(KERN_CRIT "consistent_free: bad page in kernel page "
+		       "table\n");
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	vm_region_dump(&consistent_head, "after free");
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+ no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	printk(KERN_ERR "consistent_free: trying to free "
+	       "invalid area: %p\n", vaddr);
+	dump_stack();
 }
 
 /*
+ * Initialise the consistent memory allocation.
+ */
+static int __init consistent_init(void)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	do {
+		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
+		if (!pmd) {
+			printk(KERN_ERR "consistent_init: out of pmd tables\n");
+			return -ENOMEM;
+		}
+		if (!pmd_none(*pmd)) {
+			printk(KERN_ERR "consistent_init: PMD already allocated\n");
+			return -EINVAL;
+		}
+		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		if (!pte) {
+			printk(KERN_ERR "consistent_init: out of pte tables\n");
+			return -ENOMEM;
+		}
+
+		consistent_pte = pte;
+	} while (0);
+
+	return 0;
+}
+
+core_initcall(consistent_init);
+
+/*
  * make an area consistent.
  */
 void consistent_sync(void *vaddr, size_t size, int direction)
@@ -128,16 +338,16 @@ void consistent_sync(void *vaddr, size_t size, int direction)
 	unsigned long end   = start + size;
 
 	switch (direction) {
-	case PCI_DMA_NONE:
-		BUG();
-	case PCI_DMA_FROMDEVICE:	/* invalidate only */
+	case DMA_FROM_DEVICE:		/* invalidate only */
 		invalidate_dcache_range(start, end);
 		break;
-	case PCI_DMA_TODEVICE:		/* writeback only */
+	case DMA_TO_DEVICE:		/* writeback only */
 		clean_dcache_range(start, end);
 		break;
-	case PCI_DMA_BIDIRECTIONAL:	/* writeback and invalidate */
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
 		flush_dcache_range(start, end);
 		break;
+	default:
+		BUG();
 	}
 }
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 6f59dfe9a3e7..8a562c320c50 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -1642,7 +1642,7 @@ static int __init sa1100fb_map_video_memory(struct sa1100fb_info *fbi)
 	 */
 	fbi->map_size = PAGE_ALIGN(fbi->fb.fix.smem_len + PAGE_SIZE);
 	fbi->map_cpu = consistent_alloc(GFP_KERNEL, fbi->map_size,
-					&fbi->map_dma);
+					&fbi->map_dma, PTE_BUFFERABLE);
 
 	if (fbi->map_cpu) {
 		fbi->fb.screen_base = fbi->map_cpu + PAGE_SIZE;
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index dd85f46611e7..e3d1dbb7bebf 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -14,7 +14,7 @@
  * devices.  This is the "generic" version.  The PCI specific version
  * is in pci.h
  */
-extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle);
+extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle, unsigned long flags);
 extern void consistent_free(void *vaddr, size_t size, dma_addr_t handle);
 extern void consistent_sync(void *kaddr, size_t size, int rw);
 
@@ -84,12 +84,12 @@ static inline int dma_is_consistent(dma_addr_t handle)
 static inline void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle)
 {
-	int gfp = GFP_KERNEL;
+	int gfp = GFP_ATOMIC;
 
 	if (dev == NULL || dmadev_is_sa1111(dev) || *dev->dma_mask != 0xffffffff)
 		gfp |= GFP_DMA;
 
-	return consistent_alloc(gfp, size, handle);
+	return consistent_alloc(gfp, size, handle, 0);
 }
 
 /**
diff --git a/include/asm-arm/pci.h b/include/asm-arm/pci.h
index 7760592d853f..7c690a057a20 100644
--- a/include/asm-arm/pci.h
+++ b/include/asm-arm/pci.h
@@ -40,13 +40,13 @@ static inline void pcibios_penalize_isa_irq(int irq)
 static inline void *
 pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *handle)
 {
-	int gfp = GFP_KERNEL;
+	int gfp = GFP_ATOMIC;
 
 	if (hwdev == NULL || pcidev_is_sa1111(hwdev) ||
 	    hwdev->dma_mask != 0xffffffff)
 		gfp |= GFP_DMA;
 
-	return consistent_alloc(gfp, size, handle);
+	return consistent_alloc(gfp, size, handle, 0);
 }
 
 static inline void
author	Russell King <rmk@flint.arm.linux.org.uk>	2003-01-10 21:41:11 +0000
committer	Russell King <rmk@flint.arm.linux.org.uk>	2003-01-10 21:41:11 +0000
commit	9804476ca07d30f7771dfc51fe3e2bd22d910817 (patch)
tree	0fe12038c525b5f6241ba278c4b71d60c701cea2
parent	28faab9908dcfabe654f07035f8c23cb0c1386d6 (diff)