diff options
| author | Andi Kleen <ak@suse.de> | 2004-12-31 22:02:07 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@evo.osdl.org> | 2004-12-31 22:02:07 -0800 |
| commit | 2e20fb912502903364394dc861574b995e0a95f6 (patch) | |
| tree | 310478c53248f9db6c25b938991f06ed21ab3882 /include | |
| parent | 63e6b85474d3baedca6f90cd490f4734eb48dc62 (diff) | |
[PATCH] convert x86_64 to 4 level page tables
Converted to true 4levels. The address space per process is expanded to
47bits now, the supported physical address space is 46bits.
Lmbench fork/exit numbers are down a few percent because it has to walk
much more pagetables, but some planned future optimizations will
hopefully recover it.
See Documentation/x86_64/mm.txt for more details on the memory map.
Signed-off-by: Andi Kleen <ak@suse.de>
Converted to pud_t by Nick Piggin.
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-x86_64/e820.h | 3 | ||||
| -rw-r--r-- | include/asm-x86_64/mmu_context.h | 5 | ||||
| -rw-r--r-- | include/asm-x86_64/page.h | 12 | ||||
| -rw-r--r-- | include/asm-x86_64/pda.h | 1 | ||||
| -rw-r--r-- | include/asm-x86_64/pgalloc.h | 38 | ||||
| -rw-r--r-- | include/asm-x86_64/pgtable.h | 142 | ||||
| -rw-r--r-- | include/asm-x86_64/processor.h | 4 |
7 files changed, 100 insertions, 105 deletions
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index ad24c50d4967..5b376e42b153 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h @@ -26,9 +26,6 @@ #define LOWMEMSIZE() (0x9f000) -#define MAXMEM (120UL * 1024 * 1024 * 1024 * 1024) /* 120TB */ - - #ifndef __ASSEMBLY__ struct e820entry { u64 addr; /* start of memory segment */ diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index 8f80f157035e..673126cf2744 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -40,10 +40,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, write_pda(active_mm, next); #endif set_bit(cpu, &next->cpu_vm_mask); - /* Re-load page tables */ - *read_pda(level4_pgt) = __pa(next->pgd) | _PAGE_TABLE; - __flush_tlb(); - + asm volatile("movq %0,%%cr3" :: "r" (__pa(next->pgd)) : "memory"); if (unlikely(next->context.ldt != prev->context.ldt)) load_LDT_nolock(&next->context, cpu); } diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index b45e80c76653..1930336708a9 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -43,22 +43,22 @@ void copy_page(void *, void *); */ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pgd; } pgd_t; -typedef struct { unsigned long pml4; } pml4_t; #define PTE_MASK PHYSICAL_PAGE_MASK typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) -#define pml4_val(x) ((x).pml4) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) -#define __pml4(x) ((pml4_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) extern unsigned long vm_stack_flags, vm_stack_flags32; @@ -67,19 +67,19 @@ extern unsigned long vm_force_exec32; #define __START_KERNEL 0xffffffff80100000UL #define __START_KERNEL_map 0xffffffff80000000UL -#define __PAGE_OFFSET 0x0000010000000000UL /* 1 << 40 */ +#define __PAGE_OFFSET 0xffff810000000000UL #else #define __START_KERNEL 0xffffffff80100000 #define __START_KERNEL_map 0xffffffff80000000 -#define __PAGE_OFFSET 0x0000010000000000 /* 1 << 40 */ +#define __PAGE_OFFSET 0xffff810000000000 #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) /* See Documentation/x86_64/mm.txt for a description of the memory map. */ -#define __PHYSICAL_MASK_SHIFT 40 +#define __PHYSICAL_MASK_SHIFT 46 #define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) #define __VIRTUAL_MASK_SHIFT 48 #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index 90ec48f28e2f..36b766cfc4d5 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -17,7 +17,6 @@ struct x8664_pda { int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ char *irqstackptr; /* top of irqstack */ - unsigned long volatile *level4_pgt; /* Per CPU top level page table */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ struct mm_struct *active_mm; diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index bebcaab3375c..cc0b2d52b1bb 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h @@ -9,8 +9,10 @@ #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) -#define pgd_populate(mm, pgd, pmd) \ - set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd))) +#define pud_populate(mm, pud, pmd) \ + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))) +#define pgd_populate(mm, pgd, pud) \ + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { @@ -33,12 +35,37 @@ static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } -static inline pgd_t *pgd_alloc (struct mm_struct *mm) +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } -static inline void pgd_free (pgd_t *pgd) +static inline void pud_free (pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + unsigned boundary; + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (!pgd) + return NULL; + /* + * Copy kernel pointers in from init. + * Could keep a freelist or slab cache of those because the kernel + * part never changes. + */ + boundary = pgd_index(__PAGE_OFFSET); + memset(pgd, 0, boundary * sizeof(pgd_t)); + memcpy(pgd + boundary, + init_level4_pgt + boundary, + (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); + return pgd; +} + +static inline void pgd_free(pgd_t *pgd) { BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); free_page((unsigned long)pgd); @@ -73,5 +100,6 @@ extern inline void pte_free(struct page *pte) #define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) #define __pmd_free_tlb(tlb,x) pmd_free(x) +#define __pud_free_tlb(tlb,x) pud_free(x) #endif /* _X86_64_PGALLOC_H */ diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 101710b001ab..85d03356f111 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -1,17 +1,9 @@ #ifndef _X86_64_PGTABLE_H #define _X86_64_PGTABLE_H -#include <asm-generic/4level-fixup.h> - /* * This file contains the functions and defines necessary to modify and use * the x86-64 page table tree. - * - * x86-64 has a 4 level table setup. Generic linux MM only supports - * three levels. The fourth level is currently a single static page that - * is shared by everybody and just contains a pointer to the current - * three level page setup on the beginning and some kernel mappings at - * the end. For more details see Documentation/x86_64/mm.txt */ #include <asm/processor.h> #include <asm/fixmap.h> @@ -19,15 +11,14 @@ #include <linux/threads.h> #include <asm/pda.h> -extern pgd_t level3_kernel_pgt[512]; -extern pgd_t level3_physmem_pgt[512]; -extern pgd_t level3_ident_pgt[512]; +extern pud_t level3_kernel_pgt[512]; +extern pud_t level3_physmem_pgt[512]; +extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; -extern pml4_t init_level4_pgt[]; -extern pgd_t boot_vmalloc_pgt[]; +extern pgd_t init_level4_pgt[]; extern unsigned long __supported_pte_mask; -#define swapper_pg_dir NULL +#define swapper_pg_dir init_level4_pgt extern void paging_init(void); extern void clear_kernel_mapping(unsigned long addr, unsigned long size); @@ -41,16 +32,19 @@ extern unsigned long pgkern_mask; extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#define PML4_SHIFT 39 -#define PTRS_PER_PML4 512 - /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 30 +#define PGDIR_SHIFT 39 #define PTRS_PER_PGD 512 /* + * 3rd level page + */ +#define PUD_SHIFT 30 +#define PTRS_PER_PUD 512 + +/* * PMD_SHIFT determines the size of the area a middle-level * page table can map */ @@ -66,14 +60,13 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) - -#define pml4_none(x) (!pml4_val(x)) #define pgd_none(x) (!pgd_val(x)) - -extern inline int pgd_present(pgd_t pgd) { return !pgd_none(pgd); } +#define pud_none(x) (!pud_val(x)) static inline void set_pte(pte_t *dst, pte_t val) { @@ -85,6 +78,16 @@ static inline void set_pmd(pmd_t *dst, pmd_t val) pmd_val(*dst) = pmd_val(val); } +static inline void set_pud(pud_t *dst, pud_t val) +{ + pud_val(*dst) = pud_val(val); +} + +extern inline void pud_clear (pud_t *pud) +{ + set_pud(pud, __pud(0)); +} + static inline void set_pgd(pgd_t *dst, pgd_t val) { pgd_val(*dst) = pgd_val(val); @@ -95,45 +98,30 @@ extern inline void pgd_clear (pgd_t * pgd) set_pgd(pgd, __pgd(0)); } -static inline void set_pml4(pml4_t *dst, pml4_t val) -{ - pml4_val(*dst) = pml4_val(val); -} - -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PHYSICAL_PAGE_MASK)) +#define pud_page(pud) \ +((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0)) #define pte_same(a, b) ((a).pte == (b).pte) -#define PML4_SIZE (1UL << PML4_SHIFT) -#define PML4_MASK (~(PML4_SIZE-1)) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) #define FIRST_USER_PGD_NR 0 -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#define TWOLEVEL_PGDIR_SHIFT 20 -#define BOOT_USER_L4_PTRS 1 -#define BOOT_KERNEL_L4_PTRS 511 /* But we will do it in 4rd level */ - - - #ifndef __ASSEMBLY__ -#define VMALLOC_START 0xffffff0000000000UL -#define VMALLOC_END 0xffffff7fffffffffUL -#define MODULES_VADDR 0xffffffffa0000000UL -#define MODULES_END 0xffffffffafffffffUL +#define MAXMEM 0x3fffffffffffUL +#define VMALLOC_START 0xffffc20000000000UL +#define VMALLOC_END 0xffffe1ffffffffffUL +#define MODULES_VADDR 0xffffffff88000000 +#define MODULES_END 0xfffffffffff00000 #define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define IOMAP_START 0xfffffe8000000000UL - #define _PAGE_BIT_PRESENT 0 #define _PAGE_BIT_RW 1 #define _PAGE_BIT_USER 2 @@ -224,6 +212,14 @@ static inline unsigned long pgd_bad(pgd_t pgd) return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); } +static inline unsigned long pud_bad(pud_t pud) +{ + unsigned long val = pud_val(pud); + val &= ~PTE_MASK; + val &= ~(_PAGE_USER | _PAGE_DIRTY); + return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); +} + #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) @@ -302,54 +298,32 @@ static inline int pmd_large(pmd_t pte) { /* * Level 4 access. - * Never use these in the common code. */ -#define pml4_page(pml4) ((unsigned long) __va(pml4_val(pml4) & PTE_MASK)) -#define pml4_index(address) ((address >> PML4_SHIFT) & (PTRS_PER_PML4-1)) -#define pml4_offset_k(address) (init_level4_pgt + pml4_index(address)) -#define pml4_present(pml4) (pml4_val(pml4) & _PAGE_PRESENT) -#define mk_kernel_pml4(address) ((pml4_t){ (address) | _KERNPG_TABLE }) -#define level3_offset_k(dir, address) ((pgd_t *) pml4_page(*(dir)) + pgd_index(address)) - -/* PGD - Level3 access */ -/* to find an entry in a page-table-directory. */ +#define pgd_page(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK)) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -static inline pgd_t *__pgd_offset_k(pgd_t *pgd, unsigned long address) -{ - return pgd + pgd_index(address); -} +#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) +#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) +#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) +#define mk_kernel_pgd(address) ((pgd_t){ (address) | _KERNPG_TABLE }) -/* Find correct pgd via the hidden fourth level page level: */ - -/* This accesses the reference page table of the boot cpu. - Other CPUs get synced lazily via the page fault handler. */ -static inline pgd_t *pgd_offset_k(unsigned long address) -{ - unsigned long addr; - - addr = pml4_val(init_level4_pgt[pml4_index(address)]); - addr &= PHYSICAL_PAGE_MASK; - return __pgd_offset_k((pgd_t *)__va(addr), address); -} - -/* Access the pgd of the page table as seen by the current CPU. */ -static inline pgd_t *current_pgd_offset_k(unsigned long address) -{ - unsigned long addr; - - addr = read_pda(level4_pgt)[pml4_index(address)]; - addr &= PHYSICAL_PAGE_MASK; - return __pgd_offset_k((pgd_t *)__va(addr), address); -} +/* PUD - Level3 access */ +/* to find an entry in a page-table-directory. */ +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) +#define pud_offset_k(pgd, addr) pud_offset(pgd, addr) +#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) +static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address) +{ + return pud + pud_index(address); +} /* PMD - Level 2 access */ #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 36c04fb3c0d0..4bbb228dfc27 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -159,9 +159,9 @@ static inline void clear_in_cr4 (unsigned long mask) /* - * User space process size: 512GB - 1GB (default). + * User space process size. 47bits. */ -#define TASK_SIZE (0x0000007fc0000000UL) +#define TASK_SIZE (0x800000000000) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. |
