From 86c0baf123e474b6eb404798926ecf62b426bf3a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 2 May 2007 19:27:05 +0200 Subject: [PATCH] i386: Change sysenter_setup to __cpuinit & improve __INIT, __INITDATA Change sysenter_setup to __cpuinit. Change __INIT & __INITDATA to be cpu hotplug aware. Resolve MODPOST warnings similar to: WARNING: vmlinux - Section mismatch: reference to .init.text:sysenter_setup from .text between 'identify_cpu' (at offset 0xc040a380) and 'detect_ht' and WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_end from .text between 'sysenter_setup' (at offset 0xc041a269) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_int80_start from .text between 'sysenter_setup' (at offset 0xc041a26e) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_end from .text between 'sysenter_setup' (at offset 0xc041a275) and 'enable_sep_cpu' WARNING: vmlinux - Section mismatch: reference to .init.data:vsyscall_sysenter_start from .text between 'sysenter_setup' (at offset 0xc041a27a) and 'enable_sep_cpu' Signed-off-by: Prarit Bhargava Signed-off-by: Andi Kleen --- include/linux/init.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index e290a010e3f2..9abf120ec9f8 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -52,9 +52,14 @@ #endif /* For assembly routines */ +#ifdef CONFIG_HOTPLUG_CPU +#define __INIT .section ".text","ax" +#define __INITDATA .section ".data","aw" +#else #define __INIT .section ".init.text","ax" -#define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#endif +#define __FINIT .previous #ifndef __ASSEMBLY__ /* -- cgit v1.2.3 From 973efae21beb2feda138f152ed06d4204774d93c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:06 +0200 Subject: [PATCH] i386: clean up mach_reboot_fixups The reboot_fixups stuff seems to be a bit of a mess, specifically the header is in linux/ when its a purely i386-specific piece of code. I'm not sure why it has its config option; its only currently needed for "geode-gx1/cs5530a", so perhaps whatever config option controls that hardware should enable this? Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- arch/i386/kernel/reboot.c | 6 +++++- arch/i386/kernel/reboot_fixups.c | 2 +- include/asm-i386/reboot_fixups.h | 6 ++++++ include/linux/reboot_fixups.h | 10 ---------- 4 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 include/asm-i386/reboot_fixups.h delete mode 100644 include/linux/reboot_fixups.h (limited to 'include/linux') diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 3514b4153f7f..8b5ff6e15412 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -17,7 +17,7 @@ #include #include #include "mach_reboot.h" -#include +#include /* * Power off function, if any @@ -316,6 +316,10 @@ void machine_shutdown(void) #endif } +void __attribute__((weak)) mach_reboot_fixups(void) +{ +} + void machine_emergency_restart(void) { if (!reboot_thru_bios) { diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 99aab41a05b0..2d78d918340f 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -10,7 +10,7 @@ #include #include -#include +#include static void cs5530a_warm_reset(struct pci_dev *dev) { diff --git a/include/asm-i386/reboot_fixups.h b/include/asm-i386/reboot_fixups.h new file mode 100644 index 000000000000..0cb7d87c2b68 --- /dev/null +++ b/include/asm-i386/reboot_fixups.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_REBOOT_FIXUPS_H +#define _LINUX_REBOOT_FIXUPS_H + +extern void mach_reboot_fixups(void); + +#endif /* _LINUX_REBOOT_FIXUPS_H */ diff --git a/include/linux/reboot_fixups.h b/include/linux/reboot_fixups.h deleted file mode 100644 index 480ea2d489d8..000000000000 --- a/include/linux/reboot_fixups.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_REBOOT_FIXUPS_H -#define _LINUX_REBOOT_FIXUPS_H - -#ifdef CONFIG_X86_REBOOTFIXUPS -extern void mach_reboot_fixups(void); -#else -#define mach_reboot_fixups() ((void)(0)) -#endif - -#endif /* _LINUX_REBOOT_FIXUPS_H */ -- cgit v1.2.3 From 184c44d2049c4db7ef6ec65794546954da2c6a0e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: fix x86_64-mm-sched-clock-share Fix for the following patch. Provide dummy cpufreq functions when CPUFREQ is not compiled in. Cc: Andi Kleen Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/linux/cpufreq.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0899e2cdcdd1..cb9b2ec8849c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -32,7 +32,15 @@ * CPUFREQ NOTIFIER INTERFACE * *********************************************************************/ +#ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); +#else +static inline int cpufreq_register_notifier(struct notifier_block *nb, + unsigned int list) +{ + return 0; +} +#endif int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); #define CPUFREQ_TRANSITION_NOTIFIER (0) @@ -261,17 +269,22 @@ int cpufreq_set_policy(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); -/* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ -unsigned int cpufreq_get(unsigned int cpu); -/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ +/* + * query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it + */ #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_quick_get(unsigned int cpu); +unsigned int cpufreq_get(unsigned int cpu); #else static inline unsigned int cpufreq_quick_get(unsigned int cpu) { return 0; } +static inline unsigned int cpufreq_get(unsigned int cpu) +{ + return 0; +} #endif -- cgit v1.2.3 From e073ae1b34d5600ffc550407625dcb2d4cf46c6e Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 2 May 2007 19:27:08 +0200 Subject: [PATCH] x86-64: Set HASHDIST_DEFAULT to 1 for x86_64 NUMA Enable system hashtable memory to be distributed among nodes on x86_64 NUMA Forcing the kernel to use node interleaved vmalloc instead of bootmem for the system hashtable memory (alloc_large_system_hash) reduces the memory imbalance on node 0 by around 40MB on a 8 node x86_64 NUMA box: Before the following patch, on bootup of a 8 node box: Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3206296 kB Node 0 MemUsed: 201192 kB Node 0 Active: 7012 kB Node 0 Inactive: 512 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 1912 kB Node 0 Mapped: 420 kB Node 0 AnonPages: 5612 kB Node 0 PageTables: 468 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 5408 kB Node 0 SReclaimable: 644 kB Node 0 SUnreclaim: 4764 kB After the patch (or using hashdist=1 on the kernel command line): Node 0 MemTotal: 3407488 kB Node 0 MemFree: 3247608 kB Node 0 MemUsed: 159880 kB Node 0 Active: 3012 kB Node 0 Inactive: 616 kB Node 0 Dirty: 0 kB Node 0 Writeback: 0 kB Node 0 FilePages: 2424 kB Node 0 Mapped: 380 kB Node 0 AnonPages: 1200 kB Node 0 PageTables: 396 kB Node 0 NFS_Unstable: 0 kB Node 0 Bounce: 0 kB Node 0 Slab: 6304 kB Node 0 SReclaimable: 1596 kB Node 0 SUnreclaim: 4708 kB I guess it is a good idea to keep HASHDIST_DEFAULT "on" for x86_64 NUMA since x86_64 has no dearth of vmalloc space? Or maybe enable hash distribution for all 64bit NUMA arches? The following patch does it only for x86_64. I ran a HPC MPI benchmark -- 'Ansys wingsolid', which takes up quite a bit of memory and uses up tlb entries. This was on a 4 way, 2 socket Tyan AMD box (non vsmp), with 8G total memory (4G pernode). The results with and without hash distribution are: 1. Vanilla - runtime of 1188.000s 2. With hashdist=1 runtime of 1154.000s Oprofile output for the duration of run is: 1. Vanilla: PU: AMD64 processors, speed 2411.16 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 163054 6.5513 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 162061 6.5114 libansys3.so blockSaxpy6L_fd 162042 6.5107 libansys3.so blockInnerProduct6L_fd 156286 6.2794 libansys3.so maxb33_ 87879 3.5309 libansys1.so elmatrixmultpcg_ 84857 3.4095 libansys4.so saxpy_pcg 58637 2.3560 libansys4.so .st4560 46612 1.8728 libansys4.so .st4282 43043 1.7294 vmlinux-t copy_user_generic_string 41326 1.6604 libansys3.so blockSaxpyBackSolve6L_fd 41288 1.6589 libansys3.so blockInnerProductBackSolve6L_fd 2. With hashdist=1 CPU: AMD64 processors, speed 2411.13 MHz (estimated) Counted L1_AND_L2_DTLB_MISSES events (L1 and L2 DTLB misses) with a unit mask of 0x00 (No unit mask) count 500 samples % app name symbol name 162993 6.9814 libansys1.so MultiFront::decompose(int, int, Elemset *, int *, int, int, int) 160799 6.8874 libansys3.so blockInnerProduct6L_fd 160459 6.8729 libansys3.so blockSaxpy6L_fd 156018 6.6826 libansys3.so maxb33_ 84700 3.6279 libansys4.so saxpy_pcg 83434 3.5737 libansys1.so elmatrixmultpcg_ 58074 2.4875 libansys4.so .st4560 46000 1.9703 libansys4.so .st4282 41166 1.7632 libansys3.so blockSaxpyBackSolve6L_fd 41033 1.7575 libansys3.so blockInnerProductBackSolve6L_fd 35762 1.5318 libansys1.so inner_product_sub 35591 1.5245 libansys1.so inner_product_sub2 28259 1.2104 libansys4.so addVectors Signed-off-by: Pravin B. Shelar Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Acked-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/linux/bootmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 81c07cd18643..0365ec9fc0c9 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -122,9 +122,9 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ /* Only NUMA needs hash distribution. - * IA64 is known to have sufficient vmalloc space. + * IA64 and x86_64 have sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && defined(CONFIG_IA64) +#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 -- cgit v1.2.3 From 79e030114a8d97a1dcd593ab84fb986f8c91c536 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] i386: Allow i386 crash kernels to handle x86_64 dumps The specific case I am encountering is kdump under Xen with a 64 bit hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to the hypervisor but the dump kernel is 32 bit for maximum compatibility. It's possibly less likely to be useful in a purely native scenario but I see no reason to disallow it. [akpm@linux-foundation.org: build fix] Signed-off-by: Ian Campbell Signed-off-by: Andi Kleen Acked-by: Vivek Goyal Cc: Horms Cc: Magnus Damm Cc: "Eric W. Biederman" Cc: Andi Kleen Signed-off-by: Andrew Morton --- fs/proc/vmcore.c | 2 +- include/asm-i386/kexec.h | 3 +++ include/linux/crash_dump.h | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index d96050728c43..523e1098ae88 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -514,7 +514,7 @@ static int __init parse_crash_elf64_headers(void) /* Do some basic Verification. */ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || (ehdr.e_type != ET_CORE) || - !elf_check_arch(&ehdr) || + !vmcore_elf_check_arch(&ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64 || ehdr.e_ident[EI_VERSION] != EV_CURRENT || ehdr.e_version != EV_CURRENT || diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index c5b4ab95bdc2..bcb5b21de2d2 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -42,6 +42,9 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_386 +/* We can also handle crash dumps from 64 bit kernel. */ +#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) + #define MAX_NOTE_BYTES 1024 /* CPU does not save ss and esp on stack if execution is already diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 32503657f14f..22c7ac5cd80c 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, extern const struct file_operations proc_vmcore_operations; extern struct proc_dir_entry *proc_vmcore; +/* Architecture code defines this if there are other possible ELF + * machine types, e.g. on bi-arch capable hardware. */ +#ifndef vmcore_elf_check_arch_cross +#define vmcore_elf_check_arch_cross(x) 0 +#endif + +#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) + #endif /* CONFIG_CRASH_DUMP */ #endif /* LINUX_CRASHDUMP_H */ -- cgit v1.2.3 From 6fb14755a676282a4e6caa05a08c92db8e45cfff Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: tighten kernel image page access rights On x86-64, kernel memory freed after init can be entirely unmapped instead of just getting 'poisoned' by overwriting with a debug pattern. On i386 and x86-64 (under CONFIG_DEBUG_RODATA), kernel text and bug table can also be write-protected. Compared to the first version, this one prevents re-creating deleted mappings in the kernel image range on x86-64, if those got removed previously. This, together with the original changes, prevents temporarily having inconsistent mappings when cacheability attributes are being changed on such pages (e.g. from AGP code). While on i386 such duplicate mappings don't exist, the same change is done there, too, both for consistency and because checking pte_present() before using various other pte_XXX functions is a requirement anyway. At once, i386 code gets adjusted to use pte_huge() instead of open coding this. AK: split out cpa() changes Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/vmlinux.lds.S | 4 ++-- arch/i386/mm/init.c | 25 +++++++++++++++++++------ arch/x86_64/kernel/head.S | 1 - arch/x86_64/kernel/vmlinux.lds.S | 5 +++-- arch/x86_64/mm/init.c | 25 ++++++++++++++++--------- include/linux/poison.h | 3 --- 6 files changed, 40 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 6f38f818380b..f4ec72231835 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -61,8 +61,6 @@ SECTIONS __stop___ex_table = .; } - RODATA - BUG_TABLE . = ALIGN(4); @@ -72,6 +70,8 @@ SECTIONS __tracedata_end = .; } + RODATA + /* writeable */ . = ALIGN(4096); .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 23be1b0aafa4..bd5ef3718504 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -751,13 +752,25 @@ static int noinline do_test_wp_bit(void) void mark_rodata_ro(void) { - unsigned long addr = (unsigned long)__start_rodata; + unsigned long start = PFN_ALIGN(_text); + unsigned long size = PFN_ALIGN(_etext) - start; - for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) - change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); +#ifdef CONFIG_HOTPLUG_CPU + /* It must still be possible to apply SMP alternatives. */ + if (num_possible_cpus() <= 1) +#endif + { + change_page_attr(virt_to_page(start), + size >> PAGE_SHIFT, PAGE_KERNEL_RX); + printk("Write protecting the kernel text: %luk\n", size >> 10); + } - printk("Write protecting the kernel read-only data: %uk\n", - (__end_rodata - __start_rodata) >> 10); + start += size; + size = (unsigned long)__end_rodata - start; + change_page_attr(virt_to_page(start), + size >> PAGE_SHIFT, PAGE_KERNEL_RO); + printk("Write protecting the kernel read-only data: %luk\n", + size >> 10); /* * change_page_attr() requires a global_flush_tlb() call after it. @@ -781,7 +794,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) __free_page(page); totalram_pages++; } - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); } void free_initmem(void) diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 36aa98a6d15c..fd9fdfdd143e 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -280,7 +280,6 @@ early_idt_ripmsg: .balign PAGE_SIZE ENTRY(stext) -ENTRY(_stext) #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 5176ecf006ee..3bdeb88d28f4 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -29,6 +29,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { /* First the code that has to be first for bootstrapping */ *(.bootstrap.text) + _stext = .; /* Then all the functions that are "hot" in profiles, to group them onto the same hugetlb entry */ #include "functionlist" @@ -50,10 +51,10 @@ SECTIONS __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; - RODATA - BUG_TABLE + RODATA + . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 69e22d3c9238..e3134bc9a4fc 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -563,21 +564,23 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) if (begin >= end) return; - printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (addr = begin; addr < end; addr += PAGE_SIZE) { struct page *page = pfn_to_page(addr >> PAGE_SHIFT); ClearPageReserved(page); init_page_count(page); memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); + if (addr >= __START_KERNEL_map) + change_page_attr_addr(addr, 1, __pgprot(0)); __free_page(page); totalram_pages++; } + if (addr > __START_KERNEL_map) + global_flush_tlb(); } void free_initmem(void) { - memset(__initdata_begin, POISON_FREE_INITDATA, - __initdata_end - __initdata_begin); free_init_pages("unused kernel memory", __pa_symbol(&__init_begin), __pa_symbol(&__init_end)); @@ -587,14 +590,18 @@ void free_initmem(void) void mark_rodata_ro(void) { - unsigned long addr = (unsigned long)__va(__pa_symbol(&__start_rodata)); - unsigned long end = (unsigned long)__va(__pa_symbol(&__end_rodata)); + unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size; - for (; addr < end; addr += PAGE_SIZE) - change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); +#ifdef CONFIG_HOTPLUG_CPU + /* It must still be possible to apply SMP alternatives. */ + if (num_possible_cpus() > 1) + start = PFN_ALIGN(__va(__pa_symbol(&_etext))); +#endif + size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start; + change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO); - printk ("Write protecting the kernel read-only data: %luk\n", - (__end_rodata - __start_rodata) >> 10); + printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", + size >> 10); /* * change_page_attr_addr() requires a global_flush_tlb() call after it. diff --git a/include/linux/poison.h b/include/linux/poison.h index 3e628f990fdf..89580b764959 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -26,9 +26,6 @@ /********** arch/$ARCH/mm/init.c **********/ #define POISON_FREE_INITMEM 0xcc -/********** arch/x86_64/mm/init.c **********/ -#define POISON_FREE_INITDATA 0xba - /********** arch/ia64/hp/common/sba_iommu.c **********/ /* * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a -- cgit v1.2.3 From b00742d399513a4100c24cc2accefdc1bb1e0b15 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:11 +0200 Subject: [PATCH] x86-64: Account for module percpu space separately from kernel percpu Rather than using a single constant PERCPU_ENOUGH_ROOM, compute it as the sum of kernel_percpu + PERCPU_MODULE_RESERVE. This is now common to all architectures; if an architecture wants to set PERCPU_ENOUGH_ROOM to something special, then it may do so (ia64 is the only one which does). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Eric W. Biederman Cc: Andi Kleen --- include/asm-alpha/percpu.h | 14 -------------- include/asm-sparc64/percpu.h | 10 ---------- include/asm-x86_64/percpu.h | 10 ---------- include/linux/percpu.h | 9 ++++++++- kernel/module.c | 2 +- 5 files changed, 9 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h index 651ebb141b24..48348fe34c19 100644 --- a/include/asm-alpha/percpu.h +++ b/include/asm-alpha/percpu.h @@ -1,20 +1,6 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H -/* - * Increase the per cpu area for Alpha so that - * modules using percpu area can load. - */ -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #include #endif /* __ALPHA_PERCPU_H */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 0d3df76aa47f..ced8cbde046d 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -5,16 +5,6 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - extern void setup_per_cpu_areas(void); extern unsigned long __per_cpu_base; diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 5ed0ef340842..c6fbb67eac90 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -11,16 +11,6 @@ #include -#ifdef CONFIG_MODULES -# define PERCPU_MODULE_RESERVE 8192 -#else -# define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset() read_pda(data_offset) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 600e3d387ffc..b72be2f79e6a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -11,9 +11,16 @@ /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM -#define PERCPU_ENOUGH_ROOM 32768 +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 #endif +#define PERCPU_ENOUGH_ROOM \ + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ + /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. diff --git a/kernel/module.c b/kernel/module.c index 9da5af668a20..cf49ca25fcce 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -430,7 +430,7 @@ static int percpu_modinit(void) pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); + pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { -- cgit v1.2.3 From d4f7a2c18e59e0304a1c733589ce14fc02fec1bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs as well as sections. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- arch/i386/kernel/entry.S | 4 -- arch/i386/kernel/sysenter.c | 158 +++++++++++++++++++++++++++++++++++++++++--- arch/i386/mm/pgtable.c | 6 -- include/asm-i386/elf.h | 28 +++----- include/asm-i386/fixmap.h | 8 +-- include/linux/elf.h | 17 +++++ 6 files changed, 178 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index c61c6b67e856..e901952dff37 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -305,16 +305,12 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ -#ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) -#else - pushl $SYSENTER_RETURN -#endif CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 13ca54a85a1c..e5a958379ac9 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Should the kernel map a VDSO page into processes and pass its @@ -46,6 +47,129 @@ __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); +#ifdef CONFIG_COMPAT_VDSO +static __init void reloc_symtab(Elf32_Ehdr *ehdr, + unsigned offset, unsigned size) +{ + Elf32_Sym *sym = (void *)ehdr + offset; + unsigned nsym = size / sizeof(*sym); + unsigned i; + + for(i = 0; i < nsym; i++, sym++) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_ABS) + continue; /* skip */ + + if (sym->st_shndx > SHN_LORESERVE) { + printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", + sym->st_shndx); + continue; + } + + switch(ELF_ST_TYPE(sym->st_info)) { + case STT_OBJECT: + case STT_FUNC: + case STT_SECTION: + case STT_FILE: + sym->st_value += VDSO_HIGH_BASE; + } + } +} + +static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) +{ + Elf32_Dyn *dyn = (void *)ehdr + offset; + + for(; dyn->d_tag != DT_NULL; dyn++) + switch(dyn->d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_INIT: + case DT_FINI: + case DT_REL: + case DT_DEBUG: + case DT_JMPREL: + case DT_VERSYM: + case DT_VERDEF: + case DT_VERNEED: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + /* definitely pointers needing relocation */ + dyn->d_un.d_ptr += VDSO_HIGH_BASE; + break; + + case DT_ENCODING ... OLD_DT_LOOS-1: + case DT_LOOS ... DT_HIOS-1: + /* Tags above DT_ENCODING are pointers if + they're even */ + if (dyn->d_tag >= DT_ENCODING && + (dyn->d_tag & 1) == 0) + dyn->d_un.d_ptr += VDSO_HIGH_BASE; + break; + + case DT_VERDEFNUM: + case DT_VERNEEDNUM: + case DT_FLAGS_1: + case DT_RELACOUNT: + case DT_RELCOUNT: + case DT_VALRNGLO ... DT_VALRNGHI: + /* definitely not pointers */ + break; + + case OLD_DT_LOOS ... DT_LOOS-1: + case DT_HIOS ... DT_VALRNGLO-1: + default: + if (dyn->d_tag > DT_ENCODING) + printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", + dyn->d_tag); + break; + } +} + +static __init void relocate_vdso(Elf32_Ehdr *ehdr) +{ + Elf32_Phdr *phdr; + Elf32_Shdr *shdr; + int i; + + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || + !elf_check_arch(ehdr) || + ehdr->e_type != ET_DYN); + + ehdr->e_entry += VDSO_HIGH_BASE; + + /* rebase phdrs */ + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++) { + phdr[i].p_vaddr += VDSO_HIGH_BASE; + + /* relocate dynamic stuff */ + if (phdr[i].p_type == PT_DYNAMIC) + reloc_dyn(ehdr, phdr[i].p_offset); + } + + /* rebase sections */ + shdr = (void *)ehdr + ehdr->e_shoff; + for(i = 0; i < ehdr->e_shnum; i++) { + if (!(shdr[i].sh_flags & SHF_ALLOC)) + continue; + + shdr[i].sh_addr += VDSO_HIGH_BASE; + + if (shdr[i].sh_type == SHT_SYMTAB || + shdr[i].sh_type == SHT_DYNSYM) + reloc_symtab(ehdr, shdr[i].sh_offset, + shdr[i].sh_size); + } +} +#else +static inline void relocate_vdso(Elf32_Ehdr *ehdr) +{ +} +#endif /* COMPAT_VDSO */ + void enable_sep_cpu(void) { int cpu = get_cpu(); @@ -75,6 +199,9 @@ static struct page *syscall_pages[1]; int __init sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + const void *vsyscall; + size_t vsyscall_len; + syscall_pages[0] = virt_to_page(syscall_page); #ifdef CONFIG_COMPAT_VDSO @@ -83,23 +210,23 @@ int __init sysenter_setup(void) #endif if (!boot_cpu_has(X86_FEATURE_SEP)) { - memcpy(syscall_page, - &vsyscall_int80_start, - &vsyscall_int80_end - &vsyscall_int80_start); - return 0; + vsyscall = &vsyscall_int80_start; + vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; + } else { + vsyscall = &vsyscall_sysenter_start; + vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; } - memcpy(syscall_page, - &vsyscall_sysenter_start, - &vsyscall_sysenter_end - &vsyscall_sysenter_start); + memcpy(syscall_page, vsyscall, vsyscall_len); + relocate_vdso(syscall_page); return 0; } -#ifndef CONFIG_COMPAT_VDSO /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; +#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { @@ -159,4 +286,17 @@ int in_gate_area_no_task(unsigned long addr) { return 0; } -#endif +#else /* !__HAVE_ARCH_GATE_AREA */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + /* + * If not creating userspace VMA, simply set vdso to point to + * fixmap page. + */ + current->mm->context.vdso = (void *)VDSO_HIGH_BASE; + current_thread_info()->sysenter_return = + (void *)VDSO_SYM(&SYSENTER_RETURN); + + return 0; +} +#endif /* __HAVE_ARCH_GATE_AREA */ diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index fa0cfbd551e1..99c09edc3dbb 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) } static int fixmaps; -#ifndef CONFIG_COMPAT_VDSO unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -#endif void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { @@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve) BUG_ON(fixmaps > 0); printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); -#ifdef CONFIG_COMPAT_VDSO - BUG_ON(reserve != 0); -#else __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; -#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 952b3ee3c9bb..d304ab4161ff 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_BASE ((unsigned long)current->mm->context.vdso) - -#ifdef CONFIG_COMPAT_VDSO -# define VDSO_COMPAT_BASE VDSO_HIGH_BASE -# define VDSO_PRELINK VDSO_HIGH_BASE -#else -# define VDSO_COMPAT_BASE VDSO_BASE -# define VDSO_PRELINK 0 -#endif +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_PRELINK 0 #define VDSO_SYM(x) \ - (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) extern void __kernel_vsyscall; #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) -#ifndef CONFIG_COMPAT_VDSO -#define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); -#endif extern unsigned int vdso_enabled; -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } while (0) #endif diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index 3e9f610c35df..e5651b2a585a 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,13 +19,9 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#ifndef CONFIG_COMPAT_VDSO extern unsigned long __FIXADDR_TOP; -#else -#define __FIXADDR_TOP 0xfffff000 -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#endif +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) #ifndef __ASSEMBLY__ #include diff --git a/include/linux/elf.h b/include/linux/elf.h index 60713e6ea297..8b17ffe222c4 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -83,6 +83,23 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff -- cgit v1.2.3 From ce6234b5298902aaec831a67d5f8d9bd2ef5a488 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:15 +0200 Subject: [PATCH] i386: PARAVIRT: add kmap_atomic_pte for mapping highpte pages Xen and VMI both have special requirements when mapping a highmem pte page into the kernel address space. These can be dealt with by adding a new kmap_atomic_pte() function for mapping highptes, and hooking it into the paravirt_ops infrastructure. Xen specifically wants to map the pte page RO, so this patch exposes a helper function, kmap_atomic_prot, which maps the page with the specified page protections. This also adds a kmap_flush_unused() function to clear out the cached kmap mappings. Xen needs this to clear out any potential stray RW mappings of pages which will become part of a pagetable. [ Zach - vmi.c will need some attention after this patch. It wasn't immediately obvious to me what needs to be done. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden --- arch/i386/kernel/paravirt.c | 5 +++++ arch/i386/mm/highmem.c | 9 +++++++-- include/asm-i386/highmem.h | 6 ++++++ include/asm-i386/paravirt.h | 15 +++++++++++++++ include/asm-i386/pgtable.h | 4 ++-- include/linux/highmem.h | 6 ++++++ mm/highmem.c | 9 +++++++++ 7 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 13f41b5c887a..596f382c641c 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -316,6 +317,10 @@ struct paravirt_ops paravirt_ops = { .ptep_get_and_clear = native_ptep_get_and_clear, +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = kmap_atomic, +#endif + #ifdef CONFIG_X86_PAE .set_pte_atomic = native_set_pte_atomic, .set_pte_present = native_set_pte_present, diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index ac70d09df7ee..a1a21abf742e 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -26,7 +26,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; @@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type) return page_address(page); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + set_pte(kmap_pte-idx, mk_pte(page, prot)); arch_flush_lazy_mmu_mode(); return (void*) vaddr; } +void *kmap_atomic(struct page *page, enum km_type type) +{ + return kmap_atomic_prot(page, type, kmap_prot); +} + void kunmap_atomic(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index e9a34ebc25d5..13cdcd66fff2 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -24,6 +24,7 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; @@ -67,11 +68,16 @@ extern void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); void kunmap(struct page *page); +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); void kunmap_atomic(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); struct page *kmap_atomic_to_page(void *ptr); +#ifndef CONFIG_PARAVIRT +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#endif + #define flush_cache_kmaps() do { } while (0) #endif /* __KERNEL__ */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 10f44af76b49..5048b41428fa 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -16,7 +16,9 @@ #ifndef __ASSEMBLY__ #include #include +#include +struct page; struct thread_struct; struct Xgt_desc_struct; struct tss_struct; @@ -187,6 +189,10 @@ struct paravirt_ops pte_t (*ptep_get_and_clear)(pte_t *ptep); +#ifdef CONFIG_HIGHPTE + void *(*kmap_atomic_pte)(struct page *page, enum km_type type); +#endif + #ifdef CONFIG_X86_PAE void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); @@ -884,6 +890,15 @@ static inline void paravirt_release_pd(unsigned pfn) PVOP_VCALL1(release_pd, pfn); } +#ifdef CONFIG_HIGHPTE +static inline void *kmap_atomic_pte(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); + return (void *)ret; +} +#endif + static inline void pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 6599f2aa91b7..befc697821e5 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -476,9 +476,9 @@ extern pte_t *lookup_address(unsigned long address); #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) #define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 645d440807c2..bca8e2dfa355 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -27,6 +27,8 @@ static inline void flush_kernel_dcache_page(struct page *page) unsigned int nr_free_highpages(void); extern unsigned long totalhigh_pages; +void kmap_flush_unused(void); + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -44,9 +46,13 @@ static inline void *kmap(struct page *page) #define kmap_atomic(page, idx) \ ({ pagefault_disable(); page_address(page); }) +#define kmap_atomic_prot(page, idx, prot) kmap_atomic(page, idx) + #define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) #define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) + +#define kmap_flush_unused() do {} while(0) #endif #endif /* CONFIG_HIGHMEM */ diff --git a/mm/highmem.c b/mm/highmem.c index 51e1c1995fec..be8f8d36a8b9 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -99,6 +99,15 @@ static void flush_all_zero_pkmaps(void) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } +/* Flush all unused kmap mappings in order to remove stray + mappings. */ +void kmap_flush_unused(void) +{ + spin_lock(&kmap_lock); + flush_all_zero_pkmaps(); + spin_unlock(&kmap_lock); +} + static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; -- cgit v1.2.3 From 03df4f6ee997589a84d5f9492c6419183724c710 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:17 +0200 Subject: [PATCH] i386: Clean up ELF note generation Three cleanups: 1: ELF notes are never mapped, so there's no need to have any access flags in their phdr. 2: When generating them from asm, tell the assembler to use a SHT_NOTE section type. There doesn't seem to be a way to do this from C. 3: Use ANSI rather than traditional cpp behaviour to stringify the macro argument. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Eric W. Biederman --- arch/i386/kernel/vmlinux.lds.S | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/elfnote.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 568caca87715..23e8614edeee 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -30,7 +30,7 @@ jiffies = jiffies_64; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(4); /* R__ */ + note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9fcc8d9fbb14..f3806a74c478 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -208,7 +208,7 @@ } #define NOTES \ - .notes : { *(.note.*) } :note + .notes : { *(.note.*) } :note #define INITCALLS \ *(.initcall0.init) \ diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 67396db141e8..9a1e0674e56c 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -39,12 +39,12 @@ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ #define ELFNOTE(name, type, desctype, descdata) \ -.pushsection .note.name ; \ +.pushsection .note.name, "",@note ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type ; \ -1:.asciz "name" ; \ +1:.asciz #name ; \ 2:.align 4 ; \ 3:desctype descdata ; \ 4:.align 4 ; \ -- cgit v1.2.3