summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2003-03-24 19:54:54 -0800
committerLinus Torvalds <torvalds@home.transmeta.com>2003-03-24 19:54:54 -0800
commitb4f6270ea3fec60e9b02f4bdf36f2ae7298cd024 (patch)
treeaef7bd1a413f18119415c05b49cbc50d4a84c917
parent5563e77b38c287f8d88b6a3298b411b7db24f901 (diff)
[PATCH] x86-64 updates
Lots of x86-64 updates. Merge with 2.4 and NUMA works now. Also reenabled the preemptive kernel. And some other bug fixes. IOMMU disabled by default now because it has problems. - Add more CONFIG options for device driver debugging and iommu force/debug. (don't enable iommu force currently) - Some S3/ACPI fixes/cleanups from Pavel. - Set MSG_COMPAT_* in msg_flags for networking 32bit emulation. This unfortunately still doesn't fix the fd passing problems. - Sync PCI IOMMU code with 2.4 (minor fixes, flush less often) - Really fix UP compilation (Pavel) - Reenable preempt - Fix CONFIG_DISCONTIGMEM bootup and enable. Still needs more tuning. - Fix some bugs in topology discovery and clean code up. - Don't put unwind tables into object files - Some kernel debugging hooks - Move CPU detection into early real mode code to better interact with vesafb consoles - Initialize mode in real mode character output - New 32bit FPU signal save/restore - Various fixes in FPU handling in ptrace - Fix security holes in ptrace (32bit and 64bit) - Fix serial ioctl (including security hole) - Add bluetooth ioctls to 32bit emu (from sparc64) - Correctly enable si_val in queued signals in 32bit emulation - Rework SEM_STAT emulation. LTP still fails unfortunately. - Fix error case in msg* emulation - Fix debug register access from ptrace (Michal Ludvig, me) - Fix handling of NULL arguments in 32bit execve - Fix some error cases for 32bit readv/writev (LTP still complains) - Remove rate control from unimplemented syscall warnings - Fix error message for missing aperture - Turn some APIC printks into Dprintk to make the bootup more quiet - Some fixes for no APIC (probably still broken), add disableapic option (untested) - Sync K8 MCE handler with 2.4. Should work a lot better now. - Remove never used KDB hooks - Fix buffer overflow in command line copying - Merge from i386: use separate status word for lazy FPU state - Don't force the IOMMU for dma masks < 4GB. - Print backtrace in Sysrq-T (from Andrea) - Merge from i386: fix FPU race in fork. - Disable NX mode by default for now - Rewrite dump_pagetable - Fix off by one bug in ioremap (i386 merge) - Merge from i386: handle VIA pci bridge bugs - Disable NUMA ACPI support (no SRAT support yet) - Fix aio 32bit emulation - Increase 32bit address space to nearly 4GB - Add exit_group syscall - Fix TLS setting in clone (Ulrich Drepper)
-rw-r--r--arch/x86_64/Kconfig33
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/compressed/misc.c4
-rw-r--r--arch/x86_64/boot/setup.S76
-rw-r--r--arch/x86_64/ia32/fpu32.c68
-rw-r--r--arch/x86_64/ia32/ia32_ioctl.c49
-rw-r--r--arch/x86_64/ia32/ia32_signal.c21
-rw-r--r--arch/x86_64/ia32/ipc32.c96
-rw-r--r--arch/x86_64/ia32/ptrace32.c92
-rw-r--r--arch/x86_64/ia32/sys_ia32.c136
-rw-r--r--arch/x86_64/kernel/Makefile3
-rw-r--r--arch/x86_64/kernel/aperture.c3
-rw-r--r--arch/x86_64/kernel/apic.c9
-rw-r--r--arch/x86_64/kernel/bluesmoke.c362
-rw-r--r--arch/x86_64/kernel/entry.S17
-rw-r--r--arch/x86_64/kernel/head.S12
-rw-r--r--arch/x86_64/kernel/head64.c3
-rw-r--r--arch/x86_64/kernel/i387.c17
-rw-r--r--arch/x86_64/kernel/nmi.c20
-rw-r--r--arch/x86_64/kernel/pci-gart.c93
-rw-r--r--arch/x86_64/kernel/process.c15
-rw-r--r--arch/x86_64/kernel/ptrace.c17
-rw-r--r--arch/x86_64/kernel/reboot.c1
-rw-r--r--arch/x86_64/kernel/setup.c5
-rw-r--r--arch/x86_64/kernel/setup64.c6
-rw-r--r--arch/x86_64/kernel/signal.c4
-rw-r--r--arch/x86_64/kernel/smp.c8
-rw-r--r--arch/x86_64/kernel/smpboot.c4
-rw-r--r--arch/x86_64/kernel/suspend.c2
-rw-r--r--arch/x86_64/kernel/traps.c46
-rw-r--r--arch/x86_64/mm/fault.c52
-rw-r--r--arch/x86_64/mm/ioremap.c2
-rw-r--r--arch/x86_64/mm/k8topology.c39
-rw-r--r--arch/x86_64/mm/numa.c21
-rw-r--r--arch/x86_64/pci/irq.c9
-rw-r--r--include/asm-x86_64/apic.h2
-rw-r--r--include/asm-x86_64/debugreg.h2
-rw-r--r--include/asm-x86_64/desc.h1
-rw-r--r--include/asm-x86_64/hdreg.h2
-rw-r--r--include/asm-x86_64/i387.h38
-rw-r--r--include/asm-x86_64/kdebug.h16
-rw-r--r--include/asm-x86_64/numa.h6
-rw-r--r--include/asm-x86_64/pci.h25
-rw-r--r--include/asm-x86_64/pgtable.h1
-rw-r--r--include/asm-x86_64/processor.h6
-rw-r--r--include/asm-x86_64/proto.h3
-rw-r--r--include/asm-x86_64/suspend.h2
-rw-r--r--include/asm-x86_64/system.h29
-rw-r--r--include/asm-x86_64/thread_info.h16
-rw-r--r--include/asm-x86_64/unistd.h4
50 files changed, 968 insertions, 534 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 68402665d277..f7309aa8daea 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -179,9 +179,7 @@ config SMP
If you don't know what to do here, say N.
-# broken currently
config PREEMPT
- depends on NOT_WORKING
bool "Preemptible Kernel"
---help---
This option reduces the latency of the kernel when reacting to
@@ -200,7 +198,7 @@ config PREEMPT
# someone write a better help text please.
config K8_NUMA
bool "K8 NUMA support"
- depends on SMP && NOT_WORKING
+ depends on SMP
help
Enable NUMA (Non Unified Memory Architecture) support for
AMD Opteron Multiprocessor systems. The kernel will try to allocate
@@ -590,10 +588,8 @@ config DEBUG_SLAB
allocation as well as poisoning memory on free to catch use of freed
memory.
-# bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
config MAGIC_SYSRQ
bool "Magic SysRq key"
- depends on DEBUG_KERNEL
help
If you say Y here, you will have some control over the system even
if the system crashes for example during kernel debugging (e.g., you
@@ -639,14 +635,37 @@ config KALLSYMS
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
- depends on DEBUG_KERNEL
help
Compile the kernel with frame pointers. This may help for some
debugging with external debuggers. Note the standard oops backtracer
- doesn't make use of it and the x86-64 kernel doesn't ensure an consistent
+ doesn't make use of this and the x86-64 kernel doesn't ensure an consistent
frame pointer through inline assembly (semaphores etc.)
Normally you should say N.
+config IOMMU_DEBUG
+ bool "Force IOMMU to on"
+ help
+ Force the IOMMU to on even when you have less than 4GB of memory and add
+ debugging code.
+ Can be disabled at boot time with iommu=noforce.
+
+config IOMMU_LEAK
+ bool "IOMMU leak tracing"
+ depends on DEBUG_KERNEL
+ help
+ Add a simple leak tracer to the IOMMU code. This is useful when you
+ are debugging a buggy device driver that leaks IOMMU mappings.
+
+config MCE_DEBUG
+ bool "K8 Machine check debugging mode"
+ default y
+ help
+ Turn on all Machine Check debugging for device driver problems.
+ This can cause panics, but is useful to find device driver problems.
+
+#config X86_REMOTE_DEBUG
+# bool "kgdb debugging stub"
+
endmenu
source "security/Kconfig"
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 6434c4a2366c..290803ace1a3 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -47,6 +47,10 @@ CFLAGS += -fno-reorder-blocks
# should lower this a lot and see how much .text is saves
CFLAGS += -finline-limit=2000
#CFLAGS += -g
+# don't enable this when you use kgdb:
+ifneq ($(CONFIG_X86_REMOTE_DEBUG),y)
+CFLAGS += -fno-asynchronous-unwind-tables
+endif
head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 780b43933eb6..67fc7400becb 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -274,7 +274,7 @@ static void error(char *x)
puts(x);
puts("\n\n -- System halted");
- while(1); /* Halt */
+ while(1);
}
void setup_normal_output_buffer(void)
@@ -429,8 +429,6 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
else setup_output_buffer_if_we_run_high(mv);
makecrc();
- puts("Checking CPU type...");
- check_cpu();
puts(".\nDecompressing Linux...");
gunzip();
puts("done.\nBooting the kernel.\n");
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 36d8e5e8be73..1e10ed979dc2 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -42,6 +42,7 @@
* if CX/DX have been changed in the e801 call and if so use AX/BX .
* Michael Miller, April 2001 <michaelm@mjmm.org>
*
+ * Added long mode checking and SSE force. March 2003, Andi Kleen.
*/
#include <linux/config.h>
@@ -200,10 +201,10 @@ fin: ret
prtsp2: call prtspc # Print double space
prtspc: movb $0x20, %al # Print single space (note: fall-thru)
-# Part of above routine, this one just prints ascii al
-prtchr: pushw %ax
+prtchr:
+ pushw %ax
pushw %cx
- xorb %bh, %bh
+ movw $0007,%bx
movw $0x01, %cx
movb $0x0e, %ah
int $0x10
@@ -280,6 +281,75 @@ good_sig:
loader_panic_mess: .string "Wrong loader, giving up..."
loader_ok:
+ /* check for long mode. */
+ /* we have to do this before the VESA setup, otherwise the user
+ can't see the error message. */
+
+ pushw %ds
+ movw %cs,%ax
+ movw %ax,%ds
+
+ /* minimum CPUID flags for x86-64 */
+ /* see http://www.x86-64.org/lists/discuss/msg02971.html */
+#define SSE_MASK ((1<<25)|(1<<26))
+#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|(1<<11)| \
+ (1<<13)|(1<<15)|(1<<24)|(1<<29))
+
+ pushfl /* standard way to check for cpuid */
+ popl %eax
+ movl %eax,%ebx
+ xorl $0x200000,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ cmpl %eax,%ebx
+ jz no_longmode /* cpu has no cpuid */
+ movl $0x80000000,%eax
+ cpuid
+ cmpl $0x80000001,%eax
+ jb no_longmode /* no extended cpuid */
+ xor %di,%di
+ cmpl $0x68747541,%ebx /* AuthenticAMD */
+ jnz noamd
+ cmpl $0x69746e65,%edx
+ jnz noamd
+ cmpl $0x444d4163,%ecx
+ jnz noamd
+ mov $1,%di /* cpu is from AMD */
+noamd:
+ movl $0x80000001,%eax
+ cpuid
+ andl $REQUIRED_MASK1,%edx
+ xorl $REQUIRED_MASK1,%edx
+ jnz no_longmode
+sse_test:
+ movl $1,%eax
+ cpuid
+ andl $SSE_MASK,%edx
+ cmpl $SSE_MASK,%edx
+ je sse_ok
+ test %di,%di
+ jz no_longmode /* only try to force SSE on AMD */
+ movl $0xc0010015,%ecx /* HWCR */
+ rdmsr
+ btr $15,%eax /* enable SSE */
+ wrmsr
+ xor %di,%di /* don't loop */
+ jmp sse_test /* try again */
+no_longmode:
+ call beep
+ lea long_mode_panic,%si
+ call prtstr
+no_longmode_loop:
+ jmp no_longmode_loop
+long_mode_panic:
+ .string "Your CPU does not support long mode. Use a 32bit distribution."
+ .byte 0
+
+sse_ok:
+ popw %ds
+
# Get memory size (extended mem, kB)
xorl %eax, %eax
diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c
index 09878eab6571..2094e76b9093 100644
--- a/arch/x86_64/ia32/fpu32.c
+++ b/arch/x86_64/ia32/fpu32.c
@@ -77,17 +77,20 @@ static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
struct _fpxreg *to;
struct _fpreg *from;
int i;
- int err;
- __u32 v;
+ u32 v;
+ int err = 0;
- err = __get_user(fxsave->cwd, &buf->cw);
- err |= __get_user(fxsave->swd, &buf->sw);
- err |= __get_user(fxsave->twd, &buf->tag);
+#define G(num,val) err |= __get_user(val, num + (u32 *)buf)
+ G(0, fxsave->cwd);
+ G(1, fxsave->swd);
+ G(2, fxsave->twd);
fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
- err |= __get_user(fxsave->rip, &buf->ipoff);
- err |= __get_user(fxsave->rdp, &buf->dataoff);
- err |= __get_user(v, &buf->cssel);
- fxsave->fop = v >> 16;
+ G(3, fxsave->rip);
+ G(4, v);
+ fxsave->fop = v>>16; /* cs ignored */
+ G(5, fxsave->rdp);
+ /* 6: ds ignored */
+#undef G
if (err)
return -1;
@@ -109,21 +112,29 @@ static inline int convert_fxsr_to_user(struct _fpstate_ia32 *buf,
struct _fpreg *to;
struct _fpxreg *from;
int i;
- u32 ds;
- int err;
-
- err = __put_user((unsigned long)fxsave->cwd | 0xffff0000, &buf->cw);
- err |= __put_user((unsigned long)fxsave->swd | 0xffff0000, &buf->sw);
- err |= __put_user((u32)fxsave->rip, &buf->ipoff);
- err |= __put_user((u32)(regs->cs | ((u32)fxsave->fop << 16)),
- &buf->cssel);
- err |= __put_user((u32)twd_fxsr_to_i387(fxsave), &buf->tag);
- err |= __put_user((u32)fxsave->rdp, &buf->dataoff);
- if (tsk == current)
- asm("movl %%ds,%0 " : "=r" (ds));
- else /* ptrace. task has stopped. */
+ u16 cs,ds;
+ int err = 0;
+
+ if (tsk == current) {
+ /* should be actually ds/cs at fpu exception time,
+ but that information is not available in 64bit mode. */
+ asm("movw %%ds,%0 " : "=r" (ds));
+ asm("movw %%cs,%0 " : "=r" (cs));
+ } else { /* ptrace. task has stopped. */
ds = tsk->thread.ds;
- err |= __put_user(ds, &buf->datasel);
+ cs = regs->cs;
+ }
+
+#define P(num,val) err |= __put_user(val, num + (u32 *)buf)
+ P(0, (u32)fxsave->cwd | 0xffff0000);
+ P(1, (u32)fxsave->swd | 0xffff0000);
+ P(2, twd_fxsr_to_i387(fxsave));
+ P(3, (u32)fxsave->rip);
+ P(4, cs | ((u32)fxsave->fop) << 16);
+ P(5, fxsave->rdp);
+ P(6, 0xffff0000 | ds);
+#undef P
+
if (err)
return -1;
@@ -144,9 +155,9 @@ int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 *buf, int fs
&buf->_fxsr_env[0],
sizeof(struct i387_fxsave_struct)))
return -1;
- }
tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
- current->used_math = 1;
+ tsk->used_math = 1;
+ }
return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
}
@@ -157,12 +168,11 @@ int save_i387_ia32(struct task_struct *tsk,
{
int err = 0;
- if (!tsk->used_math)
- return 0;
- tsk->used_math = 0;
- unlazy_fpu(tsk);
+ init_fpu(tsk);
if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
return -1;
+ if (fsave)
+ return 0;
err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
if (fsave)
return err ? -1 : 1;
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
index 9da5296018a5..dc2a7167524f 100644
--- a/arch/x86_64/ia32/ia32_ioctl.c
+++ b/arch/x86_64/ia32/ia32_ioctl.c
@@ -39,6 +39,7 @@
#include <linux/cdrom.h>
#include <linux/loop.h>
#include <linux/auto_fs.h>
+#include <linux/auto_fs4.h>
#include <linux/devfs_fs.h>
#include <linux/tty.h>
#include <linux/vt_kern.h>
@@ -60,6 +61,8 @@
#include <linux/if_tun.h>
#include <linux/dirent.h>
#include <linux/ctype.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/rfcomm.h>
#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
/* Ugh. This header really is not clean */
#define min min
@@ -2906,35 +2909,28 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, void *ptr)
{
typedef struct serial_struct SS;
struct serial_struct32 *ss32 = ptr;
- int err = 0;
+ int err;
struct serial_struct ss;
mm_segment_t oldseg = get_fs();
- set_fs(KERNEL_DS);
if (cmd == TIOCSSERIAL) {
- err = -EFAULT;
if (copy_from_user(&ss, ss32, sizeof(struct serial_struct32)))
- goto out;
+ return -EFAULT;
memmove(&ss.iomem_reg_shift, ((char*)&ss.iomem_base)+4,
sizeof(SS)-offsetof(SS,iomem_reg_shift));
ss.iomem_base = (void *)((unsigned long)ss.iomem_base & 0xffffffff);
}
- if (!err)
+ set_fs(KERNEL_DS);
err = sys_ioctl(fd,cmd,(unsigned long)(&ss));
+ set_fs(oldseg);
if (cmd == TIOCGSERIAL && err >= 0) {
- __u32 base;
if (__copy_to_user(ss32,&ss,offsetof(SS,iomem_base)) ||
- __copy_to_user(&ss32->iomem_reg_shift,
- &ss.iomem_reg_shift,
- sizeof(SS) - offsetof(SS, iomem_reg_shift)))
- err = -EFAULT;
- if (ss.iomem_base > (unsigned char *)0xffffffff)
- base = -1;
- else
- base = (unsigned long)ss.iomem_base;
- err |= __put_user(base, &ss32->iomem_base);
+ __put_user((unsigned long)ss.iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
+ &ss32->iomem_base) ||
+ __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+ __put_user(ss.port_high, &ss32->port_high))
+ return -EFAULT;
}
- out:
- set_fs(oldseg);
return err;
}
@@ -3045,7 +3041,14 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
return sys_ioctl(fd, BLKGETSIZE64, arg);
}
+/* Bluetooth ioctls */
+#define HCIUARTSETPROTO _IOW('U', 200, int)
+#define HCIUARTGETPROTO _IOR('U', 201, int)
+#define BNEPCONNADD _IOW('B', 200, int)
+#define BNEPCONNDEL _IOW('B', 201, int)
+#define BNEPGETCONNLIST _IOR('B', 210, int)
+#define BNEPGETCONNINFO _IOR('B', 211, int)
struct usbdevfs_ctrltransfer32 {
__u8 bRequestType;
@@ -4093,6 +4096,7 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
+COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
/* DEVFS */
COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV)
COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK)
@@ -4200,6 +4204,17 @@ COMPATIBLE_IOCTL(HCISETLINKMODE)
COMPATIBLE_IOCTL(HCISETACLMTU)
COMPATIBLE_IOCTL(HCISETSCOMTU)
COMPATIBLE_IOCTL(HCIINQUIRY)
+COMPATIBLE_IOCTL(HCIUARTSETPROTO)
+COMPATIBLE_IOCTL(HCIUARTGETPROTO)
+COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
+COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
+COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
+COMPATIBLE_IOCTL(RFCOMMGETDEVINFO)
+COMPATIBLE_IOCTL(RFCOMMSTEALDLC)
+COMPATIBLE_IOCTL(BNEPCONNADD)
+COMPATIBLE_IOCTL(BNEPCONNDEL)
+COMPATIBLE_IOCTL(BNEPGETCONNLIST)
+COMPATIBLE_IOCTL(BNEPGETCONNINFO)
/* Misc. */
COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */
COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 48203b32bad0..7f9a75a465ee 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -47,9 +47,16 @@ static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
{
if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
return -EFAULT;
- if (from->si_code < 0)
- return __copy_to_user(to, from, sizeof(siginfo_t));
- else {
+ if (from->si_code < 0) {
+ /* the only field that's different is the alignment
+ of the pointer in sigval_t. Move that 4 bytes down including
+ padding. */
+ memmove(&((siginfo_t32 *)&from)->si_int,
+ &from->si_int,
+ sizeof(siginfo_t) - offsetof(siginfo_t, si_int));
+ /* last 4 bytes stay the same */
+ return __copy_to_user(to, from, sizeof(siginfo_t32));
+ } else {
int err;
/* If you change siginfo_t structure, please be sure
@@ -59,7 +66,7 @@ static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
3 ints plus the relevant union member. */
err = __put_user(from->si_signo, &to->si_signo);
err |= __put_user(from->si_errno, &to->si_errno);
- err |= __put_user((short)from->si_code, &to->si_code);
+ err |= __put_user(from->si_code, &to->si_code);
/* First 32bits of unions are always present. */
err |= __put_user(from->si_pid, &to->si_pid);
switch (from->si_code >> 16) {
@@ -108,6 +115,7 @@ sys32_sigaltstack(const stack_ia32_t *uss_ptr, stack_ia32_t *uoss_ptr,
mm_segment_t seg;
if (uss_ptr) {
u32 ptr;
+ memset(&uss,0,sizeof(stack_t));
if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
__get_user(ptr, &uss_ptr->ss_sp) ||
__get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
@@ -340,8 +348,11 @@ ia32_setup_sigcontext(struct sigcontext_ia32 *sc, struct _fpstate_ia32 *fpstate,
tmp = save_i387_ia32(current, fpstate, regs, 0);
if (tmp < 0)
err = -EFAULT;
- else
+ else {
+ current->used_math = 0;
+ stts();
err |= __put_user((u32)(u64)(tmp ? fpstate : NULL), &sc->fpstate);
+ }
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
diff --git a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c
index d8d4fed117f5..07e6a33b47f3 100644
--- a/arch/x86_64/ia32/ipc32.c
+++ b/arch/x86_64/ia32/ipc32.c
@@ -187,12 +187,58 @@ ipc_parse_version32 (int *cmd)
}
}
+static int put_semid(void *user_semid, struct semid64_ds *s, int version)
+{
+ int err2;
+ switch (version) {
+ case IPC_64: {
+ struct semid64_ds32 *usp64 = (struct semid64_ds32 *) user_semid;
+
+ if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
+ err2 = -EFAULT;
+ break;
+ }
+ err2 = __put_user(s->sem_perm.key, &usp64->sem_perm.key);
+ err2 |= __put_user(s->sem_perm.uid, &usp64->sem_perm.uid);
+ err2 |= __put_user(s->sem_perm.gid, &usp64->sem_perm.gid);
+ err2 |= __put_user(s->sem_perm.cuid, &usp64->sem_perm.cuid);
+ err2 |= __put_user(s->sem_perm.cgid, &usp64->sem_perm.cgid);
+ err2 |= __put_user(s->sem_perm.mode, &usp64->sem_perm.mode);
+ err2 |= __put_user(s->sem_perm.seq, &usp64->sem_perm.seq);
+ err2 |= __put_user(s->sem_otime, &usp64->sem_otime);
+ err2 |= __put_user(s->sem_ctime, &usp64->sem_ctime);
+ err2 |= __put_user(s->sem_nsems, &usp64->sem_nsems);
+ break;
+ }
+ default: {
+ struct semid_ds32 *usp32 = (struct semid_ds32 *) user_semid;
+
+ if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
+ err2 = -EFAULT;
+ break;
+ }
+ err2 = __put_user(s->sem_perm.key, &usp32->sem_perm.key);
+ err2 |= __put_user(s->sem_perm.uid, &usp32->sem_perm.uid);
+ err2 |= __put_user(s->sem_perm.gid, &usp32->sem_perm.gid);
+ err2 |= __put_user(s->sem_perm.cuid, &usp32->sem_perm.cuid);
+ err2 |= __put_user(s->sem_perm.cgid, &usp32->sem_perm.cgid);
+ err2 |= __put_user(s->sem_perm.mode, &usp32->sem_perm.mode);
+ err2 |= __put_user(s->sem_perm.seq, &usp32->sem_perm.seq);
+ err2 |= __put_user(s->sem_otime, &usp32->sem_otime);
+ err2 |= __put_user(s->sem_ctime, &usp32->sem_ctime);
+ err2 |= __put_user(s->sem_nsems, &usp32->sem_nsems);
+ break;
+ }
+ }
+ return err2;
+}
+
static int
semctl32 (int first, int second, int third, void *uptr)
{
union semun fourth;
u32 pad;
- int err = 0, err2;
+ int err;
struct semid64_ds s;
mm_segment_t old_fs;
int version = ipc_parse_version32(&third);
@@ -225,46 +271,10 @@ semctl32 (int first, int second, int third, void *uptr)
fourth.__pad = &s;
old_fs = get_fs();
set_fs(KERNEL_DS);
- err = sys_semctl(first, second|IPC_64, third, fourth);
+ err = sys_semctl(first, second, third|IPC_64, fourth);
set_fs(old_fs);
-
- if (version == IPC_64) {
- struct semid64_ds32 *usp64 = (struct semid64_ds32 *) A(pad);
-
- if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
- err = -EFAULT;
- break;
- }
- err2 = __put_user(s.sem_perm.key, &usp64->sem_perm.key);
- err2 |= __put_user(s.sem_perm.uid, &usp64->sem_perm.uid);
- err2 |= __put_user(s.sem_perm.gid, &usp64->sem_perm.gid);
- err2 |= __put_user(s.sem_perm.cuid, &usp64->sem_perm.cuid);
- err2 |= __put_user(s.sem_perm.cgid, &usp64->sem_perm.cgid);
- err2 |= __put_user(s.sem_perm.mode, &usp64->sem_perm.mode);
- err2 |= __put_user(s.sem_perm.seq, &usp64->sem_perm.seq);
- err2 |= __put_user(s.sem_otime, &usp64->sem_otime);
- err2 |= __put_user(s.sem_ctime, &usp64->sem_ctime);
- err2 |= __put_user(s.sem_nsems, &usp64->sem_nsems);
- } else {
- struct semid_ds32 *usp32 = (struct semid_ds32 *) A(pad);
-
- if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
- err = -EFAULT;
- break;
- }
- err2 = __put_user(s.sem_perm.key, &usp32->sem_perm.key);
- err2 |= __put_user(s.sem_perm.uid, &usp32->sem_perm.uid);
- err2 |= __put_user(s.sem_perm.gid, &usp32->sem_perm.gid);
- err2 |= __put_user(s.sem_perm.cuid, &usp32->sem_perm.cuid);
- err2 |= __put_user(s.sem_perm.cgid, &usp32->sem_perm.cgid);
- err2 |= __put_user(s.sem_perm.mode, &usp32->sem_perm.mode);
- err2 |= __put_user(s.sem_perm.seq, &usp32->sem_perm.seq);
- err2 |= __put_user(s.sem_otime, &usp32->sem_otime);
- err2 |= __put_user(s.sem_ctime, &usp32->sem_ctime);
- err2 |= __put_user(s.sem_nsems, &usp32->sem_nsems);
- }
- if (err2)
- err = -EFAULT;
+ if (!err)
+ err = put_semid((void *)A(pad), &s, version);
break;
default:
err = -EINVAL;
@@ -343,6 +353,7 @@ out:
return err;
}
+
static int
msgctl32 (int first, int second, void *uptr)
{
@@ -387,7 +398,6 @@ msgctl32 (int first, int second, void *uptr)
set_fs(KERNEL_DS);
err = sys_msgctl(first, second|IPC_64, (void *) &m64);
set_fs(old_fs);
-
if (version == IPC_64) {
if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64))) {
err = -EFAULT;
@@ -608,7 +618,9 @@ shmctl32 (int first, int second, void *uptr)
if (err2)
err = -EFAULT;
break;
-
+ default:
+ err = -EINVAL;
+ break;
}
return err;
}
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 9edf9f083938..2f5ee3786dd5 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -8,7 +8,7 @@
* This allows to access 64bit processes too; but there is no way to see the extended
* register contents.
*
- * $Id: ptrace32.c,v 1.12 2002/03/24 13:02:02 ak Exp $
+ * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
*/
#include <linux/kernel.h>
@@ -22,11 +22,9 @@
#include <asm/errno.h>
#include <asm/debugreg.h>
#include <asm/i387.h>
-#include <asm/desc.h>
-#include <asm/ldt.h>
#include <asm/fpu32.h>
-#include <linux/mm.h>
#include <linux/ptrace.h>
+#include <linux/mm.h>
#define R32(l,q) \
case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
@@ -39,29 +37,26 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
switch (regno) {
case offsetof(struct user32, regs.fs):
if (val && (val & 3) != 3) return -EIO;
- child->thread.fs = val;
+ child->thread.fs = val & 0xffff;
break;
case offsetof(struct user32, regs.gs):
if (val && (val & 3) != 3) return -EIO;
- child->thread.gs = val;
+ child->thread.gs = val & 0xffff;
break;
case offsetof(struct user32, regs.ds):
if (val && (val & 3) != 3) return -EIO;
- child->thread.ds = val;
+ child->thread.ds = val & 0xffff;
break;
case offsetof(struct user32, regs.es):
- if (val && (val & 3) != 3) return -EIO;
- child->thread.es = val;
+ child->thread.es = val & 0xffff;
break;
-
case offsetof(struct user32, regs.ss):
if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, ss)/8] = val;
+ stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
break;
-
case offsetof(struct user32, regs.cs):
if ((val & 3) != 3) return -EIO;
- stack[offsetof(struct pt_regs, cs)/8] = val;
+ stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
break;
R32(ebx, rbx);
@@ -79,8 +74,16 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
stack[offsetof(struct pt_regs, eflags)/8] = val & 0x44dd5;
break;
- case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[6]):
- child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4] = val;
+ case offsetof(struct user32, u_debugreg[4]):
+ case offsetof(struct user32, u_debugreg[5]):
+ return -EIO;
+
+ case offsetof(struct user32, u_debugreg[0]) ...
+ offsetof(struct user32, u_debugreg[3]):
+ case offsetof(struct user32, u_debugreg[6]):
+ child->thread.debugreg
+ [(regno-offsetof(struct user32, u_debugreg[0]))/4]
+ = val;
break;
case offsetof(struct user32, u_debugreg[7]):
@@ -170,11 +173,19 @@ static struct task_struct *find_target(int request, int pid, int *err)
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
- *err = ptrace_check_attach(child,0);
- if (*err == 0)
+ if (child) {
+ *err = -EPERM;
+ if (child->pid == 1)
+ goto out;
+ *err = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (*err < 0)
+ goto out;
return child;
+ }
+ out:
put_task_struct(child);
return NULL;
+
}
extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data);
@@ -187,6 +198,9 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
__u32 val;
switch (request) {
+ default:
+ return sys_ptrace(request, pid, addr, data);
+
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
case PTRACE_POKEDATA:
@@ -201,9 +215,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
case PTRACE_GETFPXREGS:
break;
- default:
- ret = sys_ptrace(request, pid, addr, data);
- return ret;
}
child = find_target(request, pid, &ret);
@@ -261,7 +272,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
ret = -EIO;
break;
}
- empty_fpu(child);
ret = 0;
for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
ret |= __get_user(tmp, (u32 *) (unsigned long) data);
@@ -271,33 +281,47 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
break;
}
- case PTRACE_SETFPREGS:
- empty_fpu(child);
+ case PTRACE_GETFPREGS:
+ ret = -EIO;
+ if (!access_ok(VERIFY_READ, (void *)(u64)data,
+ sizeof(struct user_i387_struct)))
+ break;
save_i387_ia32(child, (void *)(u64)data, childregs, 1);
ret = 0;
break;
- case PTRACE_GETFPREGS:
- empty_fpu(child);
- restore_i387_ia32(child, (void *)(u64)data, 1);
+ case PTRACE_SETFPREGS:
+ ret = -EIO;
+ if (!access_ok(VERIFY_WRITE, (void *)(u64)data,
+ sizeof(struct user_i387_struct)))
+ break;
ret = 0;
+ /* don't check EFAULT to be bug-to-bug compatible to i386 */
+ restore_i387_ia32(child, (void *)(u64)data, 1);
break;
case PTRACE_GETFPXREGS: {
struct user32_fxsr_struct *u = (void *)(u64)data;
- empty_fpu(child);
- ret = copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u));
- ret |= __put_user(childregs->cs, &u->fcs);
- ret |= __put_user(child->thread.ds, &u->fos);
- if (ret)
+ init_fpu(child);
+ ret = -EIO;
+ if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
+ break;
ret = -EFAULT;
+ if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
+ break;
+ ret = __put_user(childregs->cs, &u->fcs);
+ ret |= __put_user(child->thread.ds, &u->fos);
break;
}
case PTRACE_SETFPXREGS: {
struct user32_fxsr_struct *u = (void *)(u64)data;
- empty_fpu(child);
- /* no error checking to be bug to bug compatible with i386 */
- copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
+ unlazy_fpu(child);
+ ret = -EIO;
+ if (!access_ok(VERIFY_READ, u, sizeof(*u)))
+ break;
+ /* no checking to be bug-to-bug compatible with i386 */
+ __copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
+ child->used_math = 1;
child->thread.i387.fxsave.mxcsr &= 0xffbf;
ret = 0;
break;
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index caa82b63cdfa..9ae80ec57f65 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -57,6 +57,7 @@
#include <linux/binfmts.h>
#include <linux/init.h>
#include <linux/aio_abi.h>
+#include <linux/aio.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
@@ -74,6 +75,7 @@
#define A(__x) ((unsigned long)(__x))
#define AA(__x) ((unsigned long)(__x))
+#define u32_to_ptr(x) ((void *)(u64)(x))
#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
@@ -738,7 +740,7 @@ asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
static struct iovec *
-get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type, int *errp)
+get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp)
{
int i;
u32 buf, len;
@@ -747,15 +749,18 @@ get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, i
/* Get the "struct iovec" from user memory */
- if (!count)
+ *errp = 0;
+ if (!*count)
return 0;
- if (count > UIO_MAXIOV)
+ *errp = -EINVAL;
+ if (*count > UIO_MAXIOV)
return(struct iovec *)0;
- if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*count))
+ *errp = -EFAULT;
+ if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*(*count)))
return(struct iovec *)0;
- if (count > UIO_FASTIOV) {
+ if (*count > UIO_FASTIOV) {
*errp = -ENOMEM;
- iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
+ iov = kmalloc(*count*sizeof(struct iovec), GFP_KERNEL);
if (!iov)
return((struct iovec *)0);
} else
@@ -763,14 +768,19 @@ get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, i
ivp = iov;
totlen = 0;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < *count; i++) {
*errp = __get_user(len, &iov32->iov_len) |
__get_user(buf, &iov32->iov_base);
if (*errp)
goto error;
*errp = verify_area(type, (void *)A(buf), len);
- if (*errp)
+ if (*errp) {
+ if (i > 0) {
+ *count = i;
+ break;
+ }
goto error;
+ }
/* SuS checks: */
*errp = -EINVAL;
if ((int)len < 0)
@@ -799,7 +809,7 @@ sys32_readv(int fd, struct compat_iovec *vector, u32 count)
int ret;
mm_segment_t old_fs = get_fs();
- if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_WRITE, &ret)) == NULL)
+ if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_WRITE, &ret)) == NULL)
return ret;
set_fs(KERNEL_DS);
ret = sys_readv(fd, iov, count);
@@ -817,7 +827,7 @@ sys32_writev(int fd, struct compat_iovec *vector, u32 count)
int ret;
mm_segment_t old_fs = get_fs();
- if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_READ, &ret)) == NULL)
+ if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_READ, &ret)) == NULL)
return ret;
set_fs(KERNEL_DS);
ret = sys_writev(fd, iov, count);
@@ -1672,21 +1682,26 @@ static int nargs(u32 src, char **dst)
return cnt;
}
-long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
+asmlinkage long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
{
mm_segment_t oldseg;
- char **buf;
- int na,ne;
+ char **buf = NULL;
+ int na = 0,ne = 0;
int ret;
- unsigned sz;
+ unsigned sz = 0;
+ if (argv) {
na = nargs(argv, NULL);
if (na < 0)
return -EFAULT;
+ }
+ if (envp) {
ne = nargs(envp, NULL);
if (ne < 0)
return -EFAULT;
+ }
+ if (argv || envp) {
sz = (na+ne)*sizeof(void *);
if (sz > PAGE_SIZE)
buf = vmalloc(sz);
@@ -1694,14 +1709,19 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
buf = kmalloc(sz, GFP_KERNEL);
if (!buf)
return -ENOMEM;
+ }
+ if (argv) {
ret = nargs(argv, buf);
if (ret < 0)
goto free;
+ }
+ if (envp) {
ret = nargs(envp, buf + na);
if (ret < 0)
goto free;
+ }
name = getname(name);
ret = PTR_ERR(name);
@@ -1710,7 +1730,7 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
oldseg = get_fs();
set_fs(KERNEL_DS);
- ret = do_execve(name, buf, buf+na, &regs);
+ ret = do_execve(name, argv ? buf : NULL, envp ? buf+na : NULL, &regs);
set_fs(oldseg);
if (ret == 0)
@@ -1719,10 +1739,12 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
putname(name);
free:
+ if (argv || envp) {
if (sz > PAGE_SIZE)
vfree(buf);
else
kfree(buf);
+ }
return ret;
}
@@ -2012,12 +2034,8 @@ long asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2)
long sys32_module_warning(void)
{
- static long warn_time = -(60*HZ);
- if (time_before(warn_time + 60*HZ,jiffies) && strcmp(current->comm,"klogd")) {
printk(KERN_INFO "%s: 32bit 2.4.x modutils not supported on 64bit kernel\n",
current->comm);
- warn_time = jiffies;
- }
return -ENOSYS ;
}
@@ -2055,6 +2073,7 @@ long sys32_sched_getaffinity(pid_t pid, unsigned int len,
return err;
}
+
extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx);
long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p)
@@ -2071,48 +2090,47 @@ long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p)
return ret;
}
-extern asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
- struct iocb **iocbpp);
-
-long sys32_io_submit(aio_context_t ctx_id, unsigned long nr,
+asmlinkage long sys32_io_submit(aio_context_t ctx_id, int nr,
u32 *iocbpp)
{
- mm_segment_t oldfs = get_fs();
- int k, err = 0;
- struct iocb **iocb64;
- if (nr > 128)
+ struct kioctx *ctx;
+ long ret = 0;
+ int i;
+
+ if (unlikely(nr < 0))
+ return -EINVAL;
+
+ if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
+ return -EFAULT;
+
+ ctx = lookup_ioctx(ctx_id);
+ if (unlikely(!ctx)) {
+ pr_debug("EINVAL: io_submit: invalid context id\n");
return -EINVAL;
- iocb64 = kmalloc(sizeof(struct iocb *) * nr, GFP_KERNEL);
- if (!iocb64)
- return -ENOMEM;
- for (k = 0; k < nr && !err; k++) {
- u64 val1, val2;
- u32 iocb32;
- struct iocb *iocb;
- err = get_user(iocb32, (u32 *)(u64)iocbpp[k]);
- iocb64[k] = iocb = (void *)(u64)iocb32;
-
- if (get_user(val1, &iocb->aio_buf) ||
- get_user(val2, &iocb->aio_nbytes))
- err = -EFAULT;
- else if (!val1) /* should check cmd */
- ;
- else if (verify_area(VERIFY_WRITE, (void*)val1, val2))
- err = -EFAULT;
-
- /* paranoia check - remove it when you are sure they
- are not pointers */
- if (get_user(val1, &iocb->aio_reserved2) || val1 ||
- get_user(val2, &iocb->aio_reserved2) || val2)
- err = -EFAULT;
}
- if (!err) {
- set_fs(KERNEL_DS);
- err = sys_io_submit(ctx_id, nr, iocb64);
- set_fs(oldfs);
+
+ for (i=0; i<nr; i++) {
+ u32 p32;
+ struct iocb *user_iocb, tmp;
+
+ if (unlikely(__get_user(p32, iocbpp + i))) {
+ ret = -EFAULT;
+ break;
}
- kfree(iocb64);
- return err;
+ user_iocb = u32_to_ptr(p32);
+
+ if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = io_submit_one(ctx, user_iocb, &tmp);
+ if (ret)
+ break;
+ }
+
+ put_ioctx(ctx);
+ return i ? i : ret;
}
extern asmlinkage long sys_io_getevents(aio_context_t ctx_id,
@@ -2140,7 +2158,7 @@ asmlinkage long sys32_io_getevents(aio_context_t ctx_id,
set_fs(KERNEL_DS);
ret = sys_io_getevents(ctx_id,min_nr,nr,events,timeout ? &t : NULL);
set_fs(oldfs);
- if (timeout && put_compat_timespec(&t, timeout))
+ if (!ret && timeout && put_compat_timespec(&t, timeout))
return -EFAULT;
return ret;
}
@@ -2172,12 +2190,8 @@ asmlinkage long sys32_open(const char * filename, int flags, int mode)
long sys32_vm86_warning(void)
{
- static long warn_time = -(60*HZ);
- if (time_before(warn_time + 60*HZ,jiffies)) {
printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
current->comm);
- warn_time = jiffies;
- }
return -ENOSYS ;
}
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 4b4ef37305d0..ce5d09400fcc 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,14 +10,13 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
setup64.o bluesmoke.o bootflag.o e820.o reboot.o
obj-$(CONFIG_MTRR) += mtrr/
+obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o
-obj-$(CONFIG_ACPI) += acpi.o
-obj-$(CONFIG_ACPI_SLEEP) += wakeup.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 24d72222e5cc..871394fa7547 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -105,7 +105,8 @@ void __init iommu_hole_init(void)
if (!fix && !fallback_aper_force)
return;
- printk("Your BIOS is broken and doesn't leave a aperture memory hole\n");
+ printk("Your BIOS doesn't leave a aperture memory hole\n");
+ printk("Please enable the IOMMU option in the BIOS setup\n");
aper_alloc = allocate_aperture();
if (!aper_alloc)
return;
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index ccbf35791611..53573cfec231 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -408,7 +408,7 @@ void __init setup_local_APIC (void)
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
- printk("ESR value before enabling vector: %08x\n", value);
+ Dprintk("ESR value before enabling vector: %08x\n", value);
value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
@@ -418,7 +418,7 @@ void __init setup_local_APIC (void)
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
- printk("ESR value after enabling vector: %08x\n", value);
+ Dprintk("ESR value after enabling vector: %08x\n", value);
} else {
if (esr_disable)
/*
@@ -1080,9 +1080,10 @@ int __init APIC_init_uniprocessor (void)
if (nmi_watchdog == NMI_LOCAL_APIC)
check_nmi_watchdog();
#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config)
- if (!skip_ioapic_setup && nr_ioapics)
+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
+ else
+ nr_ioapics = 0;
#endif
setup_boot_APIC_clock();
diff --git a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c
index 831e848a7ace..71fca07f8c52 100644
--- a/arch/x86_64/kernel/bluesmoke.c
+++ b/arch/x86_64/kernel/bluesmoke.c
@@ -1,65 +1,87 @@
/*
- * arch/x86_64/kernel/bluesmoke.c - x86-64 Machine Check Exception Reporting
- *
-
-RED-PEN: need to add power management to restore after S3 wakeup.
-
+ * Machine check handler.
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s).
*/
-
+#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/smp.h>
-#include <linux/config.h>
-#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/msr.h>
-#include <asm/apic.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
-#ifdef CONFIG_X86_MCE
+static int mce_disabled __initdata;
+static unsigned long mce_cpus;
-static int mce_disabled __initdata = 0;
+/*
+ * Machine Check Handler For PII/PIII/K7
+ */
static int banks;
+static unsigned long ignored_banks, disabled_banks;
+/* Machine Check on everything dubious. This is a good setting
+ for device driver testing. */
+#define K8_DRIVER_DEBUG ((1<<13)-1)
+/* Report RAM errors and Hyper Transport Problems, but ignore Device
+ aborts and GART errors. */
+#define K8_NORMAL_OP 0xff
-/*
- * Machine Check Handler For Hammer
- */
+#ifdef CONFIG_MCE_DEBUG
+static u32 k8_nb_flags __initdata = K8_DRIVER_DEBUG;
+#else
+static u32 k8_nb_flags __initdata = K8_NORMAL_OP;
+#endif
-static void hammer_machine_check(struct pt_regs * regs, long error_code)
+static void generic_machine_check(struct pt_regs * regs, long error_code)
{
int recover=1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
+ preempt_disable();
+
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if(mcgstl&(1<<0)) /* Recoverable ? */
recover=0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
- preempt_disable();
- for (i=0;i<banks;i++) {
+
+ if (regs && (mcgstl & 2))
+ printk(KERN_EMERG "RIP <%02lx>:%016lx RSP %016lx\n",
+ regs->cs, regs->rip, regs->rsp);
+
+ for(i=0;i<banks;i++)
+ {
+ if ((1UL<<i) & ignored_banks)
+ continue;
+
rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
- if(high&(1<<31)) {
+ if(high&(1<<31))
+ {
if(high&(1<<29))
recover|=1;
if(high&(1<<25))
recover|=2;
printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high&=~(1<<31);
- if(high&(1<<27)) {
+ if(high&(1<<27))
+ {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk("[%08x%08x]", ahigh, alow);
+ printk("[%08x%08x]", alow, ahigh);
}
- if(high&(1<<26)) {
+ if(high&(1<<26))
+ {
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk(" at %08x%08x", ahigh, alow);
+ printk(" at %08x%08x",
+ ahigh, alow);
}
printk("\n");
/* Clear it */
@@ -68,7 +90,6 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
wmb();
}
}
- preempt_enable();
if(recover&2)
panic("CPU context corrupt");
@@ -77,16 +98,13 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
printk(KERN_EMERG "Attempting to continue.\n");
mcgstl&=~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-/*
- * Handle unconfigured int18 (should never happen)
- */
+ preempt_enable();
+}
-static void unexpected_machine_check(struct pt_regs * regs, long error_code)
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+ printk("unexpected machine check %lx\n", error_code);
}
/*
@@ -95,56 +113,194 @@ static void unexpected_machine_check(struct pt_regs * regs, long error_code)
static void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
-asmlinkage void do_machine_check(struct pt_regs * regs, long error_code)
+void do_machine_check(struct pt_regs * regs, long error_code)
{
+ notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL);
machine_check_vector(regs, error_code);
}
+/*
+ * K8 machine check.
+ */
-#ifdef CONFIG_X86_MCE_NONFATAL
-static struct timer_list mce_timer;
-static int timerset = 0;
-
-#define MCE_RATE 15*HZ /* timer rate is 15s */
+static struct pci_dev *find_k8_nb(void)
+{
+ struct pci_dev *dev;
+ int cpu = smp_processor_id();
+ pci_for_each_dev(dev) {
+ if (dev->bus->number==0 && PCI_FUNC(dev->devfn)==3 &&
+ PCI_SLOT(dev->devfn) == (24+cpu))
+ return dev;
+ }
+ return NULL;
+}
-static void mce_checkregs (void *info)
+static void check_k8_nb(void)
{
- u32 low, high;
- int i;
+ struct pci_dev *nb;
+ nb = find_k8_nb();
+ if (nb == NULL)
+ return;
- for (i=0; i<banks; i++) {
- rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
+ u32 statuslow, statushigh;
+ pci_read_config_dword(nb, 0x48, &statuslow);
+ pci_read_config_dword(nb, 0x4c, &statushigh);
+ if (!(statushigh & (1<<31)))
+ return;
+ printk(KERN_ERR "Northbridge status %08x%08x\n",
+ statushigh,statuslow);
+ if (statuslow & 0x10)
+ printk(KERN_ERR "GART error %d\n", statuslow & 0xf);
+ if (statushigh & (1<<31))
+ printk(KERN_ERR "Lost an northbridge error\n");
+ if (statushigh & (1<<25))
+ printk(KERN_EMERG "NB status: unrecoverable\n");
+ if (statushigh & (1<<26)) {
+ u32 addrhigh, addrlow;
+ pci_read_config_dword(nb, 0x54, &addrhigh);
+ pci_read_config_dword(nb, 0x50, &addrlow);
+ printk(KERN_ERR "NB error address %08x%08x\n", addrhigh,addrlow);
+ }
+ if (statushigh & (1<<29))
+ printk(KERN_EMERG "Error uncorrected\n");
+ statushigh &= ~(1<<31);
+ pci_write_config_dword(nb, 0x4c, statushigh);
+}
- if ((low | high) != 0) {
- printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occurred on CPU %d.\n", smp_processor_id());
- printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);
+static void k8_machine_check(struct pt_regs * regs, long error_code)
+{
+ u64 status, nbstatus;
- /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
- wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+ preempt_disable();
- /* Serialize */
- wmb();
+ rdmsrl(MSR_IA32_MCG_STATUS, status);
+ if ((status & (1<<2)) == 0) {
+ if (!regs)
+ check_k8_nb();
+ return;
}
+ if (status & 1)
+ printk(KERN_EMERG "MCG_STATUS: unrecoverable\n");
+
+ rdmsrl(MSR_IA32_MC0_STATUS+4*4, nbstatus);
+ if ((nbstatus & (1UL<<63)) == 0)
+ goto others;
+
+ printk(KERN_EMERG "Northbridge Machine Check %s %016lx %lx\n",
+ regs ? "exception" : "timer",
+ (unsigned long)nbstatus, error_code);
+ if (nbstatus & (1UL<<62))
+ printk(KERN_EMERG "Lost at least one NB error condition\n");
+ if (nbstatus & (1UL<<61))
+ printk(KERN_EMERG "Uncorrectable condition\n");
+ if (nbstatus & (1UL<57))
+ printk(KERN_EMERG "Unrecoverable condition\n");
+
+ check_k8_nb();
+
+ if (nbstatus & (1UL<<58)) {
+ u64 adr;
+ rdmsrl(MSR_IA32_MC0_ADDR+4*4, adr);
+ printk(KERN_EMERG "Address: %016lx\n", (unsigned long)adr);
}
-}
+
+ wrmsrl(MSR_IA32_MC0_STATUS+4*4, 0);
+ wrmsrl(MSR_IA32_MCG_STATUS, 0);
+
+ if (regs && (status & (1<<1)))
+ printk(KERN_EMERG "MCE at EIP %lx ESP %lx\n", regs->rip, regs->rsp);
+ others:
+ generic_machine_check(regs, error_code);
-static void mce_timerfunc (unsigned long data)
+ preempt_enable();
+}
+
+static struct timer_list mcheck_timer;
+int mcheck_interval = 30*HZ;
+
+#ifndef CONFIG_SMP
+static void mcheck_timer_handler(unsigned long data)
{
- on_each_cpu (mce_checkregs, NULL, 1, 1);
+ k8_machine_check(NULL,0);
+ mcheck_timer.expires = jiffies + mcheck_interval;
+ add_timer(&mcheck_timer);
+}
+#else
- /* Refresh the timer. */
- mce_timer.expires = jiffies + MCE_RATE;
- add_timer (&mce_timer);
+/* SMP needs a process context trampoline because smp_call_function cannot be
+ called from interrupt context. */
+
+static void mcheck_timer_other(void *data)
+{
+ k8_machine_check(NULL, 0);
+}
+
+static void mcheck_timer_dist(void *data)
+{
+ smp_call_function(mcheck_timer_other,0,0,0);
+ k8_machine_check(NULL, 0);
+ mcheck_timer.expires = jiffies + mcheck_interval;
+ add_timer(&mcheck_timer);
+}
+
+static void mcheck_timer_handler(unsigned long data)
+{
+ static DECLARE_WORK(mcheck_work, mcheck_timer_dist, NULL);
+ schedule_work(&mcheck_work);
}
#endif
+static int nok8 __initdata;
+
+static void __init k8_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u64 cap;
+ int i;
+ struct pci_dev *nb;
+
+ if (!test_bit(X86_FEATURE_MCE, &c->x86_capability) ||
+ !test_bit(X86_FEATURE_MCA, &c->x86_capability))
+ return;
+
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+ banks = cap&0xff;
+ machine_check_vector = k8_machine_check;
+ for (i = 0; i < banks; i++) {
+ u64 val = ((1UL<<i) & disabled_banks) ? 0 : ~0UL;
+ wrmsrl(MSR_IA32_MC0_CTL+4*i, val);
+ wrmsrl(MSR_IA32_MC0_STATUS+4*i,0);
+ }
+
+ nb = find_k8_nb();
+ if (nb != NULL) {
+ u32 reg, reg2;
+ pci_read_config_dword(nb, 0x40, &reg);
+ pci_write_config_dword(nb, 0x40, k8_nb_flags);
+ pci_read_config_dword(nb, 0x44, &reg2);
+ pci_write_config_dword(nb, 0x44, reg2);
+ printk(KERN_INFO "Machine Check for K8 Northbridge %d enabled (%x,%x)\n",
+ nb->devfn, reg, reg2);
+ ignored_banks |= (1UL<<4);
+ }
+
+ set_in_cr4(X86_CR4_MCE);
+
+ if (mcheck_interval && (smp_processor_id() == 0)) {
+ init_timer(&mcheck_timer);
+ mcheck_timer.function = (void (*)(unsigned long))mcheck_timer_handler;
+ mcheck_timer.expires = jiffies + mcheck_interval;
+ add_timer(&mcheck_timer);
+ }
+
+ printk(KERN_INFO "Machine Check Reporting enabled for CPU#%d\n", smp_processor_id());
+}
/*
- * Set up machine check reporting for processors with Intel style MCE
+ * Set up machine check reporting for Intel processors
*/
-static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
+static void __init generic_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
@@ -154,33 +310,36 @@ static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
* Check for MCE support
*/
- if( !test_bit(X86_FEATURE_MCE, c->x86_capability) )
+ if( !test_bit(X86_FEATURE_MCE, &c->x86_capability) )
return;
- /* Check for PPro style MCA */
- if( !test_bit(X86_FEATURE_MCA, c->x86_capability) )
+ /*
+ * Check for PPro style MCA
+ */
+
+ if( !test_bit(X86_FEATURE_MCA, &c->x86_capability) )
return;
/* Ok machine check is available */
- machine_check_vector = hammer_machine_check;
+
+ machine_check_vector = generic_machine_check;
wmb();
if(done==0)
- printk(KERN_INFO "Machine check architecture supported.\n");
+ printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
- if(l&(1<<8)) /* Control register present ? */
+ if(l&(1<<8))
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
banks = l&0xff;
- for(i=0; i<banks; i++)
- wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
- for(i=0; i<banks; i++)
+ for(i=0;i<banks;i++)
+ {
+ u32 val = ((1UL<<i) & disabled_banks) ? 0 : ~0;
+ wrmsr(MSR_IA32_MC0_CTL+4*i, val, val);
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
+ }
set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Machine check reporting enabled on CPU#%d.\n", smp_processor_id());
-
+ printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
done=1;
}
@@ -190,30 +349,22 @@ static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
void __init mcheck_init(struct cpuinfo_x86 *c)
{
+ if (test_and_set_bit(smp_processor_id(), &mce_cpus))
+ return;
if(mce_disabled==1)
return;
- switch(c->x86_vendor)
- {
+ switch(c->x86_vendor) {
case X86_VENDOR_AMD:
- hammer_mcheck_init(c);
-#ifdef CONFIG_X86_MCE_NONFATAL
- if (timerset == 0) {
- /* Set the timer to check for non-fatal
- errors every MCE_RATE seconds */
- init_timer (&mce_timer);
- mce_timer.expires = jiffies + MCE_RATE;
- mce_timer.data = 0;
- mce_timer.function = &mce_timerfunc;
- add_timer (&mce_timer);
- timerset = 1;
- printk(KERN_INFO "Machine check exception polling timer started.\n");
- }
-#endif
+ if (c->x86 == 15 && !nok8) {
+ k8_mcheck_init(c);
break;
-
+ }
+ /* FALL THROUGH */
default:
+ case X86_VENDOR_INTEL:
+ generic_mcheck_init(c);
break;
}
}
@@ -224,16 +375,33 @@ static int __init mcheck_disable(char *str)
return 0;
}
+
+/* mce=off disable machine check
+ mce=nok8 disable k8 specific features
+ mce=disable<NUMBER> disable bank NUMBER
+ mce=enable<NUMBER> enable bank number
+ mce=device Enable device driver test reporting in NB
+ mce=NUMBER mcheck timer interval number seconds.
+ Can be also comma separated in a single mce= */
static int __init mcheck_enable(char *str)
{
- mce_disabled = -1;
+ char *p;
+ while ((p = strsep(&str,",")) != NULL) {
+ if (isdigit(*p))
+ mcheck_interval = simple_strtol(p,NULL,0) * HZ;
+ else if (!strcmp(p,"off"))
+ mce_disabled = 1;
+ else if (!strncmp(p,"enable",6))
+ disabled_banks &= ~(1<<simple_strtol(p+6,NULL,0));
+ else if (!strncmp(p,"disable",7))
+ disabled_banks |= ~(1<<simple_strtol(p+7,NULL,0));
+ else if (!strcmp(p,"nok8"))
+ nok8 = 1;
+ else if (!strcmp(p,"device"))
+ k8_nb_flags = K8_DRIVER_DEBUG;
+ }
return 0;
}
__setup("nomce", mcheck_disable);
__setup("mce", mcheck_enable);
-
-#else
-asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) {}
-void __init mcheck_init(struct cpuinfo_x86 *c) {}
-#endif
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 1386a4a47346..0934b664e5d5 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -359,8 +359,13 @@ ENTRY(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
cld
+#ifdef CONFIG_X86_REMOTE_DEBUG
+ SAVE_ALL
+ movq %rsp,%rdi
+#else
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
+#endif
testl $3,CS(%rdi)
je 1f
swapgs
@@ -667,18 +672,6 @@ ENTRY(execve)
ret
ENTRY(page_fault)
-#ifdef CONFIG_KDB
- pushq %rcx
- pushq %rdx
- pushq %rax
- movl $473,%ecx
- rdmsr
- andl $0xfffffffe,%eax /* Disable last branch recording */
- wrmsr
- popq %rax
- popq %rdx
- popq %rcx
-#endif
errorentry do_page_fault
ENTRY(coprocessor_error)
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 14feb08d2f9d..1dbe5bdc4956 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -194,7 +194,7 @@ ENTRY(no_long_mode)
jmp 1b
.org 0xf00
-pGDT32:
+ENTRY(pGDT32):
.word gdt32_end-gdt_table32
.long gdt_table32-__START_KERNEL_map
@@ -307,6 +307,15 @@ ENTRY(level3_physmem_pgt)
.quad 0x0000000000105007 /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
.org 0xb000
+ENTRY(wakeup_level4_pgt)
+ .quad 0x0000000000102007 /* -> level3_ident_pgt */
+ .fill 255,8,0
+ .quad 0x000000000010a007
+ .fill 254,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad 0x0000000000103007 /* -> level3_kernel_pgt */
+
+.org 0xc000
.data
.align 16
@@ -371,3 +380,4 @@ ENTRY(idt_table)
.quad 0
.quad 0
.endr
+
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 150f56506d0b..f910a88a2237 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/smp.h>
+#include <asm/bootsetup.h>
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
@@ -51,7 +52,7 @@ static void __init copy_bootdata(char *real_mode_data)
printk("old bootloader convention, maybe loadlin?\n");
}
command_line = (char *) ((u64)(new_data));
- memcpy(saved_command_line, command_line, 2048);
+ memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
printk("Bootdata ok (command line is %s)\n", saved_command_line);
}
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index f0eac767249f..1d6780258027 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -42,7 +42,7 @@ void __init fpu_init(void)
/* clean state in init */
stts();
- clear_thread_flag(TIF_USEDFPU);
+ current_thread_info()->status = 0;
current->used_math = 0;
}
@@ -51,13 +51,12 @@ void __init fpu_init(void)
* so initialize it and set the mxcsr to its default.
* remeber the current task has used the FPU.
*/
-void init_fpu(void)
+void init_fpu(struct task_struct *child)
{
- struct task_struct *me = current;
- memset(&me->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- me->thread.i387.fxsave.cwd = 0x37f;
- me->thread.i387.fxsave.mxcsr = 0x1f80;
- me->used_math = 1;
+ memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
+ child->thread.i387.fxsave.cwd = 0x37f;
+ child->thread.i387.fxsave.mxcsr = 0x1f80;
+ child->used_math = 1;
}
/*
@@ -81,7 +80,7 @@ int save_i387(struct _fpstate *buf)
if (!tsk->used_math)
return 0;
tsk->used_math = 0; /* trigger finit */
- if (test_thread_flag(TIF_USEDFPU)) {
+ if (tsk->thread_info->status & TS_USEDFPU) {
err = save_i387_checking((struct i387_fxsave_struct *)buf);
if (err) return err;
stts();
@@ -99,7 +98,7 @@ int save_i387(struct _fpstate *buf)
int get_fpregs(struct user_i387_struct *buf, struct task_struct *tsk)
{
- empty_fpu(tsk);
+ init_fpu(tsk);
return __copy_to_user((void *)buf, &tsk->thread.i387.fxsave,
sizeof(struct user_i387_struct)) ? -EFAULT : 0;
}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 0d749d4e8fef..ae5c5f55440a 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -25,13 +25,15 @@
#include <asm/mpspec.h>
#include <asm/nmi.h>
#include <asm/msr.h>
+#include <asm/proto.h>
+#include <asm/kdebug.h>
extern void default_do_nmi(struct pt_regs *);
unsigned int nmi_watchdog = NMI_LOCAL_APIC;
static unsigned int nmi_hz = HZ;
unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-extern void show_registers(struct pt_regs *regs);
+int nmi_watchdog_disabled;
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
@@ -251,15 +253,13 @@ void touch_nmi_watchdog (void)
alert_counter[i] = 0;
}
-void nmi_watchdog_tick (struct pt_regs * regs)
+void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
{
+ if (nmi_watchdog_disabled)
+ return;
+
+ int sum, cpu = safe_smp_processor_id();
- /*
- * Since current_thread_info()-> is always on the stack, and we
- * always switch the stack NMI-atomically, it's safe to use
- * smp_processor_id().
- */
- int sum, cpu = smp_processor_id();
sum = read_pda(apic_timer_irqs);
if (last_irq_sums[cpu] == sum) {
@@ -269,6 +269,10 @@ void nmi_watchdog_tick (struct pt_regs * regs)
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*nmi_hz) {
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD) {
+ alert_counter[cpu] = 0;
+ return;
+ }
spin_lock(&nmi_print_lock);
/*
* We are in trouble anyway, lets at least try
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index d7c72abc5164..0b956ff1e7aa 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -8,7 +8,7 @@
* See Documentation/DMA-mapping.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: pci-gart.c,v 1.12 2002/09/19 19:25:32 ak Exp $
+ * $Id: pci-gart.c,v 1.20 2003/03/12 08:23:29 ak Exp $
*/
/*
@@ -19,9 +19,12 @@ agpgart_be
possible future tuning:
fast path for sg streaming mappings
- more intelligent flush strategy - flush only a single NB?
+ more intelligent flush strategy - flush only a single NB? flush only when
+ gart area fills up and alloc_iommu wraps.
+ don't flush on allocation - need to unmap the gart area first to avoid prefetches
+ by the CPU
move boundary between IOMMU and AGP in GART dynamically
- could use exact fit in the gart in alloc_consistent, not order of two.
+
*/
#include <linux/config.h>
@@ -49,7 +52,11 @@ u32 *iommu_gatt_base; /* Remapping table */
int no_iommu;
static int no_agp;
+#ifdef CONFIG_IOMMU_DEBUG
int force_mmu = 1;
+#else
+int force_mmu = 0;
+#endif
extern int fallback_aper_order;
extern int fallback_aper_force;
@@ -58,10 +65,9 @@ extern int fallback_aper_force;
static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
-#define GPTE_MASK 0xfffffff000
#define GPTE_VALID 1
#define GPTE_COHERENT 2
-#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag))
+#define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
#define for_all_nb(dev) \
@@ -72,7 +78,6 @@ static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
-extern int agp_amdk8_init(void);
extern int agp_init(void);
#define AGPEXTERN extern
#else
@@ -130,7 +135,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
{
void *memory;
int gfp = GFP_ATOMIC;
- int order, i;
+ int i;
unsigned long iommu_page;
if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu)
@@ -140,15 +145,15 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
* First try to allocate continuous and use directly if already
* in lowmem.
*/
- order = get_order(size);
- memory = (void *)__get_free_pages(gfp, order);
+ size = round_up(size, PAGE_SIZE);
+ memory = (void *)__get_free_pages(gfp, get_order(size));
if (memory == NULL) {
return NULL;
} else {
int high = (unsigned long)virt_to_bus(memory) + size
>= 0xffffffff;
int mmu = high;
- if (force_mmu)
+ if (force_mmu && !(gfp & GFP_DMA))
mmu = 1;
if (no_iommu) {
if (high) goto error;
@@ -161,19 +166,21 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
}
}
- iommu_page = alloc_iommu(1<<order);
+ size >>= PAGE_SHIFT;
+
+ iommu_page = alloc_iommu(size);
if (iommu_page == -1)
goto error;
/* Fill in the GATT, allocating pages as needed. */
- for (i = 0; i < 1<<order; i++) {
+ for (i = 0; i < size; i++) {
unsigned long phys_mem;
void *mem = memory + i*PAGE_SIZE;
if (i > 0)
atomic_inc(&virt_to_page(mem)->count);
phys_mem = virt_to_phys(mem);
- BUG_ON(phys_mem & ~PTE_MASK);
- iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT);
+ BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
+ iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
}
flush_gart();
@@ -181,7 +188,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
return memory;
error:
- free_pages((unsigned long)memory, order);
+ free_pages((unsigned long)memory, get_order(size));
return NULL;
}
@@ -193,30 +200,32 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t bus)
{
u64 pte;
- int order = get_order(size);
unsigned long iommu_page;
int i;
+ size = round_up(size, PAGE_SIZE);
if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) {
- free_pages((unsigned long)vaddr, order);
+ free_pages((unsigned long)vaddr, get_order(size));
return;
}
+ size >>= PAGE_SHIFT;
iommu_page = (bus - iommu_bus_base) / PAGE_SIZE;
- for (i = 0; i < 1<<order; i++) {
+ for (i = 0; i < size; i++) {
pte = iommu_gatt_base[iommu_page + i];
BUG_ON((pte & GPTE_VALID) == 0);
iommu_gatt_base[iommu_page + i] = 0;
free_page((unsigned long) __va(GPTE_DECODE(pte)));
}
flush_gart();
- free_iommu(iommu_page, 1<<order);
+ free_iommu(iommu_page, size);
}
#ifdef CONFIG_IOMMU_LEAK
/* Debugging aid for drivers that don't free their IOMMU tables */
static void **iommu_leak_tab;
static int leak_trace;
-int iommu_leak_dumppages = 20;
+int iommu_leak_pages = 20;
+extern unsigned long printk_address(unsigned long);
void dump_leak(void)
{
int i;
@@ -224,10 +233,13 @@ void dump_leak(void)
if (dump || !iommu_leak_tab) return;
dump = 1;
show_stack(NULL);
- printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_dumppages);
- for (i = 0; i < iommu_leak_dumppages; i++)
- printk("[%lu: %lx] ",
- iommu_pages-i,(unsigned long) iommu_leak_tab[iommu_pages-i]);
+ /* Very crude. dump some from the end of the table too */
+ printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
+ for (i = 0; i < iommu_leak_pages; i+=2) {
+ printk("%lu: ", iommu_pages-i);
+ printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
+ printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
+ }
printk("\n");
}
#endif
@@ -275,7 +287,8 @@ static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t siz
return mmu;
}
-dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
+dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size,
+ int dir, int flush)
{
unsigned long iommu_page;
unsigned long phys_mem, bus;
@@ -297,13 +310,18 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
phys_mem &= PAGE_MASK;
for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) {
- BUG_ON(phys_mem & ~PTE_MASK);
+ BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
/*
* Set coherent mapping here to avoid needing to flush
* the caches on mapping.
*/
- iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem, GPTE_COHERENT);
+ iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
+
+#ifdef CONFIG_IOMMU_DEBUG
+ /* paranoia check */
+ BUG_ON(GPTE_DECODE(iommu_gatt_base[iommu_page+i]) != phys_mem);
+#endif
#ifdef CONFIG_IOMMU_LEAK
/* XXX need eventually caller of pci_map_sg */
@@ -311,6 +329,7 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
iommu_leak_tab[iommu_page + i] = __builtin_return_address(0);
#endif
}
+ if (flush)
flush_gart();
bus = iommu_bus_base + iommu_page*PAGE_SIZE;
@@ -341,7 +360,7 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
free_iommu(iommu_page, npages);
}
-EXPORT_SYMBOL(pci_map_single);
+EXPORT_SYMBOL(__pci_map_single);
EXPORT_SYMBOL(pci_unmap_single);
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -438,8 +457,6 @@ static __init int init_k8_gatt(agp_kern_info *info)
}
flush_gart();
- global_flush_tlb();
-
printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10);
return 0;
@@ -530,8 +547,10 @@ void __init pci_iommu_init(void)
off don't use the IOMMU
leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
memaper[=order] allocate an own aperture over RAM with size 32MB^order.
+ noforce don't force IOMMU usage. Should be fastest.
+ force Force IOMMU and turn on unmap debugging.
*/
-__init int iommu_setup(char *opt, char **end)
+__init int iommu_setup(char *opt)
{
int arg;
char *p = opt;
@@ -552,17 +571,21 @@ __init int iommu_setup(char *opt, char **end)
fallback_aper_order = arg;
}
#ifdef CONFIG_IOMMU_LEAK
- if (!memcmp(p,"leak", 4))
+ if (!memcmp(p,"leak", 4)) {
leak_trace = 1;
+ p += 4;
+ if (*p == '=') ++p;
+ if (isdigit(*p) && get_option(&p, &arg))
+ iommu_leak_pages = arg;
+ } else
#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
do {
- if (*p == ' ' || *p == 0) {
- *end = p;
+ if (*p == ' ' || *p == 0)
return 0;
- }
} while (*p++ != ',');
}
+ return 1;
}
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index bdc7b7115ae6..36df3cac213e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -192,6 +192,8 @@ void show_regs(struct pt_regs * regs)
fs,fsindex,gs,gsindex,shadowgs);
printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+
+ show_trace(&regs->rsp);
}
extern void load_gs_index(unsigned);
@@ -260,6 +262,14 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
(((u32)desc->base2) << 24);
}
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+ unlazy_fpu(tsk);
+}
int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
unsigned long unused,
@@ -294,9 +304,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
asm("movl %%es,%0" : "=m" (p->thread.es));
asm("movl %%ds,%0" : "=m" (p->thread.ds));
- unlazy_fpu(me);
- p->thread.i387 = me->thread.i387;
-
if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr)
@@ -314,7 +321,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
err = ia32_child_tls(p, childregs);
else
#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r10);
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
if (err)
goto out;
}
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index 24e9032ec7fe..20a66a609ecb 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -240,8 +240,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
unsigned long tmp;
ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
+ if ((addr & 7) || addr < 0 ||
+ addr > sizeof(struct user) - 7)
break;
tmp = 0; /* Default return condition */
@@ -250,7 +250,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if(addr >= (long) &dummy->u_debugreg[0] &&
addr <= (long) &dummy->u_debugreg[7]){
addr -= (long) &dummy->u_debugreg[0];
- addr = addr >> 2;
+ addr = addr >> 3;
tmp = child->thread.debugreg[addr];
}
ret = put_user(tmp,(unsigned long *) data);
@@ -268,8 +268,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
+ if ((addr & 7) || addr < 0 ||
+ addr > sizeof(struct user) - 7)
break;
if (addr < sizeof(struct user_regs_struct)) {
@@ -290,6 +290,11 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if(addr < (long) &dummy->u_debugreg[4] &&
((unsigned long) data) >= TASK_SIZE-3) break;
+ if (addr == (long) &dummy->u_debugreg[6]) {
+ if (data >> 32)
+ goto out_tsk;
+ }
+
if(addr == (long) &dummy->u_debugreg[7]) {
data &= ~DR_CONTROL_RESERVED;
for(i=0; i<4; i++)
@@ -298,7 +303,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
}
addr -= (long) &dummy->u_debugreg;
- addr = addr >> 2;
+ addr = addr >> 3;
child->thread.debugreg[addr] = data;
ret = 0;
}
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index a260d52f756c..a56ae47b7124 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -101,7 +101,6 @@ void machine_restart(char * __unused)
* Stop all CPUs and turn off local APICs and the IO-APIC, so
* other OSs see a clean IRQ state.
*/
- if (notify_die(DIE_STOP,"cpustop",0,0) != NOTIFY_BAD)
smp_send_stop();
disable_IO_APIC();
#endif
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index feab8634021f..3161229f13ca 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -197,12 +197,15 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (!memcmp(from, "acpi=off", 8))
acpi_disabled = 1;
+ if (!memcmp(from, "disableapic", 11))
+ disable_apic = 1;
+
if (!memcmp(from, "mem=", 4))
parse_memopt(from+4, &from);
#ifdef CONFIG_GART_IOMMU
if (!memcmp(from,"iommu=",6)) {
- iommu_setup(from+6, &from);
+ iommu_setup(from+6);
}
#endif
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 56685a77b847..02d8ab2a7d3a 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -40,7 +40,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
unsigned long __supported_pte_mask = ~0UL;
-static int do_not_nx = 0;
+static int do_not_nx = 1;
static int __init nonx_setup(char *str)
{
@@ -98,6 +98,8 @@ void pda_init(int cpu)
pda->cpudata_offset = 0;
pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
+ pda->active_mm = &init_mm;
+ pda->mmu_state = 0;
if (cpu == 0) {
/* others are initialized in smpboot.c */
@@ -121,8 +123,6 @@ void pda_init(int cpu)
asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
pda->irqstackptr += IRQSTACKSIZE-64;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
}
#define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 8b5c9c27a16f..c40b2b293eb2 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -225,6 +225,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
rsp = regs->rsp - 128;
/* This is the X/Open sanctioned signal stack switching. */
+ /* RED-PEN: redzone on that stack? */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(rsp) == 0)
rsp = current->sas_ss_sp + current->sas_ss_size;
@@ -433,7 +434,8 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
- __asm__("movq %0,%%db7" : : "r" (current->thread.debugreg[7]));
+ if (current->thread.debugreg[7])
+ asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg[7]));
/* Whee! Actually deliver the signal. */
handle_signal(signr, &info, oldset, regs);
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index c5d1957b1f9f..3ed8c38f37a6 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -485,16 +485,16 @@ asmlinkage void smp_call_function_interrupt(void)
}
}
-
/* Slow. Should be only used for debugging. */
int slow_smp_processor_id(void)
{
int stack_location;
unsigned long sp = (unsigned long)&stack_location;
- int cpu;
- unsigned long mask;
+ int offset = 0, cpu;
+
+ for (offset = 0; (cpu_online_map >> offset); offset = cpu + 1) {
+ cpu = ffz(~(cpu_online_map >> offset));
- for_each_cpu(cpu, mask) {
if (sp >= (u64)cpu_pda[cpu].irqstackptr - IRQSTACKSIZE &&
sp <= (u64)cpu_pda[cpu].irqstackptr)
return cpu;
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index f6449d6194c5..3a4881757981 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -318,8 +318,6 @@ void __init smp_callin(void)
*/
smp_store_cpu_info(cpuid);
- notify_die(DIE_CPUINIT, "cpuinit", NULL, 0);
-
local_irq_disable();
/*
@@ -898,6 +896,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
*/
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
+ else
+ nr_ioapics = 0;
setup_boot_APIC_clock();
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index a6c6823ce744..52f04905116b 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -28,7 +28,7 @@
#include <asm/io.h>
#include <asm/proto.h>
-static struct saved_context saved_context;
+struct saved_context saved_context;
unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5c4c728449b9..1ff1231e50ce 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,7 +110,7 @@ int printk_address(unsigned long address)
}
#endif
-static inline unsigned long *in_exception_stack(int cpu, unsigned long stack)
+unsigned long *in_exception_stack(int cpu, unsigned long stack)
{
int k;
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
@@ -249,7 +249,7 @@ void show_stack(unsigned long * rsp)
void dump_stack(void)
{
unsigned long dummy;
- show_stack(&dummy);
+ show_trace(&dummy);
}
void show_registers(struct pt_regs *regs)
@@ -344,7 +344,6 @@ void die(const char * str, struct pt_regs * regs, long err)
show_registers(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
- notify_die(DIE_OOPS, (char *)str, regs, err);
do_exit(SIGSEGV);
}
@@ -419,6 +418,8 @@ static void do_trap(int trapnr, int signr, char *str,
#define DO_ERROR(trapnr, signr, str, name) \
asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
+ return; \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
}
@@ -430,10 +431,13 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void *)siaddr; \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
+ return; \
do_trap(trapnr, signr, str, regs, error_code, &info); \
}
DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR( 3, SIGTRAP, "int3", int3);
DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
@@ -446,13 +450,6 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
DO_ERROR(18, SIGSEGV, "reserved", reserved)
-asmlinkage void do_int3(struct pt_regs * regs, long error_code)
-{
- if (notify_die(DIE_INT3, "int3", regs, error_code) == NOTIFY_BAD)
- return;
- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
-}
-
extern void dump_pagetable(unsigned long);
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
@@ -493,6 +490,8 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
regs->rip = fixup->fixup;
return;
}
+ notify_die(DIE_GPF, "general protection fault", regs, error_code,
+ 13, SIGSEGV);
die("general protection fault", regs, error_code);
}
}
@@ -537,14 +536,14 @@ asmlinkage void default_do_nmi(struct pt_regs * regs)
* so it must be the NMI watchdog.
*/
if (nmi_watchdog) {
- nmi_watchdog_tick(regs);
+ nmi_watchdog_tick(regs,reason);
return;
}
#endif
unknown_nmi_error(reason, regs);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason) == NOTIFY_BAD)
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_BAD)
return;
if (reason & 0x80)
mem_parity_error(reason, regs);
@@ -569,6 +568,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
#ifdef CONFIG_CHECKING
{
+ /* RED-PEN interaction with debugger - could destroy gs */
unsigned long gs;
struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
@@ -583,9 +583,6 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
conditional_sti(regs);
- if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
- return;
-
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg[7]) {
@@ -618,17 +615,22 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
- info.si_addr = ((regs->cs & 3) == 0) ? (void *)tsk->thread.rip :
- (void *)regs->rip;
+ if ((regs->cs & 3) == 0)
+ goto clear_dr7;
+
+ info.si_addr = (void *)regs->rip;
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
- asm("movq %0,%%db7"::"r"(0UL));
+ asm volatile("movq %0,%%db7"::"r"(0UL));
+ notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP);
return;
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
clear_TF:
+ /* RED-PEN could cause spurious errors */
+ if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD)
regs->eflags &= ~TF_MASK;
return;
}
@@ -775,9 +777,9 @@ asmlinkage void math_state_restore(void)
clts(); /* Allow maths ops (or we recurse) */
if (!me->used_math)
- init_fpu();
+ init_fpu(me);
restore_fpu_checking(&me->thread.i387.fxsave);
- set_thread_flag(TIF_USEDFPU);
+ me->thread_info->status |= TS_USEDFPU;
}
asmlinkage void math_emulate(void)
@@ -787,7 +789,7 @@ asmlinkage void math_emulate(void)
void do_call_debug(struct pt_regs *regs)
{
- notify_die(DIE_CALL, "debug call", regs, 0);
+ notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
}
void __init trap_init(void)
@@ -819,8 +821,6 @@ void __init trap_init(void)
set_intr_gate(KDB_VECTOR, call_debug);
- notify_die(DIE_TRAPINIT, "traps initialized", 0, 0);
-
/*
* Should be a barrier for any external CPU state.
*/
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index d588c9d07812..49119f0d18cc 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -57,29 +57,41 @@ void bust_spinlocks(int yes)
}
}
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+ return __get_user(dummy, (unsigned long *)p);
+}
+
void dump_pagetable(unsigned long address)
{
- static char *name[] = { "PML4", "PGD", "PDE", "PTE" };
- int i, shift;
- unsigned long page;
-
- shift = 9+9+9+12;
- address &= ~0xFFFF000000000000UL;
- asm("movq %%cr3,%0" : "=r" (page));
- for (i = 0; i < 4; i++) {
- unsigned long *padr = (unsigned long *) __va(page);
- padr += (address >> shift) & 0x1FFU;
- if (__get_user(page, padr)) {
- printk("%s: bad %p\n", name[i], padr);
- break;
- }
- printk("%s: %016lx ", name[i], page);
- if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */
- break;
- page &= ~0xFFFUL;
- shift -= (i == 0) ? 12 : 9;
- }
+ pml4_t *pml4;
+ asm("movq %%cr3,%0" : "=r" (pml4));
+
+ pml4 = __va((unsigned long)pml4 & PHYSICAL_PAGE_MASK);
+ pml4 += pml4_index(address);
+ printk("PML4 %lx ", pml4_val(*pml4));
+ if (bad_address(pml4)) goto bad;
+ if (!pml4_present(*pml4)) goto ret;
+
+ pgd_t *pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address);
+ if (bad_address(pgd)) goto bad;
+ printk("PGD %lx ", pgd_val(*pgd));
+ if (!pgd_present(*pgd)) goto ret;
+
+ pmd_t *pmd = pmd_offset(pgd, address);
+ if (bad_address(pmd)) goto bad;
+ printk("PMD %lx ", pmd_val(*pmd));
+ if (!pmd_present(*pmd)) goto ret;
+
+ pte_t *pte = pte_offset_kernel(pmd, address);
+ if (bad_address(pte)) goto bad;
+ printk("PTE %lx", pte_val(*pte));
+ret:
printk("\n");
+ return;
+bad:
+ printk("BAD\n");
}
int page_fault_trace;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index f0503c3badc2..ed7cc711392a 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -150,7 +150,7 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
*/
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
/*
* Ok, go for it..
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index ed2da1470216..7457842d38f5 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -21,7 +21,7 @@
#include <asm/pci-direct.h>
#include <asm/numa.h>
-static int find_northbridge(void)
+static __init int find_northbridge(void)
{
int num;
@@ -45,7 +45,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
struct node nodes[MAXNODE];
- int nodeid, numnodes, maxnode, i, nb;
+ int nodeid, i, nb;
+ int found = 0;
nb = find_northbridge();
if (nb < 0)
@@ -53,12 +54,13 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
- numnodes = (read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3;
+ numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3));
+
+ printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1);
memset(&nodes,0,sizeof(nodes));
prevbase = 0;
- maxnode = -1;
- for (i = 0; i < MAXNODE; i++) {
+ for (i = 0; i < numnodes; i++) {
unsigned long base,limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
@@ -66,18 +68,16 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid = limit & 3;
if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i, base);
- continue;
+ printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i, base);
+ return -1;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
nodeid, (base>>8)&3, (limit>>8) & 3);
return -1;
}
- if (nodeid > maxnode)
- maxnode = nodeid;
if ((1UL << nodeid) & nodes_present) {
- printk("Node %d already present. Skipping\n", nodeid);
+ printk(KERN_INFO "Node %d already present. Skipping\n", nodeid);
continue;
}
@@ -98,17 +98,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
base = start;
if (limit > end)
limit = end;
- if (limit == base)
+ if (limit == base) {
+ printk(KERN_ERR "Empty node %d\n", nodeid);
continue;
+ }
if (limit < base) {
- printk(KERN_INFO"Node %d bogus settings %lx-%lx. Ignored.\n",
+ printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit);
- continue;
+ return -1;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
- printk(KERN_INFO "Node map not sorted %lx,%lx\n",
+ printk(KERN_ERR "Node map not sorted %lx,%lx\n",
prevbase,base);
return -1;
}
@@ -116,23 +118,26 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
+ found++;
+
nodes[nodeid].start = base;
nodes[nodeid].end = limit;
prevbase = base;
}
- if (maxnode <= 0)
+ if (!found)
return -1;
- memnode_shift = compute_hash_shift(nodes,maxnode,end);
+ memnode_shift = compute_hash_shift(nodes);
if (memnode_shift < 0) {
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
return -1;
}
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
- early_for_all_nodes(i) {
+ for (i = 0; i < numnodes; i++) {
+ if (nodes[i].start != nodes[i].end)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 8135efbf522d..b117644e24cc 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -26,11 +26,10 @@ u8 memnodemap[NODEMAPSIZE];
static int numa_off __initdata;
unsigned long nodes_present;
-int maxnode;
static int emunodes __initdata;
-int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
+int __init compute_hash_shift(struct node *nodes)
{
int i;
int shift = 24;
@@ -39,12 +38,16 @@ int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
/* When in doubt use brute force. */
while (shift < 48) {
memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
- early_for_all_nodes (i) {
+ for (i = 0; i < numnodes; i++) {
+ if (nodes[i].start == nodes[i].end)
+ continue;
for (addr = nodes[i].start;
addr < nodes[i].end;
addr += (1UL << shift)) {
- if (memnodemap[addr >> shift] != 0xff) {
- printk("node %d shift %d addr %Lx conflict %d\n",
+ if (memnodemap[addr >> shift] != 0xff &&
+ memnodemap[addr >> shift] != i) {
+ printk(KERN_INFO
+ "node %d shift %d addr %Lx conflict %d\n",
i, shift, addr, memnodemap[addr>>shift]);
goto next;
}
@@ -101,9 +104,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
-
- if (nodeid > maxnode)
- maxnode = nodeid;
+ if (nodeid + 1 > numnodes)
+ numnodes = nodeid + 1;
nodes_present |= (1UL << nodeid);
}
@@ -151,6 +153,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
int i;
if (emunodes > MAXNODE)
emunodes = MAXNODE;
+ memset(&nodes, 0, sizeof(nodes));
printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20);
for (i = 0; i < emunodes; i++) {
unsigned long end = (i+1)*nodesize;
@@ -160,7 +163,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
nodes[i].end = end;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
- memnode_shift = compute_hash_shift(nodes, emunodes, nodes[i-1].end);
+ memnode_shift = compute_hash_shift(nodes);
return 0;
}
diff --git a/arch/x86_64/pci/irq.c b/arch/x86_64/pci/irq.c
index 9162d3e791d9..baf6d641d453 100644
--- a/arch/x86_64/pci/irq.c
+++ b/arch/x86_64/pci/irq.c
@@ -618,11 +618,20 @@ void pcibios_penalize_isa_irq(int irq)
int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin;
+ extern int interrupt_line_quirk;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+ return 0;
+
printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.\n",
'A' + pin - 1, dev->slot_name);
}
+ /* VIA bridges use interrupt line for apic/pci steering across
+ the V-Link */
+ else if (interrupt_line_quirk)
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
return 0;
}
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index e15d95de1f69..bce8e2252907 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -75,7 +75,7 @@ extern void smp_local_timer_interrupt (struct pt_regs * regs);
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
diff --git a/include/asm-x86_64/debugreg.h b/include/asm-x86_64/debugreg.h
index 81c8f097e4d6..2c4fe65e69ac 100644
--- a/include/asm-x86_64/debugreg.h
+++ b/include/asm-x86_64/debugreg.h
@@ -58,7 +58,7 @@
We can slow the instruction pipeline for instructions coming via the
gdt or the ldt if we want to. I am not sure why this is an advantage */
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_CONTROL_RESERVED (0xFFFFFFFFFC00) /* Reserved by Intel */
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index 33d997b78ecc..c871c9e5aecb 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -8,6 +8,7 @@
#ifndef __ASSEMBLY__
#include <asm/segment.h>
+#include <asm/mmu.h>
// 8 byte segment descriptor
struct desc_struct {
diff --git a/include/asm-x86_64/hdreg.h b/include/asm-x86_64/hdreg.h
index 6ca37a9778de..52a2ed303fcf 100644
--- a/include/asm-x86_64/hdreg.h
+++ b/include/asm-x86_64/hdreg.h
@@ -7,6 +7,4 @@
#ifndef __ASMx86_64_HDREG_H
#define __ASMx86_64_HDREG_H
-typedef unsigned long ide_ioreg_t;
-
#endif /* __ASMx86_64_HDREG_H */
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index 09e18af42ae9..e5bc31f19001 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -19,15 +19,15 @@
#include <asm/thread_info.h>
extern void fpu_init(void);
-extern void init_fpu(void);
-int save_i387(struct _fpstate *buf);
+extern void init_fpu(struct task_struct *child);
+extern int save_i387(struct _fpstate *buf);
static inline int need_signal_i387(struct task_struct *me)
{
if (!me->used_math)
return 0;
me->used_math = 0;
- if (me->thread_info->flags & _TIF_USEDFPU)
+ if (me->thread_info->status & TS_USEDFPU)
return 0;
return 1;
}
@@ -39,14 +39,14 @@ static inline int need_signal_i387(struct task_struct *me)
#define kernel_fpu_end() stts()
#define unlazy_fpu(tsk) do { \
- if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
+ if ((tsk)->thread_info->status & TS_USEDFPU) \
save_init_fpu(tsk); \
} while (0)
#define clear_fpu(tsk) do { \
- if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \
+ if ((tsk)->thread_info->status & TS_USEDFPU) { \
asm volatile("fwait"); \
- (tsk)->thread_info->flags &= ~_TIF_USEDFPU; \
+ (tsk)->thread_info->status &= ~TS_USEDFPU; \
stts(); \
} \
} while (0)
@@ -114,11 +114,11 @@ static inline int save_i387_checking(struct i387_fxsave_struct *fx)
static inline void kernel_fpu_begin(void)
{
- struct task_struct *me = current;
- if (test_tsk_thread_flag(me,TIF_USEDFPU)) {
- asm volatile("fxsave %0 ; fnclex"
- : "=m" (me->thread.i387.fxsave));
- clear_tsk_thread_flag(me, TIF_USEDFPU);
+ struct thread_info *me = current_thread_info();
+ if (me->status & TS_USEDFPU) {
+ asm volatile("rex64 ; fxsave %0 ; fnclex"
+ : "=m" (me->task->thread.i387.fxsave));
+ me->status &= ~TS_USEDFPU;
return;
}
clts();
@@ -128,7 +128,7 @@ static inline void save_init_fpu( struct task_struct *tsk )
{
asm volatile( "fxsave %0 ; fnclex"
: "=m" (tsk->thread.i387.fxsave));
- tsk->thread_info->flags &= ~TIF_USEDFPU;
+ tsk->thread_info->status &= ~TS_USEDFPU;
stts();
}
@@ -140,18 +140,4 @@ static inline int restore_i387(struct _fpstate *buf)
return restore_fpu_checking((struct i387_fxsave_struct *)buf);
}
-
-static inline void empty_fpu(struct task_struct *child)
-{
- if (!child->used_math) {
- /* Simulate an empty FPU. */
- memset(&child->thread.i387.fxsave,0,sizeof(struct i387_fxsave_struct));
- child->thread.i387.fxsave.cwd = 0x037f;
- child->thread.i387.fxsave.swd = 0;
- child->thread.i387.fxsave.twd = 0;
- child->thread.i387.fxsave.mxcsr = 0x1f80;
- }
- child->used_math = 1;
-}
-
#endif /* __ASM_X86_64_I387_H */
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index 745262828a27..316d01ac6eef 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -9,8 +9,13 @@ struct die_args {
struct pt_regs *regs;
const char *str;
long err;
+ int trapnr;
+ int signr;
};
+/* Note - you should never unregister because that can race with NMIs.
+ If you really want to do it first unregister - then synchronize_kernel - then free.
+ */
extern struct notifier_block *die_chain;
/* Grossly misnamed. */
@@ -21,15 +26,16 @@ enum die_val {
DIE_PANIC,
DIE_NMI,
DIE_DIE,
+ DIE_NMIWATCHDOG,
+ DIE_KERNELDEBUG,
+ DIE_TRAP,
+ DIE_GPF,
DIE_CALL,
- DIE_CPUINIT, /* not really a die, but .. */
- DIE_TRAPINIT, /* not really a die, but .. */
- DIE_STOP,
};
-static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err)
+static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
{
- struct die_args args = { regs: regs, str: str, err: err };
+ struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig };
return notifier_call_chain(&die_chain, val, &args);
}
diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h
index 7686e4dfd9f4..340e71cfc538 100644
--- a/include/asm-x86_64/numa.h
+++ b/include/asm-x86_64/numa.h
@@ -8,13 +8,11 @@ struct node {
u64 start,end;
};
-#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
+#define for_all_nodes(x) for ((x) = 0; (x) < numnodes; (x)++) \
if ((1UL << (x)) & nodes_present)
-#define early_for_all_nodes(n) \
- for (n=0; n<MAXNODE;n++) if (nodes[n].start!=nodes[n].end)
-extern int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem);
+extern int compute_hash_shift(struct node *nodes);
extern unsigned long nodes_present;
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 74594c63e38c..d94c514a06ab 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -44,8 +44,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
struct pci_dev;
-extern int iommu_setup(char *opt, char **end);
-
+extern int iommu_setup(char *opt);
extern void pci_iommu_init(void);
/* Allocate and map kernel buffer using consistent mode DMA for a device.
@@ -77,10 +76,11 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
* Once the device is given the dma address, the device owns this memory
* until either pci_unmap_single or pci_dma_sync_single is performed.
*/
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
- size_t size, int direction);
+extern dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
+ size_t size, int direction, int flush);
+
-extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
+void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
size_t size, int direction);
/*
@@ -118,12 +118,16 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
BUG_ON(direction == PCI_DMA_NONE);
}
-#define PCI_DMA_BUS_IS_PHYS 0
+/* The PCI address space does equal the physical memory
+ * address space. The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS (0)
#else
-static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
- size_t size, int direction)
+static inline dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
+ size_t size, int direction, int flush)
{
dma_addr_t addr;
@@ -210,6 +214,11 @@ extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction);
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+ size_t size, int direction)
+{
+ return __pci_map_single(hwdev,ptr,size,direction,1);
+}
#define pci_unmap_page pci_unmap_single
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 7aa012007c7b..4d31ca2fef05 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -283,6 +283,7 @@ static inline int pmd_large(pmd_t pte) {
#define pml4_page(pml4) ((unsigned long) __va(pml4_val(pml4) & PTE_MASK))
#define pml4_index(address) ((address >> PML4_SHIFT) & (PTRS_PER_PML4-1))
#define pml4_offset_k(address) (init_level4_pgt + pml4_index(address))
+#define pml4_present(pml4) (pml4_val(pml4) & _PAGE_PRESENT)
#define mk_kernel_pml4(address) ((pml4_t){ (address) | _KERNPG_TABLE })
#define level3_offset_k(dir, address) ((pgd_t *) pml4_page(*(dir)) + pgd_index(address))
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 705ccf54580c..49d3c6ee414d 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -269,7 +269,7 @@ struct mm_struct;
extern void release_thread(struct task_struct *);
/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk) do { } while (0)
+extern void prepare_to_copy(struct task_struct *tsk);
/*
* create a kernel thread without removing it from tasklists
@@ -308,8 +308,8 @@ extern inline void sync_core(void)
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
-#define prefetch(x) __builtin_prefetch((x),0)
-#define prefetchw(x) __builtin_prefetch((x),1)
+#define prefetch(x) __builtin_prefetch((x),0,1)
+#define prefetchw(x) __builtin_prefetch((x),1,1)
#define spin_lock_prefetch(x) prefetchw(x)
#define cpu_relax() rep_nop()
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 785d812fe2e8..2ced019ad2ed 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -6,6 +6,7 @@
/* misc architecture specific prototypes */
struct cpuinfo_x86;
+struct pt_regs;
extern void get_cpu_vendor(struct cpuinfo_x86*);
extern void start_kernel(void);
@@ -41,6 +42,8 @@ extern void free_bootmem_generic(unsigned long phys, unsigned len);
extern unsigned long end_pfn_map;
extern void show_stack(unsigned long * rsp);
+extern void show_trace(unsigned long * rsp);
+extern void show_registers(struct pt_regs *regs);
extern void exception_table_check(void);
diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h
index bccd85bb8dec..cc193889eefa 100644
--- a/include/asm-x86_64/suspend.h
+++ b/include/asm-x86_64/suspend.h
@@ -11,7 +11,7 @@ arch_prepare_suspend(void)
{
}
-/* image of the saved processor state */
+/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
struct saved_context {
u16 ds, es, fs, gs, ss;
unsigned long gs_base, gs_kernel_base, fs_base;
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 439c801f015b..3d3846fc9e04 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -22,18 +22,18 @@
struct save_context_frame {
unsigned long rbp;
unsigned long rbx;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
unsigned long rcx;
unsigned long rdx;
- unsigned long rsi;
- unsigned long rdi;
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
+ unsigned long rdi;
+ unsigned long rsi;
};
/* frame pointer must be last for get_wchan */
@@ -43,19 +43,20 @@ struct save_context_frame {
rbp needs to be always explicitely saved because gcc cannot clobber the
frame pointer and the scheduler is compiled with frame pointers. -AK */
#define SAVE_CONTEXT \
- __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) __PUSH(r12) __PUSH(r13) \
- __PUSH(r14) __PUSH(r15) \
- __PUSH(rdi) __PUSH(rsi) \
- __PUSH(rdx) __PUSH(rcx) \
+ __PUSH(rsi) __PUSH(rdi) \
+ __PUSH(r12) __PUSH(r13) __PUSH(r14) __PUSH(r15) \
+ __PUSH(rdx) __PUSH(rcx) __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) \
__PUSH(rbx) __PUSH(rbp)
#define RESTORE_CONTEXT \
__POP(rbp) __POP(rbx) \
- __POP(rcx) __POP(rdx) \
- __POP(rsi) __POP(rdi) \
- __POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \
- __POP(r9) __POP(r8)
+ __POP(r11) __POP(r10) __POP(r9) __POP(r8) __POP(rcx) __POP(rdx) \
+ __POP(r15) __POP(r14) __POP(r13) __POP(r12) \
+ __POP(rdi) __POP(rsi)
/* RED-PEN: pipeline stall on ret because it is not predicted */
+/* RED-PEN: the register saving could be optimized */
+/* frame pointer must be last for get_wchan */
+
#define switch_to(prev,next,last) \
asm volatile(SAVE_CONTEXT \
"movq %%rsp,%[prevrsp]\n\t" \
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index f841e925bef0..11e8d21989c7 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -27,6 +27,7 @@ struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
__u32 flags; /* low level flags */
+ __u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
int preempt_count;
@@ -100,16 +101,14 @@ static inline struct thread_info *stack_thread_info(void)
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_USEDFPU 16 /* FPU was used by this task this quantum */
-#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_IA32 18 /* 32bit process */
+#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_IA32 17 /* 32bit process */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_IA32 (1<<TIF_IA32)
@@ -118,6 +117,15 @@ static inline struct thread_info *stack_thread_info(void)
#define PREEMPT_ACTIVE 0x4000000
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
+
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 2c2d2e3fd5f8..7702073edc1c 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -520,8 +520,10 @@ __SYSCALL(__NR_clock_gettime, sys_clock_gettime)
__SYSCALL(__NR_clock_getres, sys_clock_getres)
#define __NR_clock_nanosleep 230
__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+#define __NR_exit_group 231
+__SYSCALL(__NR_exit_group, sys_exit_group)
-#define __NR_syscall_max __NR_clock_nanosleep
+#define __NR_syscall_max __NR_exit_group
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */