From fb4291acc48afdbe1c77854bd3ea6ff89c62d155 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Wed, 1 Jan 2003 21:21:08 -0800
Subject: ia64: Add missing include of kernel/config.h.

---
 include/asm-ia64/intrinsics.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-ia64/intrinsics.h b/include/asm-ia64/intrinsics.h
index d2977f600f80..5ff113fda5e7 100644
--- a/include/asm-ia64/intrinsics.h
+++ b/include/asm-ia64/intrinsics.h
@@ -8,6 +8,8 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <kernel/config.h>
+
 /*
  * Force an unresolved reference if someone tries to use
  * ia64_fetch_and_add() with a bad value.
-- 
cgit v1.2.3


From ef222347a8c9069ed5068b9ceafea224b0762c00 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Tue, 14 Jan 2003 05:57:35 -0800
Subject: ia64: Various updates: ia32 subsystem fix, tracing-support for
 mmu-context switching, 	etc.

---
 Documentation/ia64/README      |  20 +++---
 arch/ia64/ia32/sys_ia32.c      |   4 ++
 arch/ia64/kernel/acpi.c        |  22 ++++++
 arch/ia64/kernel/ivt.S         | 154 ++++++++++++++++++++++-------------------
 arch/ia64/kernel/pal.S         |  17 +++--
 arch/ia64/kernel/process.c     |   2 +-
 arch/ia64/kernel/ptrace.c      |  24 +++----
 arch/ia64/vmlinux.lds.S        |   4 --
 include/asm-ia64/mmu_context.h |  34 +++++++++
 include/asm-ia64/spinlock.h    |  23 ++++++
 include/asm-ia64/tlbflush.h    |   7 +-
 11 files changed, 201 insertions(+), 110 deletions(-)

diff --git a/Documentation/ia64/README b/Documentation/ia64/README
index 7163ae745151..aa17f2154cba 100644
--- a/Documentation/ia64/README
+++ b/Documentation/ia64/README
@@ -4,40 +4,40 @@
    platform.  This document provides information specific to IA-64
    ONLY, to get additional information about the Linux kernel also
    read the original Linux README provided with the kernel.
- 
+
 INSTALLING the kernel:
 
  - IA-64 kernel installation is the same as the other platforms, see
    original README for details.
-   
- 
+
+
 SOFTWARE REQUIREMENTS
 
    Compiling and running this kernel requires an IA-64 compliant GCC
    compiler.  And various software packages also compiled with an
    IA-64 compliant GCC compiler.
- 
+
 
 CONFIGURING the kernel:
 
    Configuration is the same, see original README for details.
-  
-   
+
+
 COMPILING the kernel:
 
  - Compiling this kernel doesn't differ from other platform so read
    the original README for details BUT make sure you have an IA-64
    compliant GCC compiler.
- 
+
 IA-64 SPECIFICS
 
  - General issues:
- 
+
     o Hardly any performance tuning has been done. Obvious targets
       include the library routines (IP checksum, etc.). Less
       obvious targets include making sure we don't flush the TLB
       needlessly, etc.
-	  
+
     o SMP locks cleanup/optimization
-	
+
     o IA32 support.  Currently experimental.  It mostly works.
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index a7ca7341a90a..7376af3a4991 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -2010,6 +2010,10 @@ semctl32 (int first, int second, int third, void *uptr)
 	else
 		fourth.__pad = (void *)A(pad);
 	switch (third) {
+	      default:
+		err = -EINVAL;
+		break;
+
 	      case IPC_INFO:
 	      case IPC_RMID:
 	      case IPC_SET:
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index fa757e40fa2f..8c12822dde38 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -888,4 +888,26 @@ acpi_irq_to_vector (u32 irq)
 	return gsi_to_vector(irq);
 }
 
+int __init
+acpi_register_irq (u32 gsi, u32 polarity, u32 trigger)
+{
+	int vector = 0;
+	u32 irq_base;
+	char *iosapic_address;
+
+	if (acpi_madt->flags.pcat_compat && (gsi < 16))
+		return isa_irq_to_vector(gsi);
+
+	if (!iosapic_register_intr)
+		return 0;
+
+	/* Find the IOSAPIC */
+	if (!acpi_find_iosapic(gsi, &irq_base, &iosapic_address)) {
+		/* Turn it on */
+		vector = iosapic_register_intr (gsi, polarity, trigger,
+						irq_base, iosapic_address);
+	}
+	return vector;
+}
+
 #endif /* CONFIG_ACPI_BOOT */
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 5dc8e49c345b..110564c1ed8b 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -192,7 +192,7 @@ ENTRY(vhpt_miss)
 	rfi
 END(vhpt_miss)
 
-	.align 1024
+	.org ia64_ivt+0x400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
 ENTRY(itlb_miss)
@@ -206,7 +206,7 @@ ENTRY(itlb_miss)
 	mov r16=cr.ifa				// get virtual address
 	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
-itlb_fault:
+.itlb_fault:
 	mov r17=cr.iha				// get virtual address of L3 PTE
 	movl r30=1f				// load nested fault continuation point
 	;;
@@ -230,7 +230,7 @@ itlb_fault:
 	rfi
 END(itlb_miss)
 
-	.align 1024
+	.org ia64_ivt+0x0800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
 ENTRY(dtlb_miss)
@@ -268,7 +268,7 @@ dtlb_fault:
 	rfi
 END(dtlb_miss)
 
-	.align 1024
+	.org ia64_ivt+0x0c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(alt_itlb_miss)
@@ -288,7 +288,7 @@ ENTRY(alt_itlb_miss)
 	;;
 (p8)	mov cr.iha=r17
 (p8)	mov r29=b0				// save b0
-(p8)	br.cond.dptk itlb_fault
+(p8)	br.cond.dptk .itlb_fault
 #endif
 	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
@@ -306,7 +306,7 @@ ENTRY(alt_itlb_miss)
 	rfi
 END(alt_itlb_miss)
 
-	.align 1024
+	.org ia64_ivt+0x1000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(alt_dtlb_miss)
@@ -379,7 +379,7 @@ ENTRY(page_fault)
 	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
 END(page_fault)
 
-	.align 1024
+	.org ia64_ivt+0x1400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
 ENTRY(nested_dtlb_miss)
@@ -440,7 +440,7 @@ ENTRY(nested_dtlb_miss)
 	br.sptk.many b0				// return to continuation point
 END(nested_dtlb_miss)
 
-	.align 1024
+	.org ia64_ivt+0x1800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
 ENTRY(ikey_miss)
@@ -448,7 +448,7 @@ ENTRY(ikey_miss)
 	FAULT(6)
 END(ikey_miss)
 
-	.align 1024
+	.org ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
 ENTRY(dkey_miss)
@@ -456,7 +456,7 @@ ENTRY(dkey_miss)
 	FAULT(7)
 END(dkey_miss)
 
-	.align 1024
+	.org ia64_ivt+0x2000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
 ENTRY(dirty_bit)
@@ -512,7 +512,7 @@ ENTRY(dirty_bit)
 	rfi
 END(idirty_bit)
 
-	.align 1024
+	.org ia64_ivt+0x2400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
@@ -571,7 +571,7 @@ ENTRY(iaccess_bit)
 	rfi
 END(iaccess_bit)
 
-	.align 1024
+	.org ia64_ivt+0x2800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
@@ -618,7 +618,7 @@ ENTRY(daccess_bit)
 	rfi
 END(daccess_bit)
 
-	.align 1024
+	.org ia64_ivt+0x2c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
 ENTRY(break_fault)
@@ -690,7 +690,7 @@ ENTRY(break_fault)
 	// NOT REACHED
 END(break_fault)
 
-ENTRY(demine_args)
+ENTRY_MIN_ALIGN(demine_args)
 	alloc r2=ar.pfs,8,0,0,0
 	tnat.nz p8,p0=in0
 	tnat.nz p9,p0=in1
@@ -719,7 +719,7 @@ ENTRY(demine_args)
 	br.ret.sptk.many rp
 END(demine_args)
 
-	.align 1024
+	.org ia64_ivt+0x3000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(interrupt)
@@ -746,19 +746,19 @@ ENTRY(interrupt)
 	br.call.sptk.many b6=ia64_handle_irq
 END(interrupt)
 
-	.align 1024
+	.org ia64_ivt+3400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
 	DBG_FAULT(13)
 	FAULT(13)
 
-	.align 1024
+	.org ia64_ivt+3800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3800 Entry 14 (size 64 bundles) Reserved
 	DBG_FAULT(14)
 	FAULT(14)
 
-	.align 1024
+	.org ia64_ivt+3c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3c00 Entry 15 (size 64 bundles) Reserved
 	DBG_FAULT(15)
@@ -803,7 +803,7 @@ ENTRY(dispatch_illegal_op_fault)
 	br.sptk.many ia64_leave_kernel
 END(dispatch_illegal_op_fault)
 
-	.align 1024
+	.org ia64_ivt+0x4000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4000 Entry 16 (size 64 bundles) Reserved
 	DBG_FAULT(16)
@@ -893,7 +893,7 @@ END(dispatch_to_ia32_handler)
 
 #endif /* CONFIG_IA32_SUPPORT */
 
-	.align 1024
+	.org ia64_ivt+4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
 	DBG_FAULT(17)
@@ -925,7 +925,7 @@ ENTRY(non_syscall)
 	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
 END(non_syscall)
 
-	.align 1024
+	.org ia64_ivt+4800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
 	DBG_FAULT(18)
@@ -959,7 +959,7 @@ ENTRY(dispatch_unaligned_handler)
 	br.sptk.many ia64_prepare_handle_unaligned
 END(dispatch_unaligned_handler)
 
-	.align 1024
+	.org ia64_ivt+4c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
 	DBG_FAULT(19)
@@ -1005,7 +1005,7 @@ END(dispatch_to_fault_handler)
 // --- End of long entries, Beginning of short entries
 //
 
-	.align 1024
+	.org ia64_ivt+5000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
 ENTRY(page_not_present)
@@ -1025,7 +1025,7 @@ ENTRY(page_not_present)
 	br.sptk.many page_fault
 END(page_not_present)
 
-	.align 256
+	.org ia64_ivt+5100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
 ENTRY(key_permission)
@@ -1038,7 +1038,7 @@ ENTRY(key_permission)
 	br.sptk.many page_fault
 END(key_permission)
 
-	.align 256
+	.org ia64_ivt+5200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(iaccess_rights)
@@ -1051,7 +1051,7 @@ ENTRY(iaccess_rights)
 	br.sptk.many page_fault
 END(iaccess_rights)
 
-	.align 256
+	.org ia64_ivt+5300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
@@ -1064,7 +1064,7 @@ ENTRY(daccess_rights)
 	br.sptk.many page_fault
 END(daccess_rights)
 
-	.align 256
+	.org ia64_ivt+5400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(general_exception)
@@ -1079,7 +1079,7 @@ ENTRY(general_exception)
 	br.sptk.many dispatch_to_fault_handler
 END(general_exception)
 
-	.align 256
+	.org ia64_ivt+5500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
 ENTRY(disabled_fp_reg)
@@ -1092,7 +1092,7 @@ ENTRY(disabled_fp_reg)
 	br.sptk.many dispatch_to_fault_handler
 END(disabled_fp_reg)
 
-	.align 256
+	.org ia64_ivt+5600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(nat_consumption)
@@ -1100,7 +1100,7 @@ ENTRY(nat_consumption)
 	FAULT(26)
 END(nat_consumption)
 
-	.align 256
+	.org ia64_ivt+5700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
 ENTRY(speculation_vector)
@@ -1137,13 +1137,13 @@ ENTRY(speculation_vector)
 	rfi				// and go back
 END(speculation_vector)
 
-	.align 256
+	.org ia64_ivt+5800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5800 Entry 28 (size 16 bundles) Reserved
 	DBG_FAULT(28)
 	FAULT(28)
 
-	.align 256
+	.org ia64_ivt+5900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
 ENTRY(debug_vector)
@@ -1151,7 +1151,7 @@ ENTRY(debug_vector)
 	FAULT(29)
 END(debug_vector)
 
-	.align 256
+	.org ia64_ivt+5a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(unaligned_access)
@@ -1162,91 +1162,103 @@ ENTRY(unaligned_access)
 	br.sptk.many dispatch_unaligned_handler
 END(unaligned_access)
 
-	.align 256
+	.org ia64_ivt+5b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
 	DBG_FAULT(31)
 	FAULT(31)
+END(unsupported_data_reference)
 
-	.align 256
+	.org ia64_ivt+5c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
 	DBG_FAULT(32)
 	FAULT(32)
+END(floating_point_fault)
 
-	.align 256
+	.org ia64_ivt+5d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
 	DBG_FAULT(33)
 	FAULT(33)
+END(floating_point_trap)
 
-	.align 256
+	.org ia64_ivt+5e00
 /////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66)
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
 	DBG_FAULT(34)
 	FAULT(34)
+END(lower_privilege_trap)
 
-	.align 256
+	.org ia64_ivt+5f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
 	DBG_FAULT(35)
 	FAULT(35)
+END(taken_branch_trap)
 
-	.align 256
+	.org ia64_ivt+6000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
 	DBG_FAULT(36)
 	FAULT(36)
+END(single_step_trap)
 
-	.align 256
+	.org ia64_ivt+6100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Reserved
 	DBG_FAULT(37)
 	FAULT(37)
 
-	.align 256
+	.org ia64_ivt+6200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6200 Entry 38 (size 16 bundles) Reserved
 	DBG_FAULT(38)
 	FAULT(38)
 
-	.align 256
+	.org ia64_ivt+6300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6300 Entry 39 (size 16 bundles) Reserved
 	DBG_FAULT(39)
 	FAULT(39)
 
-	.align 256
+	.org ia64_ivt+6400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6400 Entry 40 (size 16 bundles) Reserved
 	DBG_FAULT(40)
 	FAULT(40)
 
-	.align 256
+	.org ia64_ivt+6500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6500 Entry 41 (size 16 bundles) Reserved
 	DBG_FAULT(41)
 	FAULT(41)
 
-	.align 256
+	.org ia64_ivt+6600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6600 Entry 42 (size 16 bundles) Reserved
 	DBG_FAULT(42)
 	FAULT(42)
 
-	.align 256
+	.org ia64_ivt+6700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6700 Entry 43 (size 16 bundles) Reserved
 	DBG_FAULT(43)
 	FAULT(43)
 
-	.align 256
+	.org ia64_ivt+6800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6800 Entry 44 (size 16 bundles) Reserved
 	DBG_FAULT(44)
 	FAULT(44)
 
-	.align 256
+	.org ia64_ivt+6900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
 ENTRY(ia32_exception)
@@ -1254,7 +1266,7 @@ ENTRY(ia32_exception)
 	FAULT(45)
 END(ia32_exception)
 
-	.align 256
+	.org ia64_ivt+6a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(ia32_intercept)
@@ -1284,7 +1296,7 @@ ENTRY(ia32_intercept)
 	FAULT(46)
 END(ia32_intercept)
 
-	.align 256
+	.org ia64_ivt+6b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
 ENTRY(ia32_interrupt)
@@ -1297,121 +1309,121 @@ ENTRY(ia32_interrupt)
 #endif
 END(ia32_interrupt)
 
-	.align 256
+	.org ia64_ivt+6c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6c00 Entry 48 (size 16 bundles) Reserved
 	DBG_FAULT(48)
 	FAULT(48)
 
-	.align 256
+	.org ia64_ivt+6d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6d00 Entry 49 (size 16 bundles) Reserved
 	DBG_FAULT(49)
 	FAULT(49)
 
-	.align 256
+	.org ia64_ivt+6e00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6e00 Entry 50 (size 16 bundles) Reserved
 	DBG_FAULT(50)
 	FAULT(50)
 
-	.align 256
+	.org ia64_ivt+6f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6f00 Entry 51 (size 16 bundles) Reserved
 	DBG_FAULT(51)
 	FAULT(51)
 
-	.align 256
+	.org ia64_ivt+7000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7000 Entry 52 (size 16 bundles) Reserved
 	DBG_FAULT(52)
 	FAULT(52)
 
-	.align 256
+	.org ia64_ivt+7100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7100 Entry 53 (size 16 bundles) Reserved
 	DBG_FAULT(53)
 	FAULT(53)
 
-	.align 256
+	.org ia64_ivt+7200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7200 Entry 54 (size 16 bundles) Reserved
 	DBG_FAULT(54)
 	FAULT(54)
 
-	.align 256
+	.org ia64_ivt+7300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7300 Entry 55 (size 16 bundles) Reserved
 	DBG_FAULT(55)
 	FAULT(55)
 
-	.align 256
+	.org ia64_ivt+7400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7400 Entry 56 (size 16 bundles) Reserved
 	DBG_FAULT(56)
 	FAULT(56)
 
-	.align 256
+	.org ia64_ivt+7500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7500 Entry 57 (size 16 bundles) Reserved
 	DBG_FAULT(57)
 	FAULT(57)
 
-	.align 256
+	.org ia64_ivt+7600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7600 Entry 58 (size 16 bundles) Reserved
 	DBG_FAULT(58)
 	FAULT(58)
 
-	.align 256
+	.org ia64_ivt+7700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7700 Entry 59 (size 16 bundles) Reserved
 	DBG_FAULT(59)
 	FAULT(59)
 
-	.align 256
+	.org ia64_ivt+7800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7800 Entry 60 (size 16 bundles) Reserved
 	DBG_FAULT(60)
 	FAULT(60)
 
-	.align 256
+	.org ia64_ivt+7900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7900 Entry 61 (size 16 bundles) Reserved
 	DBG_FAULT(61)
 	FAULT(61)
 
-	.align 256
+	.org ia64_ivt+7a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7a00 Entry 62 (size 16 bundles) Reserved
 	DBG_FAULT(62)
 	FAULT(62)
 
-	.align 256
+	.org ia64_ivt+7b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7b00 Entry 63 (size 16 bundles) Reserved
 	DBG_FAULT(63)
 	FAULT(63)
 
-	.align 256
+	.org ia64_ivt+7c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7c00 Entry 64 (size 16 bundles) Reserved
 	DBG_FAULT(64)
 	FAULT(64)
 
-	.align 256
+	.org ia64_ivt+7d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7d00 Entry 65 (size 16 bundles) Reserved
 	DBG_FAULT(65)
 	FAULT(65)
 
-	.align 256
+	.org ia64_ivt+7e00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7e00 Entry 66 (size 16 bundles) Reserved
 	DBG_FAULT(66)
 	FAULT(66)
 
-	.align 256
+	.org ia64_ivt+7f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
 	DBG_FAULT(67)
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index ae0a0305d86e..d44041380e87 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2001 Hewlett-Packard Co
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
  *	David Mosberger <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  *
@@ -114,7 +114,7 @@ GLOBAL_ENTRY(ia64_pal_call_stacked)
 	;;
 	rsm psr.i
 	mov b7 = loc2
-	;; 
+	;;
 	br.call.sptk.many rp=b7		// now make the call
 .ret0:	mov psr.l  = loc3
 	mov ar.pfs = loc1
@@ -131,15 +131,15 @@ END(ia64_pal_call_stacked)
  * 	in0         Index of PAL service
  * 	in2 - in3   Remaning PAL arguments
  *
- * PSR_DB, PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
  * So we don't need to clear them.
  */
-#define PAL_PSR_BITS_TO_CLEAR						\
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+#define PAL_PSR_BITS_TO_CLEAR							\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
 	 IA64_PSR_DFL | IA64_PSR_DFH)
 
-#define PAL_PSR_BITS_TO_SET						\
+#define PAL_PSR_BITS_TO_SET							\
 	(IA64_PSR_BN)
 
 
@@ -161,7 +161,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
 	;;
 	mov loc3 = psr			// save psr
 	adds r8  = 1f-1b,r8		// calculate return address for call
-	;; 
+	;;
 	mov loc4=ar.rsc			// save RSE configuration
 	dep.z loc2=loc2,0,61		// convert pal entry point to physical
 	dep.z r8=r8,0,61		// convert rp to physical
@@ -275,7 +275,6 @@ END(ia64_save_scratch_fpregs)
  * Inputs:
  *      in0	Address of stack storage for fp regs
  */
-
 GLOBAL_ENTRY(ia64_load_scratch_fpregs)
 	alloc r3=ar.pfs,1,0,0,0
 	add r2=16,in0
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index f26b3fe2d28e..a360aa1ecd00 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -96,7 +96,7 @@ show_regs (struct pt_regs *regs)
 {
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
-	printk("\nPid: %d, comm: %20s\n", current->pid, current->comm);
+	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
 	print_symbol("ip is at %s\n", ip);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 880c22a1e961..b5270c92677d 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -833,21 +833,19 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 			return -1;
 		}
 #ifdef CONFIG_PERFMON
-		/* 
-		 * Check if debug registers are used 
-		 * by perfmon. This test must be done once we know that we can
-		 * do the operation, i.e. the arguments are all valid, but before
-		 * we start modifying the state.
+		/*
+		 * Check if debug registers are used by perfmon. This test must be done
+		 * once we know that we can do the operation, i.e. the arguments are all
+		 * valid, but before we start modifying the state.
 		 *
-		 * Perfmon needs to keep a count of how many processes are
-		 * trying to modify the debug registers for system wide monitoring
-		 * sessions.
+		 * Perfmon needs to keep a count of how many processes are trying to
+		 * modify the debug registers for system wide monitoring sessions.
 		 *
-		 * We also include read access here, because they may cause 
-		 * the PMU-installed debug register state (dbr[], ibr[]) to
-		 * be reset. The two arrays are also used by perfmon, but
-		 * we do not use IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
+		 * We also include read access here, because they may cause the
+		 * PMU-installed debug register state (dbr[], ibr[]) to be reset. The two
+		 * arrays are also used by perfmon, but we do not use
+		 * IA64_THREAD_DBG_VALID. The registers are restored by the PMU context
+		 * switch code.
 		 */
 		if (pfm_use_debug_registers(child)) return -1;
 #endif
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index 69ba842dd76d..d832df2affc3 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -142,10 +142,6 @@ SECTIONS
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
         { *(.data.cacheline_aligned) }
 
-  /* Kernel symbol names for modules: */
-  .kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET)
-	{ *(.kstrtab) }
-
   /* Per-cpu data: */
   . = ALIGN(PERCPU_PAGE_SIZE);
   __phys_per_cpu_start = .;
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 9316945bd04f..46fdd9e8c938 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -28,6 +28,36 @@
 
 #include <asm/processor.h>
 
+#define MMU_CONTEXT_DEBUG	0
+
+#if MMU_CONTEXT_DEBUG
+
+#include <ia64intrin.h>
+
+extern struct mmu_trace_entry {
+	char op;
+	u8 cpu;
+	u32 context;
+	void *mm;
+} mmu_tbuf[1024];
+
+extern volatile int mmu_tbuf_index;
+
+# define MMU_TRACE(_op,_cpu,_mm,_ctx)							\
+do {											\
+	int i = __sync_fetch_and_add(&mmu_tbuf_index, 1) % ARRAY_SIZE(mmu_tbuf);	\
+	struct mmu_trace_entry e;							\
+	e.op = (_op);									\
+	e.cpu = (_cpu);									\
+	e.mm = (_mm);									\
+	e.context = (_ctx);								\
+	mmu_tbuf[i] = e;								\
+} while (0)
+
+#else
+# define MMU_TRACE(op,cpu,mm,ctx)	do { ; } while (0)
+#endif
+
 struct ia64_ctx {
 	spinlock_t lock;
 	unsigned int next;	/* next context number to use */
@@ -91,6 +121,7 @@ get_mmu_context (struct mm_struct *mm)
 static inline int
 init_new_context (struct task_struct *p, struct mm_struct *mm)
 {
+	MMU_TRACE('N', smp_processor_id(), mm, 0);
 	mm->context = 0;
 	return 0;
 }
@@ -99,6 +130,7 @@ static inline void
 destroy_context (struct mm_struct *mm)
 {
 	/* Nothing to do.  */
+	MMU_TRACE('D', smp_processor_id(), mm, mm->context);
 }
 
 static inline void
@@ -138,7 +170,9 @@ activate_context (struct mm_struct *mm)
 
 	do {
 		context = get_mmu_context(mm);
+		MMU_TRACE('A', smp_processor_id(), mm, context);
 		reload_context(context);
+		MMU_TRACE('a', smp_processor_id(), mm, context);
 		/* in the unlikely event of a TLB-flush by another thread, redo the load: */
 	} while (unlikely(context != mm->context));
 }
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index 297e6b09e30d..46c473d80d44 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -74,6 +74,27 @@ typedef struct {
 #define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
 #define spin_lock_init(x)			((x)->lock = 0)
 
+#define DEBUG_SPIN_LOCK	0
+
+#if DEBUG_SPIN_LOCK
+
+#include <ia64intrin.h>
+
+#define _raw_spin_lock(x)								\
+do {											\
+	unsigned long _timeout = 1000000000;						\
+	volatile unsigned int _old = 0, _new = 1, *_ptr = &((x)->lock);			\
+	do {										\
+		if (_timeout-- == 0) {							\
+			extern void dump_stack (void);					\
+			printk("kernel DEADLOCK at %s:%d?\n", __FILE__, __LINE__);	\
+			dump_stack();							\
+		}									\
+	} while (__sync_val_compare_and_swap(_ptr, _old, _new) != _old);		\
+} while (0)
+
+#else
+
 /*
  * Streamlined test_and_set_bit(0, (x)).  We use test-and-test-and-set
  * rather than a simple xchg to avoid writing the cache-line when
@@ -95,6 +116,8 @@ typedef struct {
 	";;\n"							\
 	:: "r"(&(x)->lock) : "ar.ccv", "p7", "r2", "r29", "memory")
 
+#endif /* !DEBUG_SPIN_LOCK */
+
 #define spin_is_locked(x)	((x)->lock != 0)
 #define _raw_spin_unlock(x)	do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
 #define _raw_spin_trylock(x)	(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index 21ca04115809..dd49222e8f08 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -47,19 +47,22 @@ local_finish_flush_tlb_mm (struct mm_struct *mm)
 static inline void
 flush_tlb_mm (struct mm_struct *mm)
 {
+	MMU_TRACE('F', smp_processor_id(), mm, mm->context);
 	if (!mm)
-		return;
+		goto out;
 
 	mm->context = 0;
 
 	if (atomic_read(&mm->mm_users) == 0)
-		return;		/* happens as a result of exit_mmap() */
+		goto out;		/* happens as a result of exit_mmap() */
 
 #ifdef CONFIG_SMP
 	smp_flush_tlb_mm(mm);
 #else
 	local_finish_flush_tlb_mm(mm);
 #endif
+  out:
+	MMU_TRACE('f', smp_processor_id(), mm, mm->context);
 }
 
 extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end);
-- 
cgit v1.2.3


From 50d5299def95cea69daf36eeb95e3d1183c9e6fa Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Tue, 14 Jan 2003 06:14:22 -0800
Subject: ia64: Light-weight system call support (aka, "fsyscalls").  This does
 not (yet) 	accelerate normal system calls, but it puts the infrastructure
 in place 	and lets you write fsyscall-handlers to your hearts content. 
 A null system- 	call (such as getpid()) can now run in as little as 35
 cycles!

---
 Documentation/ia64/fsys.txt     | 219 ++++++++++++++++++++++++++++++
 arch/ia64/Kconfig               |   3 +
 arch/ia64/kernel/Makefile       |   1 +
 arch/ia64/kernel/entry.S        |  34 +++--
 arch/ia64/kernel/entry.h        |   4 +-
 arch/ia64/kernel/fsys.S         | 291 ++++++++++++++++++++++++++++++++++++++++
 arch/ia64/kernel/gate.S         |  81 ++++++++++-
 arch/ia64/kernel/head.S         |   9 +-
 arch/ia64/kernel/minstate.h     |  93 ++++++-------
 arch/ia64/kernel/traps.c        |  17 +++
 arch/ia64/kernel/unaligned.c    |  14 +-
 arch/ia64/tools/print_offsets.c |   4 +-
 include/asm-ia64/asmmacro.h     |   7 +-
 include/asm-ia64/elf.h          |  18 ++-
 include/asm-ia64/processor.h    |   8 +-
 include/asm-ia64/ptrace.h       |   2 +
 16 files changed, 720 insertions(+), 85 deletions(-)
 create mode 100644 Documentation/ia64/fsys.txt
 create mode 100644 arch/ia64/kernel/fsys.S

diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
new file mode 100644
index 000000000000..9070a4ea91ab
--- /dev/null
+++ b/Documentation/ia64/fsys.txt
@@ -0,0 +1,219 @@
+-*-Mode: outline-*-
+
+		Light-weight System Calls for IA-64
+		-----------------------------------
+
+		        Started: 13-Jan-2002
+		    Last update: 14-Jan-2002
+
+	              David Mosberger-Tang
+		      <davidm@hpl.hp.com>
+
+Using the "epc" instruction effectively introduces a new mode of
+execution to the ia64 linux kernel.  We call this mode the
+"fsys-mode".  To recap, the normal states of execution are:
+
+  - kernel mode:
+	Both the register stack and the kernel stack have been
+	switched over to the kernel stack.  The user-level state
+	is saved in a pt-regs structure at the top of the kernel
+	memory stack.
+
+  - user mode:
+	Both the register stack and the kernel stack are in
+	user land.  The user-level state is contained in the
+	CPU registers.
+
+  - bank 0 interruption-handling mode:
+	This is the non-interruptible state in that all
+	interruption-handlers start executing in.  The user-level
+	state remains in the CPU registers and some kernel state may
+	be stored in bank 0 of registers r16-r31.
+
+Fsys-mode has the following special properties:
+
+  - execution is at privilege level 0 (most-privileged)
+
+  - CPU registers may contain a mixture of user-level and kernel-level
+    state (it is the responsibility of the kernel to ensure that no
+    security-sensitive kernel-level state is leaked back to
+    user-level)
+
+  - execution is interruptible and preemptible (an fsys-mode handler
+    can disable interrupts and avoid all other interruption-sources
+    to avoid preemption)
+
+  - neither the memory nor the register stack can be trusted while
+    in fsys-mode (they point to the user-level stacks, which may
+    be invalid)
+
+In summary, fsys-mode is much more similar to running in user-mode
+than it is to running in kernel-mode.  Of course, given that the
+privilege level is at level 0, this means that fsys-mode requires some
+care (see below).
+
+
+* How to tell fsys-mode
+
+Linux operates in fsys-mode when (a) the privilege level is 0 (most
+privileged) and (b) the stacks have NOT been switched to kernel memory
+yet.  For convenience, the header file <asm-ia64/ptrace.h> provides
+three macros:
+
+	user_mode(regs)
+	user_stack(regs)
+	fsys_mode(regs)
+
+The "regs" argument is a pointer to a pt_regs structure.  user_mode()
+returns TRUE if the CPU state pointed to by "regs" was executing in
+user mode (privilege level 3).  user_stack() returns TRUE if the state
+pointed to by "regs" was executing on the user-level stack(s).
+Finally, fsys_mode() returns TRUE if the CPU state pointed to by
+"regs" was executing in fsys-mode.  The fsys_mode() macro corresponds
+exactly to the expression:
+
+	!user_mode(regs) && user_stack(regs)
+
+* How to write an fsyscall handler
+
+The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers
+(fsyscall_table).  This table contains one entry for each system call.
+By default, a system call is handled by fsys_fallback_syscall().  This
+routine takes care of entering (full) kernel mode and calling the
+normal Linux system call handler.  For performance-critical system
+calls, it is possible to write a hand-tuned fsyscall_handler.  For
+example, fsys.S contains fsys_getpid(), which is a hand-tuned version
+of the getpid() system call.
+
+The entry and exit-state of an fsyscall handler is as follows:
+
+** Machine state on entry to fsyscall handler:
+
+ - r11	  = saved ar.pfs (a user-level value)
+ - r15	  = system call number
+ - r16	  = "current" task pointer (in normal kernel-mode, this is in r13)
+ - r32-r39 = system call arguments
+ - b6	  = return address (a user-level value)
+ - ar.pfs = previous frame-state (a user-level value)
+ - PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
+ - all other registers may contain values passed in from user-mode
+
+** Required machine state on exit to fsyscall handler:
+
+ - r11	  = saved ar.pfs (as passed into the fsyscall handler)
+ - r15	  = system call number (as passed into the fsyscall handler)
+ - r32-r39 = system call arguments (as passed into the fsyscall handler)
+ - b6	  = return address (as passed into the fsyscall handler)
+ - ar.pfs = previous frame-state (as passed into the fsyscall handler)
+
+Fsyscall handlers can execute with very little overhead, but with that
+speed comes a set of restrictions:
+
+ o Fsyscall-handlers MUST check for any pending work in the flags
+   member of the thread-info structure and if any of the
+   TIF_ALLWORK_MASK flags are set, the handler needs to fall back on
+   doing a full system call (by calling fsys_fallback_syscall).
+
+ o Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
+   r15, b6, and ar.pfs) because they will be needed in case of a
+   system call restart.  Of course, all "preserved" registers also
+   must be preserved, in accordance to the normal calling conventions.
+
+ o Fsyscall-handlers MUST check argument registers for containing a
+   NaT value before using them in any way that could trigger a
+   NaT-consumption fault.  If a system call argument is found to
+   contain a NaT value, an fsyscall-handler may return immediately
+   with r8=EINVAL, r10=-1.
+
+ o Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
+   any other operation that would trigger mandatory RSE
+   (register-stack engine) traffic.
+
+ o Fsyscall-handlers MUST NOT write to any stacked registers because
+   it is not safe to assume that user-level called a handler with the
+   proper number of arguments.
+
+ o Fsyscall-handlers need to be careful when accessing per-CPU variables:
+   unless proper safe-guards are taken (e.g., interruptions are avoided),
+   execution may be pre-empted and resumed on another CPU at any given
+   time.
+
+ o Fsyscall-handlers must be careful not to leak sensitive kernel'
+   information back to user-level.  In particular, before returning to
+   user-level, care needs to be taken to clear any scratch registers
+   that could contain sensitive information (note that the current
+   task pointer is not considered sensitive: it's already exposed
+   through ar.k6).
+
+The above restrictions may seem draconian, but remember that it's
+possible to trade off some of the restrictions by paying a slightly
+higher overhead.  For example, if an fsyscall-handler could benefit
+from the shadow register bank, it could temporarily disable PSR.i and
+PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as
+needed.  In other words, following the above rules yields extremely
+fast system call execution (while fully preserving system call
+semantics), but there is also a lot of flexibility in handling more
+complicated cases.
+
+* PSR Handling
+
+The "epc" instruction doesn't change the contents of PSR at all.  This
+is in contrast to a regular interruption, which clears almost all
+bits.  Because of that, some care needs to be taken to ensure things
+work as expected.  The following discussion describes how each PSR bit
+is handled.
+
+PSR.be	Cleared when entering fsys-mode.  A srlz.d instruction is used
+	to ensure the CPU is in little-endian mode before the first
+	load/store instruction is executed.  PSR.be is normally NOT
+	restored upon return from an fsys-mode handler.  In other
+	words, user-level code must not rely on PSR.be being preserved
+	across a system call.
+PSR.up	Unchanged.
+PSR.ac	Unchanged.
+PSR.mfl Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.mfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.ic	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
+PSR.i	Unchanged.  Note: fsys-mode handlers can clear the bit, if needed.
+PSR.pk	Unchanged.
+PSR.dt	Unchanged.
+PSR.dfl	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.dfh	Unchanged.  Note: fsys-mode handlers must not write-registers!
+PSR.sp	Unchanged.
+PSR.pp	Unchanged.
+PSR.di	Unchanged.
+PSR.si	Unchanged.
+PSR.db	Unchanged.  The kernel prevents user-level from setting a hardware
+	breakpoint that triggers at any privilege level other than 3 (user-mode).
+PSR.lp	Unchanged.
+PSR.tb	Lazy redirect.  If a taken-branch trap occurs while in
+	fsys-mode, the trap-handler modifies the saved machine state
+	such that execution resumes in the gate page at
+	syscall_via_break(), with privilege level 3.  Note: the
+	taken branch would occur on the branch invoking the
+	fsyscall-handler, at which point, by definition, a syscall
+	restart is still safe.  If the system call number is invalid,
+	the fsys-mode handler will return directly to user-level.  This
+	return will trigger a taken-branch trap, but since the trap is
+	taken _after_ restoring the privilege level, the CPU has already
+	left fsys-mode, so no special treatment is needed.
+PSR.rt	Unchanged.
+PSR.cpl	Cleared to 0.
+PSR.is	Unchanged (guaranteed to be 0 on entry to the gate page).
+PSR.mc	Unchanged.
+PSR.it	Unchanged (guaranteed to be 1).
+PSR.id	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.da	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.dd	Unchanged.  Note: the ia64 linux kernel never sets this bit.
+PSR.ss	Lazy redirect.  If set, "epc" will cause a Single Step Trap to
+	be taken.  The trap handler then modifies the saved machine
+	state such that execution resumes in the gate page at
+	syscall_via_break(), with privilege level 3.
+PSR.ri	Unchanged.
+PSR.ed	Unchanged.  Note: This bit could only have an effect if an fsys-mode
+	handler performed a speculative load that gets NaTted.  If so, this
+	would be the normal & expected behavior, so no special treatment is
+	needed.
+PSR.bn	Unchanged.  Note: fsys-mode handlers may clear the bit, if needed.
+	Doing so requires clearing PSR.i and PSR.ic as well.
+PSR.ia	Unchanged.  Note: the ia64 linux kernel never sets this bit.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1a2933160766..80418538df5b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -806,6 +806,9 @@ source "arch/ia64/hp/sim/Kconfig"
 
 menu "Kernel hacking"
 
+config FSYS
+	bool "Light-weight system-call support (via epc)"
+
 choice
 	prompt "Physical memory granularity"
 	default IA64_GRANULE_64MB
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 6cc51e574f66..79b2fccee5b1 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -12,6 +12,7 @@ obj-y := acpi.o entry.o gate.o efi.o efi_stub.o ia64_ksyms.o \
 	 semaphore.o setup.o	\
 	 signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
 
+obj-$(CONFIG_FSYS) += fsys.o
 obj-$(CONFIG_IOSAPIC) += iosapic.o
 obj-$(CONFIG_IA64_PALINFO) += palinfo.o
 obj-$(CONFIG_EFI_VARS) += efivars.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index bb294e803465..32cd477cf415 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -3,7 +3,7 @@
  *
  * Kernel entry points.
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
@@ -22,8 +22,8 @@
 /*
  * Global (preserved) predicate usage on syscall entry/exit path:
  *
- *	pKern:		See entry.h.
- *	pUser:		See entry.h.
+ *	pKStk:		See entry.h.
+ *	pUStk:		See entry.h.
  *	pSys:		See entry.h.
  *	pNonSys:	!pSys
  */
@@ -63,7 +63,7 @@ ENTRY(ia64_execve)
 	sxt4 r8=r8			// return 64-bit result
 	;;
 	stf.spill [sp]=f0
-(p6)	cmp.ne pKern,pUser=r0,r0	// a successful execve() lands us in user-mode...
+(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
 	mov rp=loc0
 (p6)	mov ar.pfs=r0			// clear ar.pfs on success
 (p7)	br.ret.sptk.many rp
@@ -193,7 +193,7 @@ GLOBAL_ENTRY(ia64_switch_to)
 	;;
 (p6)	srlz.d
 	ld8 sp=[r21]			// load kernel stack pointer of new task
-	mov IA64_KR(CURRENT)=r20	// update "current" application register
+	mov IA64_KR(CURRENT)=in0	// update "current" application register
 	mov r8=r13			// return pointer to previously running task
 	mov r13=in0			// set "current" pointer
 	;;
@@ -569,11 +569,12 @@ END(ia64_ret_from_syscall)
 	// fall through
 GLOBAL_ENTRY(ia64_leave_kernel)
 	PT_REGS_UNWIND_INFO(0)
-	// work.need_resched etc. mustn't get changed by this CPU before it returns to userspace:
-(pUser)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUser
-(pUser)	rsm psr.i
+	// work.need_resched etc. mustn't get changed by this CPU before it returns to
+	// user- or fsys-mode:
+(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
+(pUStk)	rsm psr.i
 	;;
-(pUser)	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+(pUStk)	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
 .work_processed:
 (p6)	ld4 r18=[r17]				// load current_thread_info()->flags
@@ -635,9 +636,9 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	srlz.i			// ensure interruption collection is off
 	mov b7=r15
+	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
 	;;
-	bsw.0			// switch back to bank 0
-	;;
+(pUStk)	mov r18=IA64_KR(CURRENT)	// Itanium 2: 12 cycle read latency
 	adds r16=16,r12
 	adds r17=24,r12
 	;;
@@ -665,16 +666,21 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
 	;;
 	ld8.fill r14=[r16]
 	ld8.fill r15=[r17]
+(pUStk)	mov r17=1
+	;;
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
 	;;
 	mov r16=ar.bsp		// get existing backing store pointer
 	movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
 	;;
 	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
-(pKern)	br.cond.dpnt skip_rbs_switch
+(pKStk)	br.cond.dpnt skip_rbs_switch
+
 	/*
 	 * Restore user backing store.
 	 *
@@ -788,12 +794,12 @@ rse_clear_invalid:
 skip_rbs_switch:
 	mov b6=rB6
 	mov ar.pfs=rARPFS
-(pUser)	mov ar.bspstore=rARBSPSTORE
+(pUStk)	mov ar.bspstore=rARBSPSTORE
 (p9)	mov cr.ifs=rCRIFS
 	mov cr.ipsr=rCRIPSR
 	mov cr.iip=rCRIIP
 	;;
-(pUser)	mov ar.rnat=rARRNAT	// must happen with RSE in lazy mode
+(pUStk)	mov ar.rnat=rARRNAT	// must happen with RSE in lazy mode
 	mov ar.rsc=rARRSC
 	mov ar.unat=rARUNAT
 	mov pr=rARPR,-1
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
index fb80a0461569..b7db22533114 100644
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@@ -4,8 +4,8 @@
  * Preserved registers that are shared between code in ivt.S and entry.S.  Be
  * careful not to step on these!
  */
-#define pKern		p2	/* will leave_kernel return to kernel-mode? */
-#define pUser		p3	/* will leave_kernel return to user-mode? */
+#define pKStk		p2	/* will leave_kernel return to kernel-stacks? */
+#define pUStk		p3	/* will leave_kernel return to user-stacks? */
 #define pSys		p4	/* are we processing a (synchronous) system call? */
 #define pNonSys		p5	/* complement of pSys */
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
new file mode 100644
index 000000000000..3cc875a3f399
--- /dev/null
+++ b/arch/ia64/kernel/fsys.S
@@ -0,0 +1,291 @@
+/*
+ * This file contains the light-weight system call handlers (fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ * 	David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/thread_info.h>
+
+ENTRY(fsys_ni_syscall)
+	mov r8=ENOSYS
+	mov r10=-1
+	br.ret.sptk.many b6
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	add r8=IA64_TASK_TGID_OFFSET,r16
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	ld4 r8=[r8]
+	;;
+	cmp.ne p8,p0=0,r9
+(p8)	br.spnt.many fsys_fallback_syscall
+	br.ret.sptk.many b6
+END(fsys_getpid)
+
+	.rodata
+	.align 8
+	.globl fsyscall_table
+fsyscall_table:
+	data8 fsys_ni_syscall
+	data8 fsys_fallback_syscall	// exit			// 1025
+	data8 fsys_fallback_syscall	// read
+	data8 fsys_fallback_syscall	// write
+	data8 fsys_fallback_syscall	// open
+	data8 fsys_fallback_syscall	// close
+	data8 fsys_fallback_syscall	// creat		// 1030
+	data8 fsys_fallback_syscall	// link
+	data8 fsys_fallback_syscall	// unlink
+	data8 fsys_fallback_syscall	// execve
+	data8 fsys_fallback_syscall	// chdir
+	data8 fsys_fallback_syscall	// fchdir		// 1035
+	data8 fsys_fallback_syscall	// utimes
+	data8 fsys_fallback_syscall	// mknod
+	data8 fsys_fallback_syscall	// chmod
+	data8 fsys_fallback_syscall	// chown
+	data8 fsys_fallback_syscall	// lseek		// 1040
+	data8 fsys_getpid
+	data8 fsys_fallback_syscall	// getppid
+	data8 fsys_fallback_syscall	// mount
+	data8 fsys_fallback_syscall	// umount
+	data8 fsys_fallback_syscall	// setuid		// 1045
+	data8 fsys_fallback_syscall	// getuid
+	data8 fsys_fallback_syscall	// geteuid
+	data8 fsys_fallback_syscall	// ptrace
+	data8 fsys_fallback_syscall	// access
+	data8 fsys_fallback_syscall	// sync			// 1050
+	data8 fsys_fallback_syscall	// fsync
+	data8 fsys_fallback_syscall	// fdatasync
+	data8 fsys_fallback_syscall	// kill
+	data8 fsys_fallback_syscall	// rename
+	data8 fsys_fallback_syscall	// mkdir		// 1055
+	data8 fsys_fallback_syscall	// rmdir
+	data8 fsys_fallback_syscall	// dup
+	data8 fsys_fallback_syscall	// pipe
+	data8 fsys_fallback_syscall	// times
+	data8 fsys_fallback_syscall	// brk			// 1060
+	data8 fsys_fallback_syscall	// setgid
+	data8 fsys_fallback_syscall	// getgid
+	data8 fsys_fallback_syscall	// getegid
+	data8 fsys_fallback_syscall	// acct
+	data8 fsys_fallback_syscall	// ioctl		// 1065
+	data8 fsys_fallback_syscall	// fcntl
+	data8 fsys_fallback_syscall	// umask
+	data8 fsys_fallback_syscall	// chroot
+	data8 fsys_fallback_syscall	// ustat
+	data8 fsys_fallback_syscall	// dup2			// 1070
+	data8 fsys_fallback_syscall	// setreuid
+	data8 fsys_fallback_syscall	// setregid
+	data8 fsys_fallback_syscall	// getresuid
+	data8 fsys_fallback_syscall	// setresuid
+	data8 fsys_fallback_syscall	// getresgid		// 1075
+	data8 fsys_fallback_syscall	// setresgid
+	data8 fsys_fallback_syscall	// getgroups
+	data8 fsys_fallback_syscall	// setgroups
+	data8 fsys_fallback_syscall	// getpgid
+	data8 fsys_fallback_syscall	// setpgid		// 1080
+	data8 fsys_fallback_syscall	// setsid
+	data8 fsys_fallback_syscall	// getsid
+	data8 fsys_fallback_syscall	// sethostname
+	data8 fsys_fallback_syscall	// setrlimit
+	data8 fsys_fallback_syscall	// getrlimit		// 1085
+	data8 fsys_fallback_syscall	// getrusage
+	data8 fsys_fallback_syscall	// gettimeofday
+	data8 fsys_fallback_syscall	// settimeofday
+	data8 fsys_fallback_syscall	// select
+	data8 fsys_fallback_syscall	// poll			// 1090
+	data8 fsys_fallback_syscall	// symlink
+	data8 fsys_fallback_syscall	// readlink
+	data8 fsys_fallback_syscall	// uselib
+	data8 fsys_fallback_syscall	// swapon
+	data8 fsys_fallback_syscall	// swapoff		// 1095
+	data8 fsys_fallback_syscall	// reboot
+	data8 fsys_fallback_syscall	// truncate
+	data8 fsys_fallback_syscall	// ftruncate
+	data8 fsys_fallback_syscall	// fchmod
+	data8 fsys_fallback_syscall	// fchown		// 1100
+	data8 fsys_fallback_syscall	// getpriority
+	data8 fsys_fallback_syscall	// setpriority
+	data8 fsys_fallback_syscall	// statfs
+	data8 fsys_fallback_syscall	// fstatfs
+	data8 fsys_fallback_syscall	// gettid		// 1105
+	data8 fsys_fallback_syscall	// semget
+	data8 fsys_fallback_syscall	// semop
+	data8 fsys_fallback_syscall	// semctl
+	data8 fsys_fallback_syscall	// msgget
+	data8 fsys_fallback_syscall	// msgsnd		// 1110
+	data8 fsys_fallback_syscall	// msgrcv
+	data8 fsys_fallback_syscall	// msgctl
+	data8 fsys_fallback_syscall	// shmget
+	data8 fsys_fallback_syscall	// shmat
+	data8 fsys_fallback_syscall	// shmdt		// 1115
+	data8 fsys_fallback_syscall	// shmctl
+	data8 fsys_fallback_syscall	// syslog
+	data8 fsys_fallback_syscall	// setitimer
+	data8 fsys_fallback_syscall	// getitimer
+	data8 fsys_fallback_syscall		 		// 1120
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall	// vhangup
+	data8 fsys_fallback_syscall	// lchown
+	data8 fsys_fallback_syscall	// remap_file_pages	// 1125
+	data8 fsys_fallback_syscall	// wait4
+	data8 fsys_fallback_syscall	// sysinfo
+	data8 fsys_fallback_syscall	// clone
+	data8 fsys_fallback_syscall	// setdomainname
+	data8 fsys_fallback_syscall	// newuname		// 1130
+	data8 fsys_fallback_syscall	// adjtimex
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall	// init_module
+	data8 fsys_fallback_syscall	// delete_module
+	data8 fsys_fallback_syscall				// 1135
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall	// quotactl
+	data8 fsys_fallback_syscall	// bdflush
+	data8 fsys_fallback_syscall	// sysfs
+	data8 fsys_fallback_syscall	// personality		// 1140
+	data8 fsys_fallback_syscall	// afs_syscall
+	data8 fsys_fallback_syscall	// setfsuid
+	data8 fsys_fallback_syscall	// setfsgid
+	data8 fsys_fallback_syscall	// getdents
+	data8 fsys_fallback_syscall	// flock		// 1145
+	data8 fsys_fallback_syscall	// readv
+	data8 fsys_fallback_syscall	// writev
+	data8 fsys_fallback_syscall	// pread64
+	data8 fsys_fallback_syscall	// pwrite64
+	data8 fsys_fallback_syscall	// sysctl		// 1150
+	data8 fsys_fallback_syscall	// mmap
+	data8 fsys_fallback_syscall	// munmap
+	data8 fsys_fallback_syscall	// mlock
+	data8 fsys_fallback_syscall	// mlockall
+	data8 fsys_fallback_syscall	// mprotect		// 1155
+	data8 fsys_fallback_syscall	// mremap
+	data8 fsys_fallback_syscall	// msync
+	data8 fsys_fallback_syscall	// munlock
+	data8 fsys_fallback_syscall	// munlockall
+	data8 fsys_fallback_syscall	// sched_getparam	// 1160
+	data8 fsys_fallback_syscall	// sched_setparam
+	data8 fsys_fallback_syscall	// sched_getscheduler
+	data8 fsys_fallback_syscall	// sched_setscheduler
+	data8 fsys_fallback_syscall	// sched_yield
+	data8 fsys_fallback_syscall	// sched_get_priority_max	// 1165
+	data8 fsys_fallback_syscall	// sched_get_priority_min
+	data8 fsys_fallback_syscall	// sched_rr_get_interval
+	data8 fsys_fallback_syscall	// nanosleep
+	data8 fsys_fallback_syscall	// nfsservctl
+	data8 fsys_fallback_syscall	// prctl		// 1170
+	data8 fsys_fallback_syscall	// getpagesize
+	data8 fsys_fallback_syscall	// mmap2
+	data8 fsys_fallback_syscall	// pciconfig_read
+	data8 fsys_fallback_syscall	// pciconfig_write
+	data8 fsys_fallback_syscall	// perfmonctl		// 1175
+	data8 fsys_fallback_syscall	// sigaltstack
+	data8 fsys_fallback_syscall	// rt_sigaction
+	data8 fsys_fallback_syscall	// rt_sigpending
+	data8 fsys_fallback_syscall	// rt_sigprocmask
+	data8 fsys_fallback_syscall	// rt_sigqueueinfo	// 1180
+	data8 fsys_fallback_syscall	// rt_sigreturn
+	data8 fsys_fallback_syscall	// rt_sigsuspend
+	data8 fsys_fallback_syscall	// rt_sigtimedwait
+	data8 fsys_fallback_syscall	// getcwd
+	data8 fsys_fallback_syscall	// capget		// 1185
+	data8 fsys_fallback_syscall	// capset
+	data8 fsys_fallback_syscall	// sendfile
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall	// socket		// 1190
+	data8 fsys_fallback_syscall	// bind
+	data8 fsys_fallback_syscall	// connect
+	data8 fsys_fallback_syscall	// listen
+	data8 fsys_fallback_syscall	// accept
+	data8 fsys_fallback_syscall	// getsockname		// 1195
+	data8 fsys_fallback_syscall	// getpeername
+	data8 fsys_fallback_syscall	// socketpair
+	data8 fsys_fallback_syscall	// send
+	data8 fsys_fallback_syscall	// sendto
+	data8 fsys_fallback_syscall	// recv			// 1200
+	data8 fsys_fallback_syscall	// recvfrom
+	data8 fsys_fallback_syscall	// shutdown
+	data8 fsys_fallback_syscall	// setsockopt
+	data8 fsys_fallback_syscall	// getsockopt
+	data8 fsys_fallback_syscall	// sendmsg		// 1205
+	data8 fsys_fallback_syscall	// recvmsg
+	data8 fsys_fallback_syscall	// pivot_root
+	data8 fsys_fallback_syscall	// mincore
+	data8 fsys_fallback_syscall	// madvise
+	data8 fsys_fallback_syscall	// newstat		// 1210
+	data8 fsys_fallback_syscall	// newlstat
+	data8 fsys_fallback_syscall	// newfstat
+	data8 fsys_fallback_syscall	// clone2
+	data8 fsys_fallback_syscall	// getdents64
+	data8 fsys_fallback_syscall	// getunwind		// 1215
+	data8 fsys_fallback_syscall	// readahead
+	data8 fsys_fallback_syscall	// setxattr
+	data8 fsys_fallback_syscall	// lsetxattr
+	data8 fsys_fallback_syscall	// fsetxattr
+	data8 fsys_fallback_syscall	// getxattr		// 1220
+	data8 fsys_fallback_syscall	// lgetxattr
+	data8 fsys_fallback_syscall	// fgetxattr
+	data8 fsys_fallback_syscall	// listxattr
+	data8 fsys_fallback_syscall	// llistxattr
+	data8 fsys_fallback_syscall	// flistxattr		// 1225
+	data8 fsys_fallback_syscall	// removexattr
+	data8 fsys_fallback_syscall	// lremovexattr
+	data8 fsys_fallback_syscall	// fremovexattr
+	data8 fsys_fallback_syscall	// tkill
+	data8 fsys_fallback_syscall	// futex		// 1230
+	data8 fsys_fallback_syscall	// sched_setaffinity
+	data8 fsys_fallback_syscall	// sched_getaffinity
+	data8 fsys_fallback_syscall	// set_tid_address
+	data8 fsys_fallback_syscall	// alloc_hugepages
+	data8 fsys_fallback_syscall	// free_hugepages	// 1235
+	data8 fsys_fallback_syscall	// exit_group
+	data8 fsys_fallback_syscall	// lookup_dcookie
+	data8 fsys_fallback_syscall	// io_setup
+	data8 fsys_fallback_syscall	// io_destroy
+	data8 fsys_fallback_syscall	// io_getevents		// 1240
+	data8 fsys_fallback_syscall	// io_submit
+	data8 fsys_fallback_syscall	// io_cancel
+	data8 fsys_fallback_syscall	// epoll_create
+	data8 fsys_fallback_syscall	// epoll_ctl
+	data8 fsys_fallback_syscall	// epoll_wait		// 1245
+	data8 fsys_fallback_syscall	// restart_syscall
+	data8 fsys_fallback_syscall	// semtimedop
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1250
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1255
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1260
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1265
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1270
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall				// 1275
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
+	data8 fsys_fallback_syscall
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 1e34ad720b3e..70b5fcf68e96 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -2,7 +2,7 @@
  * This file contains the code that gets mapped at the upper end of each task's text
  * region.  For now, it contains the signal trampoline code only.
  *
- * Copyright (C) 1999-2002 Hewlett-Packard Co
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
  * 	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -14,6 +14,85 @@
 #include <asm/page.h>
 
 	.section .text.gate, "ax"
+.start_gate:
+
+
+#if CONFIG_FSYS
+
+#include <asm/errno.h>
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+GLOBAL_ENTRY(syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15
+	mov r10=0				// default to successful syscall execution
+	epc
+}
+	;;
+	rsm psr.be
+	movl r18=fsyscall_table
+
+	mov r16=IA64_KR(CURRENT)
+	mov r19=255
+	;;
+	shladd r18=r17,3,r18
+	cmp.geu p6,p0=r19,r17			// (syscall > 0 && syscall <= 1024+255)?
+	;;
+	srlz.d					// ensure little-endian byteorder is in effect
+(p6)	ld8 r18=[r18]
+	;;
+(p6)	mov b7=r18
+(p6)	br.sptk.many b7
+
+	mov r10=-1
+	mov r8=ENOSYS
+	br.ret.sptk.many b6
+END(syscall_via_epc)
+
+GLOBAL_ENTRY(syscall_via_break)
+	.prologue
+	.altrp b6
+	.body
+	break 0x100000
+	br.ret.sptk.many b6
+END(syscall_via_break)
+
+GLOBAL_ENTRY(fsys_fallback_syscall)
+	/*
+	 * It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
+	 * we simply fall back on doing a system-call via break.  Good enough
+	 * to get started.  (Note: we have to do this through the gate page again, since
+	 * the br.ret will switch us back to user-level privilege.)
+	 *
+	 * XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
+	 */
+	movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
+	;;
+	mov b7=r2
+	br.ret.sptk.many b7
+END(fsys_fallback_syscall)
+
+#endif /* CONFIG_FSYS */
 
 #	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
 #	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 55917acf140e..28dd4e66f7c1 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -5,7 +5,7 @@
  * to set up the kernel's global pointer and jump to the kernel
  * entry point.
  *
- * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
@@ -143,17 +143,14 @@ start_ap:
 	movl r2=init_thread_union
 	cmp.eq isBP,isAP=r0,r0
 #endif
-	;;
-	extr r3=r2,0,61		// r3 == phys addr of task struct
 	mov r16=KERNEL_TR_PAGE_NUM
 	;;
 
 	// load the "current" pointer (r13) and ar.k6 with the current task
-	mov r13=r2
-	mov IA64_KR(CURRENT)=r3		// Physical address
-
+	mov IA64_KR(CURRENT)=r2		// virtual address
 	// initialize k4 to a safe value (64-128MB is mapped by TR_KERNEL)
 	mov IA64_KR(CURRENT_STACK)=r16
+	mov r13=r2
 	/*
 	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
 	 * don't store interesting values in that structure, but the space still needs
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 81aebc4a2924..ed1d91daafac 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -30,25 +30,23 @@
  * on interrupts.
  */
 #define MINSTATE_START_SAVE_MIN_VIRT								\
-(pUser)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	dep r1=-1,r1,61,3;				/* r1 = current (virtual) */		\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
 	;;											\
-(pUser)	mov.m rARRNAT=ar.rnat;									\
-(pUser)	addl rKRBS=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKern) mov r1=sp;					/* get sp  */				\
+(pUStk)	mov.m rARRNAT=ar.rnat;									\
+(pUStk)	addl rKRBS=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
 	;;											\
-(pUser) lfetch.fault.excl.nt1 [rKRBS];								\
-(pUser)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
-(pUser)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk) lfetch.fault.excl.nt1 [rKRBS];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
 	;;											\
-(pUser)	mov ar.bspstore=rKRBS;				/* switch to kernel RBS */		\
-(pKern) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+(pUStk)	mov ar.bspstore=rKRBS;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
 	;;											\
-(pUser)	mov r18=ar.bsp;										\
-(pUser)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
 
 #define MINSTATE_END_SAVE_MIN_VIRT								\
-	or r13=r13,r14;		/* make `current' a kernel virtual address */			\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
@@ -57,21 +55,21 @@
  * go virtual and dont want to destroy the iip or ipsr.
  */
 #define MINSTATE_START_SAVE_MIN_PHYS								\
-(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE;				\
-(pUser)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-(pUser)	addl rKRBS=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
+(pKStk) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE;				\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+(pUStk)	addl rKRBS=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
 	;;											\
-(pUser)	mov rARRNAT=ar.rnat;									\
-(pKern) dep r1=0,sp,61,3;				/* compute physical addr of sp	*/	\
-(pUser)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUser)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
-(pUser)	dep rKRBS=-1,rKRBS,61,3;			/* compute kernel virtual addr of RBS */\
+(pUStk)	mov rARRNAT=ar.rnat;									\
+(pKStk) dep r1=0,sp,61,3;				/* compute physical addr of sp	*/	\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
+(pUStk)	dep rKRBS=-1,rKRBS,61,3;			/* compute kernel virtual addr of RBS */\
 	;;											\
-(pKern) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
-(pUser)	mov ar.bspstore=rKRBS;			/* switch to kernel RBS */			\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
+(pUStk)	mov ar.bspstore=rKRBS;			/* switch to kernel RBS */			\
 	;;											\
-(pUser)	mov r18=ar.bsp;										\
-(pUser)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
 
 #define MINSTATE_END_SAVE_MIN_PHYS								\
 	or r12=r12,r14;		/* make sp a kernel virtual address */				\
@@ -79,11 +77,13 @@
 	;;
 
 #ifdef MINSTATE_VIRT
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
 # define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
 # define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
 #endif
 
 #ifdef MINSTATE_PHYS
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; dep reg=0,reg,61,3
 # define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
 # define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
 #endif
@@ -110,23 +110,26 @@
  * we can pass interruption state as arguments to a handler.
  */
 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							  \
-	mov rARRSC=ar.rsc;									  \
-	mov rARPFS=ar.pfs;									  \
-	mov rR1=r1;										  \
-	mov rARUNAT=ar.unat;									  \
-	mov rCRIPSR=cr.ipsr;									  \
-	mov rB6=b6;				/* rB6 = branch reg 6 */			  \
-	mov rCRIIP=cr.iip;									  \
-	mov r1=IA64_KR(CURRENT);		/* r1 = current (physical) */			  \
-	COVER;											  \
-	;;											  \
-	invala;											  \
-	extr.u r16=rCRIPSR,32,2;		/* extract psr.cpl */				  \
-	;;											  \
-	cmp.eq pKern,pUser=r0,r16;		/* are we in kernel mode already? (psr.cpl==0) */ \
+	mov rARRSC=ar.rsc;		/* M */							  \
+	mov rARUNAT=ar.unat;		/* M */							  \
+	mov rR1=r1;			/* A */							  \
+	MINSTATE_GET_CURRENT(r1);	/* M (or M;;I) */					  \
+	mov rCRIPSR=cr.ipsr;		/* M */							  \
+	mov rARPFS=ar.pfs;		/* I */							  \
+	mov rCRIIP=cr.iip;		/* M */							  \
+	mov rB6=b6;			/* I */	/* rB6 = branch reg 6 */			  \
+	COVER;				/* B;; (or nothing) */					  \
+	;;											  \
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r1;						  \
+	;;											  \
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	  \
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	  \
 	/* switch from user to kernel RBS: */							  \
 	;;											  \
+	invala;				/* M */							  \
 	SAVE_IFS;										  \
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? (psr.cpl==0) */ \
+	;;											  \
 	MINSTATE_START_SAVE_MIN									  \
 	add r17=L1_CACHE_BYTES,r1			/* really: biggest cache-line size */	  \
 	;;											  \
@@ -138,23 +141,23 @@
 	;;											  \
 	lfetch.fault.excl.nt1 [r17];								  \
 	adds r17=8,r1;					/* initialize second base pointer */	  \
-(pKern)	mov r18=r0;		/* make sure r18 isn't NaT */					  \
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					  \
 	;;											  \
 	st8 [r17]=rCRIIP,16;	/* save cr.iip */						  \
 	st8 [r16]=rCRIFS,16;	/* save cr.ifs */						  \
-(pUser)	sub r18=r18,rKRBS;	/* r18=RSE.ndirty*8 */						  \
+(pUStk)	sub r18=r18,rKRBS;	/* r18=RSE.ndirty*8 */						  \
 	;;											  \
 	st8 [r17]=rARUNAT,16;	/* save ar.unat */						  \
 	st8 [r16]=rARPFS,16;	/* save ar.pfs */						  \
 	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			  \
 	;;											  \
 	st8 [r17]=rARRSC,16;	/* save ar.rsc */						  \
-(pUser)	st8 [r16]=rARRNAT,16;	/* save ar.rnat */						  \
-(pKern)	adds r16=16,r16;	/* skip over ar_rnat field */					  \
+(pUStk)	st8 [r16]=rARRNAT,16;	/* save ar.rnat */						  \
+(pKStk)	adds r16=16,r16;	/* skip over ar_rnat field */					  \
 	;;			/* avoid RAW on r16 & r17 */					  \
-(pUser)	st8 [r17]=rARBSPSTORE,16;	/* save ar.bspstore */					  \
+(pUStk)	st8 [r17]=rARBSPSTORE,16;	/* save ar.bspstore */					  \
 	st8 [r16]=rARPR,16;	/* save predicates */						  \
-(pKern)	adds r17=16,r17;	/* skip over ar_bspstore field */				  \
+(pKStk)	adds r17=16,r17;	/* skip over ar_bspstore field */				  \
 	;;											  \
 	st8 [r17]=rB6,16;	/* save b6 */							  \
 	st8 [r16]=r18,16;	/* save ar.rsc value for "loadrs" */				  \
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index f6612529150c..b440a937d405 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -524,6 +524,23 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	      case 29: /* Debug */
 	      case 35: /* Taken Branch Trap */
 	      case 36: /* Single Step Trap */
+		if (fsys_mode(regs)) {
+			extern char syscall_via_break[], __start_gate_section[];
+			/*
+			 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
+			 * need special handling; Debug trap is not supposed to happen.
+			 */
+			if (unlikely(vector == 29)) {
+				die("Got debug trap in fsys-mode---not supposed to happen!",
+				    regs, 0);
+				return;
+			}
+			/* re-do the system call via break 0x100000: */
+			regs->cr_iip = GATE_ADDR + (syscall_via_break - __start_gate_section);
+			ia64_psr(regs)->ri = 0;
+			ia64_psr(regs)->cpl = 3;
+			return;
+		}
 		switch (vector) {
 		      case 29:
 			siginfo.si_code = TRAP_HWBKPT;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 55719fce87e9..8c6097011ab5 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -331,12 +331,8 @@ set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 		return;
 	}
 
-	/*
-	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
-	 * infer whether the interrupt task was running on the kernel backing store.
-	 */
-	if (regs->r12 >= TASK_SIZE) {
-		DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
+	if (!user_stack(regs)) {
+		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 		return;
 	}
 
@@ -406,11 +402,7 @@ get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *na
 		return;
 	}
 
-	/*
-	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
-	 * infer whether the interrupt task was running on the kernel backing store.
-	 */
-	if (regs->r12 >= TASK_SIZE) {
+	if (!user_stack(regs)) {
 		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 		goto fail;
 	}
diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c
index 20fc8ee4f32f..4c72ea2608a6 100644
--- a/arch/ia64/tools/print_offsets.c
+++ b/arch/ia64/tools/print_offsets.c
@@ -1,7 +1,7 @@
 /*
  * Utility to generate asm-ia64/offsets.h.
  *
- * Copyright (C) 1999-2002 Hewlett-Packard Co
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * Note that this file has dual use: when building the kernel
@@ -53,7 +53,9 @@ tab[] =
     { "UNW_FRAME_INFO_SIZE",		sizeof (struct unw_frame_info) },
     { "", 0 },			/* spacer */
     { "IA64_TASK_THREAD_KSP_OFFSET",	offsetof (struct task_struct, thread.ksp) },
+    { "IA64_TASK_THREAD_ON_USTACK_OFFSET", offsetof (struct task_struct, thread.on_ustack) },
     { "IA64_TASK_PID_OFFSET",		offsetof (struct task_struct, pid) },
+    { "IA64_TASK_TGID_OFFSET",		offsetof (struct task_struct, tgid) },
     { "IA64_PT_REGS_CR_IPSR_OFFSET",	offsetof (struct pt_regs, cr_ipsr) },
     { "IA64_PT_REGS_CR_IIP_OFFSET",	offsetof (struct pt_regs, cr_iip) },
     { "IA64_PT_REGS_CR_IFS_OFFSET",	offsetof (struct pt_regs, cr_ifs) },
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index 010c6fc11a91..a37ac45b0430 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -2,7 +2,7 @@
 #define _ASM_IA64_ASMMACRO_H
 
 /*
- * Copyright (C) 2000-2001 Hewlett-Packard Co
+ * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -11,6 +11,11 @@
 	.proc name;				\
 name:
 
+#define ENTRY_MIN_ALIGN(name)			\
+	.align 16;				\
+	.proc name;				\
+name:
+
 #define GLOBAL_ENTRY(name)			\
 	.global name;				\
 	ENTRY(name)
diff --git a/include/asm-ia64/elf.h b/include/asm-ia64/elf.h
index 6cc69c3299f6..a00f0dccc22f 100644
--- a/include/asm-ia64/elf.h
+++ b/include/asm-ia64/elf.h
@@ -4,10 +4,12 @@
 /*
  * ELF-specific definitions.
  *
- * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
+ * Copyright (C) 1998-1999, 2002-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <linux/config.h>
+
 #include <asm/fpu.h>
 #include <asm/page.h>
 
@@ -88,6 +90,11 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
    relevant until we have real hardware to play with... */
 #define ELF_PLATFORM	0
 
+/*
+ * This should go into linux/elf.h...
+ */
+#define AT_SYSINFO	32
+
 #ifdef __KERNEL__
 struct elf64_hdr;
 extern void ia64_set_personality (struct elf64_hdr *elf_ex, int ibcs2_interpreter);
@@ -99,7 +106,14 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
 #define ELF_CORE_COPY_TASK_REGS(tsk, elf_gregs) dump_task_regs(tsk, elf_gregs)
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
-
+#ifdef CONFIG_FSYS
+#define ARCH_DLINFO					\
+do {							\
+	extern int syscall_via_epc;			\
+	NEW_AUX_ENT(AT_SYSINFO, syscall_via_epc);	\
+} while (0)
 #endif
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_IA64_ELF_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index b4a59a5173cb..83dbb132e4e5 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -2,7 +2,7 @@
 #define _ASM_IA64_PROCESSOR_H
 
 /*
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
@@ -223,7 +223,10 @@ typedef struct {
 struct siginfo;
 
 struct thread_struct {
-	__u64 flags;			/* various thread flags (see IA64_THREAD_*) */
+	__u32 flags;			/* various thread flags (see IA64_THREAD_*) */
+	/* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */
+	__u8 on_ustack;			/* executing on user-stacks? */
+	__u8 pad[3];
 	__u64 ksp;			/* kernel stack pointer */
 	__u64 map_base;			/* base address for get_unmapped_area() */
 	__u64 task_size;		/* limit for task size */
@@ -277,6 +280,7 @@ struct thread_struct {
 
 #define INIT_THREAD {				\
 	.flags =	0,			\
+	.on_ustack =	0,			\
 	.ksp =		0,			\
 	.map_base =	DEFAULT_MAP_BASE,	\
 	.task_size =	DEFAULT_TASK_SIZE,	\
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index eb33ef4579f7..775d1e875eff 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -218,6 +218,8 @@ struct switch_stack {
 # define ia64_task_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
 # define ia64_psr(regs)			((struct ia64_psr *) &(regs)->cr_ipsr)
 # define user_mode(regs)		(((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0)
+# define user_stack(regs)		(current->thread.on_ustack != 0)
+# define fsys_mode(regs)		(!user_mode(regs) && user_stack(regs))
 
   struct task_struct;			/* forward decl */
 
-- 
cgit v1.2.3


From 25ed62673b8cae31df97757e74ab7d9e8ed379a7 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Wed, 15 Jan 2003 00:25:02 -0800
Subject: ia64: Make asynchronous signal delivery work properly during
 fsys-mode execution. 	Add workaround for McKinley Erratum 7.

---
 Documentation/ia64/fsys.txt  | 49 +++++++++++++++++++++++++++-----------------
 arch/ia64/kernel/fsys.S      |  2 ++
 arch/ia64/kernel/gate.S      |  2 ++
 arch/ia64/kernel/process.c   |  9 +++++++-
 arch/ia64/kernel/traps.c     | 38 ++++++++++++++++++++++------------
 arch/ia64/kernel/unaligned.c |  4 ++--
 include/asm-ia64/asmmacro.h  | 11 ++++++++++
 include/asm-ia64/ptrace.h    | 11 +++++++---
 8 files changed, 88 insertions(+), 38 deletions(-)

diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
index 9070a4ea91ab..9a41823af7e4 100644
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.txt
@@ -4,7 +4,7 @@
 		-----------------------------------
 
 		        Started: 13-Jan-2002
-		    Last update: 14-Jan-2002
+		    Last update: 15-Jan-2002
 
 	              David Mosberger-Tang
 		      <davidm@hpl.hp.com>
@@ -14,23 +14,22 @@ execution to the ia64 linux kernel.  We call this mode the
 "fsys-mode".  To recap, the normal states of execution are:
 
   - kernel mode:
-	Both the register stack and the kernel stack have been
-	switched over to the kernel stack.  The user-level state
-	is saved in a pt-regs structure at the top of the kernel
-	memory stack.
+	Both the register stack and the memory stack have been
+	switched over to kernel memory.  The user-level state is saved
+	in a pt-regs structure at the top of the kernel memory stack.
 
   - user mode:
 	Both the register stack and the kernel stack are in
-	user land.  The user-level state is contained in the
+	user memory.  The user-level state is contained in the
 	CPU registers.
 
   - bank 0 interruption-handling mode:
-	This is the non-interruptible state in that all
-	interruption-handlers start executing in.  The user-level
+	This is the non-interruptible state which all
+	interruption-handlers start execution in.  The user-level
 	state remains in the CPU registers and some kernel state may
 	be stored in bank 0 of registers r16-r31.
 
-Fsys-mode has the following special properties:
+In contrast, fsys-mode has the following special properties:
 
   - execution is at privilege level 0 (most-privileged)
 
@@ -61,18 +60,19 @@ yet.  For convenience, the header file <asm-ia64/ptrace.h> provides
 three macros:
 
 	user_mode(regs)
-	user_stack(regs)
-	fsys_mode(regs)
+	user_stack(task,regs)
+	fsys_mode(task,regs)
 
-The "regs" argument is a pointer to a pt_regs structure.  user_mode()
-returns TRUE if the CPU state pointed to by "regs" was executing in
-user mode (privilege level 3).  user_stack() returns TRUE if the state
-pointed to by "regs" was executing on the user-level stack(s).
-Finally, fsys_mode() returns TRUE if the CPU state pointed to by
-"regs" was executing in fsys-mode.  The fsys_mode() macro corresponds
-exactly to the expression:
+The "regs" argument is a pointer to a pt_regs structure.  The "task"
+argument is a pointer to the task structure to which the "regs"
+pointer belongs to.  user_mode() returns TRUE if the CPU state pointed
+to by "regs" was executing in user mode (privilege level 3).
+user_stack() returns TRUE if the state pointed to by "regs" was
+executing on the user-level stack(s).  Finally, fsys_mode() returns
+TRUE if the CPU state pointed to by "regs" was executing in fsys-mode.
+The fsys_mode() macro is equivalent to the expression:
 
-	!user_mode(regs) && user_stack(regs)
+	!user_mode(regs) && user_stack(task,regs)
 
 * How to write an fsyscall handler
 
@@ -155,6 +155,17 @@ fast system call execution (while fully preserving system call
 semantics), but there is also a lot of flexibility in handling more
 complicated cases.
 
+* Signal handling
+
+The delivery of (asynchronous) signals must be delayed until fsys-mode
+is exited.  This is acomplished with the help of the lower-privilege
+transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user()
+checks whether the interrupted task was in fsys-mode and, if so, sets
+PSR.lp and returns immediately.  When fsys-mode is exited via the
+"br.ret" instruction that lowers the privilege level, a trap will
+occur.  The trap handler clears PSR.lp again and returns immediately.
+The kernel exit path then checks for and delivers any pending signals.
+
 * PSR Handling
 
 The "epc" instruction doesn't change the contents of PSR at all.  This
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 3cc875a3f399..a2eaa95ad8ed 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -13,6 +13,7 @@
 ENTRY(fsys_ni_syscall)
 	mov r8=ENOSYS
 	mov r10=-1
+	MCKINLEY_E7_WORKAROUND
 	br.ret.sptk.many b6
 END(fsys_ni_syscall)
 
@@ -27,6 +28,7 @@ ENTRY(fsys_getpid)
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
+	MCKINLEY_E7_WORKAROUND
 	br.ret.sptk.many b6
 END(fsys_getpid)
 
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 70b5fcf68e96..4941a9da4779 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -66,6 +66,7 @@ GLOBAL_ENTRY(syscall_via_epc)
 
 	mov r10=-1
 	mov r8=ENOSYS
+	MCKINLEY_E7_WORKAROUND
 	br.ret.sptk.many b6
 END(syscall_via_epc)
 
@@ -88,6 +89,7 @@ GLOBAL_ENTRY(fsys_fallback_syscall)
 	 */
 	movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
 	;;
+	MCKINLEY_E7_WORKAROUND
 	mov b7=r2
 	br.ret.sptk.many b7
 END(fsys_fallback_syscall)
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index a360aa1ecd00..43d2f15ac0fb 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -1,7 +1,7 @@
 /*
  * Architecture-specific setup.
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #define __KERNEL_SYSCALLS__	/* see <asm/unistd.h> */
@@ -144,6 +144,13 @@ show_regs (struct pt_regs *regs)
 void
 do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 {
+	if (fsys_mode(current, &scr->pt)) {
+		/* defer signal-handling etc. until we return to privilege-level 0.  */
+		if (!ia64_psr(&scr->pt)->lp)
+			ia64_psr(&scr->pt)->lp = 1;
+		return;
+	}
+
 #ifdef CONFIG_PERFMON
 	if (current->thread.pfm_ovfl_block_reset)
 		pfm_ovfl_block_reset();
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index b440a937d405..43b568f83209 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -1,7 +1,7 @@
 /*
  * Architecture-specific trap handling.
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
@@ -524,7 +524,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	      case 29: /* Debug */
 	      case 35: /* Taken Branch Trap */
 	      case 36: /* Single Step Trap */
-		if (fsys_mode(regs)) {
+		if (fsys_mode(current, regs)) {
 			extern char syscall_via_break[], __start_gate_section[];
 			/*
 			 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
@@ -580,19 +580,31 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		}
 		return;
 
-	      case 34:		/* Unimplemented Instruction Address Trap */
-		if (user_mode(regs)) {
-			siginfo.si_signo = SIGILL;
-			siginfo.si_code = ILL_BADIADDR;
-			siginfo.si_errno = 0;
-			siginfo.si_flags = 0;
-			siginfo.si_isr = 0;
-			siginfo.si_imm = 0;
-			siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
-			force_sig_info(SIGILL, &siginfo, current);
+	      case 34:
+		if (isr & 0x2) {
+			/* Lower-Privilege Transfer Trap */
+			/*
+			 * Just clear PSR.lp and then return immediately: all the
+			 * interesting work (e.g., signal delivery is done in the kernel
+			 * exit path).
+			 */
+			ia64_psr(regs)->lp = 0;
 			return;
+		} else {
+			/* Unimplemented Instr. Address Trap */
+			if (user_mode(regs)) {
+				siginfo.si_signo = SIGILL;
+				siginfo.si_code = ILL_BADIADDR;
+				siginfo.si_errno = 0;
+				siginfo.si_flags = 0;
+				siginfo.si_isr = 0;
+				siginfo.si_imm = 0;
+				siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+				force_sig_info(SIGILL, &siginfo, current);
+				return;
+			}
+			sprintf(buf, "Unimplemented Instruction Address fault");
 		}
-		sprintf(buf, "Unimplemented Instruction Address fault");
 		break;
 
 	      case 45:
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 8c6097011ab5..e0719a28e034 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -331,7 +331,7 @@ set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
 		return;
 	}
 
-	if (!user_stack(regs)) {
+	if (!user_stack(current, regs)) {
 		DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
 		return;
 	}
@@ -402,7 +402,7 @@ get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *na
 		return;
 	}
 
-	if (!user_stack(regs)) {
+	if (!user_stack(current, regs)) {
 		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
 		goto fail;
 	}
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index a37ac45b0430..afcc2e468493 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -6,6 +6,8 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <linux/config.h>
+
 #define ENTRY(name)				\
 	.align 32;				\
 	.proc name;				\
@@ -57,4 +59,13 @@ name:
   99:	x
 #endif
 
+#ifdef CONFIG_MCKINLEY
+/* workaround for Itanium 2 Errata 7: */
+# define MCKINLEY_E7_WORKAROUND			\
+	br.call.sptk.many b7=1f;;		\
+1:
+#else
+# define MCKINLEY_E7_WORKAROUND
+#endif
+
 #endif /* _ASM_IA64_ASMMACRO_H */
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index 775d1e875eff..f64f222d1a5c 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -2,7 +2,7 @@
 #define _ASM_IA64_PTRACE_H
 
 /*
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  *
@@ -218,8 +218,13 @@ struct switch_stack {
 # define ia64_task_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
 # define ia64_psr(regs)			((struct ia64_psr *) &(regs)->cr_ipsr)
 # define user_mode(regs)		(((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0)
-# define user_stack(regs)		(current->thread.on_ustack != 0)
-# define fsys_mode(regs)		(!user_mode(regs) && user_stack(regs))
+# define user_stack(task,regs)	((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs))
+# define fsys_mode(task,regs)				\
+  ({							\
+	  struct task_struct *_task = (task);		\
+	  struct pt_regs *_regs = (regs);		\
+	  !user_mode(regs) && user_stack(task, regs);	\
+  })
 
   struct task_struct;			/* forward decl */
 
-- 
cgit v1.2.3


From 5d5c8bf5fae8d13031d7edc63171128941ea5a9f Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Wed, 15 Jan 2003 00:26:31 -0800
Subject: ia64: Fix some typos.

---
 arch/ia64/kernel/ivt.S        | 108 +++++++++++++++++++++---------------------
 include/asm-ia64/intrinsics.h |   4 +-
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 110564c1ed8b..8a0f5d87df44 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -746,19 +746,19 @@ ENTRY(interrupt)
 	br.call.sptk.many b6=ia64_handle_irq
 END(interrupt)
 
-	.org ia64_ivt+3400
+	.org ia64_ivt+0x3400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
 	DBG_FAULT(13)
 	FAULT(13)
 
-	.org ia64_ivt+3800
+	.org ia64_ivt+0x3800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3800 Entry 14 (size 64 bundles) Reserved
 	DBG_FAULT(14)
 	FAULT(14)
 
-	.org ia64_ivt+3c00
+	.org ia64_ivt+0x3c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3c00 Entry 15 (size 64 bundles) Reserved
 	DBG_FAULT(15)
@@ -893,7 +893,7 @@ END(dispatch_to_ia32_handler)
 
 #endif /* CONFIG_IA32_SUPPORT */
 
-	.org ia64_ivt+4400
+	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
 	DBG_FAULT(17)
@@ -925,7 +925,7 @@ ENTRY(non_syscall)
 	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
 END(non_syscall)
 
-	.org ia64_ivt+4800
+	.org ia64_ivt+0x4800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
 	DBG_FAULT(18)
@@ -959,7 +959,7 @@ ENTRY(dispatch_unaligned_handler)
 	br.sptk.many ia64_prepare_handle_unaligned
 END(dispatch_unaligned_handler)
 
-	.org ia64_ivt+4c00
+	.org ia64_ivt+0x4c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
 	DBG_FAULT(19)
@@ -1005,7 +1005,7 @@ END(dispatch_to_fault_handler)
 // --- End of long entries, Beginning of short entries
 //
 
-	.org ia64_ivt+5000
+	.org ia64_ivt+0x5000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
 ENTRY(page_not_present)
@@ -1025,7 +1025,7 @@ ENTRY(page_not_present)
 	br.sptk.many page_fault
 END(page_not_present)
 
-	.org ia64_ivt+5100
+	.org ia64_ivt+0x5100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
 ENTRY(key_permission)
@@ -1038,7 +1038,7 @@ ENTRY(key_permission)
 	br.sptk.many page_fault
 END(key_permission)
 
-	.org ia64_ivt+5200
+	.org ia64_ivt+0x5200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(iaccess_rights)
@@ -1051,7 +1051,7 @@ ENTRY(iaccess_rights)
 	br.sptk.many page_fault
 END(iaccess_rights)
 
-	.org ia64_ivt+5300
+	.org ia64_ivt+0x5300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
@@ -1064,7 +1064,7 @@ ENTRY(daccess_rights)
 	br.sptk.many page_fault
 END(daccess_rights)
 
-	.org ia64_ivt+5400
+	.org ia64_ivt+0x5400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(general_exception)
@@ -1079,7 +1079,7 @@ ENTRY(general_exception)
 	br.sptk.many dispatch_to_fault_handler
 END(general_exception)
 
-	.org ia64_ivt+5500
+	.org ia64_ivt+0x5500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
 ENTRY(disabled_fp_reg)
@@ -1092,7 +1092,7 @@ ENTRY(disabled_fp_reg)
 	br.sptk.many dispatch_to_fault_handler
 END(disabled_fp_reg)
 
-	.org ia64_ivt+5600
+	.org ia64_ivt+0x5600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(nat_consumption)
@@ -1100,7 +1100,7 @@ ENTRY(nat_consumption)
 	FAULT(26)
 END(nat_consumption)
 
-	.org ia64_ivt+5700
+	.org ia64_ivt+0x5700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
 ENTRY(speculation_vector)
@@ -1137,13 +1137,13 @@ ENTRY(speculation_vector)
 	rfi				// and go back
 END(speculation_vector)
 
-	.org ia64_ivt+5800
+	.org ia64_ivt+0x5800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5800 Entry 28 (size 16 bundles) Reserved
 	DBG_FAULT(28)
 	FAULT(28)
 
-	.org ia64_ivt+5900
+	.org ia64_ivt+0x5900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
 ENTRY(debug_vector)
@@ -1151,7 +1151,7 @@ ENTRY(debug_vector)
 	FAULT(29)
 END(debug_vector)
 
-	.org ia64_ivt+5a00
+	.org ia64_ivt+0x5a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(unaligned_access)
@@ -1162,7 +1162,7 @@ ENTRY(unaligned_access)
 	br.sptk.many dispatch_unaligned_handler
 END(unaligned_access)
 
-	.org ia64_ivt+5b00
+	.org ia64_ivt+0x5b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
 ENTRY(unsupported_data_reference)
@@ -1170,7 +1170,7 @@ ENTRY(unsupported_data_reference)
 	FAULT(31)
 END(unsupported_data_reference)
 
-	.org ia64_ivt+5c00
+	.org ia64_ivt+0x5c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
 ENTRY(floating_point_fault)
@@ -1178,7 +1178,7 @@ ENTRY(floating_point_fault)
 	FAULT(32)
 END(floating_point_fault)
 
-	.org ia64_ivt+5d00
+	.org ia64_ivt+0x5d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
 ENTRY(floating_point_trap)
@@ -1186,7 +1186,7 @@ ENTRY(floating_point_trap)
 	FAULT(33)
 END(floating_point_trap)
 
-	.org ia64_ivt+5e00
+	.org ia64_ivt+0x5e00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
 ENTRY(lower_privilege_trap)
@@ -1194,7 +1194,7 @@ ENTRY(lower_privilege_trap)
 	FAULT(34)
 END(lower_privilege_trap)
 
-	.org ia64_ivt+5f00
+	.org ia64_ivt+0x5f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
 ENTRY(taken_branch_trap)
@@ -1202,7 +1202,7 @@ ENTRY(taken_branch_trap)
 	FAULT(35)
 END(taken_branch_trap)
 
-	.org ia64_ivt+6000
+	.org ia64_ivt+0x6000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
 ENTRY(single_step_trap)
@@ -1210,55 +1210,55 @@ ENTRY(single_step_trap)
 	FAULT(36)
 END(single_step_trap)
 
-	.org ia64_ivt+6100
+	.org ia64_ivt+0x6100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Reserved
 	DBG_FAULT(37)
 	FAULT(37)
 
-	.org ia64_ivt+6200
+	.org ia64_ivt+0x6200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6200 Entry 38 (size 16 bundles) Reserved
 	DBG_FAULT(38)
 	FAULT(38)
 
-	.org ia64_ivt+6300
+	.org ia64_ivt+0x6300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6300 Entry 39 (size 16 bundles) Reserved
 	DBG_FAULT(39)
 	FAULT(39)
 
-	.org ia64_ivt+6400
+	.org ia64_ivt+0x6400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6400 Entry 40 (size 16 bundles) Reserved
 	DBG_FAULT(40)
 	FAULT(40)
 
-	.org ia64_ivt+6500
+	.org ia64_ivt+0x6500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6500 Entry 41 (size 16 bundles) Reserved
 	DBG_FAULT(41)
 	FAULT(41)
 
-	.org ia64_ivt+6600
+	.org ia64_ivt+0x6600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6600 Entry 42 (size 16 bundles) Reserved
 	DBG_FAULT(42)
 	FAULT(42)
 
-	.org ia64_ivt+6700
+	.org ia64_ivt+0x6700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6700 Entry 43 (size 16 bundles) Reserved
 	DBG_FAULT(43)
 	FAULT(43)
 
-	.org ia64_ivt+6800
+	.org ia64_ivt+0x6800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6800 Entry 44 (size 16 bundles) Reserved
 	DBG_FAULT(44)
 	FAULT(44)
 
-	.org ia64_ivt+6900
+	.org ia64_ivt+0x6900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
 ENTRY(ia32_exception)
@@ -1266,7 +1266,7 @@ ENTRY(ia32_exception)
 	FAULT(45)
 END(ia32_exception)
 
-	.org ia64_ivt+6a00
+	.org ia64_ivt+0x6a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(ia32_intercept)
@@ -1296,7 +1296,7 @@ ENTRY(ia32_intercept)
 	FAULT(46)
 END(ia32_intercept)
 
-	.org ia64_ivt+6b00
+	.org ia64_ivt+0x6b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
 ENTRY(ia32_interrupt)
@@ -1309,121 +1309,121 @@ ENTRY(ia32_interrupt)
 #endif
 END(ia32_interrupt)
 
-	.org ia64_ivt+6c00
+	.org ia64_ivt+0x6c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6c00 Entry 48 (size 16 bundles) Reserved
 	DBG_FAULT(48)
 	FAULT(48)
 
-	.org ia64_ivt+6d00
+	.org ia64_ivt+0x6d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6d00 Entry 49 (size 16 bundles) Reserved
 	DBG_FAULT(49)
 	FAULT(49)
 
-	.org ia64_ivt+6e00
+	.org ia64_ivt+0x6e00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6e00 Entry 50 (size 16 bundles) Reserved
 	DBG_FAULT(50)
 	FAULT(50)
 
-	.org ia64_ivt+6f00
+	.org ia64_ivt+0x6f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6f00 Entry 51 (size 16 bundles) Reserved
 	DBG_FAULT(51)
 	FAULT(51)
 
-	.org ia64_ivt+7000
+	.org ia64_ivt+0x7000
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7000 Entry 52 (size 16 bundles) Reserved
 	DBG_FAULT(52)
 	FAULT(52)
 
-	.org ia64_ivt+7100
+	.org ia64_ivt+0x7100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7100 Entry 53 (size 16 bundles) Reserved
 	DBG_FAULT(53)
 	FAULT(53)
 
-	.org ia64_ivt+7200
+	.org ia64_ivt+0x7200
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7200 Entry 54 (size 16 bundles) Reserved
 	DBG_FAULT(54)
 	FAULT(54)
 
-	.org ia64_ivt+7300
+	.org ia64_ivt+0x7300
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7300 Entry 55 (size 16 bundles) Reserved
 	DBG_FAULT(55)
 	FAULT(55)
 
-	.org ia64_ivt+7400
+	.org ia64_ivt+0x7400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7400 Entry 56 (size 16 bundles) Reserved
 	DBG_FAULT(56)
 	FAULT(56)
 
-	.org ia64_ivt+7500
+	.org ia64_ivt+0x7500
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7500 Entry 57 (size 16 bundles) Reserved
 	DBG_FAULT(57)
 	FAULT(57)
 
-	.org ia64_ivt+7600
+	.org ia64_ivt+0x7600
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7600 Entry 58 (size 16 bundles) Reserved
 	DBG_FAULT(58)
 	FAULT(58)
 
-	.org ia64_ivt+7700
+	.org ia64_ivt+0x7700
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7700 Entry 59 (size 16 bundles) Reserved
 	DBG_FAULT(59)
 	FAULT(59)
 
-	.org ia64_ivt+7800
+	.org ia64_ivt+0x7800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7800 Entry 60 (size 16 bundles) Reserved
 	DBG_FAULT(60)
 	FAULT(60)
 
-	.org ia64_ivt+7900
+	.org ia64_ivt+0x7900
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7900 Entry 61 (size 16 bundles) Reserved
 	DBG_FAULT(61)
 	FAULT(61)
 
-	.org ia64_ivt+7a00
+	.org ia64_ivt+0x7a00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7a00 Entry 62 (size 16 bundles) Reserved
 	DBG_FAULT(62)
 	FAULT(62)
 
-	.org ia64_ivt+7b00
+	.org ia64_ivt+0x7b00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7b00 Entry 63 (size 16 bundles) Reserved
 	DBG_FAULT(63)
 	FAULT(63)
 
-	.org ia64_ivt+7c00
+	.org ia64_ivt+0x7c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7c00 Entry 64 (size 16 bundles) Reserved
 	DBG_FAULT(64)
 	FAULT(64)
 
-	.org ia64_ivt+7d00
+	.org ia64_ivt+0x7d00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7d00 Entry 65 (size 16 bundles) Reserved
 	DBG_FAULT(65)
 	FAULT(65)
 
-	.org ia64_ivt+7e00
+	.org ia64_ivt+0x7e00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7e00 Entry 66 (size 16 bundles) Reserved
 	DBG_FAULT(66)
 	FAULT(66)
 
-	.org ia64_ivt+7f00
+	.org ia64_ivt+0x7f00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
 	DBG_FAULT(67)
diff --git a/include/asm-ia64/intrinsics.h b/include/asm-ia64/intrinsics.h
index 5ff113fda5e7..b9a3c5e049c9 100644
--- a/include/asm-ia64/intrinsics.h
+++ b/include/asm-ia64/intrinsics.h
@@ -4,11 +4,11 @@
 /*
  * Compiler-dependent intrinsics.
  *
- * Copyright (C) 2002 Hewlett-Packard Co
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-#include <kernel/config.h>
+#include <linux/config.h>
 
 /*
  * Force an unresolved reference if someone tries to use
-- 
cgit v1.2.3


From 2135c38f1c4a7d5ccc25ade7d11cc68826d73469 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@wailua.hpl.hp.com>
Date: Wed, 15 Jan 2003 02:04:33 -0800
Subject: ia64: Correct erratum number (caught by Asit Mallick).

---
 arch/ia64/kernel/fsys.S     | 4 ++--
 arch/ia64/kernel/gate.S     | 4 ++--
 include/asm-ia64/asmmacro.h | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index a2eaa95ad8ed..c395ba0723a3 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -13,7 +13,7 @@
 ENTRY(fsys_ni_syscall)
 	mov r8=ENOSYS
 	mov r10=-1
-	MCKINLEY_E7_WORKAROUND
+	MCKINLEY_E9_WORKAROUND
 	br.ret.sptk.many b6
 END(fsys_ni_syscall)
 
@@ -28,7 +28,7 @@ ENTRY(fsys_getpid)
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
-	MCKINLEY_E7_WORKAROUND
+	MCKINLEY_E9_WORKAROUND
 	br.ret.sptk.many b6
 END(fsys_getpid)
 
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 4941a9da4779..9f2cb601b67f 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -66,7 +66,7 @@ GLOBAL_ENTRY(syscall_via_epc)
 
 	mov r10=-1
 	mov r8=ENOSYS
-	MCKINLEY_E7_WORKAROUND
+	MCKINLEY_E9_WORKAROUND
 	br.ret.sptk.many b6
 END(syscall_via_epc)
 
@@ -89,7 +89,7 @@ GLOBAL_ENTRY(fsys_fallback_syscall)
 	 */
 	movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
 	;;
-	MCKINLEY_E7_WORKAROUND
+	MCKINLEY_E9_WORKAROUND
 	mov b7=r2
 	br.ret.sptk.many b7
 END(fsys_fallback_syscall)
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index afcc2e468493..41b061ac1d24 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -60,12 +60,12 @@ name:
 #endif
 
 #ifdef CONFIG_MCKINLEY
-/* workaround for Itanium 2 Errata 7: */
-# define MCKINLEY_E7_WORKAROUND			\
+/* workaround for Itanium 2 Errata 9: */
+# define MCKINLEY_E9_WORKAROUND			\
 	br.call.sptk.many b7=1f;;		\
 1:
 #else
-# define MCKINLEY_E7_WORKAROUND
+# define MCKINLEY_E9_WORKAROUND
 #endif
 
 #endif /* _ASM_IA64_ASMMACRO_H */
-- 
cgit v1.2.3


From cc735c7851854a590818eed08a774b621c37bed3 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@wailua.hpl.hp.com>
Date: Thu, 16 Jan 2003 19:53:25 -0800
Subject: ia64: Fix ia64_fls() so it works for all possible 64-bit values. 
 Reported by Dan Magenheimer (note: the bug didn't affect 	the existing
 kernel, since the possible values passed to 	the routine were always
 "safe").

---
 include/asm-ia64/bitops.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 325bec339c3b..bec48081bba7 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -2,7 +2,7 @@
 #define _ASM_IA64_BITOPS_H
 
 /*
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
@@ -320,7 +320,7 @@ __ffs (unsigned long x)
 static inline unsigned long
 ia64_fls (unsigned long x)
 {
-	double d = x;
+	long double d = x;
 	long exp;
 
 	__asm__ ("getf.exp %0=%1" : "=r"(exp) : "f"(d));
-- 
cgit v1.2.3


From 4dbfeac2d857468bf0fccd9f5993abe1e4510514 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 17 Jan 2003 03:18:59 -0800
Subject: ia64: Add unwcheck.sh script contributed by Harish Patil.  It checks 
 the unwind info for consistency (well, just the obvious 	stuff, but
 it's a start). 	Fix the couple of bugs that this script uncovered (and
 work 	around one false positive).

---
 arch/ia64/Makefile            |   3 ++
 arch/ia64/ia32/ia32_entry.S   |   7 +++
 arch/ia64/kernel/entry.S      |  39 +++++++--------
 arch/ia64/kernel/gate.S       |  24 ++++++----
 arch/ia64/lib/memcpy_mck.S    |   6 +--
 arch/ia64/lib/memset.S        |   6 +--
 arch/ia64/scripts/unwcheck.sh | 109 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 160 insertions(+), 34 deletions(-)
 create mode 100755 arch/ia64/scripts/unwcheck.sh

diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index ee87c6516dda..bbe9e62d19f2 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -58,6 +58,9 @@ all compressed: vmlinux.gz
 vmlinux.gz: vmlinux
 	$(call makeboot,vmlinux.gz)
 
+check: vmlinux
+	arch/ia64/scripts/unwcheck.sh vmlinux
+
 archmrproper:
 archclean:
 	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ia64/boot
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 8a3445be6962..d4f3067636f5 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -95,12 +95,19 @@ END(sys32_sigsuspend)
 GLOBAL_ENTRY(ia32_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
 	/*
 	 * We need to call schedule_tail() to complete the scheduling process.
 	 * Called by ia64_switch_to after do_fork()->copy_thread().  r8 contains the
 	 * address of the previously executing task.
 	 */
 	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
 .ret1:
 #endif
 	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 32cd477cf415..bc0a07752e57 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -507,7 +507,14 @@ END(invoke_syscall_trace)
 
 GLOBAL_ENTRY(ia64_trace_syscall)
 	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
 	br.call.sptk.many rp=invoke_syscall_trace // give parent a chance to catch syscall args
+}
 .ret6:	br.call.sptk.many rp=b6			// do the syscall
 strace_check_retval:
 	cmp.lt p6,p0=r8,r0			// syscall failed?
@@ -537,12 +544,19 @@ END(ia64_trace_syscall)
 
 GLOBAL_ENTRY(ia64_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
 	/*
 	 * We need to call schedule_tail() to complete the scheduling process.
 	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
 	 * address of the previously executing task.
 	 */
 	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
 .ret8:
 	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
@@ -716,21 +730,9 @@ dont_preserve_current_frame:
 	shr.u loc1=r18,9		// RNaTslots <= dirtySize / (64*8) + 1
 	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
 	;;
-#if 1
-	.align 32		// see comment below about gas bug...
-#endif
 	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
 	shladd in0=loc1,3,r17
 	mov in1=0
-#if 0
-	// gas-2.12.90 is unable to generate a stop bit after .align, which is bad,
-	// because alloc must be at the beginning of an insn-group.
-	.align 32
-#else
-	nop 0
-	nop 0
-	nop 0
-#endif
 	;;
 rse_clear_invalid:
 #ifdef CONFIG_ITANIUM
@@ -969,17 +971,16 @@ ENTRY(sys_rt_sigreturn)
 END(sys_rt_sigreturn)
 
 GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
-	//
-	// r16 = fake ar.pfs, we simply need to make sure
-	// privilege is still 0
-	//
-	mov r16=r0
 	.prologue
+	/*
+	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+	 */
+	mov r16=r0
 	DO_SAVE_SWITCH_STACK
-	br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt
+	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
 .ret21:	.body
 	DO_LOAD_SWITCH_STACK
-	br.cond.sptk.many rp			  // goes to ia64_leave_kernel
+	br.cond.sptk.many rp				// goes to ia64_leave_kernel
 END(ia64_prepare_handle_unaligned)
 
 	//
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 9f2cb601b67f..302fc06fb04b 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -144,15 +144,18 @@ END(fsys_fallback_syscall)
 	 * call stack.
 	 */
 
+#define SIGTRAMP_SAVES										\
+	.unwabi @svr4, 's'		// mark this as a sigtramp handler (saves scratch regs)	\
+	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF						\
+	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF						\
+	.savesp pr, PR_OFF+SIGCONTEXT_OFF							\
+	.savesp rp, RP_OFF+SIGCONTEXT_OFF							\
+	.vframesp SP_OFF+SIGCONTEXT_OFF
+
 GLOBAL_ENTRY(ia64_sigtramp)
 	// describe the state that is active when we get here:
 	.prologue
-	.unwabi @svr4, 's'		// mark this as a sigtramp handler (saves scratch regs)
-	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF
-	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF
-	.savesp pr, PR_OFF+SIGCONTEXT_OFF
-	.savesp rp, RP_OFF+SIGCONTEXT_OFF
-	.vframesp SP_OFF+SIGCONTEXT_OFF
+	SIGTRAMP_SAVES
 	.body
 
 	.label_state 1
@@ -237,10 +240,11 @@ back_from_restore_rbs:
 	ldf.fill f14=[base0],32
 	ldf.fill f15=[base1],32
 	mov r15=__NR_rt_sigreturn
+	.restore sp				// pop .prologue
 	break __BREAK_SYSCALL
 
-	.body
-	.copy_state 1
+	.prologue
+	SIGTRAMP_SAVES
 setup_rbs:
 	mov ar.rsc=0				// put RSE into enforced lazy mode
 	;;
@@ -252,6 +256,7 @@ setup_rbs:
 	;;
 	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
 	st8 [r14]=r16				// save sc_ar_rnat
+	.body
 	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
 
 	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
@@ -263,10 +268,11 @@ setup_rbs:
 	;;
 	st8 [r14]=r15				// save sc_loadrs
 	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
+	.restore sp				// pop .prologue
 	br.cond.sptk back_from_setup_rbs
 
 	.prologue
-	.copy_state 1
+	SIGTRAMP_SAVES
 	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
 	.body
 restore_rbs:
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
index 5a1450973bb0..2e7559eeb304 100644
--- a/arch/ia64/lib/memcpy_mck.S
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -159,7 +159,7 @@ GLOBAL_ENTRY(__copy_user)
 	mov	ar.ec=2
 (p10)	br.dpnt.few .aligned_src_tail
 	;;
-	.align 32
+//	.align 32
 1:
 EX(.ex_handler, (p16)	ld8	r34=[src0],16)
 EK(.ex_handler, (p16)	ld8	r38=[src1],16)
@@ -316,7 +316,7 @@ EK(.ex_handler,	(p[D])	st8 [dst1] = t15, 4*8)
 (p7)	mov	ar.lc = r21
 (p8)	mov	ar.lc = r0
 	;;
-	.align 32
+//	.align 32
 1:	lfetch.fault	  [src_pre_mem], 128
 	lfetch.fault.excl [dst_pre_mem], 128
 	br.cloop.dptk.few 1b
@@ -522,7 +522,7 @@ EK(.ex_handler,  (p17)	st8	[dst1]=r39,8);						\
 		 shrp	r21=r22,r38,shift;	/* speculative work */			\
 		 br.sptk.few .unaligned_src_tail /* branch out of jump table */		\
 		 ;;
-	.align 32
+//	.align 32
 .jump_table:
 	COPYU(8)	// unaligned cases
 .jmp1:
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
index 655ff4cb179d..350663bcb9ca 100644
--- a/arch/ia64/lib/memset.S
+++ b/arch/ia64/lib/memset.S
@@ -125,7 +125,7 @@ GLOBAL_ENTRY(memset)
 (p_zr)	br.cond.dptk.many .l1b			// Jump to use stf.spill
 ;; }
 
-	.align 32 // -------------------------- //  L1A: store ahead into cache lines; fill later
+//	.align 32 // -------------------------- //  L1A: store ahead into cache lines; fill later
 { .mmi
 	and	tmp = -(LINE_SIZE), cnt		// compute end of range
 	mov	ptr9 = ptr1			// used for prefetching
@@ -194,7 +194,7 @@ GLOBAL_ENTRY(memset)
 	br.cond.dpnt.many  .move_bytes_from_alignment	// Branch no. 3
 ;; }
 
-	.align 32
+//	.align 32
 .l1b:	// ------------------------------------ //  L1B: store ahead into cache lines; fill later
 { .mmi
 	and	tmp = -(LINE_SIZE), cnt		// compute end of range
@@ -261,7 +261,7 @@ GLOBAL_ENTRY(memset)
 	and	cnt = 0x1f, cnt			// compute the remaining cnt
 	mov.i   ar.lc = loopcnt
 ;; }
-	.align 32
+//	.align 32
 .l2:	// ------------------------------------ //  L2A:  store 32B in 2 cycles
 { .mmb
 	stf8	[ptr1] = fvalue, 8
diff --git a/arch/ia64/scripts/unwcheck.sh b/arch/ia64/scripts/unwcheck.sh
new file mode 100755
index 000000000000..cf61dd42cba2
--- /dev/null
+++ b/arch/ia64/scripts/unwcheck.sh
@@ -0,0 +1,109 @@
+#!/bin/sh
+#   Usage: unwcheck.sh <executable_file_name>
+#   Pre-requisite: readelf [from Gnu binutils package]
+#   Purpose: Check the following invariant
+#       For each code range in the input binary:
+#          Sum[ lengths of unwind regions] = Number of slots in code range.
+#   Author : Harish Patil
+#   First version: January 2002
+#   Modified : 2/13/2002
+#   Modified : 3/15/2002: duplicate detection
+readelf -u $1 | gawk '\
+ function todec(hexstr){
+    dec = 0;
+    l = length(hexstr);
+    for (i = 1; i <= l; i++)
+    {
+        c = substr(hexstr, i, 1);
+        if (c == "A")
+            dec = dec*16 + 10;
+        else if (c == "B")
+            dec = dec*16 + 11;
+        else if (c == "C")
+            dec = dec*16 + 12;
+        else if (c == "D")
+            dec = dec*16 + 13;
+        else if (c == "E")
+            dec = dec*16 + 14;
+        else if (c == "F")
+            dec = dec*16 + 15;
+        else
+            dec = dec*16 + c;
+    }
+    return dec;
+ }
+ BEGIN { first = 1; sum_rlen = 0; no_slots = 0; errors=0; no_code_ranges=0; }
+ {
+   if (NF==5 && $3=="info")
+   {
+      no_code_ranges += 1;
+      if (first == 0)
+      {
+         if (sum_rlen != no_slots)
+         {
+            print full_code_range;
+            print "       ", "lo = ", lo, " hi =", hi;
+            print "       ", "sum_rlen = ", sum_rlen, "no_slots = " no_slots;
+            print "       ","   ", "*******ERROR ***********";
+            print "       ","   ", "sum_rlen:", sum_rlen, " != no_slots:" no_slots;
+            errors += 1;
+         }
+         sum_rlen = 0;
+      }
+      full_code_range =  $0;
+      code_range =  $2;
+      gsub("..$", "", code_range);
+      gsub("^.", "", code_range);
+      split(code_range, addr, "-");
+      lo = toupper(addr[1]);
+
+      code_range_lo[no_code_ranges] = addr[1];
+      occurs[addr[1]] += 1;
+      full_range[addr[1]] = $0;
+
+      gsub("0X.[0]*", "", lo);
+      hi = toupper(addr[2]);
+      gsub("0X.[0]*", "", hi);
+      no_slots = (todec(hi) - todec(lo))/ 16*3
+      first = 0;
+   }
+   if (index($0,"rlen") > 0 )
+   {
+    rlen_str =  substr($0, index($0,"rlen"));
+    rlen = rlen_str;
+    gsub("rlen=", "", rlen);
+    gsub(")", "", rlen);
+    sum_rlen = sum_rlen +  rlen;
+   }
+  }
+  END {
+      if (first == 0)
+      {
+         if (sum_rlen != no_slots)
+         {
+            print "code_range=", code_range;
+            print "       ", "lo = ", lo, " hi =", hi;
+            print "       ", "sum_rlen = ", sum_rlen, "no_slots = " no_slots;
+            print "       ","   ", "*******ERROR ***********";
+            print "       ","   ", "sum_rlen:", sum_rlen, " != no_slots:" no_slots;
+            errors += 1;
+         }
+      }
+    no_duplicates = 0;
+    for (i=1; i<=no_code_ranges; i++)
+    {
+        cr = code_range_lo[i];
+        if (reported_cr[cr]==1) continue;
+        if ( occurs[cr] > 1)
+        {
+            reported_cr[cr] = 1;
+            print "Code range low ", code_range_lo[i], ":", full_range[cr], " occurs: ", occurs[cr], " times.";
+            print " ";
+            no_duplicates++;
+        }
+    }
+    print "======================================"
+    print "Total errors:", errors, "/", no_code_ranges, " duplicates:", no_duplicates;
+    print "======================================"
+  }
+  '
-- 
cgit v1.2.3


From c2b2b9a24aaa3314ed013e6cb75c81c393834856 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn_helgaas@hp.com>
Date: Fri, 17 Jan 2003 03:22:15 -0800
Subject: [PATCH] irq cleanups

Cleanup the irq handling macros.
---
 include/asm-ia64/system.h | 81 ++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 46 deletions(-)

diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index d09f11cb14ec..ad40abfb7e91 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -117,62 +117,51 @@ ia64_insn_group_barrier (void)
  */
 /* For spinlocks etc */
 
+/* clearing psr.i is implicitly serialized (visible by next insn) */
+/* setting psr.i requires data serialization */
+#define __local_irq_save(x)	__asm__ __volatile__ ("mov %0=psr;;"			\
+						      "rsm psr.i;;"			\
+						      : "=r" (x) :: "memory")
+#define __local_irq_disable()	__asm__ __volatile__ (";; rsm psr.i;;" ::: "memory")
+#define __local_irq_restore(x)	__asm__ __volatile__ ("cmp.ne p6,p7=%0,r0;;"		\
+						      "(p6) ssm psr.i;"			\
+						      "(p7) rsm psr.i;;"		\
+						      "(p6) srlz.d"			\
+						      :: "r" ((x) & IA64_PSR_I)		\
+						      : "p6", "p7", "memory")
+
 #ifdef CONFIG_IA64_DEBUG_IRQ
 
   extern unsigned long last_cli_ip;
 
-# define local_irq_save(x)								\
-do {											\
-	unsigned long ip, psr;								\
-											\
-	__asm__ __volatile__ ("mov %0=psr;; rsm psr.i;;" : "=r" (psr) :: "memory");	\
-	if (psr & (1UL << 14)) {							\
-		__asm__ ("mov %0=ip" : "=r"(ip));					\
-		last_cli_ip = ip;							\
-	}										\
-	(x) = psr;									\
-} while (0)
+# define __save_ip()		__asm__ ("mov %0=ip" : "=r" (last_cli_ip))
 
-# define local_irq_disable()								\
-do {											\
-	unsigned long ip, psr;								\
-											\
-	__asm__ __volatile__ ("mov %0=psr;; rsm psr.i;;" : "=r" (psr) :: "memory");	\
-	if (psr & (1UL << 14)) {							\
-		__asm__ ("mov %0=ip" : "=r"(ip));					\
-		last_cli_ip = ip;							\
-	}										\
+# define local_irq_save(x)					\
+do {								\
+	unsigned long psr;					\
+								\
+	__local_irq_save(psr);					\
+	if (psr & IA64_PSR_I)					\
+		__save_ip();					\
+	(x) = psr;						\
 } while (0)
 
-# define local_irq_restore(x)							\
-do {										\
-	unsigned long ip, old_psr, psr = (x);					\
-										\
-	__asm__ __volatile__ ("mov %0=psr;"					\
-			      "cmp.ne p6,p7=%1,r0;;"				\
-			      "(p6) ssm psr.i;"					\
-			      "(p7) rsm psr.i;;"				\
-			      "(p6) srlz.d"					\
-			      : "=r" (old_psr) : "r"((psr) & IA64_PSR_I)	\
-			      : "p6", "p7", "memory");				\
-	if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I)) {			\
-		__asm__ ("mov %0=ip" : "=r"(ip));				\
-		last_cli_ip = ip;						\
-	}									\
+# define local_irq_disable()	do { unsigned long x; local_irq_save(x); } while (0)
+
+# define local_irq_restore(x)					\
+do {								\
+	unsigned long old_psr, psr = (x);			\
+								\
+	local_save_flags(old_psr);				\
+	__local_irq_restore(psr);				\
+	if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I))	\
+		__save_ip();					\
 } while (0)
 
 #else /* !CONFIG_IA64_DEBUG_IRQ */
-  /* clearing of psr.i is implicitly serialized (visible by next insn) */
-# define local_irq_save(x)	__asm__ __volatile__ ("mov %0=psr;; rsm psr.i;;"	\
-						      : "=r" (x) :: "memory")
-# define local_irq_disable()	__asm__ __volatile__ (";; rsm psr.i;;" ::: "memory")
-/* (potentially) setting psr.i requires data serialization: */
-# define local_irq_restore(x)	__asm__ __volatile__ ("cmp.ne p6,p7=%0,r0;;"	\
-						      "(p6) ssm psr.i;"		\
-						      "(p7) rsm psr.i;;"	\
-						      "srlz.d"			\
-						      :: "r"((x) & IA64_PSR_I)	\
-						      : "p6", "p7", "memory")
+# define local_irq_save(x)	__local_irq_save(x)
+# define local_irq_disable()	__local_irq_disable()
+# define local_irq_restore(x)	__local_irq_restore(x)
 #endif /* !CONFIG_IA64_DEBUG_IRQ */
 
 #define local_irq_enable()	__asm__ __volatile__ (";; ssm psr.i;; srlz.d" ::: "memory")
-- 
cgit v1.2.3


From 3b801be53992c8945232379f9a9201308413619c Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 17 Jan 2003 03:31:16 -0800
Subject: ia64: Fix Makefiles so that "make clean" removes the files generated 
 in the tools directory.  Patch by Yu, Fenghua.

---
 arch/ia64/Makefile       | 1 +
 arch/ia64/tools/Makefile | 9 +--------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index bbe9e62d19f2..7d93d20e3ee7 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -64,6 +64,7 @@ check: vmlinux
 archmrproper:
 archclean:
 	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ia64/boot
+	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ia64/tools
 
 CLEAN_FILES += include/asm-ia64/offsets.h vmlinux.gz bootloader
 
diff --git a/arch/ia64/tools/Makefile b/arch/ia64/tools/Makefile
index e8dc9c8f40a3..ce1fe06d9d97 100644
--- a/arch/ia64/tools/Makefile
+++ b/arch/ia64/tools/Makefile
@@ -4,14 +4,7 @@ TARGET	= include/asm-ia64/offsets.h
 
 src = $(obj)
 
-all:
-
-fastdep:
-
-mrproper: clean
-
-clean:
-	rm -f $(obj)/print_offsets.s $(obj)/print_offsets $(obj)/offsets.h
+clean-files := print_offsets.s print_offsets offsets.h
 
 $(TARGET): $(obj)/offsets.h
 	@if ! cmp -s $(obj)/offsets.h ${TARGET}; then	\
-- 
cgit v1.2.3


From 742429288a13d06ede9a530eb40027e8b39927ca Mon Sep 17 00:00:00 2001
From: Seth Rohit <rohit.seth@intel.com>
Date: Fri, 17 Jan 2003 06:32:05 -0800
Subject: [PATCH] ia64: Update to hugetlb

Please find attached a patch that brings in the support of hugetlb
inline with the ia32 tree.  This removes the syscall interface and gets
the hugetlbfs support (using mmap and shmat).  I might be sending you
couple of more small updates a little later.  At least wanted to get
this out first.
---
 arch/ia64/kernel/efi.c      |  28 ---
 arch/ia64/kernel/entry.S    |   4 +-
 arch/ia64/kernel/sys_ia64.c |  94 ++------
 arch/ia64/mm/hugetlbpage.c  | 537 +++++++++++++++++---------------------------
 arch/ia64/mm/init.c         |  32 ---
 5 files changed, 216 insertions(+), 479 deletions(-)

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index bd74d72cb924..5a5c69720006 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -33,15 +33,6 @@
 
 #define EFI_DEBUG	0
 
-#ifdef CONFIG_HUGETLB_PAGE
-
-/* By default at total of 512MB is reserved huge pages. */
-#define HTLBZONE_SIZE_DEFAULT  0x20000000
-
-unsigned long htlbzone_pages = (HTLBZONE_SIZE_DEFAULT >> HPAGE_SHIFT);
-
-#endif
-
 extern efi_status_t efi_call_phys (void *, ...);
 
 struct efi efi;
@@ -497,25 +488,6 @@ efi_init (void)
 				++cp;
 		}
 	}
-#ifdef CONFIG_HUGETLB_PAGE
-	/* Just duplicating the above algo for lpzone start */
-	for (cp = saved_command_line; *cp; ) {
-		if (memcmp(cp, "lpmem=", 6) == 0) {
-			cp += 6;
-			htlbzone_pages = memparse(cp, &end);
-			htlbzone_pages = (htlbzone_pages >> HPAGE_SHIFT);
-			if (end != cp)
-				break;
-			cp = end;
-		} else {
-			while (*cp != ' ' && *cp)
-				++cp;
-			while (*cp == ' ')
-				++cp;
-		}
-	}
-	printk("Total HugeTLB_Page memory pages requested  0x%lx \n", htlbzone_pages);
-#endif
 	if (mem_limit != ~0UL)
 		printk("Ignoring memory above %luMB\n", mem_limit >> 20);
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index bc0a07752e57..5ed695e62d4d 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1242,8 +1242,8 @@ sys_call_table:
 	data8 sys_sched_setaffinity
 	data8 sys_sched_getaffinity
 	data8 sys_set_tid_address
-	data8 sys_alloc_hugepages
-	data8 sys_free_hugepages		// 1235
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall 			// 1235
 	data8 sys_exit_group
 	data8 sys_lookup_dcookie
 	data8 sys_io_setup
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index c89bef6a61f1..6f748d36bdaf 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -21,12 +21,6 @@
 #include <asm/shmparam.h>
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_HUGETLB_PAGE
-# define SHMLBA_HPAGE		HPAGE_SIZE
-# define COLOR_HALIGN(addr)	(((addr) + SHMLBA_HPAGE - 1) & ~(SHMLBA_HPAGE - 1))
-# define TASK_HPAGE_BASE	((REGION_HPAGE << REGION_SHIFT) | HPAGE_SIZE)
-#endif
-
 unsigned long
 arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
 			unsigned long pgoff, unsigned long flags)
@@ -37,6 +31,20 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define COLOR_HALIGN(addr) ((addr + HPAGE_SIZE - 1) & ~(HPAGE_SIZE - 1))
+#define TASK_HPAGE_BASE ((REGION_HPAGE << REGION_SHIFT) | HPAGE_SIZE)
+	if (filp && is_file_hugepages(filp)) {
+		if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE -1)))
+			addr = TASK_HPAGE_BASE;
+		addr = COLOR_HALIGN(addr);
+	}
+	else {
+		if (REGION_NUMBER(addr) == REGION_HPAGE)
+			addr = 0;
+	}
+#endif
 	if (!addr)
 		addr = TASK_UNMAPPED_BASE;
 
@@ -244,80 +252,6 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo
 	return addr;
 }
 
-#ifdef CONFIG_HUGETLB_PAGE
-
-asmlinkage unsigned long
-sys_alloc_hugepages (int key, unsigned long addr, size_t len, int prot, int flag)
-{
-	struct mm_struct *mm = current->mm;
-	long retval;
-	extern int alloc_hugetlb_pages (int, unsigned long, unsigned long, int, int);
-
-	if ((key < 0) || (len & (HPAGE_SIZE - 1)))
-		return -EINVAL;
-
-	if (addr && ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1))))
-		addr = TASK_HPAGE_BASE;
-
-	if (!addr)
-		addr = TASK_HPAGE_BASE;
-	down_write(&mm->mmap_sem);
-	{
-		retval = arch_get_unmapped_area(NULL, COLOR_HALIGN(addr), len, 0, 0);
-		if (retval != -ENOMEM)
-			retval = alloc_hugetlb_pages(key, retval, len, prot, flag);
-	}
-	up_write(&mm->mmap_sem);
-
-	if (IS_ERR((void *) retval))
-		return retval;
-
-	force_successful_syscall_return();
-	return retval;
-}
-
-asmlinkage int
-sys_free_hugepages (unsigned long  addr)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	extern int free_hugepages(struct vm_area_struct *);
-	int retval;
-
-	down_write(&mm->mmap_sem);
-	{
-		vma = find_vma(mm, addr);
-		if (!vma || !is_vm_hugetlb_page(vma) || (vma->vm_start != addr))
-			retval = -EINVAL;
-			goto out;
-
-		spin_lock(&mm->page_table_lock);
-		{
-			retval = free_hugepages(vma);
-		}
-		spin_unlock(&mm->page_table_lock);
-	}
-out:
-	up_write(&mm->mmap_sem);
-	return retval;
-}
-
-#else /* !CONFIG_HUGETLB_PAGE */
-
-asmlinkage unsigned long
-sys_alloc_hugepages (int key, size_t addr, unsigned long len, int prot, int flag)
-{
-	return -ENOSYS;
-}
-
-asmlinkage unsigned long
-sys_free_hugepages (unsigned long  addr)
-{
-	return -ENOSYS;
-}
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
 asmlinkage unsigned long
 ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
 	     unsigned long new_addr)
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index ff6a67975c83..eabf53b0a881 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -12,89 +12,42 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
-
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-static struct vm_operations_struct hugetlb_vm_ops;
-struct list_head htlbpage_freelist;
-spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
-extern long htlbpagemem;
+#include <linux/sysctl.h>
 
-static void zap_hugetlb_resources (struct vm_area_struct *);
+static long    htlbpagemem;
+int     htlbpage_max;
+static long    htlbzone_pages;
 
-#define MAX_ID 	32
-struct htlbpagekey {
-	struct inode *in;
-	int key;
-} htlbpagek[MAX_ID];
+struct vm_operations_struct hugetlb_vm_ops;
+static LIST_HEAD(htlbpage_freelist);
+static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
 
-static struct inode *
-find_key_inode(int key)
+static struct page *alloc_hugetlb_page(void)
 {
 	int i;
-
-	for (i = 0; i < MAX_ID; i++) {
-		if (htlbpagek[i].key == key)
-			return (htlbpagek[i].in);
-	}
-	return NULL;
-}
-
-static struct page *
-alloc_hugetlb_page (void)
-{
-	struct list_head *curr, *head;
 	struct page *page;
 
 	spin_lock(&htlbpage_lock);
-
-	head = &htlbpage_freelist;
-	curr = head->next;
-
-	if (curr == head) {
+	if (list_empty(&htlbpage_freelist)) {
 		spin_unlock(&htlbpage_lock);
 		return NULL;
 	}
-	page = list_entry(curr, struct page, list);
-	list_del(curr);
+
+	page = list_entry(htlbpage_freelist.next, struct page, list);
+	list_del(&page->list);
 	htlbpagemem--;
 	spin_unlock(&htlbpage_lock);
 	set_page_count(page, 1);
-	memset(page_address(page), 0, HPAGE_SIZE);
+	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
+		clear_highpage(&page[i]);
 	return page;
 }
 
-static void
-free_hugetlb_page (struct page *page)
-{
-	spin_lock(&htlbpage_lock);
-	if ((page->mapping != NULL) && (page_count(page) == 2)) {
-		struct inode *inode = page->mapping->host;
-		int i;
-
-		ClearPageDirty(page);
-		remove_from_page_cache(page);
-		set_page_count(page, 1);
-		if ((inode->i_size -= HPAGE_SIZE) == 0) {
-			for (i = 0; i < MAX_ID; i++)
-				if (htlbpagek[i].key == inode->i_ino) {
-					htlbpagek[i].key = 0;
-					htlbpagek[i].in = NULL;
-					break;
-				}
-			kfree(inode);
-		}
-	}
-	if (put_page_testzero(page)) {
-		list_add(&page->list, &htlbpage_freelist);
-		htlbpagemem++;
-	}
-	spin_unlock(&htlbpage_lock);
-}
-
 static pte_t *
 huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
 {
@@ -144,63 +97,8 @@ set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
 	return;
 }
 
-static int
-anon_get_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma,
-		       int write_access, pte_t * page_table)
-{
-	struct page *page;
-
-	page = alloc_hugetlb_page();
-	if (page == NULL)
-		return -1;
-	set_huge_pte(mm, vma, page, page_table, write_access);
-	return 1;
-}
-
-static int
-make_hugetlb_pages_present (unsigned long addr, unsigned long end, int flags)
-{
-	int write;
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	pte_t *pte;
-
-	vma = find_vma(mm, addr);
-	if (!vma)
-		goto out_error1;
-
-	write = (vma->vm_flags & VM_WRITE) != 0;
-	if ((vma->vm_end - vma->vm_start) & (HPAGE_SIZE - 1))
-		goto out_error1;
-	spin_lock(&mm->page_table_lock);
-	do {
-		pte = huge_pte_alloc(mm, addr);
-		if ((pte) && (pte_none(*pte))) {
-			if (anon_get_hugetlb_page(mm, vma, write ? VM_WRITE : VM_READ, pte) == -1)
-				goto out_error;
-		} else
-			goto out_error;
-		addr += HPAGE_SIZE;
-	} while (addr < end);
-	spin_unlock(&mm->page_table_lock);
-	vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
-	if (flags & MAP_PRIVATE)
-		vma->vm_flags |= VM_DONTCOPY;
-	vma->vm_ops = &hugetlb_vm_ops;
-	return 0;
-out_error:
-	if (addr > vma->vm_start) {
-		vma->vm_end = addr;
-		zap_hugetlb_resources(vma);
-		vma->vm_end = end;
-	}
-	spin_unlock(&mm->page_table_lock);
-out_error1:
-	return -1;
-}
-
-int
-copy_hugetlb_page_range (struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma)
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+			struct vm_area_struct *vma)
 {
 	pte_t *src_pte, *dst_pte, entry;
 	struct page *ptepage;
@@ -220,15 +118,14 @@ copy_hugetlb_page_range (struct mm_struct *dst, struct mm_struct *src, struct vm
 		addr += HPAGE_SIZE;
 	}
 	return 0;
-
-      nomem:
+nomem:
 	return -ENOMEM;
 }
 
 int
-follow_hugetlb_page (struct mm_struct *mm, struct vm_area_struct *vma,
-		     struct page **pages, struct vm_area_struct **vmas,
-		     unsigned long *st, int *length, int i)
+follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		    struct page **pages, struct vm_area_struct **vmas,
+		    unsigned long *st, int *length, int i)
 {
 	pte_t *ptep, pte;
 	unsigned long start = *st;
@@ -252,8 +149,8 @@ back1:
 		i++;
 		len--;
 		start += PAGE_SIZE;
-		if (((start & HPAGE_MASK) == pstart) && len
-		    && (start < vma->vm_end))
+		if (((start & HPAGE_MASK) == pstart) && len &&
+				(start < vma->vm_end))
 			goto back1;
 	} while (len && start < vma->vm_end);
 	*length = len;
@@ -261,238 +158,149 @@ back1:
 	return i;
 }
 
-static void
-zap_hugetlb_resources (struct vm_area_struct *mpnt)
+void free_huge_page(struct page *page)
 {
-	struct mm_struct *mm = mpnt->vm_mm;
-	unsigned long len, addr, end;
-	pte_t *ptep;
-	struct page *page;
+	BUG_ON(page_count(page));
+	BUG_ON(page->mapping);
 
-	addr = mpnt->vm_start;
-	end = mpnt->vm_end;
-	len = end - addr;
-	do {
-		ptep = huge_pte_offset(mm, addr);
-		page = pte_page(*ptep);
-		pte_clear(ptep);
-		free_hugetlb_page(page);
-		addr += HPAGE_SIZE;
-	} while (addr < end);
-	mm->rss -= (len >> PAGE_SHIFT);
-	mpnt->vm_ops = NULL;
-	flush_tlb_range(mpnt, end - len, end);
-}
+	INIT_LIST_HEAD(&page->list);
 
-static void
-unlink_vma (struct vm_area_struct *mpnt)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-
-	vma = mm->mmap;
-	if (vma == mpnt) {
-		mm->mmap = vma->vm_next;
-	} else {
-		while (vma->vm_next != mpnt) {
-			vma = vma->vm_next;
-		}
-		vma->vm_next = mpnt->vm_next;
-	}
-	rb_erase(&mpnt->vm_rb, &mm->mm_rb);
-	mm->mmap_cache = NULL;
-	mm->map_count--;
+	spin_lock(&htlbpage_lock);
+	list_add(&page->list, &htlbpage_freelist);
+	htlbpagemem++;
+	spin_unlock(&htlbpage_lock);
 }
 
-int
-free_hugepages (struct vm_area_struct *mpnt)
+void huge_page_release(struct page *page)
 {
-	unlink_vma(mpnt);
-	zap_hugetlb_resources(mpnt);
-	kmem_cache_free(vm_area_cachep, mpnt);
-	return 1;
+	if (!put_page_testzero(page))
+		return;
+
+	free_huge_page(page);
 }
 
-static struct inode *
-set_new_inode (unsigned long len, int prot, int flag, int key)
+void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	struct inode *inode;
-	int i;
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte;
+	struct page *page;
 
-	for (i = 0; i < MAX_ID; i++) {
-		if (htlbpagek[i].key == 0)
-			break;
-	}
-	if (i == MAX_ID)
-		return NULL;
-	inode = kmalloc(sizeof (struct inode), GFP_ATOMIC);
-	if (inode == NULL)
-		return NULL;
+	BUG_ON(start & (HPAGE_SIZE - 1));
+	BUG_ON(end & (HPAGE_SIZE - 1));
 
-	inode_init_once(inode);
-	atomic_inc(&inode->i_writecount);
-	inode->i_mapping = &inode->i_data;
-	inode->i_mapping->host = inode;
-	inode->i_ino = (unsigned long) key;
-
-	htlbpagek[i].key = key;
-	htlbpagek[i].in = inode;
-	inode->i_uid = current->fsuid;
-	inode->i_gid = current->fsgid;
-	inode->i_mode = prot;
-	inode->i_size = len;
-	return inode;
+	spin_lock(&htlbpage_lock);
+	spin_unlock(&htlbpage_lock);
+	for (address = start; address < end; address += HPAGE_SIZE) {
+		pte = huge_pte_offset(mm, address);
+		if (pte_none(*pte))
+			continue;
+		page = pte_page(*pte);
+		huge_page_release(page);
+		pte_clear(pte);
+	}
+	mm->rss -= (end - start) >> PAGE_SHIFT;
+	flush_tlb_range(vma, start, end);
 }
 
-static int
-check_size_prot (struct inode *inode, unsigned long len, int prot, int flag)
+void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length)
 {
-	if (inode->i_uid != current->fsuid)
-		return -1;
-	if (inode->i_gid != current->fsgid)
-		return -1;
-	if (inode->i_size != len)
-		return -1;
-	return 0;
+	struct mm_struct *mm = vma->vm_mm;
+	spin_lock(&mm->page_table_lock);
+	unmap_hugepage_range(vma, start, start + length);
+	spin_unlock(&mm->page_table_lock);
 }
 
-int
-alloc_shared_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	struct inode *inode;
-	struct address_space *mapping;
-	struct page *page;
-	int idx;
-	int retval = -ENOMEM;
-	int newalloc = 0;
+	unsigned long addr;
+	int ret = 0;
 
-try_again:
-	spin_lock(&htlbpage_lock);
-	inode = find_key_inode(key);
-	if (inode == NULL) {
-		if (!capable(CAP_SYS_ADMIN)) {
-			if (!in_group_p(0)) {
-				retval = -EPERM;
-				goto out_err;
-			}
-		}
-		if (!(flag & IPC_CREAT)) {
-			retval = -ENOENT;
-			goto out_err;
-		}
-		inode = set_new_inode(len, prot, flag, key);
-		if (inode == NULL)
-			goto out_err;
-		newalloc = 1;
-	} else {
-		if (check_size_prot(inode, len, prot, flag) < 0) {
-			retval =  -EINVAL;
-			goto out_err;
-		}
-		else if (atomic_read(&inode->i_writecount)) {
-			spin_unlock(&htlbpage_lock);
-			goto try_again;
-		}
-	}
-	spin_unlock(&htlbpage_lock);
-	mapping = inode->i_mapping;
-
-	addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
-			     MAP_NORESERVE|MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0);
-	if (IS_ERR((void *) addr))
-		goto freeinode;
-
-	vma = find_vma(mm, addr);
-	if (!vma) {
-		retval = -EINVAL;
-		goto freeinode;
-	}
+	BUG_ON(vma->vm_start & ~HPAGE_MASK);
+	BUG_ON(vma->vm_end & ~HPAGE_MASK);
 
 	spin_lock(&mm->page_table_lock);
-	do {
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
 		pte_t *pte = huge_pte_alloc(mm, addr);
-		if ((pte) && (pte_none(*pte))) {
-			idx = (addr - vma->vm_start) >> HPAGE_SHIFT;
-			page = find_get_page(mapping, idx);
-			if (page == NULL) {
-				page = alloc_hugetlb_page();
-				if (page == NULL)
-					goto out;
-				add_to_page_cache(page, mapping, idx);
-			}
-			set_huge_pte(mm, vma, page, pte,
-				     (vma->vm_flags & VM_WRITE));
-		} else
-			goto out;
-		addr += HPAGE_SIZE;
-	} while (addr < vma->vm_end);
-	retval = 0;
-	vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
-	vma->vm_ops = &hugetlb_vm_ops;
-	spin_unlock(&mm->page_table_lock);
-	spin_lock(&htlbpage_lock);
-	atomic_set(&inode->i_writecount, 0);
-	spin_unlock(&htlbpage_lock);
-	return retval;
-out:
-	if (addr > vma->vm_start) {
-		unsigned long raddr = vma->vm_end;
-		vma->vm_end = addr;
-		zap_hugetlb_resources(vma);
-		vma->vm_end = raddr;
-	}
-	spin_unlock(&mm->page_table_lock);
-	do_munmap(mm, vma->vm_start, len);
-	if (newalloc)
-		goto freeinode;
-	return retval;
+		struct page *page;
 
-out_err:
-	spin_unlock(&htlbpage_lock);
-freeinode:
-	if (newalloc) {
-		for (idx = 0; idx < MAX_ID; idx++)
-			if (htlbpagek[idx].key == inode->i_ino) {
-				htlbpagek[idx].key = 0;
-				htlbpagek[idx].in = NULL;
-				break;
+		if (!pte) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (!pte_none(*pte))
+			continue;
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			page = alloc_hugetlb_page();
+			if (!page) {
+				ret = -ENOMEM;
+				goto out;
 			}
-		kfree(inode);
+			add_to_page_cache(page, mapping, idx);
+			unlock_page(page);
+		}
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
 	}
-	return retval;
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
 }
 
-static int
-alloc_private_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
+void update_and_free_page(struct page *page)
 {
-	if (!capable(CAP_SYS_ADMIN)) {
-		if (!in_group_p(0))
-			return -EPERM;
-	}
-	addr = do_mmap_pgoff(NULL, addr, len, prot,
-			     MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, 0);
-	if (IS_ERR((void *) addr))
-		return -ENOMEM;
-	if (make_hugetlb_pages_present(addr, (addr + len), flag) < 0) {
-		do_munmap(current->mm, addr, len);
-		return -ENOMEM;
+	int j;
+	struct page *map;
+
+	map = page;
+	htlbzone_pages--;
+	for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
+		map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
+				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
+				1 << PG_private | 1<< PG_writeback);
+		set_page_count(map, 0);
+		map++;
 	}
-	return 0;
+	set_page_count(page, 1);
+	__free_pages(page, HUGETLB_PAGE_ORDER);
 }
 
-int
-alloc_hugetlb_pages (int key, unsigned long addr, unsigned long len, int prot, int flag)
+int try_to_free_low(int count)
 {
-	if (key > 0)
-		return alloc_shared_hugetlb_pages(key, addr, len, prot, flag);
-	else
-		return alloc_private_hugetlb_pages(key, addr, len, prot, flag);
+	struct list_head *p;
+	struct page *page, *map;
+
+	map = NULL;
+	spin_lock(&htlbpage_lock);
+	list_for_each(p, &htlbpage_freelist) {
+		if (map) {
+			list_del(&map->list);
+			update_and_free_page(map);
+			htlbpagemem--;
+			map = NULL;
+			if (++count == 0)
+				break;
+		}
+		page = list_entry(p, struct page, list);
+		if ((page_zone(page))->name[0] != 'H') // Look for non-Highmem
+				map = page;
+	}
+	if (map) {
+		list_del(&map->list);
+		update_and_free_page(map);
+		htlbpagemem--;
+		count++;
+	}
+	spin_unlock(&htlbpage_lock);
+	return count;
 }
 
-int
-set_hugetlb_mem_size (int count)
+int set_hugetlb_mem_size(int count)
 {
 	int j, lcount;
 	struct page *page, *map;
@@ -503,7 +311,10 @@ set_hugetlb_mem_size (int count)
 		lcount = count;
 	else
 		lcount = count - htlbzone_pages;
-	if (lcount > 0) {	/*Increase the mem size. */
+
+	if (lcount == 0)
+		return (int)htlbzone_pages;
+	if (lcount > 0) {	/* Increase the mem size. */
 		while (lcount--) {
 			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 			if (page == NULL)
@@ -521,27 +332,79 @@ set_hugetlb_mem_size (int count)
 		}
 		return (int) htlbzone_pages;
 	}
-	/*Shrink the memory size. */
+	/* Shrink the memory size. */
+	lcount = try_to_free_low(lcount);
 	while (lcount++) {
 		page = alloc_hugetlb_page();
 		if (page == NULL)
 			break;
 		spin_lock(&htlbpage_lock);
-		htlbzone_pages--;
+		update_and_free_page(page);
 		spin_unlock(&htlbpage_lock);
-		map = page;
-		for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-			map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
-					1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
-					1 << PG_private | 1<< PG_writeback);
-			map++;
-		}
-		set_page_count(page, 1);
-		__free_pages(page, HUGETLB_PAGE_ORDER);
 	}
 	return (int) htlbzone_pages;
 }
 
-static struct vm_operations_struct hugetlb_vm_ops = {
-	.close =	zap_hugetlb_resources
+int hugetlb_sysctl_handler(ctl_table *table, int write, struct file *file, void *buffer, size_t *length)
+{
+	proc_dointvec(table, write, file, buffer, length);
+	htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
+	return 0;
+}
+
+static int __init hugetlb_setup(char *s)
+{
+	if (sscanf(s, "%d", &htlbpage_max) <= 0)
+		htlbpage_max = 0;
+	return 1;
+}
+__setup("hugepages=", hugetlb_setup);
+
+static int __init hugetlb_init(void)
+{
+	int i, j;
+	struct page *page;
+
+	for (i = 0; i < htlbpage_max; ++i) {
+		page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
+		if (!page)
+			break;
+		for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
+			SetPageReserved(&page[j]);
+		spin_lock(&htlbpage_lock);
+		list_add(&page->list, &htlbpage_freelist);
+		spin_unlock(&htlbpage_lock);
+	}
+	htlbpage_max = htlbpagemem = htlbzone_pages = i;
+	printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
+	return 0;
+}
+module_init(hugetlb_init);
+
+int hugetlb_report_meminfo(char *buf)
+{
+	return sprintf(buf,
+			"HugePages_Total: %5lu\n"
+			"HugePages_Free:  %5lu\n"
+			"Hugepagesize:    %5lu kB\n",
+			htlbzone_pages,
+			htlbpagemem,
+			HPAGE_SIZE/1024);
+}
+
+int is_hugepage_mem_enough(size_t size)
+{
+	if (size > (htlbpagemem << HPAGE_SHIFT))
+		return 0;
+	return 1;
+}
+
+static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
+{
+	BUG();
+	return NULL;
+}
+
+struct vm_operations_struct hugetlb_vm_ops = {
+	.nopage = hugetlb_nopage,
 };
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 34fb97e2002f..50aa604eb25a 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -342,13 +342,6 @@ ia64_mmu_init (void *my_cpu_data)
  * Set up the page tables.
  */
 
-#ifdef CONFIG_HUGETLB_PAGE
-long htlbpagemem;
-int htlbpage_max;
-extern long htlbzone_pages;
-extern struct list_head htlbpage_freelist;
-#endif
-
 #ifdef CONFIG_DISCONTIGMEM
 void
 paging_init (void)
@@ -462,29 +455,4 @@ mem_init (void)
 #ifdef CONFIG_IA32_SUPPORT
 	ia32_gdt_init();
 #endif
-#ifdef CONFIG_HUGETLB_PAGE
-	{
-		long i;
-		int j;
-		struct page *page, *map;
-
-		if ((htlbzone_pages << (HPAGE_SHIFT - PAGE_SHIFT)) >= max_low_pfn)
-			htlbzone_pages = (max_low_pfn >> ((HPAGE_SHIFT - PAGE_SHIFT) + 1));
-		INIT_LIST_HEAD(&htlbpage_freelist);
-		for (i = 0; i < htlbzone_pages; i++) {
-			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
-			if (!page)
-				break;
-			map = page;
-			for (j = 0; j < (HPAGE_SIZE/PAGE_SIZE); j++) {
-				SetPageReserved(map);
-				map++;
-			}
-			list_add(&page->list, &htlbpage_freelist);
-		}
-		printk("Total Huge_TLB_Page memory pages allocated %ld \n", i);
-		htlbzone_pages = htlbpagemem = i;
-		htlbpage_max = (int)i;
-	}
-#endif
 }
-- 
cgit v1.2.3


From 51792fe4e62f1dcca206504898cd3295e6cd6113 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 17 Jan 2003 07:10:43 -0800
Subject: Remove last vestiges of hugepage system calls (they have been
 replaced by hugetlbfs).

---
 include/asm-ia64/unistd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index ea993f51f702..5517083e6b67 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -4,7 +4,7 @@
 /*
  * IA-64 Linux syscall numbers and inline-functions.
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -223,8 +223,8 @@
 #define __NR_sched_setaffinity		1231
 #define __NR_sched_getaffinity		1232
 #define __NR_set_tid_address		1233
-#define __NR_alloc_hugepages		1234
-#define __NR_free_hugepages		1235
+/* 1234 available for reuse */
+/* 1235 available for reuse */
 #define __NR_exit_group			1236
 #define __NR_lookup_dcookie		1237
 #define __NR_io_setup			1238
-- 
cgit v1.2.3


From 3b0ca9310f1dd4dc55a064ac998098a8d73baddb Mon Sep 17 00:00:00 2001
From: Stéphane Eranian <eranian@hpl.hp.com>
Date: Fri, 17 Jan 2003 07:11:15 -0800
Subject: [PATCH] ia64: perfmon update

Here is the patch. It is rather big because there is some renaming and
cleanups.  This patch bring 2.5 in line with 2.4.20: perfmon-1.3

It adds:
        - idle task exclusion
        - less ctxsw overhead in system wide
        - cleanups most of the inline asm
        - don't use PAL anymore to determine PMU features
        - added temporary hooks for custom overflow handlers (VTUNE/Oprofile)
        - renaming of the perfmon init functions

Thanks.
---
 arch/ia64/kernel/ia64_ksyms.c       |   6 +-
 arch/ia64/kernel/irq_ia64.c         |   2 +-
 arch/ia64/kernel/perfmon.c          | 762 ++++++++++++++++++++++--------------
 arch/ia64/kernel/perfmon_generic.h  |  31 +-
 arch/ia64/kernel/perfmon_itanium.h  |  17 +-
 arch/ia64/kernel/perfmon_mckinley.h |  17 +-
 arch/ia64/kernel/process.c          |  18 +-
 arch/ia64/kernel/smpboot.c          |   4 +-
 include/asm-ia64/perfmon.h          |  37 +-
 include/asm-ia64/system.h           |   4 +-
 10 files changed, 583 insertions(+), 315 deletions(-)

diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index a240db18945e..029ab1f72fdc 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -142,4 +142,8 @@ EXPORT_SYMBOL(efi_dir);
 EXPORT_SYMBOL(ia64_mv);
 #endif
 EXPORT_SYMBOL(machvec_noop);
-
+#ifdef CONFIG_PERFMON
+#include <asm/perfmon.h>
+EXPORT_SYMBOL(pfm_install_alternate_syswide_subsystem);
+EXPORT_SYMBOL(pfm_remove_alternate_syswide_subsystem);
+#endif
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 0462d4080e33..d158a7cf8aa5 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -178,7 +178,7 @@ init_IRQ (void)
 	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 #endif
 #ifdef CONFIG_PERFMON
-	perfmon_init_percpu();
+	pfm_init_percpu();
 #endif
 	platform_irq_init();
 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 7ec39ed57d52..847930466f72 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -28,7 +28,6 @@
 #include <asm/bitops.h>
 #include <asm/errno.h>
 #include <asm/page.h>
-#include <asm/pal.h>
 #include <asm/perfmon.h>
 #include <asm/processor.h>
 #include <asm/signal.h>
@@ -56,8 +55,8 @@
 /*
  * Reset register flags
  */
-#define PFM_RELOAD_LONG_RESET	1
-#define PFM_RELOAD_SHORT_RESET	2
+#define PFM_PMD_LONG_RESET	1
+#define PFM_PMD_SHORT_RESET	2
 
 /*
  * Misc macros and definitions
@@ -83,8 +82,10 @@
 #define	PFM_REG_CONFIG		(0x4<<4|PFM_REG_IMPL) /* refine configuration */
 #define PFM_REG_BUFFER	 	(0x5<<4|PFM_REG_IMPL) /* PMD used as buffer */
 
+#define PMC_IS_LAST(i)	(pmu_conf.pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i)	(pmu_conf.pmd_desc[i].type & PFM_REG_END)
 
-#define PFM_IS_DISABLED() pmu_conf.pfm_is_disabled
+#define PFM_IS_DISABLED() pmu_conf.disabled
 
 #define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_soft_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
 #define PFM_FL_INHERIT_MASK	(PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
@@ -102,7 +103,6 @@
 #define PMD_PMD_DEP(i)	   pmu_conf.pmd_desc[i].dep_pmd[0]
 #define PMC_PMD_DEP(i)	   pmu_conf.pmc_desc[i].dep_pmd[0]
 
-
 /* k assume unsigned */
 #define IBR_IS_IMPL(k)	  (k<pmu_conf.num_ibrs)
 #define DBR_IS_IMPL(k)	  (k<pmu_conf.num_dbrs)
@@ -131,6 +131,9 @@
 
 #define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
 
+#define PFM_CPUINFO_CLEAR(v)	__get_cpu_var(pfm_syst_info) &= ~(v)
+#define PFM_CPUINFO_SET(v)	__get_cpu_var(pfm_syst_info) |= (v)
+
 #ifdef CONFIG_SMP
 #define cpu_is_online(i) (cpu_online_map & (1UL << i))
 #else
@@ -211,7 +214,7 @@ typedef struct {
 	u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
 	u64 seed;	/* seed for random-number generator */
 	u64 mask;	/* mask for random-number generator */
-	int flags;	/* notify/do not notify */
+	unsigned int flags; /* notify/do not notify */
 } pfm_counter_t;
 
 /*
@@ -226,7 +229,8 @@ typedef struct {
 	unsigned int frozen:1;		/* pmu must be kept frozen on ctxsw in */
 	unsigned int protected:1;	/* allow access to creator of context only */
 	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
-	unsigned int reserved:24;
+	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
+	unsigned int reserved:23;
 } pfm_context_flags_t;
 
 /*
@@ -261,7 +265,7 @@ typedef struct pfm_context {
 
 	u64			ctx_saved_psr;		/* copy of psr used for lazy ctxsw */
 	unsigned long		ctx_saved_cpus_allowed;	/* copy of the task cpus_allowed (system wide) */
-	unsigned long		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
+	unsigned int		ctx_cpu;		/* CPU used by system wide session */
 
 	atomic_t		ctx_saving_in_progress;	/* flag indicating actual save in progress */
 	atomic_t		ctx_is_busy;		/* context accessed by overflow handler */
@@ -274,6 +278,7 @@ typedef struct pfm_context {
 #define ctx_fl_frozen		ctx_flags.frozen
 #define ctx_fl_protected	ctx_flags.protected
 #define ctx_fl_using_dbreg	ctx_flags.using_dbreg
+#define ctx_fl_excl_idle	ctx_flags.excl_idle
 
 /*
  * global information about all sessions
@@ -282,10 +287,10 @@ typedef struct pfm_context {
 typedef struct {
 	spinlock_t		pfs_lock;		   /* lock the structure */
 
-	unsigned long		pfs_task_sessions;	   /* number of per task sessions */
-	unsigned long		pfs_sys_sessions;	   /* number of per system wide sessions */
-	unsigned long   	pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
-	unsigned long   	pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
+	unsigned int 		pfs_task_sessions;	   /* number of per task sessions */
+	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
+	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
+	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
 	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
 } pfm_session_t;
 
@@ -313,23 +318,22 @@ typedef struct {
 
 /*
  * This structure is initialized at boot time and contains
- * a description of the PMU main characteristic as indicated
- * by PAL along with a list of inter-registers dependencies and configurations.
+ * a description of the PMU main characteristics.
  */
 typedef struct {
-	unsigned long pfm_is_disabled;	/* indicates if perfmon is working properly */
-	unsigned long perf_ovfl_val;	/* overflow value for generic counters   */
-	unsigned long max_counters;	/* upper limit on counter pair (PMC/PMD) */
-	unsigned long num_pmcs ;	/* highest PMC implemented (may have holes) */
-	unsigned long num_pmds;		/* highest PMD implemented (may have holes) */
-	unsigned long impl_regs[16];	/* buffer used to hold implememted PMC/PMD mask */
-	unsigned long num_ibrs;		/* number of instruction debug registers */
-	unsigned long num_dbrs;		/* number of data debug registers */
-	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register descriptions */
-	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register descriptions */
+	unsigned int  disabled;		/* indicates if perfmon is working properly */
+	unsigned long ovfl_val;		/* overflow value for generic counters   */
+	unsigned long impl_pmcs[4];	/* bitmask of implemented PMCS */
+	unsigned long impl_pmds[4];	/* bitmask of implemented PMDS */
+	unsigned int  num_pmcs;		/* number of implemented PMCS */
+	unsigned int  num_pmds;		/* number of implemented PMDS */
+	unsigned int  num_ibrs;		/* number of implemented IBRS */
+	unsigned int  num_dbrs;		/* number of implemented DBRS */
+	unsigned int  num_counters;	/* number of PMD/PMC counters */
+	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
+	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
 } pmu_config_t;
 
-
 /*
  * structure used to pass argument to/from remote CPU 
  * using IPI to check and possibly save the PMU context on SMP systems.
@@ -389,13 +393,12 @@ typedef struct {
 /*
  * perfmon internal variables
  */
-static pmu_config_t	pmu_conf; 	/* PMU configuration */
 static pfm_session_t	pfm_sessions;	/* global sessions information */
 static struct proc_dir_entry *perfmon_dir; /* for debug only */
 static pfm_stats_t	pfm_stats[NR_CPUS];
+static pfm_intr_handler_desc_t	*pfm_alternate_intr_handler;
 
-DEFINE_PER_CPU(int, pfm_syst_wide);
-static DEFINE_PER_CPU(int, pfm_dcr_pp);
+DEFINE_PER_CPU(unsigned long, pfm_syst_info);
 
 /* sysctl() controls */
 static pfm_sysctl_t pfm_sysctl;
@@ -449,42 +452,62 @@ static void pfm_lazy_save_regs (struct task_struct *ta);
 #include "perfmon_generic.h"
 #endif
 
+static inline void
+pfm_clear_psr_pp(void)
+{
+	__asm__ __volatile__ ("rsm psr.pp;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+	__asm__ __volatile__ ("ssm psr.pp;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+	__asm__ __volatile__ ("rum psr.up;; srlz.i;;"::: "memory");
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+	__asm__ __volatile__ ("sum psr.up;; srlz.i;;"::: "memory");
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+	unsigned long tmp;
+	__asm__ __volatile__ ("mov %0=psr;;": "=r"(tmp) :: "memory");
+	return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
+}
+
+
 static inline unsigned long
 pfm_read_soft_counter(pfm_context_t *ctx, int i)
 {
-	return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.perf_ovfl_val);
+	return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.ovfl_val);
 }
 
 static inline void
 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
 {
-	ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.perf_ovfl_val;
+	ctx->ctx_soft_pmds[i].val = val  & ~pmu_conf.ovfl_val;
 	/*
 	 * writing to unimplemented part is ignore, so we do not need to
 	 * mask off top part
 	 */
-	ia64_set_pmd(i, val & pmu_conf.perf_ovfl_val);
-}
-
-/*
- * finds the number of PM(C|D) registers given
- * the bitvector returned by PAL
- */
-static unsigned long __init
-find_num_pm_regs(long *buffer)
-{
-	int i=3; /* 4 words/per bitvector */
-
-	/* start from the most significant word */
-	while (i>=0 && buffer[i] == 0 ) i--;
-	if (i< 0) {
-		printk(KERN_ERR "perfmon: No bit set in pm_buffer\n");
-		return 0;
-	}
-	return 1+ ia64_fls(buffer[i]) + 64 * i;
+	ia64_set_pmd(i, val & pmu_conf.ovfl_val);
 }
 
-
 /*
  * Generates a unique (per CPU) timestamp
  */
@@ -875,6 +898,120 @@ error_kmalloc:
 	return -ENOMEM;
 }
 
+static int
+pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
+{
+	unsigned long m, undo_mask;
+	unsigned int n, i;
+
+	/*
+	 * validy checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS();
+
+	if (is_syswide) {
+		/* 
+		 * cannot mix system wide and per-task sessions
+		 */
+		if (pfm_sessions.pfs_task_sessions > 0UL) {
+			DBprintk(("system wide not possible, %u conflicting task_sessions\n", 
+			  	pfm_sessions.pfs_task_sessions));
+			goto abort;
+		}
+
+		m = cpu_mask; undo_mask = 0UL; n = 0;
+		DBprintk(("cpu_mask=0x%lx\n", cpu_mask));
+		for(i=0; m; i++, m>>=1) {
+
+			if ((m & 0x1) == 0UL) continue;
+
+			if (pfm_sessions.pfs_sys_session[i]) goto undo;
+
+			DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id()));
+
+			pfm_sessions.pfs_sys_session[i] = task;
+			undo_mask |= 1UL << i;
+			n++;
+		}
+		pfm_sessions.pfs_sys_sessions += n;
+	} else {
+		if (pfm_sessions.pfs_sys_sessions) goto abort;
+		pfm_sessions.pfs_task_sessions++;
+	}
+	DBprintk(("task_sessions=%u sys_session[%d]=%d", 
+		  pfm_sessions.pfs_task_sessions, 
+		  smp_processor_id(), pfm_sessions.pfs_sys_session[smp_processor_id()] ? 1 : 0));
+	UNLOCK_PFS();
+	return 0;
+undo:
+	DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",
+  		pfm_sessions.pfs_sys_session[i]->pid, i));
+
+	for(i=0; undo_mask; i++, undo_mask >>=1) {
+		pfm_sessions.pfs_sys_session[i] = NULL;
+	}
+abort:
+	UNLOCK_PFS();
+
+	return -EBUSY;
+
+}
+
+static int
+pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask)
+{
+	pfm_context_t *ctx;
+	unsigned long m;
+	unsigned int n, i;
+
+	ctx = task ? task->thread.pfm_context : NULL;
+
+	/*
+	 * validy checks on cpu_mask have been done upstream
+	 */
+	LOCK_PFS();
+
+	DBprintk(("[%d] sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu_mask=0x%lx\n",
+		task->pid,
+		pfm_sessions.pfs_sys_sessions,
+		pfm_sessions.pfs_task_sessions,
+		pfm_sessions.pfs_sys_use_dbregs,
+		is_syswide,
+		cpu_mask));
+		
+
+	if (is_syswide) {
+		m = cpu_mask; n = 0;
+		for(i=0; m; i++, m>>=1) {
+			if ((m & 0x1) == 0UL) continue;
+			pfm_sessions.pfs_sys_session[i] = NULL;
+			n++;
+		}
+		/* 
+		 * would not work with perfmon+more than one bit in cpu_mask
+		 */
+		if (ctx && ctx->ctx_fl_using_dbreg) {
+			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
+				printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
+			} else {
+				pfm_sessions.pfs_sys_use_dbregs--;
+			}
+		}
+		pfm_sessions.pfs_sys_sessions -= n;
+
+		DBprintk(("CPU%d sys_sessions=%u\n", 
+			smp_processor_id(), pfm_sessions.pfs_sys_sessions));
+	} else {
+		pfm_sessions.pfs_task_sessions--;
+		DBprintk(("[%d] task_sessions=%u\n", 
+			task->pid, pfm_sessions.pfs_task_sessions));
+	}
+
+	UNLOCK_PFS();
+
+	return 0;
+}
+
 /*
  * XXX: do something better here
  */
@@ -891,6 +1028,7 @@ pfm_bad_permissions(struct task_struct *task)
 static int
 pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
 {
+	unsigned long smpl_pmds = pfx->ctx_smpl_regs[0];
 	int ctx_flags;
 	int cpu;
 
@@ -957,6 +1095,11 @@ pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
 		}
 #endif
 	}
+	/* verify validity of smpl_regs */
+	if ((smpl_pmds & pmu_conf.impl_pmds[0]) != smpl_pmds) {
+		DBprintk(("invalid smpl_regs 0x%lx\n", smpl_pmds));
+		return -EINVAL;
+	}
 	/* probably more to add here */
 
 	return 0;
@@ -968,7 +1111,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 {
 	pfarg_context_t tmp;
 	void *uaddr = NULL;
-	int ret, cpu = 0;
+	int ret;
 	int ctx_flags;
 	pid_t notify_pid;
 
@@ -987,40 +1130,8 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 
 	ctx_flags = tmp.ctx_flags;
 
-	ret =  -EBUSY;
-
-	LOCK_PFS();
-
-	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-
-		/* at this point, we know there is at least one bit set */
-		cpu = ffz(~tmp.ctx_cpu_mask);
-
-		DBprintk(("requesting CPU%d currently on CPU%d\n",cpu, smp_processor_id()));
-
-		if (pfm_sessions.pfs_task_sessions > 0) {
-			DBprintk(("system wide not possible, task_sessions=%ld\n", pfm_sessions.pfs_task_sessions));
-			goto abort;
-		}
-
-		if (pfm_sessions.pfs_sys_session[cpu]) {
-			DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",pfm_sessions.pfs_sys_session[cpu]->pid, cpu));
-			goto abort;
-		}
-		pfm_sessions.pfs_sys_session[cpu] = task;
-		/*
-		 * count the number of system wide sessions
-		 */
-		pfm_sessions.pfs_sys_sessions++;
-
-	} else if (pfm_sessions.pfs_sys_sessions == 0) {
-		pfm_sessions.pfs_task_sessions++;
-	} else {
-		/* no per-process monitoring while there is a system wide session */
-		goto abort;
-	}
-
-	UNLOCK_PFS();
+	ret = pfm_reserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE, tmp.ctx_cpu_mask);
+	if (ret) goto abort;
 
 	ret = -ENOMEM;
 
@@ -1103,6 +1214,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 	ctx->ctx_fl_inherit   = ctx_flags & PFM_FL_INHERIT_MASK;
 	ctx->ctx_fl_block     = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
 	ctx->ctx_fl_system    = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+	ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
 	ctx->ctx_fl_frozen    = 0;
 	/*
 	 * setting this flag to 0 here means, that the creator or the task that the
@@ -1113,7 +1225,7 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 	ctx->ctx_fl_protected = 0;
 
 	/* for system wide mode only (only 1 bit set) */
-	ctx->ctx_cpu         = cpu;
+	ctx->ctx_cpu = ffz(~tmp.ctx_cpu_mask);
 
 	atomic_set(&ctx->ctx_last_cpu,-1); /* SMP only, means no CPU */
 
@@ -1131,9 +1243,9 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 	DBprintk(("context=%p, pid=%d notify_task=%p\n",
 			(void *)ctx, task->pid, ctx->ctx_notify_task));
 
-	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d\n", 
+	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d\n", 
 			(void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit, 
-			ctx->ctx_fl_block, ctx->ctx_fl_system));
+			ctx->ctx_fl_block, ctx->ctx_fl_system, ctx->ctx_fl_excl_idle));
 
 	/*
 	 * when no notification is required, we can make this visible at the last moment
@@ -1146,8 +1258,8 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 	 */
 	if (ctx->ctx_fl_system) {
 		ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
-		set_cpus_allowed(task, 1UL << cpu);
-		DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid,task->cpus_allowed));
+		set_cpus_allowed(task, tmp.ctx_cpu_mask);
+		DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid, task->cpus_allowed));
 	}
 
 	return 0;
@@ -1155,20 +1267,8 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 buffer_error:
 	pfm_context_free(ctx);
 error:
-	/*
-	 * undo session reservation
-	 */
-	LOCK_PFS();
-
-	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-		pfm_sessions.pfs_sys_session[cpu] = NULL;
-		pfm_sessions.pfs_sys_sessions--;
-	} else {
-		pfm_sessions.pfs_task_sessions--;
-	}
+	pfm_unreserve_session(task, ctx_flags & PFM_FL_SYSTEM_WIDE , tmp.ctx_cpu_mask);
 abort:
-	UNLOCK_PFS();
-
 	/* make sure we don't leave anything behind */
 	task->thread.pfm_context = NULL;
 
@@ -1200,9 +1300,7 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
 	unsigned long mask = ovfl_regs[0];
 	unsigned long reset_others = 0UL;
 	unsigned long val;
-	int i, is_long_reset = (flag & PFM_RELOAD_LONG_RESET);
-
-	DBprintk(("masks=0x%lx\n", mask));
+	int i, is_long_reset = (flag == PFM_PMD_LONG_RESET);
 
 	/*
 	 * now restore reset value on sampling overflowed counters
@@ -1213,7 +1311,7 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
 			val = pfm_new_counter_value(ctx->ctx_soft_pmds + i, is_long_reset);
 			reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
 
-			DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
+			DBprintk_ovfl(("[%d] %s reset soft_pmd[%d]=%lx\n", current->pid,
 				  is_long_reset ? "long" : "short", i, val));
 
 			/* upper part is ignored on rval */
@@ -1235,7 +1333,7 @@ pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
 		} else {
 			ia64_set_pmd(i, val);
 		}
-		DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
+		DBprintk_ovfl(("[%d] %s reset_others pmd[%d]=%lx\n", current->pid,
 			  is_long_reset ? "long" : "short", i, val));
 	}
 	ia64_srlz_d();
@@ -1246,7 +1344,7 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 {
 	struct thread_struct *th = &task->thread;
 	pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
-	unsigned long value;
+	unsigned long value, reset_pmds;
 	unsigned int cnum, reg_flags, flags;
 	int i;
 	int ret = -EINVAL;
@@ -1262,10 +1360,11 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 
 		if (__copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
 
-		cnum      = tmp.reg_num;
-		reg_flags = tmp.reg_flags;
-		value     = tmp.reg_value;
-		flags     = 0;
+		cnum       = tmp.reg_num;
+		reg_flags  = tmp.reg_flags;
+		value      = tmp.reg_value;
+		reset_pmds = tmp.reg_reset_pmds[0];
+		flags      = 0;
 
 		/* 
 		 * we reject all non implemented PMC as well
@@ -1283,6 +1382,8 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 		 * any other configuration is rejected.
 		 */
 		if (PMC_IS_MONITOR(cnum) || PMC_IS_COUNTING(cnum)) {
+			DBprintk(("pmc[%u].pm=%ld\n", cnum, PMC_PM(cnum, value))); 
+
 			if (ctx->ctx_fl_system ^ PMC_PM(cnum, value)) {
 				DBprintk(("pmc_pm=%ld fl_system=%d\n", PMC_PM(cnum, value), ctx->ctx_fl_system));
 				goto error;
@@ -1310,6 +1411,11 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 
 			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
 
+			/* verify validity of reset_pmds */
+			if ((reset_pmds & pmu_conf.impl_pmds[0]) != reset_pmds) {
+				DBprintk(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
+				goto error;
+			}
 		} else if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
 				DBprintk(("cannot set ovfl_notify or random on pmc%u\n", cnum));
 				goto error;
@@ -1348,13 +1454,10 @@ pfm_write_pmcs(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 		ctx->ctx_soft_pmds[cnum].flags = flags;
 
 		if (PMC_IS_COUNTING(cnum)) {
-			/*
-			 * copy reset vector
-			 */
-			ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
-			ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
+			ctx->ctx_soft_pmds[cnum].reset_pmds[0] = reset_pmds;
+
+			/* mark all PMDS to be accessed as used */
+			CTX_USED_PMD(ctx, reset_pmds);
 		}
 
 		/*
@@ -1397,7 +1500,7 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 	unsigned long value, hw_value;
 	unsigned int cnum;
 	int i;
-	int ret;
+	int ret = 0;
 
 	/* we don't quite support this right now */
 	if (task != current) return -EINVAL;
@@ -1448,9 +1551,9 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 		/* update virtualized (64bits) counter */
 		if (PMD_IS_COUNTING(cnum)) {
 			ctx->ctx_soft_pmds[cnum].lval = value;
-			ctx->ctx_soft_pmds[cnum].val  = value & ~pmu_conf.perf_ovfl_val;
+			ctx->ctx_soft_pmds[cnum].val  = value & ~pmu_conf.ovfl_val;
 
-			hw_value = value & pmu_conf.perf_ovfl_val; 
+			hw_value = value & pmu_conf.ovfl_val;
 
 			ctx->ctx_soft_pmds[cnum].long_reset  = tmp.reg_long_reset;
 			ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
@@ -1478,7 +1581,7 @@ pfm_write_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int coun
 				ctx->ctx_soft_pmds[cnum].val,
 				ctx->ctx_soft_pmds[cnum].short_reset,
 				ctx->ctx_soft_pmds[cnum].long_reset,
-				ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val,
+				ia64_get_pmd(cnum) & pmu_conf.ovfl_val,
 				PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
 				ctx->ctx_used_pmds[0],
 				ctx->ctx_soft_pmds[cnum].reset_pmds[0]));
@@ -1504,15 +1607,18 @@ abort_mission:
 	return ret;
 }
 
-
 static int
 pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 {
 	struct thread_struct *th = &task->thread;
-	unsigned long val = 0UL;
+	unsigned long val, lval;
 	pfarg_reg_t *req = (pfarg_reg_t *)arg;
 	unsigned int cnum, reg_flags = 0;
-	int i, ret = -EINVAL;
+	int i, ret = 0;
+
+#if __GNUC__ < 3
+	int foo;
+#endif
 
 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
 
@@ -1528,9 +1634,16 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 	DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), task->pid));
 
 	for (i = 0; i < count; i++, req++) {
-
+#if __GNUC__ < 3
+		foo = __get_user(cnum, &req->reg_num);
+		if (foo) return -EFAULT;
+		foo = __get_user(reg_flags, &req->reg_flags);
+		if (foo) return -EFAULT;
+#else
 		if (__get_user(cnum, &req->reg_num)) return -EFAULT;
 		if (__get_user(reg_flags, &req->reg_flags)) return -EFAULT;
+#endif
+		lval = 0UL;
 
 		if (!PMD_IS_IMPL(cnum)) goto abort_mission;
 		/*
@@ -1578,9 +1691,10 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 			/*
 			 * XXX: need to check for overflow
 			 */
-
-			val &= pmu_conf.perf_ovfl_val;
+			val &= pmu_conf.ovfl_val;
 			val += ctx->ctx_soft_pmds[cnum].val;
+
+			lval = ctx->ctx_soft_pmds[cnum].lval;
 		} 
 
 		/*
@@ -1592,10 +1706,11 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 			val = v;
 		}
 
-		PFM_REG_RETFLAG_SET(reg_flags, 0);
+		PFM_REG_RETFLAG_SET(reg_flags, ret);
 
 		DBprintk(("read pmd[%u] ret=%d value=0x%lx pmc=0x%lx\n", 
-			cnum, ret, val, ia64_get_pmc(cnum)));
+					cnum, ret, val, ia64_get_pmc(cnum)));
+
 		/*
 		 * update register return value, abort all if problem during copy.
 		 * we only modify the reg_flags field. no check mode is fine because
@@ -1604,16 +1719,19 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 		if (__put_user(cnum, &req->reg_num)) return -EFAULT;
 		if (__put_user(val, &req->reg_value)) return -EFAULT;
 		if (__put_user(reg_flags, &req->reg_flags)) return -EFAULT;
+		if (__put_user(lval, &req->reg_last_reset_value)) return -EFAULT;
 	}
 
 	return 0;
 
 abort_mission:
 	PFM_REG_RETFLAG_SET(reg_flags, PFM_REG_RETFL_EINVAL);
+	/* 
+	 * XXX: if this fails, we stick with the original failure, flag not updated!
+	 */
+	__put_user(reg_flags, &req->reg_flags);
 
-	if (__put_user(reg_flags, &req->reg_flags)) ret = -EFAULT;
-
-	return ret;
+	return -EINVAL;
 }
 
 #ifdef PFM_PMU_USES_DBR
@@ -1655,7 +1773,7 @@ pfm_use_debug_registers(struct task_struct *task)
 	else
 		pfm_sessions.pfs_ptrace_use_dbregs++;
 
-	DBprintk(("ptrace_use_dbregs=%lu  sys_use_dbregs=%lu by [%d] ret = %d\n", 
+	DBprintk(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n", 
 		  pfm_sessions.pfs_ptrace_use_dbregs, 
 		  pfm_sessions.pfs_sys_use_dbregs, 
 		  task->pid, ret));
@@ -1673,7 +1791,6 @@ pfm_use_debug_registers(struct task_struct *task)
  * perfmormance monitoring, so we only decrement the number
  * of "ptraced" debug register users to keep the count up to date
  */
-
 int
 pfm_release_debug_registers(struct task_struct *task)
 {
@@ -1702,6 +1819,7 @@ pfm_use_debug_registers(struct task_struct *task)
 {
 	return 0;
 }
+
 int
 pfm_release_debug_registers(struct task_struct *task)
 {
@@ -1721,9 +1839,12 @@ pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 	if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
 
 	if (task == current) {
-		DBprintk(("restarting self %d frozen=%d \n", current->pid, ctx->ctx_fl_frozen));
+		DBprintk(("restarting self %d frozen=%d ovfl_regs=0x%lx\n", 
+			task->pid, 
+			ctx->ctx_fl_frozen,
+			ctx->ctx_ovfl_regs[0]));
 
-		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
+		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 		ctx->ctx_ovfl_regs[0] = 0UL;
 
@@ -1806,18 +1927,18 @@ pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+		pfm_clear_psr_pp();
 
 		ia64_srlz_i();
 
-		__get_cpu_var(pfm_dcr_pp)  = 0;
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
 
 		ia64_psr(regs)->pp = 0;
 
 	} else {
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+		pfm_clear_psr_up();
 
 		ia64_srlz_i();
 
@@ -1979,14 +2100,9 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
 	int i, ret = 0;
 
 	/*
-	 * for range restriction: psr.db must be cleared or the
-	 * the PMU will ignore the debug registers.
-	 *
-	 * XXX: may need more in system wide mode,
-	 * no task can have this bit set?
+	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
+	 * ensuring that no real breakpoint can be installed via this call.
 	 */
-	if (ia64_psr(regs)->db == 1) return -EINVAL;
-
 
 	first_time = ctx->ctx_fl_using_dbreg == 0;
 
@@ -2055,7 +2171,6 @@ pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, stru
 	 * Now install the values into the registers
 	 */
 	for (i = 0; i < count; i++, req++) {
-
 		
 		if (__copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
 		
@@ -2145,7 +2260,7 @@ abort_mission:
 		 * XXX: for now we can only come here on EINVAL
 		 */
 		PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
-		__put_user(tmp.dbreg_flags, &req->dbreg_flags);
+		if (__put_user(tmp.dbreg_flags, &req->dbreg_flags)) ret = -EFAULT;
 	}
 	return ret;
 }
@@ -2215,13 +2330,13 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 
 	if (ctx->ctx_fl_system) {
 		
-		__get_cpu_var(pfm_dcr_pp)  = 1;
+		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
 
 		/* set user level psr.pp */
 		ia64_psr(regs)->pp = 1;
 
 		/* start monitoring at kernel level */
-		__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
+		pfm_set_psr_pp();
 
 		/* enable dcr pp */
 		ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
@@ -2237,7 +2352,7 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 		ia64_psr(regs)->up = 1;
 
 		/* start monitoring at kernel level */
-		__asm__ __volatile__ ("sum psr.up;;"::: "memory");
+		pfm_set_psr_up();
 
 		ia64_srlz_i();
 	}
@@ -2264,11 +2379,12 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 		ia64_psr(regs)->up = 0; /* just to make sure! */
 
 		/* make sure monitoring is stopped */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+		pfm_clear_psr_pp();
 		ia64_srlz_i();
 
-		__get_cpu_var(pfm_dcr_pp)    = 0;
-		__get_cpu_var(pfm_syst_wide) = 1;
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
+		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
 	} else {
 		/*
 		 * needed in case the task was a passive task during
@@ -2279,7 +2395,7 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 		ia64_psr(regs)->up = 0;
 
 		/* make sure monitoring is stopped */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+		pfm_clear_psr_up();
 		ia64_srlz_i();
 
 		DBprintk(("clearing psr.sp for [%d]\n", current->pid));
@@ -2331,6 +2447,7 @@ pfm_get_pmc_reset(struct task_struct *task, pfm_context_t *ctx, void *arg, int c
 abort_mission:
 	PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
 	if (__copy_to_user(req, &tmp, sizeof(tmp))) ret = -EFAULT;
+
 	return ret;
 }
 
@@ -2532,7 +2649,7 @@ non_blocking:
 		 * use the local reference
 		 */
 
-		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
+		pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 		ctx->ctx_ovfl_regs[0] = 0UL;
 
@@ -2591,19 +2708,11 @@ pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ov
 	h->pid  = current->pid;
 	h->cpu  = smp_processor_id();
 	h->last_reset_value = ovfl_mask ? ctx->ctx_soft_pmds[ffz(~ovfl_mask)].lval : 0UL;
-	/* 
-	 * where did the fault happen
-	 */
-	h->ip   = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;	
-
-	/* 
-	 * which registers overflowed
-	 */
-	h->regs = ovfl_mask;
+	h->ip   = regs ? regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3): 0x0UL;
+	h->regs = ovfl_mask; 			/* which registers overflowed */
 
 	/* guaranteed to monotonically increase on each cpu */
 	h->stamp  = pfm_get_stamp();
-	h->period = 0UL; /* not yet used */
 
 	/* position for first pmd */
 	e = (unsigned long *)(h+1);
@@ -2724,7 +2833,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 		 * pfm_read_pmds().
 		 */
 		old_val = ctx->ctx_soft_pmds[i].val;
-		ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
+		ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
 
 		/*
 		 * check for overflow condition
@@ -2739,9 +2848,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 		}
 		DBprintk_ovfl(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 
 			  i, ctx->ctx_soft_pmds[i].val, old_val, 
-			  ia64_get_pmd(i) & pmu_conf.perf_ovfl_val, ovfl_pmds, ovfl_notify));
-
-
+			  ia64_get_pmd(i) & pmu_conf.ovfl_val, ovfl_pmds, ovfl_notify));
 	}
 
 	/*
@@ -2776,7 +2883,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	 */
 	if (ovfl_notify == 0UL) {
 		if (ovfl_pmds) 
-			pfm_reset_regs(ctx, &ovfl_pmds, PFM_RELOAD_SHORT_RESET);
+			pfm_reset_regs(ctx, &ovfl_pmds, PFM_PMD_SHORT_RESET);
 		return 0x0;
 	}
 
@@ -2924,7 +3031,7 @@ lost_notify: /* XXX: more to do here, to convert to non-blocking (reset values)
 }
 
 static void
-perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
+pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 {
 	u64 pmc0;
 	struct task_struct *task;
@@ -2932,6 +3039,14 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
 
 	pfm_stats[smp_processor_id()].pfm_ovfl_intr_count++;
 
+	/*
+	 * if an alternate handler is registered, just bypass the default one
+	 */
+	if (pfm_alternate_intr_handler) {
+		(*pfm_alternate_intr_handler->handler)(irq, arg, regs);
+		return;
+	}
+
 	/* 
 	 * srlz.d done before arriving here
 	 *
@@ -2994,14 +3109,13 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
 
 /* for debug only */
 static int
-perfmon_proc_info(char *page)
+pfm_proc_info(char *page)
 {
 	char *p = page;
 	int i;
 
-	p += sprintf(p, "enabled                : %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
 	p += sprintf(p, "fastctxsw              : %s\n", pfm_sysctl.fastctxsw > 0 ? "Yes": "No");
-	p += sprintf(p, "ovfl_mask              : 0x%lx\n", pmu_conf.perf_ovfl_val);
+	p += sprintf(p, "ovfl_mask              : 0x%lx\n", pmu_conf.ovfl_val);
 
 	for(i=0; i < NR_CPUS; i++) {
 		if (cpu_is_online(i) == 0) continue;
@@ -3009,16 +3123,18 @@ perfmon_proc_info(char *page)
 		p += sprintf(p, "CPU%-2d spurious intrs   : %lu\n", i, pfm_stats[i].pfm_spurious_ovfl_intr_count);
 		p += sprintf(p, "CPU%-2d recorded samples : %lu\n", i, pfm_stats[i].pfm_recorded_samples_count);
 		p += sprintf(p, "CPU%-2d smpl buffer full : %lu\n", i, pfm_stats[i].pfm_full_smpl_buffer_count);
+		p += sprintf(p, "CPU%-2d syst_wide        : %d\n", i, per_cpu(pfm_syst_info, i) & PFM_CPUINFO_SYST_WIDE ? 1 : 0);
+		p += sprintf(p, "CPU%-2d dcr_pp           : %d\n", i, per_cpu(pfm_syst_info, i) & PFM_CPUINFO_DCR_PP ? 1 : 0);
+		p += sprintf(p, "CPU%-2d exclude idle     : %d\n", i, per_cpu(pfm_syst_info, i) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0);
 		p += sprintf(p, "CPU%-2d owner            : %d\n", i, pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
-		p += sprintf(p, "CPU%-2d syst_wide        : %d\n", i, per_cpu(pfm_syst_wide, i));
-		p += sprintf(p, "CPU%-2d dcr_pp           : %d\n", i, per_cpu(pfm_dcr_pp, i));
 	}
 
 	LOCK_PFS();
-	p += sprintf(p, "proc_sessions          : %lu\n"
-			"sys_sessions           : %lu\n"
-			"sys_use_dbregs         : %lu\n"
-			"ptrace_use_dbregs      : %lu\n", 
+
+	p += sprintf(p, "proc_sessions          : %u\n"
+			"sys_sessions           : %u\n"
+			"sys_use_dbregs         : %u\n"
+			"ptrace_use_dbregs      : %u\n", 
 			pfm_sessions.pfs_task_sessions, 
 			pfm_sessions.pfs_sys_sessions,
 			pfm_sessions.pfs_sys_use_dbregs,
@@ -3033,7 +3149,7 @@ perfmon_proc_info(char *page)
 static int
 perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
-	int len = perfmon_proc_info(page);
+	int len = pfm_proc_info(page);
 
 	if (len <= off+count) *eof = 1;
 
@@ -3046,17 +3162,57 @@ perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, voi
 	return len;
 }
 
+/*
+ * we come here as soon as PFM_CPUINFO_SYST_WIDE is set. This happens
+ * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
+ * is active or inactive based on mode. We must rely on the value in 
+ * cpu_data(i)->pfm_syst_info
+ */
 void
-pfm_syst_wide_update_task(struct task_struct *task, int mode)
+pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
 {
-	struct pt_regs *regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
+	struct pt_regs *regs;
+	unsigned long dcr;
+	unsigned long dcr_pp;
 
-	regs--;
+	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
 
 	/*
-	 * propagate the value of the dcr_pp bit to the psr
+	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 
+	 * on every CPU, so we can rely on the pid to identify the idle task.
+	 */
+	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
+		regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
+		regs--;
+		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
+		return;
+	}
+	/*
+	 * if monitoring has started
 	 */
-	ia64_psr(regs)->pp = mode ? __get_cpu_var(pfm_dcr_pp) : 0;
+	if (dcr_pp) {
+		dcr = ia64_get_dcr();
+		/* 
+		 * context switching in? 
+		 */
+		if (is_ctxswin) {
+			/* mask monitoring for the idle task */
+			ia64_set_dcr(dcr & ~IA64_DCR_PP);
+			pfm_clear_psr_pp();
+			ia64_srlz_i();
+			return;
+		}
+		/* 
+		 * context switching out
+		 * restore monitoring for next task 
+		 *
+		 * Due to inlining this odd if-then-else construction generates 
+		 * better code.
+		 */
+		ia64_set_dcr(dcr |IA64_DCR_PP);
+		pfm_set_psr_pp();
+		ia64_srlz_i();
+	}
 }
 
 void
@@ -3067,11 +3223,10 @@ pfm_save_regs (struct task_struct *task)
 
 	ctx = task->thread.pfm_context;
 
-
 	/*
 	 * save current PSR: needed because we modify it
 	 */
-	__asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+	psr = pfm_get_psr();
 
 	/*
 	 * stop monitoring:
@@ -3369,7 +3524,7 @@ pfm_load_regs (struct task_struct *task)
 	 */
 	mask = pfm_sysctl.fastctxsw || ctx->ctx_fl_protected ?  ctx->ctx_used_pmds[0] : ctx->ctx_reload_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) ia64_set_pmd(i, t->pmd[i] & pmu_conf.perf_ovfl_val);
+		if (mask & 0x1) ia64_set_pmd(i, t->pmd[i] & pmu_conf.ovfl_val);
 	}
 
 	/* 
@@ -3419,7 +3574,7 @@ pfm_reset_pmu(struct task_struct *task)
 	int i;
 
 	if (task != current) {
-		printk("perfmon: invalid task in ia64_reset_pmu()\n");
+		printk("perfmon: invalid task in pfm_reset_pmu()\n");
 		return;
 	}
 
@@ -3428,6 +3583,7 @@ pfm_reset_pmu(struct task_struct *task)
 
 	/*
 	 * install reset values for PMC. We skip PMC0 (done above)
+	 * XX: good up to 64 PMCS
 	 */
 	for (i=1; (pmu_conf.pmc_desc[i].type & PFM_REG_END) == 0; i++) {
 		if ((pmu_conf.pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
@@ -3444,7 +3600,7 @@ pfm_reset_pmu(struct task_struct *task)
 
 	/*
 	 * clear reset values for PMD. 
-	 * XXX: good up to 64 PMDS. Suppose that zero is a valid value.
+	 * XXX: good up to 64 PMDS.
 	 */
 	for (i=0; (pmu_conf.pmd_desc[i].type & PFM_REG_END) == 0; i++) {
 		if ((pmu_conf.pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
@@ -3477,13 +3633,13 @@ pfm_reset_pmu(struct task_struct *task)
 	  *
 	  * We never directly restore PMC0 so we do not include it in the mask.
 	  */
-	ctx->ctx_reload_pmcs[0] = pmu_conf.impl_regs[0] & ~0x1;
+	ctx->ctx_reload_pmcs[0] = pmu_conf.impl_pmcs[0] & ~0x1;
 	/*
 	 * We must include all the PMD in this mask to avoid picking
 	 * up stale value and leak information, especially directly
 	 * at the user level when psr.sp=0
 	 */
-	ctx->ctx_reload_pmds[0] = pmu_conf.impl_regs[4];
+	ctx->ctx_reload_pmds[0] = pmu_conf.impl_pmds[0];
 
 	/* 
 	 * Keep track of the pmds we want to sample
@@ -3493,7 +3649,7 @@ pfm_reset_pmu(struct task_struct *task)
 	 *
 	 * We ignore the unimplemented pmds specified by the user
 	 */
-	ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0] & pmu_conf.impl_regs[4];
+	ctx->ctx_used_pmds[0] = ctx->ctx_smpl_regs[0];
 	ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
 
 	/*
@@ -3547,16 +3703,17 @@ pfm_flush_regs (struct task_struct *task)
 		ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+		pfm_clear_psr_pp();
 
 		ia64_srlz_i();
 
-		__get_cpu_var(pfm_syst_wide) = 0;
-		__get_cpu_var(pfm_dcr_pp)    = 0;
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
+		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
 	} else  {
 
 		/* stop monitoring */
-		__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+		pfm_clear_psr_up();
 
 		ia64_srlz_i();
 
@@ -3622,10 +3779,14 @@ pfm_flush_regs (struct task_struct *task)
 		val = ia64_get_pmd(i);
 
 		if (PMD_IS_COUNTING(i)) {
-			DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", task->pid, i, ctx->ctx_soft_pmds[i].val, val & pmu_conf.perf_ovfl_val));
+			DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", 
+				task->pid, 
+				i, 
+				ctx->ctx_soft_pmds[i].val, 
+				val & pmu_conf.ovfl_val));
 
 			/* collect latest results */
-			ctx->ctx_soft_pmds[i].val += val & pmu_conf.perf_ovfl_val;
+			ctx->ctx_soft_pmds[i].val += val & pmu_conf.ovfl_val;
 
 			/*
 			 * now everything is in ctx_soft_pmds[] and we need
@@ -3638,7 +3799,7 @@ pfm_flush_regs (struct task_struct *task)
 			 * take care of overflow inline
 			 */
 			if (pmc0 & (1UL << i)) {
-				ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
+				ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.ovfl_val;
 				DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
 					task->pid, i, ctx->ctx_soft_pmds[i].val));
 			}
@@ -3771,8 +3932,8 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 	m = nctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
 	for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
 		if ((m & 0x1) && pmu_conf.pmd_desc[i].type == PFM_REG_COUNTING) {
-			nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.perf_ovfl_val;
-			thread->pmd[i]	      	   = nctx->ctx_soft_pmds[i].lval & pmu_conf.perf_ovfl_val;
+			nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].lval & ~pmu_conf.ovfl_val;
+			thread->pmd[i]	      	   = nctx->ctx_soft_pmds[i].lval & pmu_conf.ovfl_val;
 		} else {
 			thread->pmd[i]	      	   = 0UL; /* reset to initial state */
 		}
@@ -3939,30 +4100,14 @@ pfm_context_exit(struct task_struct *task)
 
 	UNLOCK_CTX(ctx);
 
-	LOCK_PFS();
+	pfm_unreserve_session(task, ctx->ctx_fl_system, 1UL << ctx->ctx_cpu);
 
 	if (ctx->ctx_fl_system) {
-
-		pfm_sessions.pfs_sys_session[ctx->ctx_cpu] = NULL;
-		pfm_sessions.pfs_sys_sessions--;
-		DBprintk(("freeing syswide session on CPU%ld\n", ctx->ctx_cpu));
-
-		/* update perfmon debug register usage counter */
-		if (ctx->ctx_fl_using_dbreg) {
-			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
-				printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
-			} else
-				pfm_sessions.pfs_sys_use_dbregs--;
-		}
-
 		/*
 	 	 * remove any CPU pinning
 	 	 */
 		set_cpus_allowed(task, ctx->ctx_saved_cpus_allowed);
-	} else {
-		pfm_sessions.pfs_task_sessions--;
-	}
-	UNLOCK_PFS();
+	} 
 
 	pfm_context_free(ctx);
 	/* 
@@ -3990,8 +4135,7 @@ pfm_cleanup_smpl_buf(struct task_struct *task)
 	 * Walk through the list and free the sampling buffer and psb
 	 */
 	while (psb) {
-		DBprintk(("[%d] freeing smpl @%p size %ld\n", 
-			current->pid, psb->psb_hdr, psb->psb_size));
+		DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
 
 		pfm_rvfree(psb->psb_hdr, psb->psb_size);
 		tmp = psb->psb_next;
@@ -4095,16 +4239,16 @@ pfm_cleanup_notifiers(struct task_struct *task)
 		if (ctx && ctx->ctx_notify_task == task) {
 			DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
 			/*
-			 * the spinlock is required to take care of a race condition with
-			 * the send_sig_info() call. We must make sure that either the
-			 * send_sig_info() completes using a valid task, or the
-			 * notify_task is cleared before the send_sig_info() can pick up a
-			 * stale value. Note that by the time this function is executed
-			 * the 'task' is already detached from the tasklist. The problem
-			 * is that the notifiers have a direct pointer to it. It is okay
-			 * to send a signal to a task in this stage, it simply will have
-			 * no effect. But it is better than sending to a completely
-			 * destroyed task or worse to a new task using the same
+			 * the spinlock is required to take care of a race condition
+			 * with the send_sig_info() call. We must make sure that 
+			 * either the send_sig_info() completes using a valid task,
+			 * or the notify_task is cleared before the send_sig_info()
+			 * can pick up a stale value. Note that by the time this
+			 * function is executed the 'task' is already detached from the
+			 * tasklist. The problem is that the notifiers have a direct
+			 * pointer to it. It is okay to send a signal to a task in this
+			 * stage, it simply will have no effect. But it is better than sending
+			 * to a completely destroyed task or worse to a new task using the same
 			 * task_struct address.
 			 */
 			LOCK_CTX(ctx);
@@ -4123,87 +4267,131 @@ pfm_cleanup_notifiers(struct task_struct *task)
 }
 
 static struct irqaction perfmon_irqaction = {
-	.handler =	perfmon_interrupt,
+	.handler =	pfm_interrupt_handler,
 	.flags =	SA_INTERRUPT,
 	.name =		"perfmon"
 };
 
+int
+pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
+{
+	int ret;
+
+	/* some sanity checks */
+	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
+
+	/* do the easy test first */
+	if (pfm_alternate_intr_handler) return -EBUSY;
+
+	/* reserve our session */
+	ret = pfm_reserve_session(NULL, 1, cpu_online_map);
+	if (ret) return ret;
+
+	if (pfm_alternate_intr_handler) {
+		printk("perfmon: install_alternate, intr_handler not NULL after reserve\n");
+		return -EINVAL;
+	}
+
+	pfm_alternate_intr_handler = hdl;
+
+	return 0;
+}
+
+int
+pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
+{
+	if (hdl == NULL) return -EINVAL;
+
+	/* cannot remove someone else's handler! */
+	if (pfm_alternate_intr_handler != hdl) return -EINVAL;
+
+	pfm_alternate_intr_handler = NULL;
+
+	/* 
+	 * XXX: assume cpu_online_map has not changed since reservation 
+	 */
+	pfm_unreserve_session(NULL, 1, cpu_online_map);
+
+	return 0;
+}
 
 /*
  * perfmon initialization routine, called from the initcall() table
  */
 int __init
-perfmon_init (void)
+pfm_init(void)
 {
-	pal_perf_mon_info_u_t pm_info;
-	s64 status;
+	unsigned int n, n_counters, i;
 
-	pmu_conf.pfm_is_disabled = 1;
+	pmu_conf.disabled = 1;
 
-	printk("perfmon: version %u.%u (sampling format v%u.%u) IRQ %u\n", 
+	printk("perfmon: version %u.%u IRQ %u\n", 
 		PFM_VERSION_MAJ, 
 		PFM_VERSION_MIN, 
-		PFM_SMPL_VERSION_MAJ, 
-		PFM_SMPL_VERSION_MIN, 
 		IA64_PERFMON_VECTOR);
 
-	if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
-		printk("perfmon: PAL call failed (%ld), perfmon disabled\n", status);
-		return -1;
-	}
-
-	pmu_conf.perf_ovfl_val = (1UL << pm_info.pal_perf_mon_info_s.width) - 1;
 	/*
-	 * XXX: use the pfm_*_desc tables instead and simply verify with PAL
+	 * compute the number of implemented PMD/PMC from the
+	 * description tables
 	 */
-	pmu_conf.max_counters  = pm_info.pal_perf_mon_info_s.generic;
-	pmu_conf.num_pmcs      = find_num_pm_regs(pmu_conf.impl_regs);
-	pmu_conf.num_pmds      = find_num_pm_regs(&pmu_conf.impl_regs[4]);
-
-	printk("perfmon: %u bits counters\n", pm_info.pal_perf_mon_info_s.width);
+	n = 0;
+	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		pmu_conf.impl_pmcs[i>>6] |= 1UL << (i&63);
+		n++;
+	}
+	pmu_conf.num_pmcs = n;
+
+	n = 0; n_counters = 0;
+	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
+		pmu_conf.impl_pmds[i>>6] |= 1UL << (i&63);
+		n++;
+		if (PMD_IS_COUNTING(i)) n_counters++;
+	}
+	pmu_conf.num_pmds      = n;
+	pmu_conf.num_counters  = n_counters;
 
-	printk("perfmon: %lu PMC/PMD pairs, %lu PMCs, %lu PMDs\n", 
-	       pmu_conf.max_counters, pmu_conf.num_pmcs, pmu_conf.num_pmds);
+	printk("perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n", 
+	       pmu_conf.num_pmcs, 
+	       pmu_conf.num_pmds,
+	       pmu_conf.num_counters,
+	       ffz(pmu_conf.ovfl_val));
 
 	/* sanity check */
 	if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
-		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon is DISABLED\n");
-		return -1; /* no need to continue anyway */
-	}
-
-	if (ia64_pal_debug_info(&pmu_conf.num_ibrs, &pmu_conf.num_dbrs)) {
-		printk(KERN_WARNING "perfmon: unable to get number of debug registers\n");
-		pmu_conf.num_ibrs = pmu_conf.num_dbrs = 0;
+		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
+		return -1;
 	}
-	/* PAL reports the number of pairs */
-	pmu_conf.num_ibrs <<=1;
-	pmu_conf.num_dbrs <<=1;
-
-	/*
-	 * setup the register configuration descriptions for the CPU
-	 */
-	pmu_conf.pmc_desc = pfm_pmc_desc;
-	pmu_conf.pmd_desc = pfm_pmd_desc;
-
-	/* we are all set */
-	pmu_conf.pfm_is_disabled = 0;
 
 	/*
 	 * for now here for debug purposes
 	 */
 	perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
+	if (perfmon_dir == NULL) {
+		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
+		return -1; 
+	}
 
+	/*
+	 * create /proc/perfmon
+	 */
 	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
 
+	/*
+	 * initialize all our spinlocks
+	 */
 	spin_lock_init(&pfm_sessions.pfs_lock);
 
+	/* we are all set */
+	pmu_conf.disabled = 0;
+
 	return 0;
 }
-
-__initcall(perfmon_init);
+__initcall(pfm_init);
 
 void
-perfmon_init_percpu (void)
+pfm_init_percpu(void)
 {
 	int i;
 
@@ -4222,17 +4410,17 @@ perfmon_init_percpu (void)
 	 *
 	 * On McKinley, this code is ineffective until PMC4 is initialized.
 	 */
-	for (i=1; (pfm_pmc_desc[i].type & PFM_REG_END) == 0;  i++) {
-		if ((pfm_pmc_desc[i].type & PFM_REG_IMPL) == 0) continue;
-		ia64_set_pmc(i, pfm_pmc_desc[i].default_value);
+	for (i=1; PMC_IS_LAST(i) == 0;  i++) {
+		if (PMC_IS_IMPL(i) == 0) continue;
+		ia64_set_pmc(i, PMC_DFL_VAL(i));
 	}
-	for (i=0; (pfm_pmd_desc[i].type & PFM_REG_END) == 0; i++) {
-		if ((pfm_pmd_desc[i].type & PFM_REG_IMPL) == 0) continue;
+
+	for (i=0; PMD_IS_LAST(i); i++) {
+		if (PMD_IS_IMPL(i) == 0) continue;
 		ia64_set_pmd(i, 0UL);
 	}
 	ia64_set_pmc(0,1UL);
 	ia64_srlz_d();
-
 }
 
 #else /* !CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
index 6abd23864073..7c41aa84f8c1 100644
--- a/arch/ia64/kernel/perfmon_generic.h
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -1,10 +1,17 @@
+/*
+ * This file contains the architected PMU register description tables
+ * and pmc checker used by perfmon.c.
+ *
+ * Copyright (C) 2002  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ */
 #define RDEP(x)	(1UL<<(x))
 
-#if defined(CONFIG_ITANIUM) || defined(CONFIG_MCKINLEY)
-#error "This file should only be used when CONFIG_ITANIUM and CONFIG_MCKINLEY are not defined"
+#if defined(CONFIG_ITANIUM) || defined (CONFIG_MCKINLEY)
+#error "This file should not be used when CONFIG_ITANIUM or CONFIG_MCKINLEY is defined"
 #endif
 
-static pfm_reg_desc_t pmc_desc[PMU_MAX_PMCS]={
+static pfm_reg_desc_t pmc_gen_desc[PMU_MAX_PMCS]={
 /* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
@@ -13,10 +20,10 @@ static pfm_reg_desc_t pmc_desc[PMU_MAX_PMCS]={
 /* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-	     { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pmd_desc[PMU_MAX_PMDS]={
+static pfm_reg_desc_t pmd_gen_desc[PMU_MAX_PMDS]={
 /* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
 /* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
 /* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
@@ -25,5 +32,17 @@ static pfm_reg_desc_t pmd_desc[PMU_MAX_PMDS]={
 /* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
 /* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
 /* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
-	     { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	disabled:	1,
+	ovfl_val:	(1UL << 32) - 1,
+	num_ibrs:	8,
+	num_dbrs:	8,
+	pmd_desc:	pfm_gen_pmd_desc,
+	pmc_desc:	pfm_gen_pmc_desc
 };
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
index 88928ebe005c..40dbcda09944 100644
--- a/arch/ia64/kernel/perfmon_itanium.h
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -15,7 +15,7 @@
 static int pfm_ita_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 static int pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs);
 
-static pfm_reg_desc_t pfm_pmc_desc[PMU_MAX_PMCS]={
+static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
 /* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
@@ -33,7 +33,7 @@ static pfm_reg_desc_t pfm_pmc_desc[PMU_MAX_PMCS]={
 	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pfm_pmd_desc[PMU_MAX_PMDS]={
+static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
 /* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
 /* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
 /* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
@@ -55,6 +55,19 @@ static pfm_reg_desc_t pfm_pmd_desc[PMU_MAX_PMDS]={
 	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	disabled:	1,
+	ovfl_val:	(1UL << 32) - 1,
+	num_ibrs:	8,
+	num_dbrs:	8,
+	pmd_desc:	pfm_ita_pmd_desc,
+	pmc_desc:	pfm_ita_pmc_desc
+};
+
+
 static int
 pfm_ita_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
 {
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
index 27c4f03291cd..1ef6ce728620 100644
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -16,7 +16,7 @@ static int pfm_mck_reserved(struct task_struct *task, unsigned int cnum, unsigne
 static int pfm_mck_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
 static int pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs);
 
-static pfm_reg_desc_t pfm_pmc_desc[PMU_MAX_PMCS]={
+static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
 /* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
@@ -36,7 +36,7 @@ static pfm_reg_desc_t pfm_pmc_desc[PMU_MAX_PMCS]={
 	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
-static pfm_reg_desc_t pfm_pmd_desc[PMU_MAX_PMDS]={
+static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
 /* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
 /* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
 /* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
@@ -58,6 +58,19 @@ static pfm_reg_desc_t pfm_pmd_desc[PMU_MAX_PMDS]={
 	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
 };
 
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+	disabled:	1,
+	ovfl_val:	(1UL << 47) - 1,
+	num_ibrs:	8,
+	num_dbrs:	8,
+	pmd_desc:	pfm_mck_pmd_desc,
+	pmc_desc:	pfm_mck_pmc_desc
+};
+
+
 /*
  * PMC reserved fields must have their power-up values preserved
  */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 43d2f15ac0fb..2fbeb865b15f 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -205,6 +205,10 @@ cpu_idle (void *unused)
 void
 ia64_save_extra (struct task_struct *task)
 {
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_save_debug_regs(&task->thread.dbr[0]);
 
@@ -212,8 +216,9 @@ ia64_save_extra (struct task_struct *task)
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
 		pfm_save_regs(task);
 
-	if (__get_cpu_var(pfm_syst_wide))
-		pfm_syst_wide_update_task(task, 0);
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE)
+		pfm_syst_wide_update_task(task, info, 0);
 #endif
 
 #ifdef CONFIG_IA32_SUPPORT
@@ -225,6 +230,10 @@ ia64_save_extra (struct task_struct *task)
 void
 ia64_load_extra (struct task_struct *task)
 {
+#ifdef CONFIG_PERFMON
+	unsigned long info;
+#endif
+
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_load_debug_regs(&task->thread.dbr[0]);
 
@@ -232,8 +241,9 @@ ia64_load_extra (struct task_struct *task)
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
 		pfm_load_regs(task);
 
-	if (__get_cpu_var(pfm_syst_wide)) 
-		pfm_syst_wide_update_task(task, 1);
+	info = __get_cpu_var(pfm_syst_info);
+	if (info & PFM_CPUINFO_SYST_WIDE) 
+		pfm_syst_wide_update_task(task, info, 1);
 #endif
 
 #ifdef CONFIG_IA32_SUPPORT
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index e1c9a5ead71b..67dbfb5e711f 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -265,7 +265,7 @@ smp_callin (void)
 	extern void ia64_init_itm(void);
 
 #ifdef CONFIG_PERFMON
-	extern void perfmon_init_percpu(void);
+	extern void pfm_init_percpu(void);
 #endif
 
 	cpuid = smp_processor_id();
@@ -300,7 +300,7 @@ smp_callin (void)
 #endif
 
 #ifdef CONFIG_PERFMON
-	perfmon_init_percpu();
+	pfm_init_percpu();
 #endif
 
 	local_irq_enable();
diff --git a/include/asm-ia64/perfmon.h b/include/asm-ia64/perfmon.h
index c95e8d5fd3b7..b1c2eef06cd9 100644
--- a/include/asm-ia64/perfmon.h
+++ b/include/asm-ia64/perfmon.h
@@ -40,6 +40,7 @@
 #define PFM_FL_INHERIT_ALL	 0x02	/* always clone pfm_context across fork() */
 #define PFM_FL_NOTIFY_BLOCK    	 0x04	/* block task on user level notifications */
 #define PFM_FL_SYSTEM_WIDE	 0x08	/* create a system wide context */
+#define PFM_FL_EXCL_IDLE         0x20   /* exclude idle task from system wide session */
 
 /*
  * PMC flags
@@ -86,11 +87,12 @@ typedef struct {
 	unsigned long	reg_long_reset;	/* reset after sampling buffer overflow (large) */
 	unsigned long	reg_short_reset;/* reset after counter overflow (small) */
 
-	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
-	unsigned long	reg_random_seed;   /* seed value when randomization is used */
-	unsigned long	reg_random_mask;   /* bitmask used to limit random value */
+	unsigned long	reg_reset_pmds[4];   /* which other counters to reset on overflow */
+	unsigned long	reg_random_seed;     /* seed value when randomization is used */
+	unsigned long	reg_random_mask;     /* bitmask used to limit random value */
+	unsigned long	reg_last_reset_value;/* last value used to reset the PMD (PFM_READ_PMDS) */
 
-	unsigned long   reserved[14];	/* for future use */
+	unsigned long   reserved[13];	/* for future use */
 } pfarg_reg_t;
 
 typedef struct {
@@ -123,7 +125,7 @@ typedef struct {
  * Define the version numbers for both perfmon as a whole and the sampling buffer format.
  */
 #define PFM_VERSION_MAJ		1U
-#define PFM_VERSION_MIN		1U
+#define PFM_VERSION_MIN		3U
 #define PFM_VERSION		(((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
 
 #define PFM_SMPL_VERSION_MAJ	1U
@@ -156,13 +158,17 @@ typedef struct {
 	unsigned long	stamp;			/* timestamp */
 	unsigned long	ip;			/* where did the overflow interrupt happened */
 	unsigned long	regs;			/* bitmask of which registers overflowed */
-	unsigned long   period;			/* unused */
+	unsigned long   reserved;		/* unused */
 } perfmon_smpl_entry_t;
 
 extern int perfmonctl(pid_t pid, int cmd, void *arg, int narg);
 
 #ifdef __KERNEL__
 
+typedef struct {
+	void (*handler)(int irq, void *arg, struct pt_regs *regs);
+} pfm_intr_handler_desc_t;
+
 extern void pfm_save_regs (struct task_struct *);
 extern void pfm_load_regs (struct task_struct *);
 
@@ -174,9 +180,24 @@ extern void pfm_cleanup_owners (struct task_struct *);
 extern int  pfm_use_debug_registers(struct task_struct *);
 extern int  pfm_release_debug_registers(struct task_struct *);
 extern int  pfm_cleanup_smpl_buf(struct task_struct *);
-extern void pfm_syst_wide_update_task(struct task_struct *, int);
+extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
 extern void pfm_ovfl_block_reset(void);
-extern void perfmon_init_percpu(void);
+extern void pfm_init_percpu(void);
+
+/* 
+ * hooks to allow VTune/Prospect to cooperate with perfmon.
+ * (reserved for system wide monitoring modules only)
+ */
+extern int pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *h);
+extern int pfm_remove_alternate_syswide_subsystem(pfm_intr_handler_desc_t *h);
+
+/*
+ * describe the content of the local_cpu_date->pfm_syst_info field
+ */
+#define PFM_CPUINFO_SYST_WIDE	0x1	/* if set a system wide session exist */
+#define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
+#define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
+
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index ad40abfb7e91..2308826038c9 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -205,8 +205,8 @@ extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);
 
 #ifdef CONFIG_PERFMON
-  DECLARE_PER_CPU(int, pfm_syst_wide);
-# define PERFMON_IS_SYSWIDE() (get_cpu_var(pfm_syst_wide) != 0)
+  DECLARE_PER_CPU(unsigned long, pfm_syst_info);
+# define PERFMON_IS_SYSWIDE() (get_cpu_var(pfm_syst_info) & 0x1)
 #else
 # define PERFMON_IS_SYSWIDE() (0)
 #endif
-- 
cgit v1.2.3


From db19f36acf9443370f18830b0e0604725b6f5922 Mon Sep 17 00:00:00 2001
From: Kochi Takayoshi <kochi@hpc.bs1.fc.nec.co.jp>
Date: Fri, 17 Jan 2003 07:17:02 -0800
Subject: [PATCH] ia64: skip _PRT entry for non-existent IOSAPICs

On some machines that support I/O hot-plugging,
it happens that after boottime one or more IO SAPICs appear
after hot-plug event.  Even in that case, ACPI _PRT entries
can exist for devices behind those IO SAPICs at boottime
for future use.

Currently iosapic.c will give up parsing _PRT entries
once one of them hits such a non-existent IO SAPIC.

This patch fixes the problem on 2.5 ia64 bk tree.
For 2.4, we don't have this problem now.
---
 arch/ia64/kernel/iosapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index d8ffef34209f..a5d82b937b26 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -752,7 +752,7 @@ iosapic_parse_prt (void)
 
 			if (index < 0) {
 				printk(KERN_WARNING"IOSAPIC: GSI 0x%x has no IOSAPIC!\n", gsi);
-				return;
+				continue;
 			}
 			addr = iosapic_lists[index].addr;
 			gsi_base = iosapic_lists[index].gsi_base;
-- 
cgit v1.2.3


From 8f14820c02bb2fbfec7e978a44cd774c88a4c142 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex_williamson@hp.com>
Date: Fri, 17 Jan 2003 07:23:16 -0800
Subject: [PATCH] ia64: fix typo in ia32_support.c

Happened to notice the attached redundancy.
---
 arch/ia64/ia32/ia32_support.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
index 9d0d71ef57d9..1280c806e43b 100644
--- a/arch/ia64/ia32/ia32_support.c
+++ b/arch/ia64/ia32/ia32_support.c
@@ -95,8 +95,6 @@ ia32_load_state (struct task_struct *t)
 	struct pt_regs *regs = ia64_task_regs(t);
 	int nr = smp_processor_id();	/* LDT and TSS depend on CPU number: */
 
-	nr = smp_processor_id();
-
 	eflag = t->thread.eflag;
 	fsr = t->thread.fsr;
 	fcr = t->thread.fcr;
-- 
cgit v1.2.3


From 887b478a6e8a2d100828ef9e5f7abc9307376d13 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 17 Jan 2003 07:40:03 -0800
Subject: ia64: Don't risk running past the end of the unwind-table.  Based on
 a patch by 	Suresh Siddha.

---
 arch/ia64/kernel/unwind.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 5d5f584d4562..916abca4864a 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -1997,16 +1997,18 @@ unw_create_gate_table (void)
 {
 	extern char __start_gate_section[], __stop_gate_section[];
 	unsigned long *lp, start, end, segbase = unw.kernel_table.segment_base;
-	const struct unw_table_entry *entry, *first;
+	const struct unw_table_entry *entry, *first, *unw_table_end;
+	extern int ia64_unw_end;
 	size_t info_size, size;
 	char *info;
 
 	start = (unsigned long) __start_gate_section - segbase;
 	end   = (unsigned long) __stop_gate_section - segbase;
+	unw_table_end = (struct unw_table_entry *) &ia64_unw_end;
 	size  = 0;
 	first = lookup(&unw.kernel_table, start);
 
-	for (entry = first; entry->start_offset < end; ++entry)
+	for (entry = first; entry < unw_table_end && entry->start_offset < end; ++entry)
 		size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
 	size += 8;	/* reserve space for "end of table" marker */
 
@@ -2021,7 +2023,7 @@ unw_create_gate_table (void)
 	lp = unw.gate_table;
 	info = (char *) unw.gate_table + size;
 
-	for (entry = first; entry->start_offset < end; ++entry, lp += 3) {
+	for (entry = first; entry < unw_table_end && entry->start_offset < end; ++entry, lp += 3) {
 		info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
 		info -= info_size;
 		memcpy(info, (char *) segbase + entry->info_offset, info_size);
-- 
cgit v1.2.3


From 98a3d3b1f9b82e88d4e4a338b20532c628b347e5 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@nevyn.them.org>
Date: Sat, 18 Jan 2003 08:21:37 -0500
Subject: Tweak has_stopped_jobs for use with debugging

---
 kernel/exit.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/exit.c b/kernel/exit.c
index 743ed76ed243..03801540a5e6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -203,6 +203,17 @@ static inline int has_stopped_jobs(int pgrp)
 	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
 		if (p->state != TASK_STOPPED)
 			continue;
+
+		/* If p is stopped by a debugger on a signal that won't
+		   stop it, then don't count p as stopped.  This isn't
+		   perfect but it's a good approximation.  */
+		if (unlikely (p->ptrace)
+		    && p->exit_code != SIGSTOP
+		    && p->exit_code != SIGTSTP
+		    && p->exit_code != SIGTTOU
+		    && p->exit_code != SIGTTIN)
+			continue;
+
 		retval = 1;
 		break;
 	}
-- 
cgit v1.2.3


From 1669ce53e2ff7b49a60d0230866d3faee5f45573 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@nevyn.them.org>
Date: Sat, 18 Jan 2003 10:40:18 -0500
Subject: Add PTRACE_GETSIGINFO and PTRACE_SETSIGINFO

These new ptrace commands allow a debugger to control signals more precisely;
for instance, store a signal and deliver it later, as if it had come from the
original outside process or in response to the same faulting memory access.
---
 include/linux/ptrace.h |  2 ++
 include/linux/sched.h  |  1 +
 kernel/ptrace.c        | 23 +++++++++++++++++++++++
 kernel/signal.c        |  8 +++++++-
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c6de3a4ea70a..b56bbe7ca800 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -26,6 +26,8 @@
 /* 0x4200-0x4300 are reserved for architecture-independent additions.  */
 #define PTRACE_SETOPTIONS	0x4200
 #define PTRACE_GETEVENTMSG	0x4201
+#define PTRACE_GETSIGINFO	0x4202
+#define PTRACE_SETSIGINFO	0x4203
 
 /* options set using PTRACE_SETOPTIONS */
 #define PTRACE_O_TRACESYSGOOD	0x00000001
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 15a951d2d27e..a325e5a8c645 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -400,6 +400,7 @@ struct task_struct {
 	struct backing_dev_info *backing_dev_info;
 
 	unsigned long ptrace_message;
+	siginfo_t *last_siginfo; /* For ptrace use.  */
 };
 
 extern void __put_task_struct(struct task_struct *tsk);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a16dfb90d412..9f3769bfdc7e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -286,6 +286,23 @@ static int ptrace_setoptions(struct task_struct *child, long data)
 	return 0;
 }
 
+static int ptrace_getsiginfo(struct task_struct *child, long data)
+{
+	if (child->last_siginfo == NULL)
+		return -EINVAL;
+	return copy_siginfo_to_user ((siginfo_t *) data, child->last_siginfo);
+}
+
+static int ptrace_setsiginfo(struct task_struct *child, long data)
+{
+	if (child->last_siginfo == NULL)
+		return -EINVAL;
+	if (copy_from_user (child->last_siginfo, (siginfo_t *) data,
+			    sizeof (siginfo_t)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
 int ptrace_request(struct task_struct *child, long request,
 		   long addr, long data)
 {
@@ -301,6 +318,12 @@ int ptrace_request(struct task_struct *child, long request,
 	case PTRACE_GETEVENTMSG:
 		ret = put_user(child->ptrace_message, (unsigned long *) data);
 		break;
+	case PTRACE_GETSIGINFO:
+		ret = ptrace_getsiginfo(child, data);
+		break;
+	case PTRACE_SETSIGINFO:
+		ret = ptrace_setsiginfo(child, data);
+		break;
 	default:
 		break;
 	}
diff --git a/kernel/signal.c b/kernel/signal.c
index 7c485d01a4b0..b683402178ec 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1244,10 +1244,13 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
 			/* Let the debugger run.  */
 			current->exit_code = signr;
+			current->last_siginfo = info;
 			set_current_state(TASK_STOPPED);
 			notify_parent(current, SIGCHLD);
 			schedule();
 
+			current->last_siginfo = NULL;
+
 			/* We're back.  Did the debugger cancel the sig?  */
 			signr = current->exit_code;
 			if (signr == 0)
@@ -1258,7 +1261,10 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 			if (signr == SIGSTOP)
 				continue;
 
-			/* Update the siginfo structure.  Is this good?  */
+			/* Update the siginfo structure if the signal has
+			   changed.  If the debugger wanted something
+			   specific in the siginfo structure then it should
+			   have updated *info via PTRACE_SETSIGINFO.  */
 			if (signr != info->si_signo) {
 				info->si_signo = signr;
 				info->si_errno = 0;
-- 
cgit v1.2.3


From 9f799613466bcbe184441220342f39529fba0d3d Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 21 Jan 2003 06:51:42 -0800
Subject: [PATCH] ia64: [COMPAT] Eliminate the rest of the __kernel_..._t32
 typedefs

---
 arch/ia64/ia32/sys_ia32.c | 16 ++++++++--------
 include/asm-ia64/compat.h |  7 +++++++
 include/asm-ia64/ia32.h   | 13 +------------
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 0ef2af595c8a..e095592424d0 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1849,10 +1849,10 @@ struct ipc_perm32 {
 
 struct ipc64_perm32 {
 	key_t key;
-	__kernel_uid32_t32 uid;
-	__kernel_gid32_t32 gid;
-	__kernel_uid32_t32 cuid;
-	__kernel_gid32_t32 cgid;
+	compat_uid32_t uid;
+	compat_gid32_t gid;
+	compat_uid32_t cuid;
+	compat_gid32_t cgid;
 	compat_mode_t mode;
 	unsigned short __pad1;
 	unsigned short seq;
@@ -1895,8 +1895,8 @@ struct msqid_ds32 {
 	unsigned short msg_cbytes;
 	unsigned short msg_qnum;
 	unsigned short msg_qbytes;
-	__kernel_ipc_pid_t32 msg_lspid;
-	__kernel_ipc_pid_t32 msg_lrpid;
+	compat_ipc_pid_t msg_lspid;
+	compat_ipc_pid_t msg_lrpid;
 };
 
 struct msqid64_ds32 {
@@ -1922,8 +1922,8 @@ struct shmid_ds32 {
 	compat_time_t   shm_atime;
 	compat_time_t   shm_dtime;
 	compat_time_t   shm_ctime;
-	__kernel_ipc_pid_t32 shm_cpid;
-	__kernel_ipc_pid_t32 shm_lpid;
+	compat_ipc_pid_t shm_cpid;
+	compat_ipc_pid_t shm_lpid;
 	unsigned short shm_nattch;
 };
 
diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h
index 69bf8481be81..ce02e2fa66c4 100644
--- a/include/asm-ia64/compat.h
+++ b/include/asm-ia64/compat.h
@@ -14,11 +14,18 @@ typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u16		compat_uid_t;
 typedef u16		compat_gid_t;
+typedef u32		compat_uid32_t;
+typedef u32		compat_gid32_t;
 typedef u16		compat_mode_t;
 typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
 typedef s32		compat_off_t;
+typedef s64		compat_loff_t;
 typedef u16		compat_nlink_t;
+typedef u16		compat_ipc_pid_t;
+typedef s32		compat_daddr_t;
+typedef u32		compat_caddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
diff --git a/include/asm-ia64/ia32.h b/include/asm-ia64/ia32.h
index bb2f802fe3dc..35604ea3b199 100644
--- a/include/asm-ia64/ia32.h
+++ b/include/asm-ia64/ia32.h
@@ -12,17 +12,6 @@
  * 32 bit structures for IA32 support.
  */
 
-/* 32bit compatibility types */
-typedef unsigned short	__kernel_ipc_pid_t32;
-typedef unsigned int	__kernel_uid32_t32;
-typedef unsigned int	__kernel_gid32_t32;
-typedef unsigned short	__kernel_umode_t32;
-typedef short		__kernel_nlink_t32;
-typedef int		__kernel_daddr_t32;
-typedef unsigned int	__kernel_caddr_t32;
-typedef long		__kernel_loff_t32;
-typedef __kernel_fsid_t	__kernel_fsid_t32;
-
 #define IA32_PAGE_SHIFT		12	/* 4KB pages */
 #define IA32_PAGE_SIZE		(1UL << IA32_PAGE_SHIFT)
 #define IA32_PAGE_MASK		(~(IA32_PAGE_SIZE - 1))
@@ -222,7 +211,7 @@ struct statfs32 {
        int f_bavail;
        int f_files;
        int f_ffree;
-       __kernel_fsid_t32 f_fsid;
+       compat_fsid_t f_fsid;
        int f_namelen;  /* SunOS ignores this field. */
        int f_spare[6];
 };
-- 
cgit v1.2.3


From 7f7ac8513e3ad7fd340d7b6d0e15230d2c2052e4 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 21 Jan 2003 06:53:28 -0800
Subject: [PATCH] ia64: [COMPAT] {get,put}_compat_timspec 5/8

---
 arch/ia64/ia32/ia32_signal.c | 11 ++++-------
 arch/ia64/ia32/sys_ia32.c    |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
index d85b58933635..ad8589a188a4 100644
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -607,14 +607,11 @@ sys32_rt_sigtimedwait (sigset32_t *uthese, siginfo_t32 *uinfo,
 
 	if (copy_from_user(&s.sig, uthese, sizeof(sigset32_t)))
 		return -EFAULT;
-	if (uts) {
-		ret = get_user(t.tv_sec, &uts->tv_sec);
-		ret |= get_user(t.tv_nsec, &uts->tv_nsec);
-		if (ret)
-			return -EFAULT;
-	}
+	if (uts && get_compat_timespec(&t, uts))
+		return -EFAULT;
 	set_fs(KERNEL_DS);
-	ret = sys_rt_sigtimedwait(&s, &info, &t, sigsetsize);
+	ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL,
+			sigsetsize);
 	set_fs(old_fs);
 	if (ret >= 0 && uinfo) {
 		if (copy_siginfo_to_user32(uinfo, &info))
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index e095592424d0..081720d9cf45 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -3540,7 +3540,7 @@ sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec *interval)
 	set_fs(KERNEL_DS);
 	ret = sys_sched_rr_get_interval(pid, &t);
 	set_fs(old_fs);
-	if (put_user (t.tv_sec, &interval->tv_sec) || put_user (t.tv_nsec, &interval->tv_nsec))
+	if (put_compat_timespec(&t, interval))
 		return -EFAULT;
 	return ret;
 }
-- 
cgit v1.2.3


From cf3e791e3c33cf6e2aa528bef2d3f2d1960d14b8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 21 Jan 2003 06:54:12 -0800
Subject: [PATCH] ia64: [COMPAT] compat_{old_}sigset_t

---
 arch/ia64/ia32/ia32_signal.c | 26 +++++++++++++-------------
 include/asm-ia64/compat.h    |  7 +++++++
 include/asm-ia64/ia32.h      | 14 ++------------
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
index ad8589a188a4..1b701f290573 100644
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -56,7 +56,7 @@ struct sigframe_ia32
        int sig;
        struct sigcontext_ia32 sc;
        struct _fpstate_ia32 fpstate;
-       unsigned int extramask[_IA32_NSIG_WORDS-1];
+       unsigned int extramask[_COMPAT_NSIG_WORDS-1];
        char retcode[8];
 };
 
@@ -463,7 +463,7 @@ sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int r
 }
 
 asmlinkage long
-ia32_rt_sigsuspend (sigset32_t *uset, unsigned int sigsetsize, struct sigscratch *scr)
+ia32_rt_sigsuspend (compat_sigset_t *uset, unsigned int sigsetsize, struct sigscratch *scr)
 {
 	extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
 	sigset_t oldset, set;
@@ -504,7 +504,7 @@ ia32_rt_sigsuspend (sigset32_t *uset, unsigned int sigsetsize, struct sigscratch
 asmlinkage long
 ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
 {
-	return ia32_rt_sigsuspend((sigset32_t *)&mask, sizeof(mask), scr);
+	return ia32_rt_sigsuspend((compat_sigset_t *)&mask, sizeof(mask), scr);
 }
 
 asmlinkage long
@@ -530,14 +530,14 @@ sys32_rt_sigaction (int sig, struct sigaction32 *act,
 	int ret;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset32_t))
+	if (sigsetsize != sizeof(compat_sigset_t))
 		return -EINVAL;
 
 	if (act) {
 		ret = get_user(handler, &act->sa_handler);
 		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
 		ret |= get_user(restorer, &act->sa_restorer);
-		ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(sigset32_t));
+		ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(compat_sigset_t));
 		if (ret)
 			return -EFAULT;
 
@@ -550,7 +550,7 @@ sys32_rt_sigaction (int sig, struct sigaction32 *act,
 		ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
 		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
 		ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
-		ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(sigset32_t));
+		ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(compat_sigset_t));
 	}
 	return ret;
 }
@@ -560,7 +560,7 @@ extern asmlinkage long sys_rt_sigprocmask (int how, sigset_t *set, sigset_t *ose
 					   size_t sigsetsize);
 
 asmlinkage long
-sys32_rt_sigprocmask (int how, sigset32_t *set, sigset32_t *oset, unsigned int sigsetsize)
+sys32_rt_sigprocmask (int how, compat_sigset_t *set, compat_sigset_t *oset, unsigned int sigsetsize)
 {
 	mm_segment_t old_fs = get_fs();
 	sigset_t s;
@@ -589,11 +589,11 @@ sys32_rt_sigprocmask (int how, sigset32_t *set, sigset32_t *oset, unsigned int s
 asmlinkage long
 sys32_sigprocmask (int how, unsigned int *set, unsigned int *oset)
 {
-	return sys32_rt_sigprocmask(how, (sigset32_t *) set, (sigset32_t *) oset, sizeof(*set));
+	return sys32_rt_sigprocmask(how, (compat_sigset_t *) set, (compat_sigset_t *) oset, sizeof(*set));
 }
 
 asmlinkage long
-sys32_rt_sigtimedwait (sigset32_t *uthese, siginfo_t32 *uinfo,
+sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo,
 		struct compat_timespec *uts, unsigned int sigsetsize)
 {
 	extern asmlinkage long sys_rt_sigtimedwait (const sigset_t *, siginfo_t *,
@@ -605,7 +605,7 @@ sys32_rt_sigtimedwait (sigset32_t *uthese, siginfo_t32 *uinfo,
 	sigset_t s;
 	int ret;
 
-	if (copy_from_user(&s.sig, uthese, sizeof(sigset32_t)))
+	if (copy_from_user(&s.sig, uthese, sizeof(compat_sigset_t)))
 		return -EFAULT;
 	if (uts && get_compat_timespec(&t, uts))
 		return -EFAULT;
@@ -645,7 +645,7 @@ sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *o
 	int ret;
 
 	if (act) {
-		old_sigset32_t mask;
+		compat_old_sigset_t mask;
 
 		ret = get_user(handler, &act->sa_handler);
 		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
@@ -863,7 +863,7 @@ setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs
 
 	err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]);
 
-	if (_IA32_NSIG_WORDS > 1)
+	if (_COMPAT_NSIG_WORDS > 1)
 		err |= __copy_to_user(frame->extramask, (char *) &set->sig + 4,
 				      sizeof(frame->extramask));
 
@@ -1008,7 +1008,7 @@ sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5, int
 		goto badframe;
 
 	if (__get_user(set.sig[0], &frame->sc.oldmask)
-	    || (_IA32_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask,
+	    || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask,
 							 sizeof(frame->extramask))))
 		goto badframe;
 
diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h
index ce02e2fa66c4..d9b875dff673 100644
--- a/include/asm-ia64/compat.h
+++ b/include/asm-ia64/compat.h
@@ -68,4 +68,11 @@ struct compat_flock {
        compat_pid_t	l_pid;
 };
 
+typedef u32		compat_old_sigset_t;	/* at least 32 bits */
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32		compat_sigset_word;
+
 #endif /* _ASM_IA64_COMPAT_H */
diff --git a/include/asm-ia64/ia32.h b/include/asm-ia64/ia32.h
index 35604ea3b199..94791af38caa 100644
--- a/include/asm-ia64/ia32.h
+++ b/include/asm-ia64/ia32.h
@@ -132,10 +132,6 @@ struct ia32_user_fxsr_struct {
 };
 
 /* signal.h */
-#define _IA32_NSIG	       64
-#define _IA32_NSIG_BPW	       32
-#define _IA32_NSIG_WORDS	       (_IA32_NSIG / _IA32_NSIG_BPW)
-
 #define IA32_SET_SA_HANDLER(ka,handler,restorer)				\
 				((ka)->sa.sa_handler = (__sighandler_t)		\
 					(((unsigned long)(restorer) << 32)	\
@@ -143,23 +139,17 @@ struct ia32_user_fxsr_struct {
 #define IA32_SA_HANDLER(ka)	((unsigned long) (ka)->sa.sa_handler & 0xffffffff)
 #define IA32_SA_RESTORER(ka)	((unsigned long) (ka)->sa.sa_handler >> 32)
 
-typedef struct {
-       unsigned int sig[_IA32_NSIG_WORDS];
-} sigset32_t;
-
 struct sigaction32 {
        unsigned int sa_handler;		/* Really a pointer, but need to deal with 32 bits */
        unsigned int sa_flags;
        unsigned int sa_restorer;	/* Another 32 bit pointer */
-       sigset32_t sa_mask;		/* A 32 bit mask */
+       compat_sigset_t sa_mask;		/* A 32 bit mask */
 };
 
-typedef unsigned int old_sigset32_t;	/* at least 32 bits */
-
 struct old_sigaction32 {
        unsigned int  sa_handler;	/* Really a pointer, but need to deal
 					     with 32 bits */
-       old_sigset32_t sa_mask;		/* A 32 bit mask */
+       compat_old_sigset_t sa_mask;		/* A 32 bit mask */
        unsigned int sa_flags;
        unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
-- 
cgit v1.2.3


From 0bd6a6d025da4ddcb96c29ca4ef9cc48832f541a Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 21 Jan 2003 06:54:45 -0800
Subject: [PATCH] ia64: [COMPAT] compat_sys_sigpending and
 compat_sys_sigprocmask

---
 arch/ia64/ia32/ia32_entry.S  | 4 ++--
 arch/ia64/ia32/ia32_signal.c | 6 ------
 arch/ia64/ia32/sys_ia32.c    | 6 ------
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index d4f3067636f5..5f0fb53908b9 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -271,7 +271,7 @@ ia32_syscall_table:
 	data8 sys_setreuid	/* 16-bit version */	  /* 70 */
 	data8 sys_setregid	/* 16-bit version */
 	data8 sys32_sigsuspend
-	data8 sys32_sigpending
+	data8 compat_sys_sigpending
 	data8 sys_sethostname
 	data8 sys32_setrlimit	  /* 75 */
 	data8 sys32_old_getrlimit
@@ -324,7 +324,7 @@ ia32_syscall_table:
 	data8 sys32_modify_ldt
 	data8 sys32_ni_syscall	/* adjtimex */
 	data8 sys32_mprotect	  /* 125 */
-	data8 sys32_sigprocmask
+	data8 compat_sys_sigprocmask
 	data8 sys32_ni_syscall	/* create_module */
 	data8 sys32_ni_syscall	/* init_module */
 	data8 sys32_ni_syscall	/* delete_module */
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
index 1b701f290573..f2d006240df2 100644
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -586,12 +586,6 @@ sys32_rt_sigprocmask (int how, compat_sigset_t *set, compat_sigset_t *oset, unsi
 	return 0;
 }
 
-asmlinkage long
-sys32_sigprocmask (int how, unsigned int *set, unsigned int *oset)
-{
-	return sys32_rt_sigprocmask(how, (compat_sigset_t *) set, (compat_sigset_t *) oset, sizeof(*set));
-}
-
 asmlinkage long
 sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo,
 		struct compat_timespec *uts, unsigned int sigsetsize)
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 081720d9cf45..17ebca0407aa 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -3479,12 +3479,6 @@ sys32_fstat64 (unsigned int fd, struct stat64 *statbuf)
 	return ret;
 }
 
-asmlinkage long
-sys32_sigpending (unsigned int *set)
-{
-	return do_sigpending(set, sizeof(*set));
-}
-
 struct sysinfo32 {
 	s32 uptime;
 	u32 loads[3];
-- 
cgit v1.2.3


From 68f9116b7c3cf853b79bfbdd363c6044c5e6477a Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Tue, 21 Jan 2003 07:01:31 -0800
Subject: ia64: asm-ia64/system.h: Remove include of <linux/percpu.h>.

---
 include/asm-ia64/system.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 8f4757e7b637..296d494c58e4 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -26,7 +26,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/percpu.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 67d8f3d01e7a68f1840074489060f986c28d8a70 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 21 Jan 2003 07:20:30 -0800
Subject: [PATCH] ia64: [COMPAT] compat_sys_[f]statfs

---
 arch/ia64/ia32/ia32_entry.S |  4 ++--
 arch/ia64/ia32/sys_ia32.c   | 55 ---------------------------------------------
 include/asm-ia64/compat.h   | 23 ++++++++++++++-----
 include/asm-ia64/ia32.h     | 13 -----------
 4 files changed, 20 insertions(+), 75 deletions(-)

diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 5f0fb53908b9..87ff1d57bcd2 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -297,8 +297,8 @@ ia32_syscall_table:
 	data8 sys_getpriority
 	data8 sys_setpriority
 	data8 sys32_ni_syscall	  /* old profil syscall holder */
-	data8 sys32_statfs
-	data8 sys32_fstatfs	  /* 100 */
+	data8 compat_sys_statfs
+	data8 compat_sys_fstatfs	  /* 100 */
 	data8 sys32_ioperm
 	data8 sys32_socketcall
 	data8 sys_syslog
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 17ebca0407aa..48c0bbb7d524 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -609,61 +609,6 @@ sys32_pipe (int *fd)
 	return retval;
 }
 
-static inline int
-put_statfs (struct statfs32 *ubuf, struct statfs *kbuf)
-{
-	int err;
-
-	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)))
-		return -EFAULT;
-
-	err = __put_user(kbuf->f_type, &ubuf->f_type);
-	err |= __put_user(kbuf->f_bsize, &ubuf->f_bsize);
-	err |= __put_user(kbuf->f_blocks, &ubuf->f_blocks);
-	err |= __put_user(kbuf->f_bfree, &ubuf->f_bfree);
-	err |= __put_user(kbuf->f_bavail, &ubuf->f_bavail);
-	err |= __put_user(kbuf->f_files, &ubuf->f_files);
-	err |= __put_user(kbuf->f_ffree, &ubuf->f_ffree);
-	err |= __put_user(kbuf->f_namelen, &ubuf->f_namelen);
-	err |= __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]);
-	err |= __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]);
-	return err;
-}
-
-extern asmlinkage long sys_statfs(const char * path, struct statfs * buf);
-
-asmlinkage long
-sys32_statfs (const char *path, struct statfs32 *buf)
-{
-	int ret;
-	struct statfs s;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_statfs(path, &s);
-	set_fs(old_fs);
-	if (put_statfs(buf, &s))
-		return -EFAULT;
-	return ret;
-}
-
-extern asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf);
-
-asmlinkage long
-sys32_fstatfs (unsigned int fd, struct statfs32 *buf)
-{
-	int ret;
-	struct statfs s;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_fstatfs(fd, &s);
-	set_fs(old_fs);
-	if (put_statfs(buf, &s))
-		return -EFAULT;
-	return ret;
-}
-
 static inline long
 get_tv32 (struct timeval *o, struct compat_timeval *i)
 {
diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h
index d9b875dff673..4dbbbf499800 100644
--- a/include/asm-ia64/compat.h
+++ b/include/asm-ia64/compat.h
@@ -61,11 +61,24 @@ struct compat_stat {
 };
 
 struct compat_flock {
-       short		l_type;
-       short		l_whence;
-       compat_off_t	l_start;
-       compat_off_t	l_len;
-       compat_pid_t	l_pid;
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+struct compat_statfs {
+	int		f_type;
+	int		f_bsize;
+	int		f_blocks;
+	int		f_bfree;
+	int		f_bavail;
+	int		f_files;
+	int		f_ffree;
+	compat_fsid_t	f_fsid;
+	int		f_namelen;	/* SunOS ignores this field. */
+	int		f_spare[6];
 };
 
 typedef u32		compat_old_sigset_t;	/* at least 32 bits */
diff --git a/include/asm-ia64/ia32.h b/include/asm-ia64/ia32.h
index 94791af38caa..4824df582b8a 100644
--- a/include/asm-ia64/ia32.h
+++ b/include/asm-ia64/ia32.h
@@ -193,19 +193,6 @@ struct stat64 {
 	unsigned int	st_ino_hi;
 };
 
-struct statfs32 {
-       int f_type;
-       int f_bsize;
-       int f_blocks;
-       int f_bfree;
-       int f_bavail;
-       int f_files;
-       int f_ffree;
-       compat_fsid_t f_fsid;
-       int f_namelen;  /* SunOS ignores this field. */
-       int f_spare[6];
-};
-
 typedef union sigval32 {
 	int sival_int;
 	unsigned int sival_ptr;
-- 
cgit v1.2.3


From 2dc3864fe95c37d7775c95b023cfd48b8ce630fe Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Thu, 23 Jan 2003 19:56:56 -0800
Subject: ia64: Sync up with 2.5.59. 	Add light-weight version of
 set_tid_address() system call.

---
 Documentation/ia64/fsys.txt     |  3 +-
 arch/ia64/Makefile              | 25 +++++++++------
 arch/ia64/hp/zx1/hpzx1_misc.c   | 32 +++++++++----------
 arch/ia64/ia32/sys_ia32.c       |  6 ++--
 arch/ia64/kernel/acpi.c         | 20 ++++++------
 arch/ia64/kernel/fsys.S         | 52 ++++++++++++++++++++++++++++--
 arch/ia64/kernel/process.c      |  2 ++
 arch/ia64/kernel/traps.c        |  2 ++
 arch/ia64/kernel/unaligned.c    | 13 ++++----
 arch/ia64/mm/extable.c          | 70 ++++++++++-------------------------------
 arch/ia64/tools/print_offsets.c |  1 +
 arch/ia64/vmlinux.lds.S         | 31 ++++++++++++------
 include/asm-ia64/asmmacro.h     | 16 +++++-----
 include/asm-ia64/mmu_context.h  |  5 ++-
 include/asm-ia64/ptrace.h       | 10 +++---
 include/asm-ia64/system.h       |  3 +-
 include/asm-ia64/tlb.h          |  8 ++---
 include/asm-ia64/uaccess.h      | 38 ++++++++++------------
 18 files changed, 184 insertions(+), 153 deletions(-)

diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
index 9a41823af7e4..21dde92d0699 100644
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.txt
@@ -4,7 +4,7 @@
 		-----------------------------------
 
 		        Started: 13-Jan-2002
-		    Last update: 15-Jan-2002
+		    Last update: 24-Jan-2002
 
 	              David Mosberger-Tang
 		      <davidm@hpl.hp.com>
@@ -89,6 +89,7 @@ The entry and exit-state of an fsyscall handler is as follows:
 
 ** Machine state on entry to fsyscall handler:
 
+ - r10	  = 0
  - r11	  = saved ar.pfs (a user-level value)
  - r15	  = system call number
  - r16	  = "current" task pointer (in normal kernel-mode, this is in r13)
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 7d93d20e3ee7..e2beb8cee8ca 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -5,7 +5,7 @@
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
 #
-# Copyright (C) 1998-2002 by David Mosberger-Tang <davidm@hpl.hp.com>
+# Copyright (C) 1998-2003 by David Mosberger-Tang <davidm@hpl.hp.com>
 #
 
 NM := $(CROSS_COMPILE)nm -B
@@ -48,30 +48,37 @@ drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
 drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
 drivers-$(CONFIG_IA64_SGI_SN)	+= arch/ia64/sn/fakeprom/
 
-makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/ia64/boot $(1)
-maketool =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/ia64/tools $(1)
+boot := arch/ia64/boot
+tools := arch/ia64/tools
 
 .PHONY: boot compressed archclean archmrproper include/asm-ia64/offsets.h
 
-all compressed: vmlinux.gz
+all: vmlinux
+
+compressed: vmlinux.gz
 
 vmlinux.gz: vmlinux
-	$(call makeboot,vmlinux.gz)
+	$(Q)$(MAKE) $(build)=$(boot) vmlinux.gz
 
 check: vmlinux
 	arch/ia64/scripts/unwcheck.sh vmlinux
 
 archmrproper:
 archclean:
-	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ia64/boot
-	$(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ia64/tools
+	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=$(tools)
 
 CLEAN_FILES += include/asm-ia64/offsets.h vmlinux.gz bootloader
 
 prepare: include/asm-ia64/offsets.h
 
 boot:	lib/lib.a vmlinux
-	$(call makeboot,$@)
+	$(Q)$(MAKE) $(build)=$(boot) $@
 
 include/asm-ia64/offsets.h: include/asm include/linux/version.h include/config/MARKER
-	$(call maketool,$@)
+	$(Q)$(MAKE) $(build)=$(tools) $@
+
+define archhelp
+  echo '  compressed	- Build compressed kernel image'
+  echo '  boot		- Build vmlinux and bootloader for Ski simulator'
+endef
diff --git a/arch/ia64/hp/zx1/hpzx1_misc.c b/arch/ia64/hp/zx1/hpzx1_misc.c
index a1c0e2585469..bf6faa991038 100644
--- a/arch/ia64/hp/zx1/hpzx1_misc.c
+++ b/arch/ia64/hp/zx1/hpzx1_misc.c
@@ -1,9 +1,9 @@
 /*
  * Misc. support for HP zx1 chipset support
  *
- * Copyright (C) 2002 Hewlett-Packard Co
- * Copyright (C) 2002 Alex Williamson <alex_williamson@hp.com>
- * Copyright (C) 2002 Bjorn Helgaas <bjorn_helgaas@hp.com>
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
+ *	Alex Williamson <alex_williamson@hp.com>
+ *	Bjorn Helgaas <bjorn_helgaas@hp.com>
  */
 
 
@@ -17,7 +17,7 @@
 #include <asm/dma.h>
 #include <asm/iosapic.h>
 
-extern acpi_status acpi_evaluate_integer (acpi_handle, acpi_string, acpi_object_list *,
+extern acpi_status acpi_evaluate_integer (acpi_handle, acpi_string, struct acpi_object_list *,
 					  unsigned long *);
 
 #define PFX "hpzx1: "
@@ -190,31 +190,31 @@ hpzx1_fake_pci_dev(char *name, unsigned int busnum, unsigned long addr, unsigned
 	hpzx1_devices++;
 }
 
-typedef struct {
+struct acpi_hp_vendor_long {
 	u8	guid_id;
 	u8	guid[16];
 	u8	csr_base[8];
 	u8	csr_length[8];
-} acpi_hp_vendor_long;
+};
 
 #define HP_CCSR_LENGTH	0x21
 #define HP_CCSR_TYPE	0x2
 #define HP_CCSR_GUID	EFI_GUID(0x69e9adf9, 0x924f, 0xab5f,				\
 				 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
 
-extern acpi_status acpi_get_crs(acpi_handle, acpi_buffer *);
-extern acpi_resource *acpi_get_crs_next(acpi_buffer *, int *);
-extern acpi_resource_data *acpi_get_crs_type(acpi_buffer *, int *, int);
-extern void acpi_dispose_crs(acpi_buffer *);
+extern acpi_status acpi_get_crs(acpi_handle, struct acpi_buffer *);
+extern struct acpi_resource *acpi_get_crs_next(struct acpi_buffer *, int *);
+extern union acpi_resource_data *acpi_get_crs_type(struct acpi_buffer *, int *, int);
+extern void acpi_dispose_crs(struct acpi_buffer *);
 
 static acpi_status
 hp_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
 {
 	int i, offset = 0;
 	acpi_status status;
-	acpi_buffer buf;
-	acpi_resource_vendor *res;
-	acpi_hp_vendor_long *hp_res;
+	struct acpi_buffer buf;
+	struct acpi_resource_vendor *res;
+	struct acpi_hp_vendor_long *hp_res;
 	efi_guid_t vendor_guid;
 
 	*csr_base = 0;
@@ -226,14 +226,14 @@ hp_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
 		return status;
 	}
 
-	res = (acpi_resource_vendor *)acpi_get_crs_type(&buf, &offset, ACPI_RSTYPE_VENDOR);
+	res = (struct acpi_resource_vendor *)acpi_get_crs_type(&buf, &offset, ACPI_RSTYPE_VENDOR);
 	if (!res) {
 		printk(KERN_ERR PFX "Failed to find config space for device\n");
 		acpi_dispose_crs(&buf);
 		return AE_NOT_FOUND;
 	}
 
-	hp_res = (acpi_hp_vendor_long *)(res->reserved);
+	hp_res = (struct acpi_hp_vendor_long *)(res->reserved);
 
 	if (res->length != HP_CCSR_LENGTH || hp_res->guid_id != HP_CCSR_TYPE) {
 		printk(KERN_ERR PFX "Unknown Vendor data\n");
@@ -288,7 +288,7 @@ hpzx1_lba_probe(acpi_handle obj, u32 depth, void *context, void **ret)
 {
 	u64 csr_base = 0, csr_length = 0;
 	acpi_status status;
-	NATIVE_UINT busnum;
+	acpi_native_uint busnum;
 	char *name = context;
 	char fullname[32];
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 48c0bbb7d524..c51bcbb5084c 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1999		Arun Sharma <arun.sharma@intel.com>
  * Copyright (C) 1997,1998	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  * Copyright (C) 1997		David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 2000-2002 Hewlett-Packard Co
+ * Copyright (C) 2000-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * These routines maintain argument size conversion between 32bit and 64bit
@@ -2348,7 +2348,7 @@ shmctl32 (int first, int second, void *uptr)
 
 static long
 semtimedop32(int semid, struct sembuf *tsems, int nsems,
-	     const struct timespec32 *timeout32)
+	     const struct compat_timespec *timeout32)
 {
 	struct timespec t;
 	if (get_user (t.tv_sec, &timeout32->tv_sec) ||
@@ -2371,7 +2371,7 @@ sys32_ipc (u32 call, int first, int second, int third, u32 ptr, u32 fifth)
 		return sys_semtimedop(first, (struct sembuf *)AA(ptr), second, NULL);
 	      case SEMTIMEDOP:
 		return semtimedop32(first, (struct sembuf *)AA(ptr), second,
-				    (const struct timespec32 *)AA(fifth));
+				    (const struct compat_timespec *)AA(fifth));
 	      case SEMGET:
 		return sys_semget(first, second, third);
 	      case SEMCTL:
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 8c12822dde38..eaf9c9917121 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -128,7 +128,7 @@ acpi_get_sysname (void)
  * with a list of acpi_resource structures.
  */
 acpi_status
-acpi_get_crs (acpi_handle obj, acpi_buffer *buf)
+acpi_get_crs (acpi_handle obj, struct acpi_buffer *buf)
 {
 	acpi_status result;
 	buf->length = 0;
@@ -144,10 +144,10 @@ acpi_get_crs (acpi_handle obj, acpi_buffer *buf)
 	return acpi_get_current_resources(obj, buf);
 }
 
-acpi_resource *
-acpi_get_crs_next (acpi_buffer *buf, int *offset)
+struct acpi_resource *
+acpi_get_crs_next (struct acpi_buffer *buf, int *offset)
 {
-	acpi_resource *res;
+	struct acpi_resource *res;
 
 	if (*offset >= buf->length)
 		return NULL;
@@ -157,11 +157,11 @@ acpi_get_crs_next (acpi_buffer *buf, int *offset)
 	return res;
 }
 
-acpi_resource_data *
-acpi_get_crs_type (acpi_buffer *buf, int *offset, int type)
+union acpi_resource_data *
+acpi_get_crs_type (struct acpi_buffer *buf, int *offset, int type)
 {
 	for (;;) {
-		acpi_resource *res = acpi_get_crs_next(buf, offset);
+		struct acpi_resource *res = acpi_get_crs_next(buf, offset);
 		if (!res)
 			return NULL;
 		if (res->id == type)
@@ -170,7 +170,7 @@ acpi_get_crs_type (acpi_buffer *buf, int *offset, int type)
 }
 
 void
-acpi_dispose_crs (acpi_buffer *buf)
+acpi_dispose_crs (struct acpi_buffer *buf)
 {
 	kfree(buf->pointer);
 }
@@ -638,7 +638,7 @@ static int __init
 acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 {
 	struct acpi_table_header *fadt_header;
-	fadt_descriptor_rev2 *fadt;
+	struct fadt_descriptor_rev2 *fadt;
 	u32 sci_irq, gsi_base;
 	char *iosapic_address;
 
@@ -649,7 +649,7 @@ acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
 	if (fadt_header->revision != 3)
 		return -ENODEV;		/* Only deal with ACPI 2.0 FADT */
 
-	fadt = (fadt_descriptor_rev2 *) fadt_header;
+	fadt = (struct fadt_descriptor_rev2 *) fadt_header;
 
 	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
 		acpi_kbd_controller_present = 0;
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index c395ba0723a3..65c479e62d15 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -10,6 +10,28 @@
 #include <asm/offsets.h>
 #include <asm/thread_info.h>
 
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10	= 0 (i.e., defaults to "successful syscall return")
+ *   r11	= saved ar.pfs (a user-level value)
+ *   r15	= system call number
+ *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39	= system call arguments
+ *   b6		= return address (a user-level value)
+ *   ar.pfs	= previous frame-state (a user-level value)
+ *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11	= saved ar.pfs (as passed into the fsyscall handler)
+ *   r15	= system call number (as passed into the fsyscall handler)
+ *   r32-r39	= system call arguments (as passed into the fsyscall handler)
+ *   b6		= return address (as passed into the fsyscall handler)
+ *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
+ */
+
 ENTRY(fsys_ni_syscall)
 	mov r8=ENOSYS
 	mov r10=-1
@@ -32,6 +54,30 @@ ENTRY(fsys_getpid)
 	br.ret.sptk.many b6
 END(fsys_getpid)
 
+ENTRY(fsys_set_tid_address)
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	tnat.z p6,p7=r32		// check argument register for being NaT
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+	add r8=IA64_TASK_PID_OFFSET,r16
+	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+	;;
+	ld4 r8=[r8]
+	cmp.ne p8,p0=0,r9
+	mov r17=-1
+	;;
+(p6)	st8 [r18]=r32
+(p7)	st8 [r18]=r17
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	mov r17=0			// don't leak kernel bits...
+	mov r18=0			// don't leak kernel bits...
+	MCKINLEY_E9_WORKAROUND
+	br.ret.sptk.many b6
+END(fsys_set_tid_address)
+
 	.rodata
 	.align 8
 	.globl fsyscall_table
@@ -245,9 +291,9 @@ fsyscall_table:
 	data8 fsys_fallback_syscall	// futex		// 1230
 	data8 fsys_fallback_syscall	// sched_setaffinity
 	data8 fsys_fallback_syscall	// sched_getaffinity
-	data8 fsys_fallback_syscall	// set_tid_address
-	data8 fsys_fallback_syscall	// alloc_hugepages
-	data8 fsys_fallback_syscall	// free_hugepages	// 1235
+	data8 fsys_set_tid_address	// set_tid_address
+	data8 fsys_fallback_syscall	// unused
+	data8 fsys_fallback_syscall	// unused		// 1235
 	data8 fsys_fallback_syscall	// exit_group
 	data8 fsys_fallback_syscall	// lookup_dcookie
 	data8 fsys_fallback_syscall	// io_setup
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 2fbeb865b15f..ed63bbaa9069 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -144,12 +144,14 @@ show_regs (struct pt_regs *regs)
 void
 do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 {
+#ifdef CONFIG_FSYS
 	if (fsys_mode(current, &scr->pt)) {
 		/* defer signal-handling etc. until we return to privilege-level 0.  */
 		if (!ia64_psr(&scr->pt)->lp)
 			ia64_psr(&scr->pt)->lp = 1;
 		return;
 	}
+#endif
 
 #ifdef CONFIG_PERFMON
 	if (current->thread.pfm_ovfl_block_reset)
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 43b568f83209..c853cd530f55 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -524,6 +524,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	      case 29: /* Debug */
 	      case 35: /* Taken Branch Trap */
 	      case 36: /* Single Step Trap */
+#ifdef CONFIG_FSYS
 		if (fsys_mode(current, regs)) {
 			extern char syscall_via_break[], __start_gate_section[];
 			/*
@@ -541,6 +542,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 			ia64_psr(regs)->cpl = 3;
 			return;
 		}
+#endif
 		switch (vector) {
 		      case 29:
 			siginfo.si_code = TRAP_HWBKPT;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index e0719a28e034..cb366a3bd3a7 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1294,12 +1294,12 @@ within_logging_rate_limit (void)
 void
 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 {
-	struct exception_fixup fix = { 0 };
 	struct ia64_psr *ipsr = ia64_psr(regs);
 	mm_segment_t old_fs = get_fs();
 	unsigned long bundle[2];
 	unsigned long opcode;
 	struct siginfo si;
+	const struct exception_table_entry *eh = NULL;
 	union {
 		unsigned long l;
 		load_store_t insn;
@@ -1317,10 +1317,9 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
 	 * handler into reading an arbitrary kernel addresses...
 	 */
-	if (!user_mode(regs)) {
-		fix = SEARCH_EXCEPTION_TABLE(regs);
-	}
-	if (user_mode(regs) || fix.cont) {
+	if (!user_mode(regs))
+		eh = SEARCH_EXCEPTION_TABLE(regs);
+	if (user_mode(regs) || eh) {
 		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
 			goto force_sigbus;
 
@@ -1486,8 +1485,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
   failure:
 	/* something went wrong... */
 	if (!user_mode(regs)) {
-		if (fix.cont) {
-			handle_exception(regs, fix);
+		if (eh) {
+			handle_exception(regs, eh);
 			goto done;
 		}
 		die_if_kernel("error during unaligned kernel access\n", regs, ret);
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 3d11a8ad6451..104cfa54639d 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -10,22 +10,21 @@
 #include <asm/uaccess.h>
 #include <asm/module.h>
 
-extern const struct exception_table_entry __start___ex_table[];
-extern const struct exception_table_entry __stop___ex_table[];
-
-static inline const struct exception_table_entry *
-search_one_table (const struct exception_table_entry *first,
-		  const struct exception_table_entry *last,
-		  unsigned long ip, unsigned long gp)
+const struct exception_table_entry *
+search_extable (const struct exception_table_entry *first,
+		const struct exception_table_entry *last,
+		unsigned long ip)
 {
-        while (first <= last) {
-		const struct exception_table_entry *mid;
-		long diff;
+	const struct exception_table_entry *mid;
+	unsigned long mid_ip;
+	long diff, base = (long) first;
 
+        while (first <= last) {
 		mid = &first[(last - first)/2];
-		diff = (mid->addr + gp) - ip;
+		mid_ip = base + mid->addr;
+		diff = mid_ip - ip;
                 if (diff == 0)
-                        return mid;
+                        return (void *) ((long) base + mid->cont);
                 else if (diff < 0)
                         first = mid + 1;
                 else
@@ -34,50 +33,13 @@ search_one_table (const struct exception_table_entry *first,
         return 0;
 }
 
-#ifndef CONFIG_MODULES
-register unsigned long main_gp __asm__("gp");
-#endif
-
-struct exception_fixup
-search_exception_table (unsigned long addr)
-{
-	const struct exception_table_entry *entry;
-	struct exception_fixup fix = { 0 };
-
-#ifndef CONFIG_MODULES
-	/* There is only the kernel to search.  */
-	entry = search_one_table(__start___ex_table, __stop___ex_table - 1, addr, main_gp);
-	if (entry)
-		fix.cont = entry->cont + main_gp;
-	return fix;
-#else
-	struct archdata *archdata;
-	struct module *mp;
-
-	/* The kernel is the last "module" -- no need to treat it special. */
-	for (mp = module_list; mp; mp = mp->next) {
-		if (!mp->ex_table_start)
-			continue;
-		archdata = (struct archdata *) mp->archdata_start;
-		if (!archdata)
-			continue;
-		entry = search_one_table(mp->ex_table_start, mp->ex_table_end - 1,
-					 addr, (unsigned long) archdata->gp);
-		if (entry) {
-			fix.cont = entry->cont + (unsigned long) archdata->gp;
-			return fix;
-		}
-	}
-#endif
-	return fix;
-}
-
 void
-handle_exception (struct pt_regs *regs, struct exception_fixup fix)
+handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
 {
+	long fix = (long) e;
 	regs->r8 = -EFAULT;
-	if (fix.cont & 4)
+	if (fix & 4)
 		regs->r9 = 0;
-	regs->cr_iip = (long) fix.cont & ~0xf;
-	ia64_psr(regs)->ri = fix.cont & 0x3;		/* set continuation slot number */
+	regs->cr_iip = fix & ~0xf;
+	ia64_psr(regs)->ri = fix & 0x3;		/* set continuation slot number */
 }
diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c
index 4c72ea2608a6..2b32267fc8e4 100644
--- a/arch/ia64/tools/print_offsets.c
+++ b/arch/ia64/tools/print_offsets.c
@@ -56,6 +56,7 @@ tab[] =
     { "IA64_TASK_THREAD_ON_USTACK_OFFSET", offsetof (struct task_struct, thread.on_ustack) },
     { "IA64_TASK_PID_OFFSET",		offsetof (struct task_struct, pid) },
     { "IA64_TASK_TGID_OFFSET",		offsetof (struct task_struct, tgid) },
+    { "IA64_TASK_CLEAR_CHILD_TID_OFFSET",offsetof (struct task_struct, clear_child_tid) },
     { "IA64_PT_REGS_CR_IPSR_OFFSET",	offsetof (struct pt_regs, cr_ipsr) },
     { "IA64_PT_REGS_CR_IIP_OFFSET",	offsetof (struct pt_regs, cr_iip) },
     { "IA64_PT_REGS_CR_IFS_OFFSET",	offsetof (struct pt_regs, cr_ifs) },
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index 011cef5d1831..d6312ee7c729 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -6,7 +6,7 @@
 
 #define LOAD_OFFSET PAGE_OFFSET
 #include <asm-generic/vmlinux.lds.h>
-	
+
 OUTPUT_FORMAT("elf64-ia64-little")
 OUTPUT_ARCH(ia64)
 ENTRY(phys_start)
@@ -29,9 +29,29 @@ SECTIONS
 
   _text = .;
   _stext = .;
-  .text : AT(ADDR(.text) - PAGE_OFFSET)
+
+  .text.ivt : AT(ADDR(.text.ivt) - PAGE_OFFSET)
     {
 	*(.text.ivt)
+    }
+
+  /*
+   * Due to a linker bug (still present as of binutils 2.13.90.0.10),
+   * the exception table must come before any code that uses the
+   * uaccess.h macros; otherwise, the linker will silently truncate negative @secrel()
+   * values to 0!!  Just love it when bugs like these sneak in...
+   */
+  /* Exception table */
+  . = ALIGN(16);
+  __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+	{
+	  __start___ex_table = .;
+	  *(__ex_table)
+	  __stop___ex_table = .;
+	}
+
+  .text : AT(ADDR(.text) - PAGE_OFFSET)
+    {
 	*(.text)
     }
   .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
@@ -47,13 +67,6 @@ SECTIONS
   /* Global data */
   _data = .;
 
-  /* Exception table */
-  . = ALIGN(16);
-  __start___ex_table = .;
-  __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
-	{ *(__ex_table) }
-  __stop___ex_table = .;
-
 #if defined(CONFIG_IA64_GENERIC)
   /* Machine Vector */
   . = ALIGN(16);
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index 41b061ac1d24..556ef09977b4 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -44,18 +44,18 @@ name:
 	.previous
 
 #if __GNUC__ >= 3
-# define EX(y,x...)					\
-	.xdata4 "__ex_table", @gprel(99f), @gprel(y);	\
+# define EX(y,x...)						\
+	.xdata4 "__ex_table", @secrel(99f), @secrel(y);		\
   [99:]	x
-# define EXCLR(y,x...)					\
-	.xdata4 "__ex_table", @gprel(99f), @gprel(y)+4;	\
+# define EXCLR(y,x...)						\
+	.xdata4 "__ex_table", @secrel(99f), @secrel(y)+4;	\
   [99:]	x
 #else
-# define EX(y,x...)					\
-	.xdata4 "__ex_table", @gprel(99f), @gprel(y);	\
+# define EX(y,x...)						\
+	.xdata4 "__ex_table", @secrel(99f), @secrel(y);		\
   99:	x
-# define EXCLR(y,x...)					\
-	.xdata4 "__ex_table", @gprel(99f), @gprel(y)+4;	\
+# define EXCLR(y,x...)						\
+	.xdata4 "__ex_table", @secrel(99f), @secrel(y)+4;	\
   99:	x
 #endif
 
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index ebcc17669dbb..a614a1dbbb61 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -177,7 +177,10 @@ activate_context (struct mm_struct *mm)
 	} while (unlikely(context != mm->context));
 }
 
-#define deactivate_mm(tsk,mm)	do { } while (0)
+#define deactivate_mm(tsk,mm)					\
+do {								\
+	MMU_TRACE('d', smp_processor_id(), mm, mm->context);	\
+} while (0)
 
 /*
  * Switch from address space PREV to address space NEXT.
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index f64f222d1a5c..92966a334ce3 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -219,11 +219,11 @@ struct switch_stack {
 # define ia64_psr(regs)			((struct ia64_psr *) &(regs)->cr_ipsr)
 # define user_mode(regs)		(((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0)
 # define user_stack(task,regs)	((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs))
-# define fsys_mode(task,regs)				\
-  ({							\
-	  struct task_struct *_task = (task);		\
-	  struct pt_regs *_regs = (regs);		\
-	  !user_mode(regs) && user_stack(task, regs);	\
+# define fsys_mode(task,regs)					\
+  ({								\
+	  struct task_struct *_task = (task);			\
+	  struct pt_regs *_regs = (regs);			\
+	  !user_mode(_regs) && user_stack(_task, _regs);	\
   })
 
   struct task_struct;			/* forward decl */
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 296d494c58e4..357758463c87 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -7,7 +7,7 @@
  * on information published in the Processor Abstraction Layer
  * and the System Abstraction Layer manual.
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
  * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
@@ -17,6 +17,7 @@
 #include <asm/kregs.h>
 #include <asm/page.h>
 #include <asm/pal.h>
+#include <asm/percpu.h>
 
 #define KERNEL_START		(PAGE_OFFSET + 68*1024*1024)
 
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 9370ac9bc997..2edbcee407e7 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_IA64_TLB_H
 #define _ASM_IA64_TLB_H
 /*
- * Copyright (C) 2002 Hewlett-Packard Co
+ * Copyright (C) 2002-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * This file was derived from asm-generic/tlb.h.
@@ -70,8 +70,7 @@ extern struct mmu_gather	mmu_gathers[NR_CPUS];
  * freed pages that where gathered up to this point.
  */
 static inline void
-ia64_tlb_flush_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 	unsigned int nr;
 
@@ -197,8 +196,7 @@ tlb_remove_page (struct mmu_gather *tlb, struct page *page)
  * PTE, not just those pointing to (normal) physical memory.
  */
 static inline void
-__tlb_remove_tlb_entry(struct mmu_gather *tlb,
-			pte_t *ptep, unsigned long address)
+__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
 {
 	if (tlb->start_addr == ~0UL)
 		tlb->start_addr = address;
diff --git a/include/asm-ia64/uaccess.h b/include/asm-ia64/uaccess.h
index afe47af459bc..54345fbbd630 100644
--- a/include/asm-ia64/uaccess.h
+++ b/include/asm-ia64/uaccess.h
@@ -26,7 +26,7 @@
  * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and
  * then resumes execution at the continuation point.
  *
- * Copyright (C) 1998, 1999, 2001-2002 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -140,25 +140,25 @@ asm (".section \"__ex_table\", \"a\"\n\t.previous");
 
 #define __get_user_64(addr)									\
 	asm ("\n"_LL"\tld8 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_32(addr)									\
 	asm ("\n"_LL"\tld4 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_16(addr)									\
 	asm ("\n"_LL"\tld2 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_8(addr)									\
 	asm ("\n"_LL"\tld1 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
@@ -202,28 +202,28 @@ extern void __put_user_unknown (void);
 #define __put_user_64(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst8 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_32(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst4 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_16(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst2 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_8(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst1 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @gprel(1b), @gprel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
@@ -314,26 +314,22 @@ struct exception_table_entry {
 	int cont;	/* gp-relative continuation address; if bit 2 is set, r9 is set to 0 */
 };
 
-struct exception_fixup {
-	unsigned long cont;	/* continuation point (bit 2: clear r9 if set) */
-};
-
-extern struct exception_fixup search_exception_table (unsigned long addr);
-extern void handle_exception (struct pt_regs *regs, struct exception_fixup fixup);
+extern void handle_exception (struct pt_regs *regs, const struct exception_table_entry *e);
+extern const struct exception_table_entry *search_exception_tables (unsigned long addr);
 
 #ifdef GAS_HAS_LOCAL_TAGS
-#define SEARCH_EXCEPTION_TABLE(regs) search_exception_table(regs->cr_iip + ia64_psr(regs)->ri);
+# define SEARCH_EXCEPTION_TABLE(regs) search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri)
 #else
-#define SEARCH_EXCEPTION_TABLE(regs) search_exception_table(regs->cr_iip);
+# define SEARCH_EXCEPTION_TABLE(regs) search_exception_tables(regs->cr_iip)
 #endif
 
 static inline int
 done_with_exception (struct pt_regs *regs)
 {
-	struct exception_fixup fix;
-	fix = SEARCH_EXCEPTION_TABLE(regs);
-	if (fix.cont) {
-		handle_exception(regs, fix);
+	const struct exception_table_entry *e;
+	e = SEARCH_EXCEPTION_TABLE(regs);
+	if (e) {
+		handle_exception(regs, e);
 		return 1;
 	}
 	return 0;
-- 
cgit v1.2.3


From 76f0944e03a562c61c11d3f220c3fb4da3230fcc Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Thu, 23 Jan 2003 21:33:03 -0800
Subject: ia64: More vmlinux.lds.S cleanups.

---
 arch/ia64/vmlinux.lds.S | 58 +++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index d6312ee7c729..d800f59edc99 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -70,20 +70,24 @@ SECTIONS
 #if defined(CONFIG_IA64_GENERIC)
   /* Machine Vector */
   . = ALIGN(16);
-  machvec_start = .;
   .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
-	{ *(.machvec) }
-  machvec_end = .;
+	{
+	  machvec_start = .;
+	  *(.machvec)
+	  machvec_end = .;
+	}
 #endif
 
   /* Unwind info & table: */
   . = ALIGN(8);
   .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET)
 	{ *(.IA_64.unwind_info*) }
-  ia64_unw_start = .;
   .IA_64.unwind : AT(ADDR(.IA_64.unwind) - PAGE_OFFSET)
-	{ *(.IA_64.unwind*) }
-  ia64_unw_end = .;
+	{
+	  ia64_unw_start = .;
+	  *(.IA_64.unwind*)
+	  ia64_unw_end = .;
+	}
 
   RODATA
 
@@ -100,32 +104,38 @@ SECTIONS
   .init.data : AT(ADDR(.init.data) - PAGE_OFFSET)
 	{ *(.init.data) }
 
-  __initramfs_start = .;
   .init.ramfs : AT(ADDR(.init.ramfs) - PAGE_OFFSET)
-	{ *(.init.ramfs) }
-  __initramfs_end = .;
+	{
+	  __initramfs_start = .;
+	  *(.init.ramfs)
+	  __initramfs_end = .;
+	}
 
    . = ALIGN(16);
-  __setup_start = .;
   .init.setup : AT(ADDR(.init.setup) - PAGE_OFFSET)
-        { *(.init.setup) }
-  __setup_end = .;
-  __start___param = .;
+        {
+	  __setup_start = .;
+	  *(.init.setup)
+	  __setup_end = .;
+	}
   __param : AT(ADDR(__param) - PAGE_OFFSET)
-        { *(__param) }
-  __stop___param = .;
-  __initcall_start = .;
+        {
+	  __start___param = .;
+	  *(__param)
+	  __stop___param = .;
+	}
   .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET)
 	{
-		*(.initcall1.init)
-		*(.initcall2.init)
-		*(.initcall3.init)
-		*(.initcall4.init)
-		*(.initcall5.init)
-		*(.initcall6.init)
-		*(.initcall7.init)
+	  __initcall_start = .;
+	  *(.initcall1.init)
+	  *(.initcall2.init)
+	  *(.initcall3.init)
+	  *(.initcall4.init)
+	  *(.initcall5.init)
+	  *(.initcall6.init)
+	  *(.initcall7.init)
+	  __initcall_end = .;
 	}
-  __initcall_end = .;
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
 
-- 
cgit v1.2.3


From a79b0f25aab84a8ca491188454e39df51f329afe Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 24 Jan 2003 02:39:20 -0800
Subject: ia64: Switch over to using place-relative ("ip"-relative) entries in 
 the exception table.

---
 arch/ia64/mm/extable.c      |  9 +++++----
 arch/ia64/vmlinux.lds.S     | 30 ++++++++++--------------------
 include/asm-ia64/asmmacro.h | 16 ++++++++--------
 include/asm-ia64/uaccess.h  | 16 ++++++++--------
 4 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 104cfa54639d..898824e69a1a 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -17,14 +17,14 @@ search_extable (const struct exception_table_entry *first,
 {
 	const struct exception_table_entry *mid;
 	unsigned long mid_ip;
-	long diff, base = (long) first;
+	long diff;
 
         while (first <= last) {
 		mid = &first[(last - first)/2];
-		mid_ip = base + mid->addr;
+		mid_ip = (u64) &mid->addr + mid->addr;
 		diff = mid_ip - ip;
                 if (diff == 0)
-                        return (void *) ((long) base + mid->cont);
+                        return mid;
                 else if (diff < 0)
                         first = mid + 1;
                 else
@@ -36,7 +36,8 @@ search_extable (const struct exception_table_entry *first,
 void
 handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
 {
-	long fix = (long) e;
+	long fix = (u64) &e->cont + e->cont;
+
 	regs->r8 = -EFAULT;
 	if (fix & 4)
 		regs->r9 = 0;
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index d800f59edc99..aa2937b01067 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -30,28 +30,9 @@ SECTIONS
   _text = .;
   _stext = .;
 
-  .text.ivt : AT(ADDR(.text.ivt) - PAGE_OFFSET)
-    {
-	*(.text.ivt)
-    }
-
-  /*
-   * Due to a linker bug (still present as of binutils 2.13.90.0.10),
-   * the exception table must come before any code that uses the
-   * uaccess.h macros; otherwise, the linker will silently truncate negative @secrel()
-   * values to 0!!  Just love it when bugs like these sneak in...
-   */
-  /* Exception table */
-  . = ALIGN(16);
-  __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
-	{
-	  __start___ex_table = .;
-	  *(__ex_table)
-	  __stop___ex_table = .;
-	}
-
   .text : AT(ADDR(.text) - PAGE_OFFSET)
     {
+	*(.text.ivt)
 	*(.text)
     }
   .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
@@ -64,6 +45,15 @@ SECTIONS
 
   /* Read-only data */
 
+  /* Exception table */
+  . = ALIGN(16);
+  __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+	{
+	  __start___ex_table = .;
+	  *(__ex_table)
+	  __stop___ex_table = .;
+	}
+
   /* Global data */
   _data = .;
 
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index 556ef09977b4..b1fcb60b7ba7 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -44,18 +44,18 @@ name:
 	.previous
 
 #if __GNUC__ >= 3
-# define EX(y,x...)						\
-	.xdata4 "__ex_table", @secrel(99f), @secrel(y);		\
+# define EX(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.;	\
   [99:]	x
-# define EXCLR(y,x...)						\
-	.xdata4 "__ex_table", @secrel(99f), @secrel(y)+4;	\
+# define EXCLR(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.+4;	\
   [99:]	x
 #else
-# define EX(y,x...)						\
-	.xdata4 "__ex_table", @secrel(99f), @secrel(y);		\
+# define EX(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.;	\
   99:	x
-# define EXCLR(y,x...)						\
-	.xdata4 "__ex_table", @secrel(99f), @secrel(y)+4;	\
+# define EXCLR(y,x...)				\
+	.xdata4 "__ex_table", 99f-., y-.+4;	\
   99:	x
 #endif
 
diff --git a/include/asm-ia64/uaccess.h b/include/asm-ia64/uaccess.h
index 54345fbbd630..53268bee2f2e 100644
--- a/include/asm-ia64/uaccess.h
+++ b/include/asm-ia64/uaccess.h
@@ -140,25 +140,25 @@ asm (".section \"__ex_table\", \"a\"\n\t.previous");
 
 #define __get_user_64(addr)									\
 	asm ("\n"_LL"\tld8 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_32(addr)									\
 	asm ("\n"_LL"\tld4 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_16(addr)									\
 	asm ("\n"_LL"\tld2 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_8(addr)									\
 	asm ("\n"_LL"\tld1 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)+4\n"				\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
 	     _LL										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
@@ -202,28 +202,28 @@ extern void __put_user_unknown (void);
 #define __put_user_64(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst8 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_32(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst4 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_16(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst2 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_8(x,addr)								\
 	asm volatile (									\
 		"\n"_LL"\tst1 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
-		"\t.xdata4 \"__ex_table\", @secrel(1b), @secrel(1f)\n"			\
+		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
 		_LL									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
-- 
cgit v1.2.3


From fd89d145972346c7cd167c9383e7fe34cfc60169 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Fri, 24 Jan 2003 03:45:41 -0800
Subject: ia64: Check for acceptable version of gas before trying to build 
 the kernel.  Old gas versions will result in buggy kernels 	that will
 bugcheck all over the place (usually mount() is 	the first one to
 fail).

---
 arch/ia64/Makefile                | 10 ++++++++
 arch/ia64/ia32/binfmt_elf32.c     |  1 -
 arch/ia64/kernel/traps.c          |  2 +-
 arch/ia64/scripts/check-gas       | 11 +++++++++
 arch/ia64/scripts/check-gas-asm.S |  2 ++
 include/asm-ia64/uaccess.h        | 50 ++++++++++++++++-----------------------
 6 files changed, 44 insertions(+), 32 deletions(-)
 create mode 100755 arch/ia64/scripts/check-gas
 create mode 100644 arch/ia64/scripts/check-gas-asm.S

diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index e2beb8cee8ca..558ba2b91f3d 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -23,6 +23,16 @@ CFLAGS_KERNEL	:= -mconstant-gp
 
 GCC_VERSION=$(shell $(CC) -v 2>&1 | fgrep 'gcc version' | cut -f3 -d' ' | cut -f1 -d'.')
 
+GAS_STATUS=$(shell arch/ia64/scripts/check-gas $(CC))
+
+ifeq ($(GAS_STATUS),buggy)
+$(error Sorry, you need a newer version of the assember, one that is built from	\
+	a source-tree that post-dates 18-Dec-2002.  You can find a pre-compiled	\
+	static binary of such an assembler at:					\
+										\
+		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
+endif
+
 ifneq ($(GCC_VERSION),2)
 	cflags-y += -frename-registers --param max-inline-insns=5000
 endif
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index fbcbe4a2c74a..790682a0e88c 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -44,7 +44,6 @@ extern void put_dirty_page (struct task_struct * tsk, struct page *page, unsigne
 
 static void elf32_set_personality (void);
 
-#define ELF_PLAT_INIT(_r)		ia64_elf32_init(_r)
 #define setup_arg_pages(bprm)		ia32_setup_arg_pages(bprm)
 #define elf_map				elf32_map
 
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index c853cd530f55..8b4fa9e68404 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -142,7 +142,7 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
 
 	switch (break_num) {
 	      case 0: /* unknown error (used by GCC for __builtin_abort()) */
-		die_if_kernel("bad break", regs, break_num);
+		die_if_kernel("bugcheck!", regs, break_num);
 		sig = SIGILL; code = ILL_ILLOPC;
 		break;
 
diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas
new file mode 100755
index 000000000000..ef652caf82ae
--- /dev/null
+++ b/arch/ia64/scripts/check-gas
@@ -0,0 +1,11 @@
+#!/bin/sh
+dir=$(dirname $0)
+CC=$1
+$CC -c $dir/check-gas-asm.S
+res=$(objdump -r --section .data check-gas-asm.o | fgrep 00004 | tr -s ' ' |cut -f3 -d' ')
+if [ $res != ".text" ]; then
+	echo buggy
+else
+	echo good
+fi
+exit 0
diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S
new file mode 100644
index 000000000000..010e1d227e5d
--- /dev/null
+++ b/arch/ia64/scripts/check-gas-asm.S
@@ -0,0 +1,2 @@
+[1:]	nop 0
+	.xdata4 ".data", 0, 1b-.
diff --git a/include/asm-ia64/uaccess.h b/include/asm-ia64/uaccess.h
index 53268bee2f2e..b4bfc4ca9739 100644
--- a/include/asm-ia64/uaccess.h
+++ b/include/asm-ia64/uaccess.h
@@ -128,38 +128,28 @@ struct __large_struct { unsigned long buf[100]; };
 /* We need to declare the __ex_table section before we can use it in .xdata.  */
 asm (".section \"__ex_table\", \"a\"\n\t.previous");
 
-#if __GNUC__ >= 3
-#  define GAS_HAS_LOCAL_TAGS	/* define if gas supports local tags a la [1:] */
-#endif
-
-#ifdef GAS_HAS_LOCAL_TAGS
-# define _LL	"[1:]"
-#else
-# define _LL	"1:"
-#endif
-
 #define __get_user_64(addr)									\
-	asm ("\n"_LL"\tld8 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
-	     _LL										\
+	asm ("\n[1:]\tld8 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_32(addr)									\
-	asm ("\n"_LL"\tld4 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
-	     _LL										\
+	asm ("\n[1:]\tld4 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_16(addr)									\
-	asm ("\n"_LL"\tld2 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
-	     _LL										\
+	asm ("\n[1:]\tld2 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 #define __get_user_8(addr)									\
-	asm ("\n"_LL"\tld1 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
-	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"				\
-	     _LL										\
+	asm ("\n[1:]\tld1 %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n"	\
+	     "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n"						\
+	     "[1:]"										\
 	     : "=r"(__gu_val), "=r"(__gu_err) : "m"(__m(addr)), "1"(__gu_err));
 
 extern void __put_user_unknown (void);
@@ -201,30 +191,30 @@ extern void __put_user_unknown (void);
  */
 #define __put_user_64(x,addr)								\
 	asm volatile (									\
-		"\n"_LL"\tst8 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		"\n[1:]\tst8 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
 		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
-		_LL									\
+		"[1:]"									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_32(x,addr)								\
 	asm volatile (									\
-		"\n"_LL"\tst4 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		"\n[1:]\tst4 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
 		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
-		_LL									\
+		"[1:]"									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_16(x,addr)								\
 	asm volatile (									\
-		"\n"_LL"\tst2 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		"\n[1:]\tst2 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
 		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
-		_LL									\
+		"[1:]"									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 #define __put_user_8(x,addr)								\
 	asm volatile (									\
-		"\n"_LL"\tst1 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
+		"\n[1:]\tst1 %1=%r2%P1\t// %0 gets overwritten by exception handler\n"	\
 		"\t.xdata4 \"__ex_table\", 1b-., 1f-.\n"				\
-		_LL									\
+		"[1:]"									\
 		: "=r"(__pu_err) : "m"(__m(addr)), "rO"(x), "0"(__pu_err))
 
 /*
-- 
cgit v1.2.3


From 466a43d5dab45f691fcd793efe57a47313ba98a3 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Sun, 26 Jan 2003 18:57:56 -0800
Subject: ia64: Fix typo.

---
 Documentation/ia64/fsys.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
index 21dde92d0699..e258efdbea0b 100644
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.txt
@@ -3,8 +3,8 @@
 		Light-weight System Calls for IA-64
 		-----------------------------------
 
-		        Started: 13-Jan-2002
-		    Last update: 24-Jan-2002
+		        Started: 13-Jan-2003
+		    Last update: 24-Jan-2003
 
 	              David Mosberger-Tang
 		      <davidm@hpl.hp.com>
-- 
cgit v1.2.3


From a7f215bba8f45264d4ac3e77b265bb2a8c420ca1 Mon Sep 17 00:00:00 2001
From: Stéphane Eranian <eranian@hpl.hp.com>
Date: Mon, 27 Jan 2003 19:35:44 -0800
Subject: [PATCH] ia64: fix PSR bug in perfmon code and switch to C99
 initializers

Please apply this small patch to your 2.5.59.
It fixes the psr problem reported by the NEC guy and also cleans
up the structure intializations in the model specific files.
---
 arch/ia64/kernel/perfmon.c          |  9 +++++++--
 arch/ia64/kernel/perfmon_generic.h  | 12 ++++++------
 arch/ia64/kernel/perfmon_itanium.h  | 12 ++++++------
 arch/ia64/kernel/perfmon_mckinley.h | 12 ++++++------
 4 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 847930466f72..7989ff206488 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -3972,6 +3972,11 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 
 	sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
 
+	/*
+	 * propagate kernel psr in new context (used for first ctxsw in
+	 */
+	nctx->ctx_saved_psr = pfm_get_psr();
+
 	/* link with new task */
 	thread->pfm_context = nctx;
 
@@ -4268,8 +4273,8 @@ pfm_cleanup_notifiers(struct task_struct *task)
 
 static struct irqaction perfmon_irqaction = {
 	.handler =	pfm_interrupt_handler,
-	.flags =	SA_INTERRUPT,
-	.name =		"perfmon"
+	.flags   =	SA_INTERRUPT,
+	.name    =	"perfmon"
 };
 
 int
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
index 7c41aa84f8c1..72fb62692318 100644
--- a/arch/ia64/kernel/perfmon_generic.h
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -39,10 +39,10 @@ static pfm_reg_desc_t pmd_gen_desc[PMU_MAX_PMDS]={
  * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
  */
 static pmu_config_t pmu_conf={
-	disabled:	1,
-	ovfl_val:	(1UL << 32) - 1,
-	num_ibrs:	8,
-	num_dbrs:	8,
-	pmd_desc:	pfm_gen_pmd_desc,
-	pmc_desc:	pfm_gen_pmc_desc
+	.disabled = 1,
+	.ovfl_val = (1UL << 32) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_gen_pmd_desc,
+	.pmc_desc = pfm_gen_pmc_desc
 };
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
index 40dbcda09944..b0f9a8876961 100644
--- a/arch/ia64/kernel/perfmon_itanium.h
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -59,12 +59,12 @@ static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
  * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
  */
 static pmu_config_t pmu_conf={
-	disabled:	1,
-	ovfl_val:	(1UL << 32) - 1,
-	num_ibrs:	8,
-	num_dbrs:	8,
-	pmd_desc:	pfm_ita_pmd_desc,
-	pmc_desc:	pfm_ita_pmc_desc
+	.disabled = 1,
+	.ovfl_val = (1UL << 32) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_ita_pmd_desc,
+	.pmc_desc = pfm_ita_pmc_desc
 };
 
 
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
index 1ef6ce728620..eb761290b6bf 100644
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -62,12 +62,12 @@ static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
  * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
  */
 static pmu_config_t pmu_conf={
-	disabled:	1,
-	ovfl_val:	(1UL << 47) - 1,
-	num_ibrs:	8,
-	num_dbrs:	8,
-	pmd_desc:	pfm_mck_pmd_desc,
-	pmc_desc:	pfm_mck_pmc_desc
+	.disabled = 1,
+	.ovfl_val = (1UL << 47) - 1,
+	.num_ibrs = 8,
+	.num_dbrs = 8,
+	.pmd_desc = pfm_mck_pmd_desc,
+	.pmc_desc = pfm_mck_pmc_desc
 };
 
 
-- 
cgit v1.2.3


From 236d11b092d7d4ef3d558d7cc61b73842fb25df5 Mon Sep 17 00:00:00 2001
From: Arun Sharma <arun.sharma@intel.com>
Date: Mon, 27 Jan 2003 21:42:16 -0800
Subject: [PATCH] ia64: make hugetlb support work again

---
 arch/ia64/kernel/sys_ia64.c | 1 +
 arch/ia64/mm/hugetlbpage.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index f58284887cf3..66a2e57e50f3 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -16,6 +16,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
+#include <linux/hugetlb.h>
 
 #include <asm/shmparam.h>
 #include <asm/uaccess.h>
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index eabf53b0a881..a20df7ccb198 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -242,7 +242,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 				ret = -ENOMEM;
 				goto out;
 			}
-			add_to_page_cache(page, mapping, idx);
+			add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
 			unlock_page(page);
 		}
 		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
-- 
cgit v1.2.3


From 03c1d82243514e6da25cb371795e6ab1a20cf0ff Mon Sep 17 00:00:00 2001
From: Stéphane Eranian <eranian@hpl.hp.com>
Date: Mon, 27 Jan 2003 23:52:35 -0800
Subject: [PATCH] ia64: fix return type of sys_perfmonctl()

---
 arch/ia64/kernel/perfmon.c | 9 +++++++--
 include/asm-ia64/perfmon.h | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 7989ff206488..ffe9a0685245 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2517,7 +2517,7 @@ check_task_state(struct task_struct *task)
 	return ret;
 }
 
-asmlinkage int
+asmlinkage long
 sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7, 
 		long arg8, long stack)
 {
@@ -3977,6 +3977,11 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 	 */
 	nctx->ctx_saved_psr = pfm_get_psr();
 
+	/*
+	 * propagate kernel psr in new context (used for first ctxsw in
+	 */
+	nctx->ctx_saved_psr = pfm_get_psr();
+
 	/* link with new task */
 	thread->pfm_context = nctx;
 
@@ -4430,7 +4435,7 @@ pfm_init_percpu(void)
 
 #else /* !CONFIG_PERFMON */
 
-asmlinkage int
+asmlinkage long
 sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6, 
 		long arg7, long arg8, long stack)
 {
diff --git a/include/asm-ia64/perfmon.h b/include/asm-ia64/perfmon.h
index b1c2eef06cd9..110d439d45a2 100644
--- a/include/asm-ia64/perfmon.h
+++ b/include/asm-ia64/perfmon.h
@@ -161,7 +161,7 @@ typedef struct {
 	unsigned long   reserved;		/* unused */
 } perfmon_smpl_entry_t;
 
-extern int perfmonctl(pid_t pid, int cmd, void *arg, int narg);
+extern long perfmonctl(pid_t pid, int cmd, void *arg, int narg);
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From a58d36df8a5c2f6dbaac980026c202d9ee2ef317 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Tue, 28 Jan 2003 22:36:14 -0800
Subject: ia64: Fix ARCH_DLINFO.

---
 include/asm-ia64/elf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/asm-ia64/elf.h b/include/asm-ia64/elf.h
index a00f0dccc22f..8fb191e7712e 100644
--- a/include/asm-ia64/elf.h
+++ b/include/asm-ia64/elf.h
@@ -107,10 +107,10 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
 #ifdef CONFIG_FSYS
-#define ARCH_DLINFO					\
-do {							\
-	extern int syscall_via_epc;			\
-	NEW_AUX_ENT(AT_SYSINFO, syscall_via_epc);	\
+#define ARCH_DLINFO									\
+do {											\
+	extern char syscall_via_epc[], __start_gate_section[];				\
+	NEW_AUX_ENT(AT_SYSINFO, GATE_ADDR + (syscall_via_epc - __start_gate_section));	\
 } while (0)
 #endif
 
-- 
cgit v1.2.3


From 880700ed7a032e4860beb3086bbe2c10c97fc68b Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Wed, 29 Jan 2003 06:16:56 -0800
Subject: ia64: Add light-weight version of getppid().  Detect at boottime
 whether the 	McKinley Erratum 9 workaround is needed and, if not, patch the
 workaround 	bundles with NOPs.

---
 arch/ia64/kernel/fsys.S         | 53 +++++++++++++++++++++++++++++++++++++----
 arch/ia64/kernel/setup.c        | 33 +++++++++++++++++++++++--
 arch/ia64/tools/print_offsets.c |  8 ++++---
 arch/ia64/vmlinux.lds.S         |  7 ++++++
 include/asm-ia64/asmmacro.h     | 14 ++++++++++-
 include/asm-ia64/bugs.h         | 16 ++++++-------
 6 files changed, 112 insertions(+), 19 deletions(-)

diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 65c479e62d15..200d66891b1c 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -46,7 +46,7 @@ ENTRY(fsys_getpid)
 	add r8=IA64_TASK_TGID_OFFSET,r16
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]
+	ld4 r8=[r8]				// r8 = current->tgid
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
@@ -54,6 +54,51 @@ ENTRY(fsys_getpid)
 	br.ret.sptk.many b6
 END(fsys_getpid)
 
+ENTRY(fsys_getppid)
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+
+	ld4 r9=[r9]
+	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+	;;
+	and r9=TIF_ALLWORK_MASK,r9
+
+1:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
+	;;
+	cmp.ne p8,p0=0,r9
+	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
+	;;
+
+	/*
+	 * The .acq is needed to ensure that the read of tgid has returned its data before
+	 * we re-check "real_parent".
+	 */
+	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+	/*
+	 * Re-read current->group_leader->real_parent.
+	 */
+	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+	cmp.ne p6,p0=r18,r19			// did real_parent change?
+	mov r19=0			// i must not leak kernel bits...
+(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
+	;;
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+#else
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
+	mov r19=0			// i must not leak kernel bits...
+#endif
+	MCKINLEY_E9_WORKAROUND
+	br.ret.sptk.many b6
+END(fsys_getppid)
+
 ENTRY(fsys_set_tid_address)
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
@@ -72,8 +117,8 @@ ENTRY(fsys_set_tid_address)
 (p7)	st8 [r18]=r17
 (p8)	br.spnt.many fsys_fallback_syscall
 	;;
-	mov r17=0			// don't leak kernel bits...
-	mov r18=0			// don't leak kernel bits...
+	mov r17=0			// i must not leak kernel bits...
+	mov r18=0			// i must not leak kernel bits...
 	MCKINLEY_E9_WORKAROUND
 	br.ret.sptk.many b6
 END(fsys_set_tid_address)
@@ -100,7 +145,7 @@ fsyscall_table:
 	data8 fsys_fallback_syscall	// chown
 	data8 fsys_fallback_syscall	// lseek		// 1040
 	data8 fsys_getpid
-	data8 fsys_fallback_syscall	// getppid
+	data8 fsys_getppid		// getppid
 	data8 fsys_fallback_syscall	// mount
 	data8 fsys_fallback_syscall	// umount
 	data8 fsys_fallback_syscall	// setuid		// 1045
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 65b0489b2d47..6e21b7e7c268 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1,7 +1,7 @@
 /*
  * Architecture-specific setup.
  *
- * Copyright (C) 1998-2001 Hewlett-Packard Co
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
@@ -748,10 +748,39 @@ cpu_init (void)
 	}
 
 	if (ia64_pal_rse_info(&num_phys_stacked, 0) != 0) {
-		printk ("cpu_init: PAL RSE info failed, assuming 96 physical stacked regs\n");
+		printk(KERN_WARNING"cpu_init: PAL RSE info failed; assuming 96 physical "
+		       "stacked regs\n");
 		num_phys_stacked = 96;
 	}
 	/* size of physical stacked register partition plus 8 bytes: */
 	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
 	platform_cpu_init();
 }
+
+void
+check_bugs (void)
+{
+	extern int __start___mckinley_e9_bundles[];
+	extern int __end___mckinley_e9_bundles[];
+	u64 *bundle;
+	int *wp;
+
+	if (local_cpu_data->family == 0x1f && local_cpu_data->model == 0)
+		printk(KERN_INFO"check_bugs: leaving McKinley Errata 9 workaround enabled\n");
+	else {
+		printk(KERN_INFO"check_bugs: McKinley Errata 9 workaround not needed; "
+		       "disabling it\n");
+		for (wp = __start___mckinley_e9_bundles; wp < __end___mckinley_e9_bundles; ++wp) {
+			bundle = (u64 *) ((char *) wp + *wp);
+			/* install a bundle of NOPs: */
+			bundle[0] = 0x0000000100000000;
+			bundle[1] = 0x0004000000000200;
+			ia64_fc(bundle);
+		}
+		ia64_insn_group_barrier();
+		ia64_sync_i();
+		ia64_insn_group_barrier();
+		ia64_srlz_i();
+		ia64_insn_group_barrier();
+	}
+}
diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c
index 2b32267fc8e4..5f52857e3dd0 100644
--- a/arch/ia64/tools/print_offsets.c
+++ b/arch/ia64/tools/print_offsets.c
@@ -52,11 +52,13 @@ tab[] =
     { "SIGFRAME_SIZE",			sizeof (struct sigframe) },
     { "UNW_FRAME_INFO_SIZE",		sizeof (struct unw_frame_info) },
     { "", 0 },			/* spacer */
-    { "IA64_TASK_THREAD_KSP_OFFSET",	offsetof (struct task_struct, thread.ksp) },
-    { "IA64_TASK_THREAD_ON_USTACK_OFFSET", offsetof (struct task_struct, thread.on_ustack) },
+    { "IA64_TASK_CLEAR_CHILD_TID_OFFSET",offsetof (struct task_struct, clear_child_tid) },
+    { "IA64_TASK_GROUP_LEADER_OFFSET",	offsetof (struct task_struct, group_leader) },
     { "IA64_TASK_PID_OFFSET",		offsetof (struct task_struct, pid) },
+    { "IA64_TASK_REAL_PARENT_OFFSET",	offsetof (struct task_struct, real_parent) },
     { "IA64_TASK_TGID_OFFSET",		offsetof (struct task_struct, tgid) },
-    { "IA64_TASK_CLEAR_CHILD_TID_OFFSET",offsetof (struct task_struct, clear_child_tid) },
+    { "IA64_TASK_THREAD_KSP_OFFSET",	offsetof (struct task_struct, thread.ksp) },
+    { "IA64_TASK_THREAD_ON_USTACK_OFFSET", offsetof (struct task_struct, thread.on_ustack) },
     { "IA64_PT_REGS_CR_IPSR_OFFSET",	offsetof (struct pt_regs, cr_ipsr) },
     { "IA64_PT_REGS_CR_IIP_OFFSET",	offsetof (struct pt_regs, cr_iip) },
     { "IA64_PT_REGS_CR_IFS_OFFSET",	offsetof (struct pt_regs, cr_ifs) },
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index aa2937b01067..1389f87f513c 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -54,6 +54,13 @@ SECTIONS
 	  __stop___ex_table = .;
 	}
 
+  __mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - PAGE_OFFSET)
+	{
+	  __start___mckinley_e9_bundles = .;
+	  *(__mckinley_e9_bundles)
+	  __end___mckinley_e9_bundles = .;
+	}
+
   /* Global data */
   _data = .;
 
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index b1fcb60b7ba7..d3c3a0998f47 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -59,10 +59,22 @@ name:
   99:	x
 #endif
 
-#ifdef CONFIG_MCKINLEY
+/*
+ * For now, we always put in the McKinley E9 workaround.  On CPUs that don't need it,
+ * we'll patch out the work-around bundles with NOPs, so their impact is minimal.
+ */
+#define DO_MCKINLEY_E9_WORKAROUND
+#ifdef DO_MCKINLEY_E9_WORKAROUND
+	.section "__mckinley_e9_bundles", "a"
+	.previous
 /* workaround for Itanium 2 Errata 9: */
 # define MCKINLEY_E9_WORKAROUND			\
+	.xdata4 "__mckinley_e9_bundles", 1f-.;	\
+1:{ .mib;					\
+	nop.m 0;				\
+	nop.i 0;				\
 	br.call.sptk.many b7=1f;;		\
+  };						\
 1:
 #else
 # define MCKINLEY_E9_WORKAROUND
diff --git a/include/asm-ia64/bugs.h b/include/asm-ia64/bugs.h
index c74d2261ccc3..e94677a3b22f 100644
--- a/include/asm-ia64/bugs.h
+++ b/include/asm-ia64/bugs.h
@@ -4,16 +4,14 @@
  * Needs:
  *	void check_bugs(void);
  *
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
+#ifndef _ASM_IA64_BUGS_H
+#define _ASM_IA64_BUGS_H
 
 #include <asm/processor.h>
 
-/*
- * I don't know of any ia-64 bugs yet..
- */
-static void
-check_bugs (void)
-{
-}
+extern void check_bugs (void);
+
+#endif /* _ASM_IA64_BUGS_H */
-- 
cgit v1.2.3


From 82ae989b12fc7863465e00a7c5a50673613362c4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Mon, 3 Feb 2003 22:53:24 -0600
Subject: [PATCH] qlogic fix

Linus's current BK tree needs the following build fix:
---
 drivers/scsi/qlogicfas.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c
index ec1d109ae7a2..fcd8db40018c 100644
--- a/drivers/scsi/qlogicfas.c
+++ b/drivers/scsi/qlogicfas.c
@@ -348,7 +348,7 @@ static void ql_icmd(Scsi_Cmnd * cmd)
 	 /**/ outb(qlcfg5, qbase + 5);	/* select timer */
 	outb(qlcfg9 & 7, qbase + 9);	/* prescaler */
 /*	outb(0x99, qbase + 5);	*/
-	outb(cmd->target, qbase + 4);
+	outb(cmd->device->id, qbase + 4);
 
 	for (i = 0; i < cmd->cmd_len; i++)
 		outb(cmd->cmnd[i], qbase + 2);
@@ -573,7 +573,7 @@ static int qlogicfas_command(Scsi_Cmnd * cmd)
 	 *	Non-irq version
 	 */
 	 
-	if (cmd->target == qinitid)
+	if (cmd->device->id == qinitid)
 		return (DID_BAD_TARGET << 16);
 	ql_icmd(cmd);
 	if ((k = ql_wai()))
@@ -590,7 +590,7 @@ static int qlogicfas_command(Scsi_Cmnd * cmd)
 
 int qlogicfas_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
 {
-	if (cmd->target == qinitid) {
+	if (cmd->device->id == qinitid) {
 		cmd->result = DID_BAD_TARGET << 16;
 		done(cmd);
 		return 0;
-- 
cgit v1.2.3


From ed12fbdddca29f5e8f5c2cbf75c909a8992aa28d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 4 Feb 2003 02:40:21 -0600
Subject: [PATCH] remove __scsi_add_host

now that scsi_add_host accepts a NMULL dev argument we don't need it
anymore.
---
 drivers/scsi/hosts.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 6ca0adf6706d..7e474fe3ccf3 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -285,7 +285,15 @@ int scsi_remove_host(struct Scsi_Host *shost)
 	return 0;
 }
 
-int __scsi_add_host(struct Scsi_Host *shost)
+/**
+ * scsi_add_host - add a scsi host
+ * @shost:	scsi host pointer to add
+ * @dev:	a struct device of type scsi class
+ *
+ * Return value: 
+ * 	0 on success / != 0 for error
+ **/
+int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
 {
 	Scsi_Host_Template *sht = shost->hostt;
 	struct scsi_device *sdev;
@@ -294,6 +302,11 @@ int __scsi_add_host(struct Scsi_Host *shost)
 	printk(KERN_INFO "scsi%d : %s\n", shost->host_no,
 			sht->info ? sht->info(shost) : sht->name);
 
+	if (dev) {
+		dev->class_data = shost;
+		shost->host_gendev = dev;
+	}
+
 	scsi_scan_host(shost);
 			
 	list_for_each_entry (sdev, &shost->my_devices, siblings) {
@@ -305,23 +318,6 @@ int __scsi_add_host(struct Scsi_Host *shost)
 	return saved_error;
 }
 
-/**
- * scsi_add_host - add a scsi host
- * @shost:	scsi host pointer to add
- * @dev:	a struct device of type scsi class
- *
- * Return value: 
- * 	0 on success / != 0 for error
- **/
-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
-{
-	if (dev) {
-		dev->class_data = shost;
-		shost->host_gendev = dev;
-	}
-	return __scsi_add_host(shost);
-}
-
 /**
  * scsi_unregister - unregister a scsi host
  * @shost:	scsi host to be unregistered
@@ -523,7 +519,7 @@ int scsi_register_host(Scsi_Host_Template *shost_tp)
 	 */
 	list_for_each_entry(shost, &scsi_host_list, sh_list)
 		if (shost->hostt == shost_tp)
-			if (__scsi_add_host(shost))
+			if (scsi_add_host(shost, NULL))
 				goto out_of_space;
 
 	return 0;
-- 
cgit v1.2.3


From 2995a98103285a91a599ef06d1e419557b30dc56 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@wailua.hpl.hp.com>
Date: Tue, 4 Feb 2003 01:08:00 -0800
Subject: ia64: Use printk severity-levels where appropriate. 	Triggered by
 analysis done by Philipp Marek.

---
 arch/ia64/dig/setup.c               |  4 +--
 arch/ia64/hp/common/sba_iommu.c     | 28 +++++++--------
 arch/ia64/hp/sim/simeth.c           | 20 ++++++-----
 arch/ia64/hp/sim/simscsi.c          |  9 ++---
 arch/ia64/hp/sim/simserial.c        | 40 ++++++++++-----------
 arch/ia64/ia32/ia32_ioctl.c         |  2 +-
 arch/ia64/kernel/acpi.c             | 32 ++++++++---------
 arch/ia64/kernel/brl_emu.c          |  2 +-
 arch/ia64/kernel/efi.c              | 22 ++++++------
 arch/ia64/kernel/iosapic.c          | 40 ++++++++++-----------
 arch/ia64/kernel/irq.c              | 10 +++---
 arch/ia64/kernel/machvec.c          |  2 +-
 arch/ia64/kernel/mca.c              | 26 +++++++-------
 arch/ia64/kernel/palinfo.c          | 37 ++++++++++---------
 arch/ia64/kernel/perfmon.c          | 72 +++++++++++++++++++------------------
 arch/ia64/kernel/perfmon_mckinley.h |  4 +--
 arch/ia64/kernel/sal.c              | 16 ++++-----
 arch/ia64/kernel/setup.c            | 19 +++++-----
 arch/ia64/kernel/smpboot.c          | 24 ++++++-------
 arch/ia64/kernel/time.c             | 21 ++++++-----
 arch/ia64/kernel/traps.c            | 22 +++++++-----
 arch/ia64/kernel/unwind.c           | 17 ++++-----
 arch/ia64/lib/swiotlb.c             |  2 +-
 arch/ia64/mm/discontig.c            |  2 +-
 arch/ia64/mm/fault.c                |  2 +-
 arch/ia64/mm/init.c                 | 10 +++---
 26 files changed, 249 insertions(+), 236 deletions(-)

diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index 66455679daf4..d58003f1ad02 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 1999 Intel Corp.
  * Copyright (C) 1999, 2001 Hewlett-Packard Co
- * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
@@ -56,7 +56,7 @@ dig_setup (char **cmdline_p)
 	if (!ia64_boot_param->console_info.num_rows
 	    || !ia64_boot_param->console_info.num_cols)
 	{
-		printk("dig_setup: warning: invalid screen-info, guessing 80x25\n");
+		printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
 		orig_x = 0;
 		orig_y = 0;
 		num_cols = 80;
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index eea0618b1e7a..7b1af476ded8 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -288,20 +288,19 @@ sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
 	unsigned long *rptr = (unsigned long *) &(ioc->res_map[(pide >>3) & ~(sizeof(unsigned long) - 1)]);
 	uint rcnt;
 
-	/* printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", */
-	printk("SBA: %s rp %p bit %d rval 0x%lx\n",
+	printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
 		 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
 
 	rcnt = 0;
 	while (rcnt < BITS_PER_LONG) {
-		printk("%s %2d %p %016Lx\n",
-			(rcnt == (pide & (BITS_PER_LONG - 1)))
-				? "    -->" : "       ",
-			rcnt, ptr, *ptr );
+		printk(KERN_DEBUG "%s %2d %p %016Lx\n",
+		       (rcnt == (pide & (BITS_PER_LONG - 1)))
+		       ? "    -->" : "       ",
+		       rcnt, ptr, *ptr );
 		rcnt++;
 		ptr++;
 	}
-	printk("%s", msg);
+	printk(KERN_DEBUG "%s", msg);
 }
 
 
@@ -363,11 +362,9 @@ static void
 sba_dump_sg(struct ioc *ioc, struct scatterlist *startsg, int nents)
 {
 	while (nents-- > 0) {
-		printk(" %d : DMA %08lx/%05x CPU %p\n",
-				nents,
-				(unsigned long) sba_sg_iova(startsg),
-				sba_sg_iova_len(startsg),
-				sba_sg_address(startsg));
+		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
+		       (unsigned long) sba_sg_iova(startsg), sba_sg_iova_len(startsg),
+		       sba_sg_address(startsg));
 		startsg++;
 	}
 }
@@ -1451,9 +1448,10 @@ sba_common_init(struct sba_device *sba_dev)
 			    sba_dev->ioc[i].res_map;
 		} else {
 			u64 reserved_iov;
-			
+
 			/* Yet another 1.x hack */
-			printk("zx1 1.x: Starting resource hint offset into IOV space to avoid initial zero value IOVA\n");
+			printk(KERN_DEBUG "zx1 1.x: Starting resource hint offset into "
+			       "IOV space to avoid initial zero value IOVA\n");
 			sba_dev->ioc[i].res_hint = (unsigned long *)
 			    &(sba_dev->ioc[i].res_map[L1_CACHE_BYTES]);
 
@@ -1632,7 +1630,7 @@ void __init sba_init(void)
 	       device->slot_name, hpa);
 
 	if ((hw_rev & 0xFF) < 0x20) {
-		printk("%s: SBA rev less than 2.0 not supported", DRIVER_NAME);
+		printk(KERN_INFO "%s: SBA rev less than 2.0 not supported", DRIVER_NAME);
 		return;
 	}
 
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index b03af9dca130..d54ad634096b 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -1,7 +1,7 @@
 /*
  * Simulated Ethernet Driver
  *
- * Copyright (C) 1999-2001 Hewlett-Packard Co
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  */
 #include <linux/config.h>
@@ -116,7 +116,7 @@ simeth_probe (void)
 {
 	int r;
 
-	printk("simeth: v%s\n", simeth_version);
+	printk(KERN_INFO "simeth: v%s\n", simeth_version);
 
 	r = simeth_probe1();
 
@@ -235,7 +235,8 @@ simeth_probe1(void)
 	/* Fill in the fields of the device structure with ethernet-generic values. */
 	ether_setup(dev);
 
-	printk("%s: hosteth=%s simfd=%d, HwAddr", dev->name, simeth_device, local->simfd);
+	printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
+	       dev->name, simeth_device, local->simfd);
 	for(i = 0; i < ETH_ALEN; i++) {
 		printk(" %2.2x", dev->dev_addr[i]);
 	}
@@ -251,7 +252,7 @@ static int
 simeth_open(struct net_device *dev)
 {
 	if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
-		printk ("simeth: unable to get IRQ %d.\n", dev->irq);
+		printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq);
 		return -EAGAIN;
 	}
 
@@ -312,11 +313,12 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 			if (strcmp(dev->name, ifa->ifa_label) == 0) break;
 	}
 	if ( ifa == NULL ) {
-		printk("simeth_open: can't find device %s's ifa\n", dev->name);
+		printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name);
 		return NOTIFY_DONE;
 	}
 
-	printk("simeth_device_event: %s ipaddr=0x%x\n", dev->name, htonl(ifa->ifa_local));
+	printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
+	       dev->name, htonl(ifa->ifa_local));
 
 	/*
 	 * XXX Fix me
@@ -330,7 +332,8 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 		netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
 		netdev_detach(local->simfd);
 
-	printk("simeth: netdev_attach/detach: event=%s ->%d\n", event == NETDEV_UP ? "attach":"detach", r);
+	printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
+	       event == NETDEV_UP ? "attach":"detach", r);
 
 	return NOTIFY_DONE;
 }
@@ -460,7 +463,8 @@ simeth_rx(struct net_device *dev)
 		 */
 		len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
 		if ( len == 0 ) {
-			if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n", dev->name, SIMETH_RECV_MAX-rcv_count);
+			if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n",
+						       dev->name, SIMETH_RECV_MAX-rcv_count);
 			break;
 		}
 #if 0
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 8ca6c3fec006..c85bd586d2ab 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -1,7 +1,7 @@
 /*
  * Simulated SCSI driver.
  *
- * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
+ * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  *
@@ -87,7 +87,8 @@ simscsi_setup (char *s)
 {
 	/* XXX Fix me we may need to strcpy() ? */
 	if (strlen(s) > MAX_ROOT_LEN) {
-		printk("simscsi_setup: prefix too long---using default %s\n", simscsi_root);
+		printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
+		       simscsi_root);
 	}
 	simscsi_root = s;
 	return 1;
@@ -354,7 +355,7 @@ simscsi_queuecommand (Scsi_Cmnd *sc, void (*done)(Scsi_Cmnd *))
 			break;
 
 		      case START_STOP:
-			printk("START_STOP\n");
+			printk(KERN_ERR "START_STOP\n");
 			break;
 
 		      default:
@@ -380,7 +381,7 @@ simscsi_queuecommand (Scsi_Cmnd *sc, void (*done)(Scsi_Cmnd *))
 int
 simscsi_host_reset (Scsi_Cmnd *sc)
 {
-	printk ("simscsi_host_reset: not implemented\n");
+	printk(KERN_ERR "simscsi_host_reset: not implemented\n");
 	return 0;
 }
 
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 184fa96bad1d..2ca3be2678a8 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -7,7 +7,7 @@
  * case means sys_sim.c console (goes via the simulator). The code hereafter
  * is completely leveraged from the serial.c driver.
  *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
@@ -195,7 +195,7 @@ static void rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
 	 */
 	info = IRQ_ports[irq];
 	if (!info || !info->tty) {
-		printk("simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
+		printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
 		return;
 	}
 	/*
@@ -219,13 +219,13 @@ static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
 static void do_serial_bh(void)
 {
 	run_task_queue(&tq_serial);
-	printk("do_serial_bh: called\n");
+	printk(KERN_ERR "do_serial_bh: called\n");
 }
 #endif
 
 static void do_softint(void *private_)
 {
-	printk("simserial: do_softint called\n");
+	printk(KERN_ERR "simserial: do_softint called\n");
 }
 
 static void rs_put_char(struct tty_struct *tty, unsigned char ch)
@@ -439,7 +439,7 @@ static void rs_throttle(struct tty_struct * tty)
 {
 	if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
 
-	printk("simrs_throttle called\n");
+	printk(KERN_INFO "simrs_throttle called\n");
 }
 
 static void rs_unthrottle(struct tty_struct * tty)
@@ -452,7 +452,7 @@ static void rs_unthrottle(struct tty_struct * tty)
 		else
 			rs_send_xchar(tty, START_CHAR(tty));
 	}
-	printk("simrs_unthrottle called\n");
+	printk(KERN_INFO "simrs_unthrottle called\n");
 }
 
 /*
@@ -474,29 +474,29 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 
 	switch (cmd) {
 		case TIOCMGET:
-			printk("rs_ioctl: TIOCMGET called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCMGET called\n");
 			return -EINVAL;
 		case TIOCMBIS:
 		case TIOCMBIC:
 		case TIOCMSET:
-			printk("rs_ioctl: TIOCMBIS/BIC/SET called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n");
 			return -EINVAL;
 		case TIOCGSERIAL:
-			printk("simrs_ioctl TIOCGSERIAL called\n");
+			printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
 			return 0;
 		case TIOCSSERIAL:
-			printk("simrs_ioctl TIOCSSERIAL called\n");
+			printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
 			return 0;
 		case TIOCSERCONFIG:
-			printk("rs_ioctl: TIOCSERCONFIG called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
 			return -EINVAL;
 
 		case TIOCSERGETLSR: /* Get line status register */
-			printk("rs_ioctl: TIOCSERGETLSR called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
 			return  -EINVAL;
 
 		case TIOCSERGSTRUCT:
-			printk("rs_ioctl: TIOCSERGSTRUCT called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
 #if 0
 			if (copy_to_user((struct async_struct *) arg,
 					 info, sizeof(struct async_struct)))
@@ -511,7 +511,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 		 * Caller should use TIOCGICOUNT to see which one it was
 		 */
 		case TIOCMIWAIT:
-			printk("rs_ioctl: TIOCMIWAIT: called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
 			return 0;
 		/*
 		 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
@@ -520,13 +520,13 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 		 *     RI where only 0->1 is counted.
 		 */
 		case TIOCGICOUNT:
-			printk("rs_ioctl: TIOCGICOUNT called\n");
+			printk(KERN_INFO "rs_ioctl: TIOCGICOUNT called\n");
 			return 0;
 
 		case TIOCSERGWILD:
 		case TIOCSERSWILD:
 			/* "setserial -W" is called in Debian boot */
-			printk ("TIOCSER?WILD ioctl obsolete, ignored.\n");
+			printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
 			return 0;
 
 		default:
@@ -596,7 +596,7 @@ static void shutdown(struct async_struct * info)
 						     IRQ_T(info), "serial", NULL);
 
 				if (retval)
-					printk("serial shutdown: request_irq: error %d"
+					printk(KERN_ERR "serial shutdown: request_irq: error %d"
 					       "  Couldn't reacquire IRQ.\n", retval);
 			} else
 				free_irq(state->irq, NULL);
@@ -654,12 +654,12 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 		 * one, we've got real problems, since it means the
 		 * serial port won't be shutdown.
 		 */
-		printk("rs_close: bad serial port count; tty->count is 1, "
+		printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
 		       "state->count is %d\n", state->count);
 		state->count = 1;
 	}
 	if (--state->count < 0) {
-		printk("rs_close: bad serial port count for ttys%d: %d\n",
+		printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
 		       info->line, state->count);
 		state->count = 0;
 	}
@@ -1013,7 +1013,7 @@ done:
 static inline void show_serial_version(void)
 {
 	printk(KERN_INFO "%s version %s with", serial_name, serial_version);
-	printk(" no serial options enabled\n");
+	printk(KERN_INFO " no serial options enabled\n");
 }
 
 /*
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
index f525f2453950..b25d9c5c6f88 100644
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -510,6 +510,6 @@ sys32_ioctl (unsigned int fd, unsigned int cmd, unsigned int arg)
 			return(sg_ioctl_trans(fd, cmd, arg));
 
 	}
-	printk("%x:unimplemented IA32 ioctl system call\n", cmd);
+	printk(KERN_ERR "%x:unimplemented IA32 ioctl system call\n", cmd);
 	return -EINVAL;
 }
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index eaf9c9917121..b73b8e12161c 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 1999 VA Linux Systems
  *  Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- *  Copyright (C) 2000, 2002 Hewlett-Packard Co.
+ *  Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *  Copyright (C) 2000 Intel Corp.
  *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
@@ -75,20 +75,20 @@ acpi_get_sysname (void)
 
 	rsdp_phys = acpi_find_rsdp();
 	if (!rsdp_phys) {
-		printk("ACPI 2.0 RSDP not found, default to \"dig\"\n");
+		printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
 		return "dig";
 	}
 
 	rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
 	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
-		printk("ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
 	xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
 	hdr = &xsdt->header;
 	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
-		printk("ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+		printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
 		return "dig";
 	}
 
@@ -199,7 +199,7 @@ acpi_request_vector (u32 int_type)
 		/* correctable platform error interrupt */
 		vector = platform_intr_list[int_type];
 	} else
-		printk("acpi_request_vector(): invalid interrupt type\n");
+		printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
 	return vector;
 }
 
@@ -249,7 +249,7 @@ acpi_parse_lsapic (acpi_table_entry_header *header)
 
 	acpi_table_print_madt_entry(header);
 
-	printk("CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid);
+	printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid);
 
 	if (lsapic->flags.enabled) {
 		available_cpus++;
@@ -478,8 +478,8 @@ acpi_numa_slit_init (struct acpi_table_slit *slit)
 	len = sizeof(struct acpi_table_header) + 8
 		+ slit->localities * slit->localities;
 	if (slit->header.length != len) {
-		printk("ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
-		      len, slit->header.length);
+		printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+		       len, slit->header.length);
 		memset(numa_slit, 10, sizeof(numa_slit));
 		return;
 	}
@@ -514,8 +514,8 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
 	size = (size << 32) | ma->length_lo;
 
 	if (num_memblks >= NR_MEMBLKS) {
-		printk("Too many mem chunks in SRAT. Ignoring %ld MBytes at %lx\n",
-			size/(1024*1024), paddr);
+		printk(KERN_ERR "Too many mem chunks in SRAT. Ignoring %ld MBytes at %lx\n",
+		       size/(1024*1024), paddr);
 		return;
 	}
 
@@ -545,8 +545,8 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
 
 	if (min_hole_size) {
 		if (min_hole_size > size) {
-			printk("Too huge memory hole. Ignoring %ld MBytes at %lx\n",
-				size/(1024*1024), paddr);
+			printk(KERN_ERR "Too huge memory hole. Ignoring %ld MBytes at %lx\n",
+			       size/(1024*1024), paddr);
 			return;
 		}
 	}
@@ -605,8 +605,8 @@ acpi_numa_arch_fixup(void)
 	for (i = 0; i < srat_num_cpus; i++)
 		node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
 
-	printk("Number of logical nodes in system = %d\n", numnodes);
-	printk("Number of memory chunks in system = %d\n", num_memblks);
+	printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
+	printk(KERN_INFO "Number of memory chunks in system = %d\n", num_memblks);
 
 	if (!slit_table) return;
 	memset(numa_slit, -1, sizeof(numa_slit));
@@ -806,7 +806,7 @@ acpi_boot_init (char *cmdline)
 
 #ifdef CONFIG_SMP
 	if (available_cpus == 0) {
-		printk("ACPI: Found 0 CPUS; assuming 1\n");
+		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
 		available_cpus = 1; /* We've got at least one of these, no? */
 	}
 	smp_boot_data.cpu_count = total_cpus;
@@ -817,7 +817,7 @@ acpi_boot_init (char *cmdline)
 #endif
 #endif
 	/* Make boot-up look pretty */
-	printk("%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 6101686da2a9..541addcbe50c 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -195,7 +195,7 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
 		/*
 		 *  The target address contains unimplemented bits.
 		 */
-		printk("Woah! Unimplemented Instruction Address Trap!\n");
+		printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
 		siginfo.si_signo = SIGILL;
 		siginfo.si_errno = 0;
 		siginfo.si_flags = 0;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 5a5c69720006..78c8c05096c8 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * Copyright (C) 1999-2003 Hewlett-Packard Co.
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *	Stephane Eranian <eranian@hpl.hp.com>
  *
@@ -365,7 +365,7 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 				prev_valid = 1;
 			} else {
 				if (curr.start < prev.start)
-					printk("Oops: EFI memory table not ordered!\n");
+					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
 
 				if (prev.end == curr.start) {
 					/* merge two consecutive memory ranges */
@@ -437,7 +437,8 @@ efi_map_pal_code (void)
 		 * dedicated ITR for the PAL code.
 		 */
 		if ((vaddr & mask) == (KERNEL_START & mask)) {
-			printk("%s: no need to install ITR for PAL code\n", __FUNCTION__);
+			printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
+			       __FUNCTION__);
 			continue;
 		}
 
@@ -445,7 +446,7 @@ efi_map_pal_code (void)
 			panic("Woah!  PAL code size bigger than a granule!");
 
 		mask  = ~((1 << IA64_GRANULE_SHIFT) - 1);
-		printk("CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+		printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
 		       smp_processor_id(), md->phys_addr,
 		       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 		       vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
@@ -489,7 +490,7 @@ efi_init (void)
 		}
 	}
 	if (mem_limit != ~0UL)
-		printk("Ignoring memory above %luMB\n", mem_limit >> 20);
+		printk(KERN_INFO "Ignoring memory above %luMB\n", mem_limit >> 20);
 
 	efi.systab = __va(ia64_boot_param->efi_systab);
 
@@ -501,7 +502,7 @@ efi_init (void)
 	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		panic("Woah! EFI system table signature incorrect\n");
 	if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
-		printk("Warning: EFI system table major version mismatch: "
+		printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
 		       "got %d.%02d, expected %d.%02d\n",
 		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
 		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
@@ -516,7 +517,7 @@ efi_init (void)
 		vendor[i] = '\0';
 	}
 
-	printk("EFI v%u.%.02u by %s:",
+	printk(KERN_INFO "EFI v%u.%.02u by %s:",
 	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
 
 	for (i = 0; i < efi.systab->nr_tables; i++) {
@@ -608,7 +609,7 @@ efi_enter_virtual_mode (void)
 									   | _PAGE_PL_0
 									   | _PAGE_AR_RW));
 #else
-				printk("EFI_MEMORY_WC mapping\n");
+				printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
 				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 #endif
 			} else if (md->attribute & EFI_MEMORY_WT) {
@@ -618,7 +619,7 @@ efi_enter_virtual_mode (void)
 									   | _PAGE_PL_0
 									   | _PAGE_AR_RW));
 #else
-				printk("EFI_MEMORY_WT mapping\n");
+				printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
 				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
 #endif
 			}
@@ -630,7 +631,8 @@ efi_enter_virtual_mode (void)
 			       efi_desc_size, ia64_boot_param->efi_memdesc_version,
 			       ia64_boot_param->efi_memmap);
 	if (status != EFI_SUCCESS) {
-		printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status);
+		printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
+		       "(status=%lu)\n", status);
 		return;
 	}
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index a5d82b937b26..6e38d3909e7a 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -4,7 +4,7 @@
  * Copyright (C) 1999 Intel Corp.
  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
  * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
@@ -433,7 +433,7 @@ iosapic_reassign_vector (int vector)
 	    || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
 	{
 		new_vector = ia64_alloc_vector();
-		printk("Reassigning vector %d to %d\n", vector, new_vector);
+		printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
 		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
 		       sizeof(struct iosapic_intr_info));
 		memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
@@ -468,17 +468,17 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 #else
 	if (iosapic_address) {
 		if (iosapic_intr_info[vector].addr && (iosapic_intr_info[vector].addr != iosapic_address))
-			printk("WARN: register_intr: diff IOSAPIC ADDRESS for GSI 0x%x, vector %d\n",
-			       gsi, vector);
+			printk(KERN_WARNING "warning: register_intr: diff IOSAPIC ADDRESS for "
+			       "GSI 0x%x, vector %d\n", gsi, vector);
 		iosapic_intr_info[vector].addr = iosapic_address;
 		if (iosapic_intr_info[vector].gsi_base && (iosapic_intr_info[vector].gsi_base != gsi_base)) {
-			printk("WARN: register_intr: diff GSI base 0x%x for GSI 0x%x, vector %d\n",
-			       gsi_base, gsi, vector);
+			printk(KERN_WARNING "warning: register_intr: diff GSI base 0x%x for "
+			       "GSI 0x%x, vector %d\n", gsi_base, gsi, vector);
 		}
 		iosapic_intr_info[vector].gsi_base = gsi_base;
 	} else if (!iosapic_intr_info[vector].addr)
-		printk("WARN: register_intr: invalid override for GSI 0x%x, vector %d\n",
-		       gsi, vector);
+		printk(KERN_WARNING "warning: register_intr: invalid override for GSI 0x%x, "
+		       "vector %d\n", gsi, vector);
 #endif
 	if (edge_triggered) {
 		iosapic_intr_info[vector].trigger = IOSAPIC_EDGE;
@@ -491,9 +491,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
 	idesc = irq_desc(vector);
 	if (idesc->handler != irq_type) {
 		if (idesc->handler != &no_irq_type)
-			printk("%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector, idesc->handler->typename,
-			       irq_type->typename);
+			printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
+			       __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
 }
@@ -518,7 +517,7 @@ iosapic_register_intr (unsigned int gsi,
 	register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
 		      polarity, edge_triggered, gsi_base, iosapic_address);
 
-	printk("GSI 0x%x(%s,%s) -> CPU 0x%04x vector %d\n",
+	printk(KERN_INFO "GSI 0x%x(%s,%s) -> CPU 0x%04x vector %d\n",
 	       gsi, (polarity ? "high" : "low"),
 	       (edge_triggered ? "edge" : "level"), dest, vector);
 
@@ -560,14 +559,14 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
 		delivery = IOSAPIC_LOWEST_PRIORITY;
 		break;
 	      default:
-		printk("iosapic_register_platform_irq(): invalid int type\n");
+		printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type\n");
 		return -1;
 	}
 
 	register_intr(gsi, vector, delivery, polarity,
 		      edge_triggered, gsi_base, iosapic_address);
 
-	printk("PLATFORM int 0x%x: GSI 0x%x(%s,%s) -> CPU 0x%04x vector %d\n",
+	printk(KERN_INFO "PLATFORM int 0x%x: GSI 0x%x(%s,%s) -> CPU 0x%04x vector %d\n",
 	       int_type, gsi, (polarity ? "high" : "low"),
 	       (edge_triggered ? "edge" : "level"), dest, vector);
 
@@ -594,7 +593,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 	index = find_iosapic(gsi);
 
 	if (index < 0) {
-		printk("ISA: No corresponding IOSAPIC found : ISA IRQ %u -> GSI 0x%x\n",
+		printk(KERN_ERR "ISA: No corresponding IOSAPIC found : ISA IRQ %u -> GSI 0x%x\n",
 		       isa_irq, gsi);
 		return;
 	}
@@ -634,7 +633,7 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base, int pcat_compat)
 		 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
 		 * enabled.
 		 */
-		printk("%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+		printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
 		outb(0xff, 0xA1);
 		outb(0xff, 0x21);
 	}
@@ -655,7 +654,7 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base, int pcat_compat)
 	iosapic_lists[num_iosapic].num_rte = num_rte;
 	num_iosapic++;
 
-	printk(KERN_INFO"  IOSAPIC v%x.%x, address 0x%lx, GSIs 0x%x-0x%x\n",
+	printk(KERN_INFO "  IOSAPIC v%x.%x, address 0x%lx, GSIs 0x%x-0x%x\n",
 	       (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, gsi_base, gsi_base + num_rte - 1);
 
 	if ((gsi_base == 0) && pcat_compat) {
@@ -692,7 +691,7 @@ fixup_vector (int vector, unsigned int gsi, const char *pci_id)
 	idesc = irq_desc(vector);
 	if (idesc->handler != irq_type) {
 		if (idesc->handler != &no_irq_type)
-			printk("IOSAPIC: changing vector %d from %s to %s\n",
+			printk(KERN_INFO "IOSAPIC: changing vector %d from %s to %s\n",
 			       vector, idesc->handler->typename, irq_type->typename);
 		idesc->handler = irq_type;
 	}
@@ -723,7 +722,8 @@ fixup_vector (int vector, unsigned int gsi, const char *pci_id)
 #endif
 	set_rte(vector, dest);
 
-	printk("IOSAPIC: %s -> GSI 0x%x -> CPU 0x%04x vector %d\n", pci_id, gsi, dest, vector);
+	printk(KERN_INFO "IOSAPIC: %s -> GSI 0x%x -> CPU 0x%04x vector %d\n",
+	       pci_id, gsi, dest, vector);
 }
 
 void __init
@@ -751,7 +751,7 @@ iosapic_parse_prt (void)
 			index = find_iosapic(gsi);
 
 			if (index < 0) {
-				printk(KERN_WARNING"IOSAPIC: GSI 0x%x has no IOSAPIC!\n", gsi);
+				printk(KERN_WARNING "IOSAPIC: GSI 0x%x has no IOSAPIC!\n", gsi);
 				continue;
 			}
 			addr = iosapic_lists[index].addr;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index a1fcd948efad..91ac80986823 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -108,7 +108,7 @@ static void ack_none(unsigned int irq)
  * a generic callback i think.
  */
 #if CONFIG_X86
-	printk("unexpected IRQ trap at vector %02x\n", irq);
+	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * Currently unexpected vectors happen only on SMP and APIC.
@@ -122,7 +122,7 @@ static void ack_none(unsigned int irq)
 #endif
 #endif
 #if CONFIG_IA64
-	printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+	printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
 #endif
 }
 
@@ -317,7 +317,7 @@ void enable_irq(unsigned int irq)
 		desc->depth--;
 		break;
 	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n",
+		printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n",
 		       irq, (void *) __builtin_return_address(0));
 	}
 	spin_unlock_irqrestore(&desc->lock, flags);
@@ -466,7 +466,7 @@ int request_irq(unsigned int irq,
 	 */
 	if (irqflags & SA_SHIRQ) {
 		if (!dev_id)
-			printk("Bad boy: %s called us without a dev_id!\n", devname);
+			printk(KERN_ERR "Bad boy: %s called us without a dev_id!\n", devname);
 	}
 #endif
 
@@ -547,7 +547,7 @@ void free_irq(unsigned int irq, void *dev_id)
 			kfree(action);
 			return;
 		}
-		printk("Trying to free free IRQ%d\n",irq);
+		printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
 		spin_unlock_irqrestore(&desc->lock,flags);
 		return;
 	}
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index f1be3eb676d4..def11142fa5a 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -47,7 +47,7 @@ machvec_init (const char *name)
 		panic("generic kernel failed to find machine vector for platform %s!", name);
 	}
 	ia64_mv = *mv;
-	printk("booting generic kernel on platform %s\n", name);
+	printk(KERN_INFO "booting generic kernel on platform %s\n", name);
 }
 
 #endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 129239fa3d3d..d45e4cc9d0c9 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -231,7 +231,7 @@ ia64_mca_register_cpev (int cpev)
 {
 	/* Register the CPE interrupt vector with SAL */
 	if (ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0)) {
-		printk("ia64_mca_platform_init: failed to register Corrected "
+		printk(KERN_ERR "ia64_mca_platform_init: failed to register Corrected "
 		       "Platform Error interrupt vector with SAL.\n");
 		return;
 	}
@@ -398,7 +398,7 @@ ia64_mca_init(void)
 					 IA64_MCA_RENDEZ_TIMEOUT,
 					 0)))
 	{
-		printk("ia64_mca_init: Failed to register rendezvous interrupt "
+		printk(KERN_ERR "ia64_mca_init: Failed to register rendezvous interrupt "
 		       "with SAL.  rc = %ld\n", rc);
 		return;
 	}
@@ -409,8 +409,8 @@ ia64_mca_init(void)
 					 IA64_MCA_WAKEUP_VECTOR,
 					 0, 0)))
 	{
-		printk("ia64_mca_init: Failed to register wakeup interrupt with SAL.  rc = %ld\n",
-		       rc);
+		printk(KERN_ERR "ia64_mca_init: Failed to register wakeup interrupt with SAL.  "
+		       "rc = %ld\n", rc);
 		return;
 	}
 
@@ -430,8 +430,8 @@ ia64_mca_init(void)
 				       ia64_mc_info.imi_mca_handler_size,
 				       0, 0, 0)))
 	{
-		printk("ia64_mca_init: Failed to register os mca handler with SAL.  rc = %ld\n",
-		       rc);
+		printk(KERN_ERR "ia64_mca_init: Failed to register os mca handler with SAL.  "
+		       "rc = %ld\n", rc);
 		return;
 	}
 
@@ -459,8 +459,8 @@ ia64_mca_init(void)
 				       __pa(ia64_get_gp()),
 				       ia64_mc_info.imi_slave_init_handler_size)))
 	{
-		printk("ia64_mca_init: Failed to register m/s init handlers with SAL. rc = %ld\n",
-		       rc);
+		printk(KERN_ERR "ia64_mca_init: Failed to register m/s init handlers with SAL. "
+		       "rc = %ld\n", rc);
 		return;
 	}
 
@@ -495,7 +495,8 @@ ia64_mca_init(void)
 				}
 			ia64_mca_register_cpev(cpev);
 		} else
-			printk("ia64_mca_init: Failed to get routed CPEI vector from ACPI.\n");
+			printk(KERN_ERR
+			       "ia64_mca_init: Failed to get routed CPEI vector from ACPI.\n");
 	}
 
 	/* Initialize the areas set aside by the OS to buffer the
@@ -511,7 +512,7 @@ ia64_mca_init(void)
 	mca_test();
 #endif /* #if defined(MCA_TEST) */
 
-	printk("Mca related initialization done\n");
+	printk(KERN_INFO "Mca related initialization done\n");
 
 	/* commented out because this is done elsewhere */
 #if 0
@@ -807,7 +808,7 @@ ia64_init_handler (struct pt_regs *regs)
 	sal_log_processor_info_t *proc_ptr;
 	ia64_err_rec_t *plog_ptr;
 
-	printk("Entered OS INIT handler\n");
+	printk(KERN_INFO "Entered OS INIT handler\n");
 
 	/* Get the INIT processor log */
 	if (!ia64_log_get(SAL_INFO_TYPE_INIT, (prfunc_t)printk))
@@ -1736,8 +1737,7 @@ ia64_log_processor_info_print(sal_log_record_header_t *lh, prfunc_t prfunc)
 		/*
 		 *  Now process processor device error record section
 		 */
-		ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh,
-						 printk);
+		ia64_log_proc_dev_err_info_print((sal_log_processor_info_t *)slsh, printk);
 	}
 
 	IA64_MCA_DEBUG("ia64_mca_log_print: "
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index a0d6a97ca2e6..19b44cf837b8 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -6,7 +6,7 @@
  * Intel IA-64 Architecture Software Developer's Manual v1.0.
  *
  *
- * Copyright (C) 2000-2001 Hewlett-Packard Co
+ * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *
  * 05/26/2000	S.Eranian	initial release
@@ -225,15 +225,12 @@ cache_info(char *page)
 	int i,j, k;
 	s64 status;
 
-	if ((status=ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
-			printk("ia64_pal_cache_summary=%ld\n", status);
-			return 0;
+	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+		return 0;
 	}
 
-	p += sprintf(p, "Cache levels  : %ld\n" \
-			"Unique caches : %ld\n\n",
-			levels,
-			unique_caches);
+	p += sprintf(p, "Cache levels  : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
 
 	for (i=0; i < levels; i++) {
 
@@ -308,8 +305,8 @@ vm_info(char *page)
 	int i, j;
 	s64 status;
 
-	if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
-		printk("ia64_pal_vm_summary=%ld\n", status);
+	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
 		return 0;
 	}
 
@@ -339,8 +336,8 @@ vm_info(char *page)
 	}
 	p += sprintf(p, "\n");
 
-	if ((status=ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
-		printk("ia64_pal_vm_page_size=%ld\n", status);
+	if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
 		return 0;
 	}
 
@@ -360,7 +357,7 @@ vm_info(char *page)
 	p = bitvector_process(p, vw_pages);
 
 	if ((status=ia64_get_ptce(&ptce)) != 0) {
-		printk("ia64_get_ptce=%ld\n",status);
+		printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
 		return 0;
 	}
 
@@ -710,8 +707,8 @@ tr_info(char *page)
 		u64 rv2:32;
 	} *rid_reg;
 
-	if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
-		printk("ia64_pal_vm_summary=%ld\n", status);
+	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
 		return 0;
 	}
 	max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
@@ -722,7 +719,8 @@ tr_info(char *page)
 
 		status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
 		if (status != 0) {
-			printk("palinfo: pal call failed on tr[%d:%d]=%ld\n", i, j, status);
+			printk(KERN_ERR "palinfo: pal call failed on tr[%d:%d]=%ld\n",
+			       i, j, status);
 			continue;
 		}
 
@@ -841,7 +839,7 @@ palinfo_smp_call(void *info)
 {
 	palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
 	if (data == NULL) {
-		printk("%s palinfo: data pointer is NULL\n", KERN_ERR);
+		printk(KERN_ERR "palinfo: data pointer is NULL\n");
 		data->ret = 0; /* no output */
 		return;
 	}
@@ -868,7 +866,8 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 
 	/* will send IPI to other CPU and wait for completion of remote call */
 	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
-		printk("palinfo: remote CPU call from %d to %d on function %d: error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+		printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
+		       "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
 		return 0;
 	}
 	return ptr.ret;
@@ -877,7 +876,7 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 static
 int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
 {
-	printk("palinfo: should not be called with non SMP kernel\n");
+	printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
 	return 0;
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index ffe9a0685245..dc6d8f997891 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -585,7 +585,7 @@ pfm_vm_close(struct vm_area_struct *vma)
 	pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data;
 
 	if (psb == NULL) {
-		printk("perfmon: psb is null in [%d]\n", current->pid);
+		printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
 		return;
 	}
 	/*
@@ -650,7 +650,7 @@ pfm_remove_smpl_mapping(struct task_struct *task)
 	 * some sanity checks first
 	 */
 	if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) {
-		printk("perfmon: invalid context mm=%p\n", task->mm);
+		printk(KERN_DEBUG "perfmon: invalid context mm=%p\n", task->mm);
 		return -1;
 	}
 	psb = ctx->ctx_psb;
@@ -661,11 +661,11 @@ pfm_remove_smpl_mapping(struct task_struct *task)
 
 	up_write(&task->mm->mmap_sem);
 	if (r !=0) {
-		printk("perfmon: pid %d unable to unmap sampling buffer @0x%lx size=%ld\n", 
-				task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
+		printk(KERN_DEBUG "perfmon: pid %d unable to unmap sampling buffer "
+		       "@0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
 	}
 
-	DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n", 
+	DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n",
 		task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags));
 
 	return 0;
@@ -700,7 +700,7 @@ pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long ad
 		page = pfm_kvirt_to_pa(buf);
 
 		if (remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM;
-		
+
 		addr  += PAGE_SIZE;
 		buf   += PAGE_SIZE;
 		size  -= PAGE_SIZE;
@@ -857,7 +857,7 @@ pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned lo
 	vma->vm_end = vma->vm_start + size;
 
 	DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start));
-		
+
 	/* can only be applied to current, need to have the mm semaphore held when called */
 	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
 		DBprintk(("Can't remap buffer\n"));
@@ -978,7 +978,7 @@ pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cp
 		pfm_sessions.pfs_sys_use_dbregs,
 		is_syswide,
 		cpu_mask));
-		
+
 
 	if (is_syswide) {
 		m = cpu_mask; n = 0;
@@ -992,7 +992,8 @@ pfm_unreserve_session(struct task_struct *task, int is_syswide, unsigned long cp
 		 */
 		if (ctx && ctx->ctx_fl_using_dbreg) {
 			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
-				printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
+				printk(KERN_DEBUG "perfmon: invalid release for [%d] "
+				       "sys_use_dbregs=0\n", task->pid);
 			} else {
 				pfm_sessions.pfs_sys_use_dbregs--;
 			}
@@ -1798,7 +1799,8 @@ pfm_release_debug_registers(struct task_struct *task)
 
 	LOCK_PFS();
 	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-		printk("perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+		printk(KERN_DEBUG "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n",
+		       task->pid);
 		ret = -1;
 	}  else {
 		pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -2060,7 +2062,7 @@ pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 
 	pfm_sysctl.debug = mode == 0 ? 0 : 1;
 
-	printk("perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
+	printk(KERN_INFO "perfmon debugging %s\n", pfm_sysctl.debug ? "on" : "off");
 
 	return 0;
 }
@@ -2324,7 +2326,7 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 				current));
 
 	if (PMU_OWNER() != task) {
-		printk("perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
+		printk(KERN_DEBUG "perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
 		return -EINVAL;
 	}
 
@@ -2345,7 +2347,8 @@ pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 
 	} else {
 		if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
-			printk("perfmon: pfm_start task flag not set for [%d]\n", task->pid);
+			printk(KERN_DEBUG "perfmon: pfm_start task flag not set for [%d]\n",
+			       task->pid);
 			return -EINVAL;
 		}
 		/* set user level psr.up */
@@ -2620,7 +2623,7 @@ pfm_ovfl_block_reset(void)
 	 * do some sanity checks first
 	 */
 	if (!ctx) {
-		printk("perfmon: [%d] has no PFM context\n", current->pid);
+		printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
 		return;
 	}
 
@@ -2792,16 +2795,16 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	 * Don't think this could happen given upfront tests
 	 */
 	if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
-		printk("perfmon: Spurious overflow interrupt: process %d not using perfmon\n", 
-			task->pid);
+		printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d not "
+		       "using perfmon\n", task->pid);
 		return 0x1;
 	}
 	/*
 	 * sanity test. Should never happen
 	 */
 	if ((pmc0 & 0x1) == 0) {
-		printk("perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n", 
-			task->pid, pmc0);
+		printk(KERN_DEBUG "perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n",
+		       task->pid, pmc0);
 		return 0x0;
 	}
 
@@ -2966,8 +2969,8 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 		 * this call is safe in an interrupt handler, so does read_lock() on tasklist_lock
 		 */
 		ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
-		if (ret != 0) 
-			printk("send_sig_info(process %d, SIGPROF)=%d\n",  
+		if (ret != 0)
+			printk(KERN_DEBUG "send_sig_info(process %d, SIGPROF)=%d\n",
 			       ctx->ctx_notify_task->pid, ret);
 		/*
 		 * now undo the protections in order
@@ -3066,8 +3069,8 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 
 		/* sanity check */
 		if (!ctx) {
-			printk("perfmon: Spurious overflow interrupt: process %d has no PFM context\n", 
-				task->pid);
+			printk(KERN_DEBUG "perfmon: Spurious overflow interrupt: process %d has "
+			       "no PFM context\n", task->pid);
 			return;
 		}
 #ifdef CONFIG_SMP
@@ -3424,7 +3427,8 @@ must_wait_saving:
 
 	/* will send IPI to other CPU and wait for completion of remote call */
 	if ((ret=smp_call_function_single(cpu, pfm_handle_fetch_regs, &arg, 0, 1))) {
-		printk("perfmon: remote CPU call from %d to %d error %d\n", smp_processor_id(), cpu, ret);
+		printk(KERN_ERR "perfmon: remote CPU call from %d to %d error %d\n",
+		       smp_processor_id(), cpu, ret);
 		return;
 	}
 	/*
@@ -3763,8 +3767,9 @@ pfm_flush_regs (struct task_struct *task)
 	 *
 	 */
 
-	if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id()) 
-		printk("perfmon: [%d] last_cpu=%d\n", task->pid, atomic_read(&ctx->ctx_last_cpu));
+	if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id())
+		printk(KERN_DEBUG "perfmon: [%d] last_cpu=%d\n",
+		       task->pid, atomic_read(&ctx->ctx_last_cpu));
 
 	/*
 	 * we save all the used pmds
@@ -4138,7 +4143,7 @@ pfm_cleanup_smpl_buf(struct task_struct *task)
 	pfm_smpl_buffer_desc_t *tmp, *psb = task->thread.pfm_smpl_buf_list;
 
 	if (psb == NULL) {
-		printk("perfmon: psb is null in [%d]\n", current->pid);
+		printk(KERN_DEBUG "perfmon: psb is null in [%d]\n", current->pid);
 		return -1;
 	}
 	/*
@@ -4298,7 +4303,8 @@ pfm_install_alternate_syswide_subsystem(pfm_intr_handler_desc_t *hdl)
 	if (ret) return ret;
 
 	if (pfm_alternate_intr_handler) {
-		printk("perfmon: install_alternate, intr_handler not NULL after reserve\n");
+		printk(KERN_DEBUG "perfmon: install_alternate, intr_handler not NULL "
+		       "after reserve\n");
 		return -EINVAL;
 	}
 
@@ -4335,10 +4341,8 @@ pfm_init(void)
 
 	pmu_conf.disabled = 1;
 
-	printk("perfmon: version %u.%u IRQ %u\n", 
-		PFM_VERSION_MAJ, 
-		PFM_VERSION_MIN, 
-		IA64_PERFMON_VECTOR);
+	printk(KERN_INFO "perfmon: version %u.%u IRQ %u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN,
+	       IA64_PERFMON_VECTOR);
 
 	/*
 	 * compute the number of implemented PMD/PMC from the
@@ -4362,8 +4366,8 @@ pfm_init(void)
 	pmu_conf.num_pmds      = n;
 	pmu_conf.num_counters  = n_counters;
 
-	printk("perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n", 
-	       pmu_conf.num_pmcs, 
+	printk(KERN_INFO "perfmon: %u PMCs, %u PMDs, %u counters (%lu bits)\n",
+	       pmu_conf.num_pmcs,
 	       pmu_conf.num_pmds,
 	       pmu_conf.num_counters,
 	       ffz(pmu_conf.ovfl_val));
@@ -4380,7 +4384,7 @@ pfm_init(void)
 	perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
 	if (perfmon_dir == NULL) {
 		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
-		return -1; 
+		return -1;
 	}
 
 	/*
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
index eb761290b6bf..c4e8a12b5c52 100644
--- a/arch/ia64/kernel/perfmon_mckinley.h
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -163,12 +163,12 @@ pfm_mck_pmc_check(struct task_struct *task, unsigned int cnum, unsigned long *va
 	 * i-side events in L1D and L2 caches
 	 */
 	if (check_case1) {
-		ret =   ((val13 >> 45) & 0xf) == 0 
+		ret =   ((val13 >> 45) & 0xf) == 0
 		   && ((val8 & 0x1) == 0)
 		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
 		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
 
-		if (ret) printk("perfmon: failure check_case1\n");
+		if (ret) printk(KERN_DEBUG "perfmon: failure check_case1\n");
 	}
 
 	return ret ? -EINVAL : 0;
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 462f58836dc8..ffd5e361b46e 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -1,7 +1,7 @@
 /*
  * System Abstraction Layer (SAL) interface routines.
  *
- * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
@@ -96,17 +96,17 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 	int i;
 
 	if (!systab) {
-		printk("Hmm, no SAL System Table.\n");
+		printk(KERN_WARNING "Hmm, no SAL System Table.\n");
 		return;
 	}
 
 	if (strncmp(systab->signature, "SST_", 4) != 0)
-		printk("bad signature in system table!");
+		printk(KERN_ERR "bad signature in system table!");
 
 	/*
 	 * revisions are coded in BCD, so %x does the job for us
 	 */
-	printk("SAL v%x.%02x: oem=%.32s, product=%.32s\n",
+	printk(KERN_INFO "SAL v%x.%02x: oem=%.32s, product=%.32s\n",
 	       systab->sal_rev_major, systab->sal_rev_minor,
 	       systab->oem_id, systab->product_id);
 
@@ -121,7 +121,7 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 		switch (*p) {
 		      case SAL_DESC_ENTRY_POINT:
 			ep = (struct ia64_sal_desc_entry_point *) p;
-			printk("SAL: entry: pal_proc=0x%lx, sal_proc=0x%lx\n",
+			printk(KERN_INFO "SAL: entry: pal_proc=0x%lx, sal_proc=0x%lx\n",
 			       ep->pal_proc, ep->sal_proc);
 			ia64_pal_handler_init(__va(ep->pal_proc));
 			ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
@@ -139,12 +139,12 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 			      switch (ap->mechanism) {
 				    case IA64_SAL_AP_EXTERNAL_INT:
 				      ap_wakeup_vector = ap->vector;
-				      printk("SAL: AP wakeup using external interrupt "
+				      printk(KERN_INFO "SAL: AP wakeup using external interrupt "
 					     "vector 0x%lx\n", ap_wakeup_vector);
 				      break;
 
 				    default:
-				      printk("SAL: AP wakeup mechanism unsupported!\n");
+				      printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
 				      break;
 			      }
 			      break;
@@ -154,7 +154,7 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 		      {
 			      struct ia64_sal_desc_platform_feature *pf = (void *) p;
 			      sal_platform_features = pf->feature_mask;
-			      printk("SAL: Platform features ");
+			      printk(KERN_INFO "SAL: Platform features ");
 
 			      if (pf->feature_mask & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
 				      printk("BusLock ");
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 6e21b7e7c268..c8a439ca44f7 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -171,7 +171,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 
 #if IGNORE_PFN0
 	if (start == PAGE_OFFSET) {
-		printk("warning: skipping physical page 0\n");
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
 		start += PAGE_SIZE;
 		if (start >= end) return 0;
 	}
@@ -341,7 +341,7 @@ find_memory (void)
 		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
 		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
 
-		printk("Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
 		       initrd_start, ia64_boot_param->initrd_size);
 	}
 #endif
@@ -409,8 +409,9 @@ setup_arch (char **cmdline_p)
 		ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
 	else {
 		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
-		printk("No I/O port range found in EFI memory map, falling back to AR.KR0\n");
-		printk("I/O port base = 0x%lx\n", phys_iobase);
+		printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
+		       "to AR.KR0\n");
+		printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
 	}
 	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
 
@@ -615,7 +616,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
 		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
 	}
-	printk("CPU %d: %lu virtual and %lu physical address bits\n",
+	printk(KERN_INFO "CPU %d: %lu virtual and %lu physical address bits\n",
 	       smp_processor_id(), impl_va_msb + 1, phys_addr_size);
 	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
 	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
@@ -738,7 +739,7 @@ cpu_init (void)
 	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
 	else {
-		printk("cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
 	while (max_ctx < ia64_ctx.max_ctx) {
@@ -748,7 +749,7 @@ cpu_init (void)
 	}
 
 	if (ia64_pal_rse_info(&num_phys_stacked, 0) != 0) {
-		printk(KERN_WARNING"cpu_init: PAL RSE info failed; assuming 96 physical "
+		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
 		       "stacked regs\n");
 		num_phys_stacked = 96;
 	}
@@ -766,9 +767,9 @@ check_bugs (void)
 	int *wp;
 
 	if (local_cpu_data->family == 0x1f && local_cpu_data->model == 0)
-		printk(KERN_INFO"check_bugs: leaving McKinley Errata 9 workaround enabled\n");
+		printk(KERN_INFO "check_bugs: leaving McKinley Errata 9 workaround enabled\n");
 	else {
-		printk(KERN_INFO"check_bugs: McKinley Errata 9 workaround not needed; "
+		printk(KERN_INFO "check_bugs: McKinley Errata 9 workaround not needed; "
 		       "disabling it\n");
 		for (wp = __start___mckinley_e9_bundles; wp < __end___mckinley_e9_bundles; ++wp) {
 			bundle = (u64 *) ((char *) wp + *wp);
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 3e9aadadeeb7..15d820d88c64 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -1,7 +1,7 @@
 /*
  * SMP boot-related support
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  *
  * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
@@ -204,7 +204,7 @@ ia64_sync_itc (unsigned int master)
 	go[MASTER] = 1;
 
 	if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
-		printk("sync_itc: failed to get attention of CPU %u!\n", master);
+		printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
 		return;
 	}
 
@@ -244,8 +244,8 @@ ia64_sync_itc (unsigned int master)
 		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
 #endif
 
-	printk("CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, maxerr %lu cycles)\n",
-	       smp_processor_id(), master, delta, rt);
+	printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
+	       "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
 }
 
 /*
@@ -272,7 +272,8 @@ smp_callin (void)
 	phys_id = hard_smp_processor_id();
 
 	if (test_and_set_bit(cpuid, &cpu_online_map)) {
-		printk("huh, phys CPU#0x%x, CPU#0x%x already present??\n", phys_id, cpuid);
+		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
+		       phys_id, cpuid);
 		BUG();
 	}
 
@@ -380,9 +381,7 @@ do_boot_cpu (int sapicid, int cpu)
 
 	if (test_bit(cpu, &cpu_callin_map)) {
 		/* number CPUs logically, starting from 1 (BSP is 0) */
-		printk("CPU%d: ", cpu);
-		/*print_cpu_info(&cpu_data[cpu]); */
-		printk("CPU has booted.\n");
+		printk(KERN_INFO "CPU%d: CPU has booted.\n", cpu);
 	} else {
 		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
 		ia64_cpu_to_sapicid[cpu] = -1;
@@ -399,7 +398,7 @@ smp_tune_scheduling (void)
 {
 	cache_decay_ticks = 10;	/* XXX base this on PAL info and cache-bandwidth estimate */
 
-	printk("task migration cache decay timeout: %ld msecs.\n",
+	printk(KERN_INFO "task migration cache decay timeout: %ld msecs.\n",
 	       (cache_decay_ticks + 1) * 1000 / HZ);
 }
 
@@ -491,7 +490,7 @@ smp_prepare_cpus (unsigned int max_cpus)
 	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
 	ia64_cpu_to_sapicid[0] = boot_cpu_id;
 
-	printk("Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
+	printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
 
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
@@ -526,7 +525,7 @@ smp_cpus_done (unsigned int dummy)
 		if (cpu_online(cpu))
 			bogosum += cpu_data(cpu)->loops_per_jiffy;
 
-	printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 	       num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
 }
 
@@ -571,5 +570,6 @@ init_smp_config(void)
 	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
 				       __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
 	if (sal_ret < 0)
-		printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret));
+		printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
+		       ia64_sal_strerror(sal_ret));
 }
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 4e6c6268703d..ea60135a293b 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -1,7 +1,7 @@
 /*
  * linux/arch/ia64/kernel/time.c
  *
- * Copyright (C) 1998-2002 Hewlett-Packard Co
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger <davidm@hpl.hp.com>
  * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
@@ -76,10 +76,8 @@ gettimeoffset (void)
 
 	now = ia64_get_itc();
 	if ((long) (now - last_tick) < 0) {
-# if 1
-		printk("CPU %d: now < last_tick (now=0x%lx,last_tick=0x%lx)!\n",
+		printk(KERN_ERR "CPU %d: now < last_tick (now=0x%lx,last_tick=0x%lx)!\n",
 		       smp_processor_id(), now, last_tick);
-# endif
 		return last_time_offset;
 	}
 	elapsed_cycles = now - last_tick;
@@ -158,7 +156,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	new_itm = local_cpu_data->itm_next;
 
 	if (!time_after(ia64_get_itc(), new_itm))
-		printk("Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
 		       ia64_get_itc(), new_itm);
 
 	while (1) {
@@ -247,21 +245,22 @@ ia64_init_itm (void)
 	 */
 	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift);
 	if (status != 0) {
-		printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
 	} else {
 		status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio);
 		if (status != 0)
-			printk("PAL_FREQ_RATIOS failed with status=%ld\n", status);
+			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
 	}
 	if (status != 0) {
 		/* invent "random" values */
-		printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n");
+		printk(KERN_ERR
+		       "SAL/PAL failed to obtain frequency info---inventing reasonably values\n");
 		platform_base_freq = 100000000;
 		itc_ratio.num = 3;
 		itc_ratio.den = 1;
 	}
 	if (platform_base_freq < 40000000) {
-		printk("Platform base frequency %lu bogus---resetting to 75MHz!\n",
+		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
 		       platform_base_freq);
 		platform_base_freq = 75000000;
 	}
@@ -272,8 +271,8 @@ ia64_init_itm (void)
 
 	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
 	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
-	printk("CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n",
-	       smp_processor_id(),
+	printk(KERN_INFO "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
+	       "ITC freq=%lu.%03luMHz\n", smp_processor_id(),
 	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
 	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
 
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 8b4fa9e68404..bb8e11fc0a13 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -57,7 +57,8 @@ trap_init (void)
 		major = fpswa_interface->revision >> 16;
 		minor = fpswa_interface->revision & 0xffff;
 	}
-	printk("fpswa interface at %lx (rev %d.%d)\n", ia64_boot_param->fpswa, major, minor);
+	printk(KERN_INFO "fpswa interface at %lx (rev %d.%d)\n",
+	       ia64_boot_param->fpswa, major, minor);
 }
 
 /*
@@ -222,7 +223,7 @@ ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, uns
 {
 	struct pt_regs *regs = (struct pt_regs *) &stack;
 
-	printk("%s(%d): <sc%ld(%lx,%lx,%lx,%lx)>\n", current->comm, current->pid,
+	printk(KERN_DEBUG "%s(%d): <sc%ld(%lx,%lx,%lx,%lx)>\n", current->comm, current->pid,
 	       regs->r15, arg0, arg1, arg2, arg3);
 	return -ENOSYS;
 }
@@ -346,7 +347,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 			/* emulation was successful */
 			ia64_increment_ip(regs);
 		} else if (exception == -1) {
-			printk("handle_fpu_swa: fp_emulate() returned -1\n");
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
 			return -1;
 		} else {
 			/* is next instruction a trap? */
@@ -369,7 +370,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 		}
 	} else {
 		if (exception == -1) {
-			printk("handle_fpu_swa: fp_emulate() returned -1\n");
+			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
 			return -1;
 		} else if (exception != 0) {
 			/* raise exception */
@@ -467,7 +468,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 				       ? " (RSE access)" : " (data access)") : "");
 		if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
-			printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr);
+			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
+			       current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri,
+			       regs->pr);
 # endif
 			return;
 		}
@@ -614,8 +617,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		if (ia32_exception(regs, isr) == 0)
 			return;
 #endif
-		printk("Unexpected IA-32 exception (Trap 45)\n");
-		printk("  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", regs->cr_iip, ifa, isr);
+		printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+		       regs->cr_iip, ifa, isr);
 		force_sig(SIGSEGV, current);
 		break;
 
@@ -624,8 +628,8 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		if (ia32_intercept(regs, isr) == 0)
 			return;
 #endif
-		printk("Unexpected IA-32 intercept trap (Trap 46)\n");
-		printk("  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
 		       regs->cr_iip, ifa, isr, iim);
 		force_sig(SIGSEGV, current);
 		return;
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 916abca4864a..e54bf65985a6 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1999-2002 Hewlett-Packard Co
+ * Copyright (C) 1999-2003 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 /*
@@ -532,7 +532,7 @@ push (struct unw_state_record *sr)
 
 	rs = alloc_reg_state();
 	if (!rs) {
-		printk("unwind: cannot stack reg state!\n");
+		printk(KERN_ERR "unwind: cannot stack reg state!\n");
 		return;
 	}
 	memcpy(rs, &sr->curr, sizeof(*rs));
@@ -545,7 +545,7 @@ pop (struct unw_state_record *sr)
 	struct unw_reg_state *rs = sr->curr.next;
 
 	if (!rs) {
-		printk("unwind: stack underflow!\n");
+		printk(KERN_ERR "unwind: stack underflow!\n");
 		return;
 	}
 	memcpy(&sr->curr, rs, sizeof(*rs));
@@ -561,7 +561,7 @@ dup_state_stack (struct unw_reg_state *rs)
 	while (rs) {
 		copy = alloc_reg_state();
 		if (!copy) {
-			printk ("unwind.dup_state_stack: out of memory\n");
+			printk(KERN_ERR "unwind.dup_state_stack: out of memory\n");
 			return NULL;
 		}
 		memcpy(copy, rs, sizeof(*copy));
@@ -951,7 +951,7 @@ desc_copy_state (unw_word label, struct unw_state_record *sr)
 			return;
 		}
 	}
-	printk("unwind: failed to find state labeled 0x%lx\n", label);
+	printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
 }
 
 static inline void
@@ -961,7 +961,7 @@ desc_label_state (unw_word label, struct unw_state_record *sr)
 
 	ls = alloc_labeled_state();
 	if (!ls) {
-		printk("unwind.desc_label_state(): out of memory\n");
+		printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
 		return;
 	}
 	ls->label = label;
@@ -1055,7 +1055,8 @@ desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word
 	r->val = 4*spoff;
 }
 
-#define UNW_DEC_BAD_CODE(code)			printk("unwind: unknown code 0x%02x\n", code);
+#define UNW_DEC_BAD_CODE(code)			printk(KERN_ERR "unwind: unknown code 0x%02x\n", \
+						       code);
 
 /*
  * region headers:
@@ -2015,7 +2016,7 @@ unw_create_gate_table (void)
 	unw.gate_table = alloc_bootmem(size);
 	if (!unw.gate_table) {
 		unw.gate_table_size = 0;
-		printk("unwind: unable to create unwind data for gate page!\n");
+		printk(KERN_ERR "unwind: unable to create unwind data for gate page!\n");
 		return;
 	}
 	unw.gate_table_size = size;
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index d06543fafbf8..f390da81d018 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -113,7 +113,7 @@ swiotlb_init (void)
 	io_tlb_index = 0;
 	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
 
-	printk("Placing software IO TLB between 0x%p - 0x%p\n",
+	printk(KERN_INFO "Placing software IO TLB between 0x%p - 0x%p\n",
 	       (void *) io_tlb_start, (void *) io_tlb_end);
 }
 
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 3e2e0b879ad8..f855cb69db21 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -215,7 +215,7 @@ discontig_mem_init(void)
 	int	node;
 
 	if (numnodes == 0) {
-		printk("node info missing!\n");
+		printk(KERN_ERR "node info missing!\n");
 		numnodes = 1;
 	}
 
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 45b20d7eb71c..c1e982f74e32 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -196,7 +196,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 		yield();
 		goto survive;
 	}
-	printk("VM: killing process %s\n", current->comm);
+	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
 		do_exit(SIGKILL);
 	goto no_context;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 4ea4a55d3035..49114de81896 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -234,7 +234,7 @@ put_gate_page (struct page *page, unsigned long address)
 	pte_t *pte;
 
 	if (!PageReserved(page))
-		printk("put_gate_page: gate page at 0x%p not in reserved memory\n",
+		printk(KERN_ERR "put_gate_page: gate page at 0x%p not in reserved memory\n",
 		       page_address(page));
 
 	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
@@ -431,10 +431,10 @@ mem_init (void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n",
-	       (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
-	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
-	       datasize >> 10, initsize >> 10);
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
+	       "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
+	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
 
 	/*
 	 * Allow for enough (cached) page table pages so that we can map the entire memory
-- 
cgit v1.2.3


From 19250aed187d5bfa2eb5ff7806728448fdaeb4d8 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@tiger.hpl.hp.com>
Date: Tue, 4 Feb 2003 01:37:59 -0800
Subject: ia64: Fix potential perfmon deadlock.  Patch by Stephane Eranian.

---
 arch/ia64/kernel/perfmon.c | 299 ++++++++++-----------------------------------
 1 file changed, 62 insertions(+), 237 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index dc6d8f997891..4a63c27ac26d 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -267,8 +267,6 @@ typedef struct pfm_context {
 	unsigned long		ctx_saved_cpus_allowed;	/* copy of the task cpus_allowed (system wide) */
 	unsigned int		ctx_cpu;		/* CPU used by system wide session */
 
-	atomic_t		ctx_saving_in_progress;	/* flag indicating actual save in progress */
-	atomic_t		ctx_is_busy;		/* context accessed by overflow handler */
 	atomic_t		ctx_last_cpu;		/* CPU id of current or last CPU used */
 } pfm_context_t;
 
@@ -439,9 +437,6 @@ static struct {
  * forward declarations
  */
 static void pfm_reset_pmu(struct task_struct *);
-#ifdef CONFIG_SMP
-static void pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx);
-#endif
 static void pfm_lazy_save_regs (struct task_struct *ta);
 
 #if   defined(CONFIG_ITANIUM)
@@ -490,6 +485,19 @@ pfm_set_psr_l(unsigned long val)
 	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(val): "memory");
 }
 
+static inline void
+pfm_freeze_pmu(void)
+{
+	ia64_set_pmc(0,1UL);
+	ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+	ia64_set_pmc(0,0UL);
+	ia64_srlz_d();
+}
 
 static inline unsigned long
 pfm_read_soft_counter(pfm_context_t *ctx, int i)
@@ -1230,10 +1238,6 @@ pfm_context_create(struct task_struct *task, pfm_context_t *ctx, void *req, int
 
 	atomic_set(&ctx->ctx_last_cpu,-1); /* SMP only, means no CPU */
 
-	/* may be redudant with memset() but at least it's easier to remember */
-	atomic_set(&ctx->ctx_saving_in_progress, 0); 
-	atomic_set(&ctx->ctx_is_busy, 0); 
-
 	sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
 
 	if (__copy_to_user(req, &tmp, sizeof(tmp))) {
@@ -1667,25 +1671,6 @@ pfm_read_pmds(struct task_struct *task, pfm_context_t *ctx, void *arg, int count
 			val = ia64_get_pmd(cnum);
 			DBprintk(("reading pmd[%u]=0x%lx from hw\n", cnum, val));
 		} else {
-#ifdef CONFIG_SMP
-			int cpu;
-			/*
-			 * for SMP system, the context may still be live on another
-			 * CPU so we need to fetch it before proceeding with the read
-			 * This call we only be made once for the whole loop because
-			 * of ctx_last_cpu becoming == -1.
-			 *
-			 * We cannot reuse ctx_last_cpu as it may change before we get to the
-			 * actual IPI call. In this case, we will do the call for nothing but
-			 * there is no way around it. The receiving side will simply do nothing.
-			 */
-			cpu = atomic_read(&ctx->ctx_last_cpu);
-			if (cpu != -1) {
-				DBprintk(("must fetch on CPU%d for [%d]\n", cpu, task->pid));
-				pfm_fetch_regs(cpu, task, ctx);
-			}
-#endif
-			/* context has been saved */
 			val = th->pmd[cnum];
 		}
 		if (PMD_IS_COUNTING(cnum)) {
@@ -1862,8 +1847,7 @@ pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 		}
 
 		/* simply unfreeze */
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
+		pfm_unfreeze_pmu();
 
 		return 0;
 	} 
@@ -2416,8 +2400,7 @@ pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
 	atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
 
 	/* simply unfreeze */
-	ia64_set_pmc(0, 0);
-	ia64_srlz_d();
+	pfm_unfreeze_pmu();
 
 	return 0;
 }
@@ -2665,8 +2648,7 @@ non_blocking:
 			ctx->ctx_psb->psb_index = 0;
 		}
 
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
+		pfm_unfreeze_pmu();
 
 		/* state restored, can go back to work (user mode) */
 	}
@@ -3073,19 +3055,6 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 			       "no PFM context\n", task->pid);
 			return;
 		}
-#ifdef CONFIG_SMP
-		/*
-		 * Because an IPI has higher priority than the PMU overflow interrupt, it is 
-		 * possible that the handler be interrupted by a request from another CPU to fetch 
-		 * the PMU state of the currently active context. The task may have just been 
-		 * migrated to another CPU which is trying to restore the context. If there was
-		 * a pending overflow interrupt when the task left this CPU, it is possible for
-		 * the handler to get interrupt by the IPI. In which case, we fetch request
-		 * MUST be postponed until the interrupt handler is done. The ctx_is_busy
-		 * flag indicates such a condition. The other CPU must busy wait until it's cleared.
-		 */
-		atomic_set(&ctx->ctx_is_busy, 1);
-#endif
 
 		/* 
 		 * assume PMC[0].fr = 1 at this point 
@@ -3099,12 +3068,6 @@ pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 		ia64_set_pmc(0, pmc0);
 		ia64_srlz_d();
 
-#ifdef CONFIG_SMP
-		/*
-		 * announce that we are doing with the context
-		 */
-		atomic_set(&ctx->ctx_is_busy, 0);
-#endif
 	} else {
 		pfm_stats[smp_processor_id()].pfm_spurious_ovfl_intr_count++;
 	}
@@ -3222,10 +3185,13 @@ void
 pfm_save_regs (struct task_struct *task)
 {
 	pfm_context_t *ctx;
+	unsigned long mask;
 	u64 psr;
+	int i;
 
 	ctx = task->thread.pfm_context;
 
+
 	/*
 	 * save current PSR: needed because we modify it
 	 */
@@ -3238,129 +3204,61 @@ pfm_save_regs (struct task_struct *task)
 	 * We do not need to set psr.sp because, it is irrelevant in kernel.
 	 * It will be restored from ipsr when going back to user level
 	 */
-	__asm__ __volatile__ ("rum psr.up;;"::: "memory");
+	pfm_clear_psr_up();
 	ia64_srlz_i();
 
 	ctx->ctx_saved_psr = psr;
 
-	//ctx->ctx_last_cpu  = smp_processor_id();
-
-}
-
-static void
-pfm_lazy_save_regs (struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	struct thread_struct *t;
-	unsigned long mask;
-	int i;
-
-	DBprintk(("on [%d] by [%d]\n", task->pid, current->pid));
-
-	t   = &task->thread;
-	ctx = task->thread.pfm_context;
-
 #ifdef CONFIG_SMP
-	/* 
-	 * announce we are saving this PMU state
-	 * This will cause other CPU, to wait until we're done
-	 * before using the context.h
+	/*
+	 * We do not use a lazy scheme in SMP because
+	 * of the new scheduler which masks interrupts
+	 * during low-level context switch. So we save
+	 * all the PMD register we use and restore on
+	 * ctxsw in.
 	 *
-	 * must be an atomic operation
+	 * release ownership of this PMU.
+	 * must be done before we save the registers.
 	 */
-	atomic_set(&ctx->ctx_saving_in_progress, 1);
-
-	 /*
-	  * if owner is NULL, it means that the other CPU won the race
-	  * and the IPI has caused the context to be saved in pfm_handle_fectch_regs()
-	  * instead of here. We have nothing to do
-	  *
-	  * note that this is safe, because the other CPU NEVER modifies saving_in_progress.
-	  */
-	if (PMU_OWNER() == NULL) goto do_nothing;
-#endif
+	SET_PMU_OWNER(NULL);
 
 	/*
-	 * do not own the PMU
+	 * save PMDs
 	 */
-	SET_PMU_OWNER(NULL);
-
 	ia64_srlz_d();
 
-	/*
-	 * XXX needs further optimization.
-	 * Also must take holes into account
-	 */
 	mask = ctx->ctx_used_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
+		if (mask & 0x1) task->thread.pmd[i] =ia64_get_pmd(i);
 	}
 
-	/* save pmc0 */
-	t->pmc[0] = ia64_get_pmc(0);
+	/* 
+	 * save pmc0 
+	 */
+	task->thread.pmc[0] = ia64_get_pmc(0);
 
-	/* not owned by this CPU */
+	/* 
+	 * force a full reload 
+	 */
 	atomic_set(&ctx->ctx_last_cpu, -1);
-
-#ifdef CONFIG_SMP
-do_nothing:
 #endif
-	/*
-	 * declare we are done saving this context
-	 *
-	 * must be an atomic operation
-	 */
-	atomic_set(&ctx->ctx_saving_in_progress,0);
-
 }
 
-#ifdef CONFIG_SMP
-/*
- * Handles request coming from other CPUs
- */
-static void 
-pfm_handle_fetch_regs(void *info)
+static void
+pfm_lazy_save_regs (struct task_struct *task)
 {
-	pfm_smp_ipi_arg_t *arg = info;
-	struct thread_struct *t;
 	pfm_context_t *ctx;
+	struct thread_struct *t;
 	unsigned long mask;
 	int i;
 
-	ctx = arg->task->thread.pfm_context;
-	t   = &arg->task->thread;
-
-	DBprintk(("task=%d owner=%d saving=%d\n", 
-		  arg->task->pid,
-		  PMU_OWNER() ? PMU_OWNER()->pid: -1,
-		  atomic_read(&ctx->ctx_saving_in_progress)));
-
-	/* must wait until not busy before retrying whole request */
-	if (atomic_read(&ctx->ctx_is_busy)) {
-		arg->retval = 2;
-		return;
-	}
-
-	/* must wait if saving was interrupted */
-	if (atomic_read(&ctx->ctx_saving_in_progress)) {
-		arg->retval = 1;
-		return;
-	}
-
-	/* can proceed, done with context */
-	if (PMU_OWNER() != arg->task) {
-		arg->retval = 0;
-		return;
-	}
+	DBprintk(("on [%d] by [%d]\n", task->pid, current->pid));
 
-	DBprintk(("saving state for [%d] used_pmcs=0x%lx reload_pmcs=0x%lx used_pmds=0x%lx\n", 
-		arg->task->pid,
-		ctx->ctx_used_pmcs[0],
-		ctx->ctx_reload_pmcs[0],
-		ctx->ctx_used_pmds[0]));
+	t   = &task->thread;
+	ctx = task->thread.pfm_context;
 
 	/*
-	 * XXX: will be replaced with pure assembly call
+	 * do not own the PMU
 	 */
 	SET_PMU_OWNER(NULL);
 
@@ -3368,10 +3266,11 @@ pfm_handle_fetch_regs(void *info)
 
 	/*
 	 * XXX needs further optimization.
+	 * Also must take holes into account
 	 */
 	mask = ctx->ctx_used_pmds[0];
 	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) t->pmd[i] = ia64_get_pmd(i);
+		if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
 	}
 
 	/* save pmc0 */
@@ -3379,67 +3278,7 @@ pfm_handle_fetch_regs(void *info)
 
 	/* not owned by this CPU */
 	atomic_set(&ctx->ctx_last_cpu, -1);
-
-	/* can proceed */
-	arg->retval = 0;
-}
-
-/*
- * Function call to fetch PMU state from another CPU identified by 'cpu'.
- * If the context is being saved on the remote CPU, then we busy wait until
- * the saving is done and then we return. In this case, non IPI is sent.
- * Otherwise, we send an IPI to the remote CPU, potentially interrupting 
- * pfm_lazy_save_regs() over there.
- *
- * If the retval==1, then it means that we interrupted remote save and that we must
- * wait until the saving is over before proceeding.
- * Otherwise, we did the saving on the remote CPU, and it was done by the time we got there.
- * in either case, we can proceed.
- */
-static void
-pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx)
-{
-	pfm_smp_ipi_arg_t  arg;
-	int ret;
-
-	arg.task   = task;
-	arg.retval = -1;
-
-	if (atomic_read(&ctx->ctx_is_busy)) {
-must_wait_busy:
-		while (atomic_read(&ctx->ctx_is_busy));
-	}
-
-	if (atomic_read(&ctx->ctx_saving_in_progress)) {
-		DBprintk(("no IPI, must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
-must_wait_saving:
-		/* busy wait */
-		while (atomic_read(&ctx->ctx_saving_in_progress));
-		DBprintk(("done saving for [%d] on [%d]\n", task->pid, cpu));
-		return;
-	}
-	DBprintk(("calling CPU %d from CPU %d\n", cpu, smp_processor_id()));
-
-	if (cpu == -1) {
-		printk("refusing to use -1 for [%d]\n", task->pid);
-		return;
-	}
-
-	/* will send IPI to other CPU and wait for completion of remote call */
-	if ((ret=smp_call_function_single(cpu, pfm_handle_fetch_regs, &arg, 0, 1))) {
-		printk(KERN_ERR "perfmon: remote CPU call from %d to %d error %d\n",
-		       smp_processor_id(), cpu, ret);
-		return;
-	}
-	/*
-	 * we must wait until saving is over on the other CPU
-	 * This is the case, where we interrupted the saving which started just at the time we sent the
-	 * IPI.
-	 */
-	if (arg.retval == 1) goto must_wait_saving;
-	if (arg.retval == 2) goto must_wait_busy;
 }
-#endif /* CONFIG_SMP */
 
 void
 pfm_load_regs (struct task_struct *task)
@@ -3450,14 +3289,16 @@ pfm_load_regs (struct task_struct *task)
 	unsigned long mask;
 	u64 psr;
 	int i;
-#ifdef CONFIG_SMP
-	int cpu;
-#endif
 
 	owner = PMU_OWNER();
 	ctx   = task->thread.pfm_context;
 	t     = &task->thread;
 
+	if (ctx == NULL) {
+		printk("perfmon: pfm_load_regs: null ctx for [%d]\n", task->pid);
+		return;
+	}
+
 	/*
 	 * we restore ALL the debug registers to avoid picking up 
 	 * stale state.
@@ -3483,6 +3324,7 @@ pfm_load_regs (struct task_struct *task)
 
 	/*
 	 * if we were the last user, then nothing to do except restore psr
+	 * this path cannot be used in SMP
 	 */
 	if (owner == task) {
 		if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id())
@@ -3490,32 +3332,19 @@ pfm_load_regs (struct task_struct *task)
 				atomic_read(&ctx->ctx_last_cpu), task->pid));
 
 		psr = ctx->ctx_saved_psr;
-		__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
+		pfm_set_psr_l(psr);
 
 		return;
 	}
-	DBprintk(("load_regs: must reload for [%d] owner=%d\n", 
-		task->pid, owner ? owner->pid : -1 ));
+
 	/*
 	 * someone else is still using the PMU, first push it out and
 	 * then we'll be able to install our stuff !
+	 *
+	 * not possible in SMP
 	 */
 	if (owner) pfm_lazy_save_regs(owner);
 
-#ifdef CONFIG_SMP
-	/* 
-	 * check if context on another CPU (-1 means saved)
-	 * We MUST use the variable, as last_cpu may change behind our 
-	 * back. If it changes to -1 (not on a CPU anymore), then in cpu
-	 * we have the last CPU the context was on. We may be sending the 
-	 * IPI for nothing, but we have no way of verifying this. 
-	 */
-	cpu = atomic_read(&ctx->ctx_last_cpu);
-	if (cpu != -1) {
-		pfm_fetch_regs(cpu, task, ctx);
-	}
-#endif
-
 	/*
 	 * To avoid leaking information to the user level when psr.sp=0,
 	 * we must reload ALL implemented pmds (even the ones we don't use).
@@ -3552,8 +3381,7 @@ pfm_load_regs (struct task_struct *task)
 	 * fl_frozen==1 when we are in blocking mode waiting for restart
 	 */
 	if (ctx->ctx_fl_frozen == 0) {
-		ia64_set_pmc(0, 0);
-		ia64_srlz_d();
+		pfm_unfreeze_pmu();
 	}
 	atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
 
@@ -3563,8 +3391,7 @@ pfm_load_regs (struct task_struct *task)
 	 * restore the psr we changed in pfm_save_regs()
 	 */
 	psr = ctx->ctx_saved_psr;
-	__asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
-
+	pfm_set_psr_l(psr);
 }
 
 /*
@@ -3583,7 +3410,7 @@ pfm_reset_pmu(struct task_struct *task)
 	}
 
 	/* Let's make sure the PMU is frozen */
-	ia64_set_pmc(0,1);
+	pfm_freeze_pmu();
 
 	/*
 	 * install reset values for PMC. We skip PMC0 (done above)
@@ -3750,8 +3577,7 @@ pfm_flush_regs (struct task_struct *task)
 	 * This destroys the overflow information. This is required to make sure
 	 * next process does not start with monitoring on if not requested
 	 */
-	ia64_set_pmc(0, 1);
-	ia64_srlz_d();
+	pfm_freeze_pmu();
 
 	/*
 	 * We don't need to restore psr, because we are on our way out
@@ -4433,8 +4259,7 @@ pfm_init_percpu(void)
 		if (PMD_IS_IMPL(i) == 0) continue;
 		ia64_set_pmd(i, 0UL);
 	}
-	ia64_set_pmc(0,1UL);
-	ia64_srlz_d();
+	pfm_freeze_pmu();
 }
 
 #else /* !CONFIG_PERFMON */
-- 
cgit v1.2.3


From 21cd9dc6470fea34911ed05a1014baba5f85ba2a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 4 Feb 2003 04:38:47 -0600
Subject: [PATCH] fixes and cleanups for the new command allocation code

On Tue, Feb 04, 2003 at 12:33:23PM -0600, James Bottomley wrote:
> I agree with this.  It is a guarantee the mid-layer makes to the LLD
> (and there are some LLDs with static issue queues for which this is a
> hard requirement).  I think (once the dust has settled and we've agreed
> which field holds the current queue depth) what's needed is a check in
> the scsi_request_fn() to see if we're over the LLD's current depth for
> the device and plug the queue and exit if we are.  The next returning
> command will unplug and send.
>
> This way of doing things means that we're free to prep as many commands
> as we can, but we guarantee only to have the correct number outstanding
> to the LLD.

Okay, here's a new versin of the patch.  Changes:

* throttel on number of inflight command blocks
* rename scsi_cmnd->new_queue_depth to scsi_cmnd->queue_depth
* remove scsi_do_cmd
* serialize pool handling
---
 drivers/scsi/53c700.c                   |   6 +-
 drivers/scsi/aacraid/linit.c            |   2 +-
 drivers/scsi/advansys.c                 |  19 -
 drivers/scsi/aic7xxx_old.c              |   2 +-
 drivers/scsi/aic7xxx_old/aic7xxx_proc.c |   2 +-
 drivers/scsi/cpqfcTSinit.c              |   2 +-
 drivers/scsi/eata.c                     |   8 +-
 drivers/scsi/gdth.c                     |   4 +-
 drivers/scsi/gdth_proc.c                |   4 +-
 drivers/scsi/hosts.c                    |  38 +-
 drivers/scsi/hosts.h                    |   4 +
 drivers/scsi/qla1280.c                  |   2 +-
 drivers/scsi/scsi.c                     | 737 ++++++++------------------------
 drivers/scsi/scsi.h                     |  69 +--
 drivers/scsi/scsi_lib.c                 | 166 +------
 drivers/scsi/scsi_scan.c                |   3 -
 drivers/scsi/scsi_syms.c                |   6 -
 drivers/scsi/scsi_sysfs.c               |   6 +-
 drivers/scsi/sg.c                       |   4 +-
 drivers/scsi/u14-34f.c                  |   8 +-
 20 files changed, 235 insertions(+), 857 deletions(-)

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 62f7f53f1448..dbe934810790 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1718,10 +1718,10 @@ NCR_700_proc_directory_info(char *proc_buf, char **startp,
 	hostdata = (struct NCR_700_Host_Parameters *)host->hostdata[0];
 	len += sprintf(&buf[len], "Total commands outstanding: %d\n", hostdata->command_slot_count);
 	len += sprintf(&buf[len],"\
-Target	Depth  Active  Next Tag\n\
-======	=====  ======  ========\n");
+Target	Active  Next Tag\n\
+======	======  ========\n");
 	list_for_each_entry(SDp, &host->my_devices, siblings) {
-		len += sprintf(&buf[len]," %2d:%2d   %4d    %4d      %4d\n", SDp->id, SDp->lun, SDp->current_queue_depth, NCR_700_get_depth(SDp), SDp->current_tag);
+		len += sprintf(&buf[len]," %2d:%2d   %4d      %4d\n", SDp->id, SDp->lun, NCR_700_get_depth(SDp), SDp->current_tag);
 	}
 	if((len -= offset) <= 0)
 		return 0;
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 242b38f9dfe0..6915d91dfbe7 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -536,7 +536,7 @@ static int aac_slave_configure(Scsi_Device * dev )
 
 	dprintk((KERN_DEBUG "(scsi%d:%d:%d:%d) Tagged Queue depth %2d, "
 				"%s\n", dev->host->host_no, dev->channel,
-				dev->id, dev->lun, dev->new_queue_depth,
+				dev->id, dev->lun, dev->queue_depth,
 				dev->online ? "OnLine" : "OffLine"));
 	return 0;
 }
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 9b092e2de182..067baacdf75b 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -8417,25 +8417,6 @@ asc_prt_driver_conf(struct Scsi_Host *shp, char *cp, int cplen)
         chip_scsi_id = boardp->dvc_var.adv_dvc_var.chip_scsi_id;
     }
 
-    if (boardp->flags & ASC_SELECT_QUEUE_DEPTHS) {
-        len = asc_prt_line(cp, leftlen, " queue_depth:");
-        ASC_PRT_NEXT();
-        for (i = 0; i <= ADV_MAX_TID; i++) {
-            if ((chip_scsi_id == i) ||
-                ((boardp->init_tidmask & ADV_TID_TO_TIDMASK(i)) == 0)) {
-                continue;
-            }
-            if (boardp->device[i] == NULL) {
-                continue;
-            }
-            len = asc_prt_line(cp, leftlen, " %X:%d",
-                i, boardp->device[i]->current_queue_depth);
-            ASC_PRT_NEXT();
-        }
-        len = asc_prt_line(cp, leftlen, "\n");
-        ASC_PRT_NEXT();
-    }
-
     return totlen;
 }
 
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 827accc86a3e..cba61d8d1744 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -4068,7 +4068,7 @@ aic7xxx_handle_seqint(struct aic7xxx_host *p, unsigned char intstat)
              * normal.
              */
 	    scsi_adjust_queue_depth(scb->cmd->device, MSG_SIMPLE_TAG,
-			    scb->cmd->device->new_queue_depth);
+			    scb->cmd->device->queue_depth);
             scb->tag_action = MSG_SIMPLE_Q_TAG;
             scb->hscb->control &= ~SCB_TAG_TYPE;
             scb->hscb->control |= MSG_SIMPLE_Q_TAG;
diff --git a/drivers/scsi/aic7xxx_old/aic7xxx_proc.c b/drivers/scsi/aic7xxx_old/aic7xxx_proc.c
index 4a438674fc41..2dda94d3d900 100644
--- a/drivers/scsi/aic7xxx_old/aic7xxx_proc.c
+++ b/drivers/scsi/aic7xxx_old/aic7xxx_proc.c
@@ -313,7 +313,7 @@ aic7xxx_proc_info ( char *buffer, char **start, off_t offset, int length,
                     p->user[tindex].options);
     if(sdptr->simple_tags)
     {
-      size += sprintf(BLS, "  Tagged Command Queueing Enabled, Ordered Tags %s, Depth %d/%d\n", sdptr->ordered_tags ? "Enabled" : "Disabled", sdptr->new_queue_depth, aic_dev->max_q_depth);
+      size += sprintf(BLS, "  Tagged Command Queueing Enabled, Ordered Tags %s, Depth %d/%d\n", sdptr->ordered_tags ? "Enabled" : "Disabled", sdptr->queue_depth, aic_dev->max_q_depth);
     }
     if(aic_dev->barrier_total)
       size += sprintf(BLS, "  Total transfers %ld:\n    (%ld/%ld/%ld/%ld reads/writes/REQ_BARRIER/Ordered Tags)\n",
diff --git a/drivers/scsi/cpqfcTSinit.c b/drivers/scsi/cpqfcTSinit.c
index 843ead6b75cf..3528bdd5b4db 100644
--- a/drivers/scsi/cpqfcTSinit.c
+++ b/drivers/scsi/cpqfcTSinit.c
@@ -1604,7 +1604,7 @@ return -ENOTSUPP;
 
   scsi_cdb[0] = RELEASE;
 
-  SCpnt = scsi_getset_command(ScsiDev, GFP_KERNEL);
+  SCpnt = scsi_get_command(ScsiDev, GFP_KERNEL);
   {
     CPQFC_DECLARE_COMPLETION(wait);
     
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index 4bba896bbebd..322ebd0be0eb 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -895,7 +895,7 @@ static int eata2x_slave_configure(Scsi_Device *dev) {
       tag_suffix = "";
       }
 
-   if (TLDEV(dev->type) && linked_comm && dev->new_queue_depth > 2)
+   if (TLDEV(dev->type) && linked_comm && dev->queue_depth > 2)
       link_suffix = ", sorted";
    else if (TLDEV(dev->type))
       link_suffix = ", unsorted";
@@ -904,7 +904,7 @@ static int eata2x_slave_configure(Scsi_Device *dev) {
 
    printk("%s: scsi%d, channel %d, id %d, lun %d, cmds/lun %d%s%s.\n",
           BN(j), host->host_no, dev->channel, dev->id, dev->lun,
-          dev->new_queue_depth, link_suffix, tag_suffix);
+          dev->queue_depth, link_suffix, tag_suffix);
 
    return FALSE;
 }
@@ -1699,7 +1699,7 @@ static int eata2x_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) {
    /* Map DMA buffers and SG list */
    map_dma(i, j);
 
-   if (linked_comm && SCpnt->device->new_queue_depth > 2
+   if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type)) {
       HD(j)->cp_stat[i] = READY;
       flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE);
@@ -2207,7 +2207,7 @@ static void ihdlr(int irq, unsigned int j) {
 
    sync_dma(i, j);
 
-   if (linked_comm && SCpnt->device->new_queue_depth > 2
+   if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type))
       flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE);
 
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 3c3135c3bcdf..4bb5cd8b38d9 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -4637,7 +4637,7 @@ static void gdth_flush(int hanum)
 
 #if LINUX_VERSION_CODE >= 0x020322
     sdev = scsi_get_host_dev(gdth_ctr_tab[hanum]);
-    scp  = scsi_getset_command(sdev, GFP_KERNEL);
+    scp  = scsi_get_command(sdev, GFP_KERNEL);
     scp->cmd_len = 12;
     scp->use_sg = 0;
 #else
@@ -4711,7 +4711,7 @@ void gdth_halt(void)
         memset(cmnd, 0xff, MAX_COMMAND_SIZE);
 #if LINUX_VERSION_CODE >= 0x020322
         sdev = scsi_get_host_dev(gdth_ctr_tab[hanum]);
-        scp  = scsi_getset_command(sdev, GFP_KERNEL);
+        scp  = scsi_get_command(sdev, GFP_KERNEL);
         scp->cmd_len = 12;
         scp->use_sg = 0;
 #else
diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index 9b943a37ce0e..f1afbacf0906 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c
@@ -48,7 +48,7 @@ static int gdth_set_info(char *buffer,int length,int vh,int hanum,int busnum)
 
 #if LINUX_VERSION_CODE >= 0x020322
     sdev = scsi_get_host_dev(gdth_ctr_vtab[vh]);
-    scp  = scsi_getset_command(sdev, GFP_KERNEL);
+    scp  = scsi_get_command(sdev, GFP_KERNEL);
     if (!scp)
         return -ENOMEM;
     scp->cmd_len = 12;
@@ -712,7 +712,7 @@ static int gdth_get_info(char *buffer,char **start,off_t offset,
 
 #if LINUX_VERSION_CODE >= 0x020322
     sdev = scsi_get_host_dev(gdth_ctr_vtab[vh]);
-    scp  = scsi_getset_command(sdev, GFP_KERNEL);
+    scp  = scsi_get_command(sdev, GFP_KERNEL);
     if (!scp)
         return -ENOMEM;
     scp->cmd_len = 12;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 7e474fe3ccf3..5891be92ec51 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -342,17 +342,8 @@ void scsi_unregister(struct Scsi_Host *shost)
 	}
 
 	shost->hostt->present--;
-
-	/* Cleanup proc */
 	scsi_proc_host_rm(shost);
-
-	while (!list_empty(&shost->free_list)) {
-		struct scsi_cmnd *cmd;
-		cmd = list_entry(shost->free_list.next,struct scsi_cmnd,list);
-		list_del_init(&cmd->list);
-		kmem_cache_free(scsi_core->scsi_cmd_cache, cmd);
-	}
-
+	scsi_destroy_command_freelist(shost);
 	kfree(shost);
 }
 
@@ -373,8 +364,7 @@ extern int blk_nohighio;
 struct Scsi_Host * scsi_register(Scsi_Host_Template *shost_tp, int xtr_bytes)
 {
 	struct Scsi_Host *shost, *shost_scr;
-	struct scsi_cmnd *cmd = NULL;
-	int gfp_mask;
+	int gfp_mask, rval;
 	DECLARE_COMPLETION(sem);
 
         /* Check to see if this host has any error handling facilities */
@@ -441,7 +431,7 @@ struct Scsi_Host * scsi_register(Scsi_Host_Template *shost_tp, int xtr_bytes)
 	shost->unchecked_isa_dma = shost_tp->unchecked_isa_dma;
 	shost->use_clustering = shost_tp->use_clustering;
 	if (!blk_nohighio)
-	shost->highmem_io = shost_tp->highmem_io;
+		shost->highmem_io = shost_tp->highmem_io;
 
 	shost->max_sectors = shost_tp->max_sectors;
 	shost->use_blk_tcq = shost_tp->use_blk_tcq;
@@ -463,16 +453,9 @@ struct Scsi_Host * scsi_register(Scsi_Host_Template *shost_tp, int xtr_bytes)
 found:
 	spin_unlock(&scsi_host_list_lock);
 
-	spin_lock_init(&shost->free_list_lock);
-	INIT_LIST_HEAD(&shost->free_list);
-
-	/* Get one backup command for this host. */
-	cmd = scsi_get_command(shost, GFP_KERNEL);
-	if (cmd)
-		list_add(&cmd->list, &shost->free_list);		
-	else
-		printk(KERN_NOTICE "The system is running low in memory.\n");
-
+	rval = scsi_setup_command_freelist(shost);
+	if (rval)
+		goto fail;
 	scsi_proc_host_add(shost);
 
 	shost->eh_notify = &sem;
@@ -483,10 +466,15 @@ found:
 	 */
 	wait_for_completion(&sem);
 	shost->eh_notify = NULL;
-
 	shost->hostt->present++;
-
 	return shost;
+
+fail:
+	spin_lock(&scsi_host_list_lock);
+	list_del(&shost->sh_list);
+	spin_unlock(&scsi_host_list_lock);
+	kfree(shost);
+	return NULL;
 }
 
 /**
diff --git a/drivers/scsi/hosts.h b/drivers/scsi/hosts.h
index d276316390ca..f812838f01a8 100644
--- a/drivers/scsi/hosts.h
+++ b/drivers/scsi/hosts.h
@@ -29,6 +29,9 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 
+struct scsi_host_cmd_pool;
+
+
 /* It is senseless to set SG_ALL any higher than this - the performance
  *  does not get any better, and it wastes memory
  */
@@ -375,6 +378,7 @@ struct Scsi_Host
     struct list_head      sh_list;
     struct list_head	  my_devices;
 
+    struct scsi_host_cmd_pool *cmd_pool;
     spinlock_t            free_list_lock;
     struct list_head      free_list;   /* backup store of cmd structs */
 
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 90c83fe5330b..cb759e19c945 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -1820,7 +1820,7 @@ qla1280_slave_configure(Scsi_Device * device)
 		/* device->queue_depth = 20; */
 		printk(KERN_INFO "scsi(%li:%d:%d:%d): Enabled tagged queuing, "
 		       "queue depth %d.\n", p->host_no, device->channel,
-		       device->id, device->lun, device->new_queue_depth);
+		       device->id, device->lun, device->queue_depth);
 	} else {
 		scsi_adjust_queue_depth(device, 0 /* TCQ off */, 3);
 	}
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ef553600e24f..31a93e8343cb 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -145,7 +145,6 @@ LIST_HEAD(scsi_dev_info_list);
  * Function prototypes.
  */
 extern void scsi_times_out(Scsi_Cmnd * SCpnt);
-void scsi_build_commandblocks(Scsi_Device * SDpnt);
 
 #ifdef MODULE
 MODULE_PARM(scsi_logging_level, "i");
@@ -195,14 +194,6 @@ static void scsi_wait_done(Scsi_Cmnd * SCpnt)
 		complete(req->waiting);
 }
 
-/*
- * This lock protects the freelist for all devices on the system.
- * We could make this finer grained by having a single lock per
- * device if it is ever found that there is excessive contention
- * on this lock.
- */
-static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED;
-
 /*
  * Function:    scsi_allocate_request
  *
@@ -272,231 +263,6 @@ void scsi_release_request(Scsi_Request * req)
 	kfree(req);
 }
 
-/*
- * FIXME(eric) - this is not at all optimal.  Given that
- * single lun devices are rare and usually slow
- * (i.e. CD changers), this is good enough for now, but
- * we may want to come back and optimize this later.
- *
- * Scan through all of the devices attached to this
- * host, and see if any are active or not.  If so,
- * we need to defer this command.
- *
- * We really need a busy counter per device.  This would
- * allow us to more easily figure out whether we should
- * do anything here or not.
- */
-static int check_all_luns(struct Scsi_Host *shost, struct scsi_device *myself)
-{
-	struct scsi_device *sdev;
-
-	list_for_each_entry(sdev, &myself->same_target_siblings,
-		       	same_target_siblings) {
-		if (atomic_read(&sdev->device_active))
-			return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Function:    scsi_allocate_device
- *
- * Purpose:     Allocate a command descriptor.
- *
- * Arguments:   device    - device for which we want a command descriptor
- *              wait      - 1 if we should wait in the event that none
- *                          are available.
- *              interruptible - 1 if we should unblock and return NULL
- *                          in the event that we must wait, and a signal
- *                          arrives.
- *
- * Lock status: No locks assumed to be held.  This function is SMP-safe.
- *
- * Returns:     Pointer to command descriptor.
- *
- * Notes:       Prior to the new queue code, this function was not SMP-safe.
- *
- *              If the wait flag is true, and we are waiting for a free
- *              command block, this function will interrupt and return
- *              NULL in the event that a signal arrives that needs to
- *              be handled.
- *
- *              This function is deprecated, and drivers should be
- *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
- */
-struct scsi_cmnd *scsi_allocate_device(struct scsi_device *sdev, int wait)
-{
-	DECLARE_WAITQUEUE(wq, current);
-	struct Scsi_Host *shost = sdev->host;
-	struct scsi_cmnd *scmnd;
-	unsigned long flags;
-
-	spin_lock_irqsave(&device_request_lock, flags);
-	while (1) {
-		if (sdev->device_blocked)
-			goto busy;
-		if (sdev->single_lun && check_all_luns(shost, sdev))
-			goto busy;
-
-		/*
-		 * Now we can check for a free command block for this device.
-		 */
-		for (scmnd = sdev->device_queue; scmnd; scmnd = scmnd->next)
-			if (!scmnd->request)
-				goto found;
-
-busy:
-		if (!wait)
-			goto fail;
-
-		/*
-		 * We need to wait for a free commandblock.  We need to
-		 * insert ourselves into the list before we release the
-		 * lock.  This way if a block were released the same
-		 * microsecond that we released the lock, the call
-		 * to schedule() wouldn't block (well, it might switch,
-		 * but the current task will still be schedulable.
-		 */
-		add_wait_queue(&sdev->scpnt_wait, &wq);
-		set_current_state(TASK_UNINTERRUPTIBLE);
-
-		spin_unlock_irqrestore(&device_request_lock, flags);
-		schedule();
-		spin_lock_irqsave(&device_request_lock, flags);
-
-		remove_wait_queue(&sdev->scpnt_wait, &wq);
-		set_current_state(TASK_RUNNING);
-	}
-
-found:
-	scmnd->request = NULL;
-	atomic_inc(&scmnd->device->host->host_active);
-	atomic_inc(&scmnd->device->device_active);
-
-	scmnd->buffer  = NULL;
-	scmnd->bufflen = 0;
-	scmnd->request_buffer = NULL;
-	scmnd->request_bufflen = 0;
-
-	scmnd->use_sg = 0;	/* Reset the scatter-gather flag */
-	scmnd->old_use_sg = 0;
-	scmnd->transfersize = 0;	/* No default transfer size */
-	scmnd->cmd_len = 0;
-
-	scmnd->sc_data_direction = SCSI_DATA_UNKNOWN;
-	scmnd->sc_request = NULL;
-	scmnd->sc_magic = SCSI_CMND_MAGIC;
-
-	scmnd->result = 0;
-	scmnd->underflow = 0;	/* Do not flag underflow conditions */
-	scmnd->old_underflow = 0;
-	scmnd->resid = 0;
-	scmnd->state = SCSI_STATE_INITIALIZING;
-	scmnd->owner = SCSI_OWNER_HIGHLEVEL;
-
-	spin_unlock_irqrestore(&device_request_lock, flags);
-
-	SCSI_LOG_MLQUEUE(5, printk("Activating command for device %d (%d)\n",
-				scmnd->device->id,
-				atomic_read(&scmnd->device->host->host_active)));
-
-	return scmnd;
-
-fail:
-	spin_unlock_irqrestore(&device_request_lock, flags);
-	return NULL;
-}
-
-inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
-{
-	unsigned long flags;
-        Scsi_Device * SDpnt;
-	int alloc_cmd = 0;
-
-	spin_lock_irqsave(&device_request_lock, flags);
-
-        SDpnt = SCpnt->device;
-
-	SCpnt->request = NULL;
-	SCpnt->state = SCSI_STATE_UNUSED;
-	SCpnt->owner = SCSI_OWNER_NOBODY;
-	atomic_dec(&SCpnt->device->host->host_active);
-	atomic_dec(&SDpnt->device_active);
-
-	SCSI_LOG_MLQUEUE(5, printk("Deactivating command for device %d (active=%d, failed=%d)\n",
-				   SCpnt->device->id,
-				   atomic_read(&SCpnt->device->host->host_active),
-				   SCpnt->device->host->host_failed));
-
-	if(SDpnt->current_queue_depth > SDpnt->new_queue_depth) {
-		Scsi_Cmnd *prev, *next;
-		/*
-		 * Release the command block and decrement the queue
-		 * depth.
-		 */
-		for(prev = NULL, next = SDpnt->device_queue;
-				next != SCpnt;
-				prev = next, next = next->next) ;
-		if(prev == NULL)
-			SDpnt->device_queue = next->next;
-		else
-			prev->next = next->next;
-		kfree((char *)SCpnt);
-		SDpnt->current_queue_depth--;
-	} else if(SDpnt->current_queue_depth < SDpnt->new_queue_depth) {
-		alloc_cmd = 1;
-		SDpnt->current_queue_depth++;
-	}
-	spin_unlock_irqrestore(&device_request_lock, flags);
-
-        /*
-         * Wake up anyone waiting for this device.  Do this after we
-         * have released the lock, as they will need it as soon as
-         * they wake up.  
-         */
-	wake_up(&SDpnt->scpnt_wait);
-
-	/*
-	 * We are happy to release command blocks in the scope of the
-	 * device_request_lock since that's nice and quick, but allocation
-	 * can take more time so do it outside that scope instead.
-	 */
-	if(alloc_cmd) {
-		Scsi_Cmnd *newSCpnt;
-
-		newSCpnt = kmalloc(sizeof(Scsi_Cmnd), GFP_ATOMIC |
-				(SDpnt->host->unchecked_isa_dma ?
-				 GFP_DMA : 0));
-		if(newSCpnt) {
-			memset(newSCpnt, 0, sizeof(Scsi_Cmnd));
-			init_timer(&newSCpnt->eh_timeout);
-			newSCpnt->device = SDpnt;
-			newSCpnt->request = NULL;
-			newSCpnt->use_sg = 0;
-			newSCpnt->old_use_sg = 0;
-			newSCpnt->old_cmd_len = 0;
-			newSCpnt->underflow = 0;
-			newSCpnt->old_underflow = 0;
-			newSCpnt->transfersize = 0;
-			newSCpnt->resid = 0;
-			newSCpnt->serial_number = 0;
-			newSCpnt->serial_number_at_timeout = 0;
-			newSCpnt->host_scribble = NULL;
-			newSCpnt->state = SCSI_STATE_UNUSED;
-			newSCpnt->owner = SCSI_OWNER_NOBODY;
-			spin_lock_irqsave(&device_request_lock, flags);
-			newSCpnt->next = SDpnt->device_queue;
-			SDpnt->device_queue = newSCpnt;
-			spin_unlock_irqrestore(&device_request_lock, flags);
-		} else {
-			spin_lock_irqsave(&device_request_lock, flags);
-			SDpnt->current_queue_depth--;
-			spin_unlock_irqrestore(&device_request_lock, flags);
-		}
-	}
-}
-
 /*
  * Function:    scsi_mlqueue_insert()
  *
@@ -516,7 +282,7 @@ inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
  * Notes:       This could be called either from an interrupt context or a
  *              normal process context.
  */
-int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
+static int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_device *device = cmd->device;
@@ -578,43 +344,179 @@ int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
 	return 0;
 }
 
+struct scsi_host_cmd_pool {
+	kmem_cache_t	*slab;
+	unsigned int	users;
+	char		*name;
+	unsigned int	slab_flags;
+	unsigned int	gfp_mask;
+};
+
+static struct scsi_host_cmd_pool scsi_cmd_pool = {
+	.name		= "scsi_cmd_cache",
+	.slab_flags	= SLAB_HWCACHE_ALIGN,
+};
+
+static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
+	.name		= "scsi_cmd_cache(DMA)",
+	.slab_flags	= SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA,
+	.gfp_mask	= __GFP_DMA,
+};
+
+static DECLARE_MUTEX(host_cmd_pool_mutex);
+
+static struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost,
+					    int gfp_mask)
+{
+	struct scsi_cmnd *cmd;
+
+	cmd = kmem_cache_alloc(shost->cmd_pool->slab,
+			gfp_mask | shost->cmd_pool->gfp_mask);
+
+	if (unlikely(!cmd)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&shost->free_list_lock, flags);
+		if (likely(!list_empty(&shost->free_list))) {
+			cmd = list_entry(shost->free_list.next,
+					 struct scsi_cmnd, list);
+			list_del_init(&cmd->list);
+		}
+		spin_unlock_irqrestore(&shost->free_list_lock, flags);
+	}
+
+	return cmd;
+}
+
+/*
+ * Function:	scsi_get_command()
+ *
+ * Purpose:	Allocate and setup a scsi command block
+ *
+ * Arguments:	dev	- parent scsi device
+ *		gfp_mask- allocator flags
+ *
+ * Returns:	The allocated scsi command structure.
+ */
+struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int gfp_mask)
+{
+	struct scsi_cmnd *cmd = __scsi_get_command(dev->host, gfp_mask);
+
+	if (likely(cmd)) {
+		memset(cmd, 0, sizeof(*cmd));
+		cmd->device = dev;
+		cmd->state = SCSI_STATE_UNUSED;
+		cmd->owner = SCSI_OWNER_NOBODY;
+		init_timer(&cmd->eh_timeout);
+		INIT_LIST_HEAD(&cmd->list);
+	}
+
+	return cmd;
+}				
+
+/*
+ * Function:	scsi_put_command()
+ *
+ * Purpose:	Free a scsi command block
+ *
+ * Arguments:	cmd	- command block to free
+ *
+ * Returns:	Nothing.
+ *
+ * Notes:	The command must not belong to any lists.
+ */
+void scsi_put_command(struct scsi_cmnd *cmd)
+{
+	struct Scsi_Host *shost = cmd->device->host;
+	unsigned long flags;
+	
+	spin_lock_irqsave(&shost->free_list_lock, flags);
+	if (unlikely(list_empty(&shost->free_list))) {
+		list_add(&cmd->list, &shost->free_list);
+		cmd = NULL;
+	}
+	spin_unlock_irqrestore(&shost->free_list_lock, flags);
+
+	if (likely(cmd))
+		kmem_cache_free(shost->cmd_pool->slab, cmd);
+}
+
+/*
+ * Function:	scsi_setup_command_freelist()
+ *
+ * Purpose:	Setup the command freelist for a scsi host.
+ *
+ * Arguments:	shost	- host to allocate the freelist for.
+ *
+ * Returns:	Nothing.
+ */
+int scsi_setup_command_freelist(struct Scsi_Host *shost)
+{
+	struct scsi_host_cmd_pool *pool;
+	struct scsi_cmnd *cmd;
+
+	spin_lock_init(&shost->free_list_lock);
+	INIT_LIST_HEAD(&shost->free_list);
+
+	/*
+	 * Select a command slab for this host and create it if not
+	 * yet existant.
+	 */
+	down(&host_cmd_pool_mutex);
+	pool = (shost->unchecked_isa_dma ? &scsi_cmd_dma_pool : &scsi_cmd_pool);
+	if (!pool->users) {
+		pool->slab = kmem_cache_create(pool->name,
+				sizeof(struct scsi_cmnd), 0,
+				pool->slab_flags, NULL, NULL);
+		if (!pool->slab)
+			goto fail;
+	}
+
+	pool->users++;
+	shost->cmd_pool = pool;
+	up(&host_cmd_pool_mutex);
+
+	/*
+	 * Get one backup command for this host.
+	 */
+	cmd = kmem_cache_alloc(shost->cmd_pool->slab,
+			GFP_KERNEL | shost->cmd_pool->gfp_mask);
+	if (!cmd)
+		goto fail2;
+	list_add(&cmd->list, &shost->free_list);		
+	return 0;
+
+ fail2:
+	if (!--pool->users)
+		kmem_cache_destroy(pool->slab);
+	return -ENOMEM;
+ fail:
+	up(&host_cmd_pool_mutex);
+	return -ENOMEM;
+
+}
+
 /*
- * Function:    scsi_release_command
- *
- * Purpose:     Release a command block.
- *
- * Arguments:   SCpnt - command block we are releasing.
- *
- * Notes:       The command block can no longer be used by the caller once
- *              this funciton is called.  This is in effect the inverse
- *              of scsi_allocate_device.  Note that we also must perform
- *              a couple of additional tasks.  We must first wake up any
- *              processes that might have blocked waiting for a command
- *              block, and secondly we must hit the queue handler function
- *              to make sure that the device is busy.  Note - there is an
- *              option to not do this - there were instances where we could
- *              recurse too deeply and blow the stack if this happened
- *              when we were indirectly called from the request function
- *              itself.
- *
- *              The idea is that a lot of the mid-level internals gunk
- *              gets hidden in this function.  Upper level drivers don't
- *              have any chickens to wave in the air to get things to
- *              work reliably.
- *
- *              This function is deprecated, and drivers should be
- *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ * Function:	scsi_destroy_command_freelist()
+ *
+ * Purpose:	Release the command freelist for a scsi host.
+ *
+ * Arguments:	shost	- host that's freelist is going to be destroyed
  */
-void scsi_release_command(Scsi_Cmnd * SCpnt)
+void scsi_destroy_command_freelist(struct Scsi_Host *shost)
 {
-        __scsi_release_command(SCpnt);
-        /*
-         * Finally, hit the queue request function to make sure that
-         * the device is actually busy if there are requests present.
-         * This won't block - if the device cannot take any more, life
-         * will go on.  
-         */
-        scsi_queue_next_request(SCpnt->device->request_queue, NULL);
+	while (!list_empty(&shost->free_list)) {
+		struct scsi_cmnd *cmd;
+
+		cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list);
+		list_del_init(&cmd->list);
+		kmem_cache_free(shost->cmd_pool->slab, cmd);
+	}
+
+	down(&host_cmd_pool_mutex);
+	if (!--shost->cmd_pool->users)
+		kmem_cache_destroy(shost->cmd_pool->slab);
+	up(&host_cmd_pool_mutex);
 }
 
 /*
@@ -746,13 +648,6 @@ int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt)
 	return rtn;
 }
 
-/*
- * scsi_do_cmd sends all the commands out to the low-level driver.  It
- * handles the specifics required for each low level driver - ie queued
- * or non queued.  It also prevents conflicts when different high level
- * drivers go for the same host at the same time.
- */
-
 void scsi_wait_req (Scsi_Request * SRpnt, const void *cmnd ,
  		  void *buffer, unsigned bufflen, 
  		  int timeout, int retries)
@@ -960,121 +855,6 @@ void scsi_init_cmd_from_req(Scsi_Cmnd * SCpnt, Scsi_Request * SRpnt)
 	SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n"));
 }
 
-/*
- * Function:    scsi_do_cmd
- *
- * Purpose:     Queue a SCSI command
- *
- * Arguments:   SCpnt     - command descriptor.
- *              cmnd      - actual SCSI command to be performed.
- *              buffer    - data buffer.
- *              bufflen   - size of data buffer.
- *              done      - completion function to be run.
- *              timeout   - how long to let it run before timeout.
- *              retries   - number of retries we allow.
- *
- * Lock status: With the new queueing code, this is SMP-safe, and no locks
- *              need be held upon entry.   The old queueing code the lock was
- *              assumed to be held upon entry.
- *
- * Returns:     Nothing.
- *
- * Notes:       Prior to the new queue code, this function was not SMP-safe.
- *              Also, this function is now only used for queueing requests
- *              for things like ioctls and character device requests - this
- *              is because we essentially just inject a request into the
- *              queue for the device. Normal block device handling manipulates
- *              the queue directly.
- */
-void scsi_do_cmd(Scsi_Cmnd * SCpnt, const void *cmnd,
-	      void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
-		 int timeout, int retries)
-{
-	struct Scsi_Host *host = SCpnt->device->host;
-
-	ASSERT_LOCK(host->host_lock, 0);
-
-	SCpnt->pid = scsi_pid++;
-	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
-
-	SCSI_LOG_MLQUEUE(4,
-			 {
-			 int i;
-			 int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
-			 printk("scsi_do_cmd (host = %d, channel = %d target = %d, "
-		    "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
-				"retries = %d)\n"
-				"command : ", host->host_no, SCpnt->device->channel,
-				SCpnt->device->id, buffer,
-				bufflen, done, timeout, retries);
-			 for (i = 0; i < size; ++i)
-			 	printk("%02x  ", ((unsigned char *) cmnd)[i]);
-			 	printk("\n");
-			 });
-
-	if (!host) {
-		panic("Invalid or not present host.\n");
-	}
-	/*
-	 * We must prevent reentrancy to the lowlevel host driver.  This prevents
-	 * it - we enter a loop until the host we want to talk to is not busy.
-	 * Race conditions are prevented, as interrupts are disabled in between the
-	 * time we check for the host being not busy, and the time we mark it busy
-	 * ourselves.
-	 */
-
-
-	/*
-	 * Our own function scsi_done (which marks the host as not busy, disables
-	 * the timeout counter, etc) will be called by us or by the
-	 * scsi_hosts[host].queuecommand() function needs to also call
-	 * the completion function for the high level driver.
-	 */
-
-	memcpy((void *) SCpnt->data_cmnd, (const void *) cmnd, 
-               sizeof(SCpnt->data_cmnd));
-	SCpnt->reset_chain = NULL;
-	SCpnt->serial_number = 0;
-	SCpnt->serial_number_at_timeout = 0;
-	SCpnt->bufflen = bufflen;
-	SCpnt->buffer = buffer;
-	SCpnt->flags = 0;
-	SCpnt->retries = 0;
-	SCpnt->allowed = retries;
-	SCpnt->done = done;
-	SCpnt->timeout_per_command = timeout;
-
-	memcpy((void *) SCpnt->cmnd, (const void *) cmnd, 
-               sizeof(SCpnt->cmnd));
-	/* Zero the sense buffer.  Some host adapters automatically request
-	 * sense on error.  0 is not a valid sense code.
-	 */
-	memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
-	SCpnt->request_buffer = buffer;
-	SCpnt->request_bufflen = bufflen;
-	SCpnt->old_use_sg = SCpnt->use_sg;
-	if (SCpnt->cmd_len == 0)
-		SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
-	SCpnt->old_cmd_len = SCpnt->cmd_len;
-	SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
-	SCpnt->old_underflow = SCpnt->underflow;
-
-	/* Start the timer ticking.  */
-
-	SCpnt->internal_timeout = NORMAL_TIMEOUT;
-	SCpnt->abort_reason = 0;
-	SCpnt->result = 0;
-
-	/*
-	 * At this point, we merely set up the command, stick it in the normal
-	 * request queue, and return.  Eventually that request will come to the
-	 * top of the list, and will be dispatched.
-	 */
-	scsi_insert_special_cmd(SCpnt, 0);
-
-	SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n"));
-}
-
 /**
  * scsi_done - Mark this command as done
  * @SCpnt: The SCSI Command which we think we've completed.
@@ -1339,94 +1119,6 @@ void scsi_finish_command(Scsi_Cmnd * SCpnt)
 	SCpnt->done(SCpnt);
 }
 
-/*
- * Function:    scsi_release_commandblocks()
- *
- * Purpose:     Release command blocks associated with a device.
- *
- * Arguments:   SDpnt   - device
- *
- * Returns:     Nothing
- *
- * Lock status: No locking assumed or required.
- *
- * Notes:
- */
-void scsi_release_commandblocks(Scsi_Device * SDpnt)
-{
-	Scsi_Cmnd *SCpnt, *SCnext;
-	unsigned long flags;
-
- 	spin_lock_irqsave(&device_request_lock, flags);
-	for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) {
-		SDpnt->device_queue = SCnext = SCpnt->next;
-		kfree((char *) SCpnt);
-	}
-	SDpnt->current_queue_depth = 0;
-	SDpnt->new_queue_depth = 0;
-	spin_unlock_irqrestore(&device_request_lock, flags);
-}
-
-/*
- * Function:    scsi_build_commandblocks()
- *
- * Purpose:     Allocate command blocks associated with a device.
- *
- * Arguments:   SDpnt   - device
- *
- * Returns:     Nothing
- *
- * Lock status: No locking assumed or required.
- *
- * Notes:	We really only allocate one command here.  We will allocate
- *		more commands as needed once the device goes into real use.
- */
-void scsi_build_commandblocks(Scsi_Device * SDpnt)
-{
-	unsigned long flags;
-	Scsi_Cmnd *SCpnt;
-
-	if (SDpnt->current_queue_depth != 0)
-		return;
-		
-	SCpnt = (Scsi_Cmnd *) kmalloc(sizeof(Scsi_Cmnd), GFP_ATOMIC |
-			(SDpnt->host->unchecked_isa_dma ? GFP_DMA : 0));
-	if (NULL == SCpnt) {
-		/*
-		 * Since we don't currently have *any* command blocks on this
-		 * device, go ahead and try an atomic allocation...
-		 */
-		SCpnt = (Scsi_Cmnd *) kmalloc(sizeof(Scsi_Cmnd), GFP_ATOMIC |
-			(SDpnt->host->unchecked_isa_dma ? GFP_DMA : 0));
-		if (NULL == SCpnt)
-			return;	/* Oops, we aren't going anywhere for now */
-	}
-
-	memset(SCpnt, 0, sizeof(Scsi_Cmnd));
-	init_timer(&SCpnt->eh_timeout);
-	SCpnt->device = SDpnt;
-	SCpnt->request = NULL;
-	SCpnt->use_sg = 0;
-	SCpnt->old_use_sg = 0;
-	SCpnt->old_cmd_len = 0;
-	SCpnt->underflow = 0;
-	SCpnt->old_underflow = 0;
-	SCpnt->transfersize = 0;
-	SCpnt->resid = 0;
-	SCpnt->serial_number = 0;
-	SCpnt->serial_number_at_timeout = 0;
-	SCpnt->host_scribble = NULL;
-	SCpnt->state = SCSI_STATE_UNUSED;
-	SCpnt->owner = SCSI_OWNER_NOBODY;
-	spin_lock_irqsave(&device_request_lock, flags);
-	if(SDpnt->new_queue_depth == 0)
-		SDpnt->new_queue_depth = 1;
-	SDpnt->current_queue_depth++;
-	SCpnt->next = SDpnt->device_queue;
-	SDpnt->device_queue = SCpnt;
-	spin_unlock_irqrestore(&device_request_lock, flags);
-}
-
 /*
  * Function:	scsi_adjust_queue_depth()
  *
@@ -1448,28 +1140,10 @@ void scsi_build_commandblocks(Scsi_Device * SDpnt)
  * 		the right thing depending on whether or not the device is
  * 		currently active and whether or not it even has the
  * 		command blocks built yet.
- *
- * 		If cmdblocks != 0 then we are a live device.  We just set the
- * 		new_queue_depth variable and when the scsi completion handler
- * 		notices that current_queue_depth != new_queue_depth it will
- * 		work to rectify the situation.  If new_queue_depth is less than
- * 		current_queue_depth, then it will free the completed command
- * 		instead of putting it back on the free list and dec
- * 		current_queue_depth.  Otherwise	it will try to allocate a new
- * 		command block for the device and put it on the free list along
- * 		with the command that is being
- *		completed.  Obviously, if the device isn't doing anything then
- *		neither is this code, so it will bring the devices queue depth
- *		back into line when the device is actually being used.  This
- *		keeps us from needing to fire off a kernel thread or some such
- *		nonsense (this routine can be called from interrupt code, so
- *		handling allocations here would be tricky and risky, making
- *		a kernel thread a much safer way to go if we wanted to handle
- *		the work immediately instead of letting it get done a little
- *		at a time in the completion handler).
  */
 void scsi_adjust_queue_depth(Scsi_Device *SDpnt, int tagged, int tags)
 {
+	static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED;
 	unsigned long flags;
 
 	/*
@@ -1486,7 +1160,7 @@ void scsi_adjust_queue_depth(Scsi_Device *SDpnt, int tagged, int tags)
 		return;
 
 	spin_lock_irqsave(&device_request_lock, flags);
-	SDpnt->new_queue_depth = tags;
+	SDpnt->queue_depth = tags;
 	switch(tagged) {
 		case MSG_ORDERED_TAG:
 			SDpnt->ordered_tags = 1;
@@ -1503,15 +1177,9 @@ void scsi_adjust_queue_depth(Scsi_Device *SDpnt, int tagged, int tags)
 				SDpnt->channel, SDpnt->id, SDpnt->lun); 
 		case 0:
 			SDpnt->ordered_tags = SDpnt->simple_tags = 0;
-			SDpnt->new_queue_depth = tags;
+			SDpnt->queue_depth = tags;
 			break;
 	}
-	/* TODO FIXME This is a hack and MUST go eventually.
-	   This fixes a problem in scsi_scan.c::scsi_alloc_sdev()
-	   else we cannot ever have ANY SCSI devices.
-	*/
-	SDpnt->current_queue_depth = 1;
-
 	spin_unlock_irqrestore(&device_request_lock, flags);
 }
 
@@ -1860,28 +1528,6 @@ void scsi_device_put(struct scsi_device *sdev)
  */
 int scsi_slave_attach(struct scsi_device *sdev)
 {
-	/* all this code is now handled elsewhere 
-	if (sdev->attached++ == 0) {
-		scsi_build_commandblocks(sdev);
-		if (sdev->current_queue_depth == 0) {
-			printk(KERN_ERR "scsi: Allocation failure during"
-			       " attach, some SCSI devices might not be"
-			       " configured\n");
-			return -ENOMEM;
-		}
-		if (sdev->host->hostt->slave_configure != NULL) {
-			if (sdev->host->hostt->slave_configure(sdev) != 0) {
-				printk(KERN_INFO "scsi: failed low level driver"
-				       " attach, some SCSI device might not be"
-				       " configured\n");
-				scsi_release_commandblocks(sdev);
-				return -ENOMEM;
-			}
-		} else if (sdev->host->cmd_per_lun != 0)
-			scsi_adjust_queue_depth(sdev, 0,
-						sdev->host->cmd_per_lun);
-	}
-		 */
 	sdev->attached++;
 	return 0;
 }
@@ -1898,11 +1544,6 @@ int scsi_slave_attach(struct scsi_device *sdev)
  */
 void scsi_slave_detach(struct scsi_device *sdev)
 {
-	/*
-	if (--sdev->attached == 0) {
-		scsi_release_commandblocks(sdev);
-	}
-	*/
 	sdev->attached--;
 }
 /*
@@ -1952,18 +1593,16 @@ int scsi_unregister_device(struct Scsi_Device_Template *tpnt)
 {
 	Scsi_Device *SDpnt;
 	struct Scsi_Host *shpnt;
-	struct list_head spnt, *prev_spnt;
-	
 
 	/*
 	 * Next, detach the devices from the driver.
 	 */
-
 	for (shpnt = scsi_host_get_next(NULL); shpnt;
 	     shpnt = scsi_host_get_next(shpnt)) {
 		list_for_each_entry(SDpnt, &shpnt->my_devices, siblings)
 			(*tpnt->detach) (SDpnt);
 	}
+
 	/*
 	 * Extract the template from the linked list.
 	 */
@@ -1972,11 +1611,6 @@ int scsi_unregister_device(struct Scsi_Device_Template *tpnt)
 	up_write(&scsi_devicelist_mutex);
 
 	scsi_upper_driver_unregister(tpnt);
-
-	/*
-	 * Final cleanup for the driver is done in the driver sources in the
-	 * cleanup function.
-	 */
 	return 0;
 }
 
@@ -2019,18 +1653,9 @@ __setup("scsi_default_dev_flags=", setup_scsi_default_dev_flags);
 
 #endif
 
+/* FIXME(hch): add proper error handling */
 static int __init init_scsi(void)
 {
-	printk(KERN_INFO "SCSI subsystem driver " REVISION "\n");
-
-	scsi_core = kmalloc(sizeof(*scsi_core), GFP_KERNEL);
-	if (!scsi_core)
-		goto out_no_mem;
-	memset(scsi_core, 0, sizeof(*scsi_core));
-	
-	if (scsi_create_cmdcache(scsi_core))
-		goto out_no_mem;
-
 	scsi_init_queue();
 	scsi_init_procfs();
 	devfs_mk_dir(NULL, "scsi", NULL);
@@ -2039,10 +1664,6 @@ static int __init init_scsi(void)
 	scsi_sysfs_register();
 	open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL);
 	return 0;
-
-out_no_mem:
-	printk(KERN_CRIT "Couldn't load SCSI Core -- out of memory!\n");
-	return -ENOMEM;
 }
 
 static void __exit exit_scsi(void)
@@ -2052,12 +1673,6 @@ static void __exit exit_scsi(void)
 	devfs_remove("scsi");
 	scsi_exit_procfs();
 	scsi_exit_queue();
-
-	scsi_destroy_cmdcache(scsi_core);
-
-	if (scsi_core)
-		kfree(scsi_core);
-	scsi_core = NULL;
 }
 
 subsys_initcall(init_scsi);
diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
index 0e47f156821e..cdf310ed8600 100644
--- a/drivers/scsi/scsi.h
+++ b/drivers/scsi/scsi.h
@@ -446,8 +446,10 @@ extern void scsi_exit_queue(void);
  * Prototypes for functions in scsi.c
  */
 extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt);
-extern void scsi_release_commandblocks(Scsi_Device * SDpnt);
-extern void scsi_build_commandblocks(Scsi_Device * SDpnt);
+extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
+extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
+extern struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int flags);
+extern void scsi_put_command(struct scsi_cmnd *cmd);
 extern void scsi_adjust_queue_depth(Scsi_Device *, int, int);
 extern int scsi_track_queue_full(Scsi_Device *, int);
 extern int scsi_slave_attach(struct scsi_device *);
@@ -457,14 +459,6 @@ extern void scsi_device_put(struct scsi_device *);
 extern void scsi_done(Scsi_Cmnd * SCpnt);
 extern void scsi_finish_command(Scsi_Cmnd *);
 extern int scsi_retry_command(Scsi_Cmnd *);
-extern Scsi_Cmnd *scsi_allocate_device(Scsi_Device *, int);
-extern void __scsi_release_command(Scsi_Cmnd *);
-extern void scsi_release_command(Scsi_Cmnd *);
-extern void scsi_do_cmd(Scsi_Cmnd *, const void *cmnd,
-			void *buffer, unsigned bufflen,
-			void (*done) (struct scsi_cmnd *),
-			int timeout, int retries);
-extern int scsi_mlqueue_insert(struct scsi_cmnd *, int);
 extern int scsi_attach_device(struct scsi_device *);
 extern void scsi_detach_device(struct scsi_device *);
 extern int scsi_get_device_flags(unsigned char *vendor, unsigned char *model);
@@ -582,8 +576,7 @@ struct scsi_device {
 	struct list_head busy_cmnds;    /* list of Scsi_Cmnd structs in use */
 	Scsi_Cmnd *device_queue;	/* queue of SCSI Command structures */
         Scsi_Cmnd *current_cmnd;	/* currently active command */
-	unsigned short current_queue_depth;/* How deep of a queue we have */
-	unsigned short new_queue_depth; /* How deep of a queue we want */
+	unsigned short queue_depth;	/* How deep of a queue we want */
 	unsigned short last_queue_full_depth; /* These two are used by */
 	unsigned short last_queue_full_count; /* scsi_track_queue_full() */
 	unsigned long last_queue_full_time;/* don't let QUEUE_FULLs on the same
@@ -770,15 +763,6 @@ struct scsi_cmnd {
 	unsigned volatile char internal_timeout;
 	struct scsi_cmnd *bh_next;	/* To enumerate the commands waiting 
 					   to be processed. */
-
-/* OBSOLETE, please do not use -- obosolete stuff. */
-/* Use cmd->device->{id, channel, lun} instead */
-/* 	unsigned int target; */
-/* 	unsigned int lun; */
-/* 	unsigned int channel; */
-/* OBSOLETE, use cmd->device->host instead */	
-/* 	struct Scsi_Host   *host; */
-
 	unsigned char cmd_len;
 	unsigned char old_cmd_len;
 	unsigned char sc_data_direction;
@@ -995,45 +979,4 @@ extern void scsi_device_unregister(struct scsi_device *);
 extern int scsi_sysfs_register(void);
 extern void scsi_sysfs_unregister(void);
 
-/* -------------------------------------------------- */
-/* data decl: */
-
-/* All the SCSI Core specific global data, etc,
-   should go in here.
-*/
-
-struct scsi_core_data {
-	kmem_cache_t   *scsi_cmd_cache;
-	kmem_cache_t   *scsi_cmd_dma_cache;
-};
-
-extern struct scsi_core_data *scsi_core;
-
-/* -------------------------------------------------- */
-/* fn decl: */
-
-int scsi_create_cmdcache(struct scsi_core_data *scsi_core);
-int scsi_destroy_cmdcache(struct scsi_core_data *scsi_core);
-
-struct scsi_cmnd * scsi_get_command(struct Scsi_Host *host, int alloc_flags);
-void scsi_put_command(struct scsi_cmnd *cmd);
-void scsi_setup_command(struct scsi_device *dev, struct scsi_cmnd *cmd);
-
-/* -------------------------------------------------- */
-/* inline funcs: */
-
-/* scsi_getset_command: allocate, set and return a command struct,
-   when the device is known.
-*/
-static inline struct scsi_cmnd *scsi_getset_command(struct scsi_device *dev,
-						    int flags)
-{
-	struct scsi_cmnd *cmd;
-
-	if (!dev) return NULL;
-	if (!dev->host) return NULL;
-	scsi_setup_command(dev, (cmd = scsi_get_command(dev->host, flags)));
-	return cmd;
-}				
-
-#endif
+#endif /* _SCSI_H */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 975d51f830fa..7a005d8d6af5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -35,7 +35,6 @@ struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR] = {
 }; 	
 #undef SP
 
-struct scsi_core_data *scsi_core;
 
 /*
  * Function:    scsi_insert_special_cmd()
@@ -814,9 +813,10 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		SCpnt = (Scsi_Cmnd *) req->special;
 		SRpnt = (Scsi_Request *) req->special;
 		
-		if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) {
-			SCpnt = scsi_getset_command(SRpnt->sr_device,
-						    GFP_ATOMIC);
+		if (SRpnt->sr_magic == SCSI_REQ_MAGIC) {
+			if (SDpnt->device_busy >= SDpnt->queue_depth)
+				return BLKPREP_DEFER;
+			SCpnt = scsi_get_command(SRpnt->sr_device, GFP_ATOMIC);
 			if (!SCpnt)
 				return BLKPREP_DEFER;
 			scsi_init_cmd_from_req(SCpnt, SRpnt);
@@ -826,16 +826,14 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		/*
 		 * Now try and find a command block that we can use.
 		 */
-		if (req->special) {
-			SCpnt = (Scsi_Cmnd *) req->special;
-		} else {
-			SCpnt = scsi_getset_command(SDpnt, GFP_ATOMIC);
-		}
-		/*
-		 * if command allocation failure, wait a bit
-		 */
-		if (unlikely(!SCpnt))
-			return BLKPREP_DEFER;
+		if (!req->special) {
+			if (SDpnt->device_busy >= SDpnt->queue_depth)
+				return BLKPREP_DEFER;
+			SCpnt = scsi_get_command(SDpnt, GFP_ATOMIC);
+			if (unlikely(!SCpnt))
+				return BLKPREP_DEFER;
+		} else
+			SCpnt = req->special;
 		
 		/* pull a tag out of the request if we have one */
 		SCpnt->tag = req->tag;
@@ -1195,143 +1193,3 @@ void __exit scsi_exit_queue(void)
 		kmem_cache_destroy(sgp->slab);
 	}
 }
-
-/* -------------------------------------------------- */
-
-int scsi_create_cmdcache(struct scsi_core_data *scsi_core)
-{
-	if (!scsi_core)
-		return -EFAULT;
-
-	scsi_core->scsi_cmd_cache
-		= kmem_cache_create("scsi_cmd_cache",
-				    sizeof(struct scsi_cmnd), 0,
-				    SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,NULL,NULL);
-	if (!scsi_core->scsi_cmd_cache)
-		return -ENOMEM;
-
-	scsi_core->scsi_cmd_dma_cache
-		= kmem_cache_create("scsi_cmd_cache(DMA)",
-				    sizeof(struct scsi_cmnd), 0,
-				    SLAB_NO_REAP|SLAB_HWCACHE_ALIGN
-				    |SLAB_CACHE_DMA,
-				    NULL,NULL);
-	if (!scsi_core->scsi_cmd_dma_cache) {
-		scsi_destroy_cmdcache(scsi_core);
-		return -ENOMEM;
-	}
-	return 0;
-} /* end scsi_create_cmdcache() */
-
-/* -------------------------------------------------- */
-
-int scsi_destroy_cmdcache(struct scsi_core_data *scsi_core)
-{
-	if (!scsi_core)
-		return -EFAULT;
-
-	if (scsi_core->scsi_cmd_cache &&
-	    kmem_cache_destroy(scsi_core->scsi_cmd_cache)) {
-		goto bail;
-	} else {
-		scsi_core->scsi_cmd_cache = NULL;
-	}
-
-	if (scsi_core->scsi_cmd_dma_cache &&
-	    kmem_cache_destroy(scsi_core->scsi_cmd_dma_cache)) {
-		goto bail;
-	} else {
-		scsi_core->scsi_cmd_dma_cache = NULL;
-	}
-		
-	return 0;
-bail:
-	printk(KERN_CRIT "Failed to free scsi command cache"
-	       " -- memory leak\n");
-	return -EFAULT;
-} /* end scsi_destroy_cmdcache() */
-
-/* -------------------------------------------------- */
-
-struct scsi_cmnd * scsi_get_command(struct Scsi_Host *host, int alloc_flags)
-{
-	unsigned long flags;
-	struct scsi_cmnd *cmd = NULL;
-	
-	if (!host)
-		return NULL;
-
-	if (host->unchecked_isa_dma) {
-		cmd = kmem_cache_alloc(scsi_core->scsi_cmd_dma_cache,
-				       alloc_flags);
-	} else {
-		cmd = kmem_cache_alloc(scsi_core->scsi_cmd_cache, alloc_flags);
-	}
-
-	if (!cmd) {
-		spin_lock_irqsave(&host->free_list_lock, flags);
-		if (!list_empty(&host->free_list)) {
-			cmd = list_entry(host->free_list.next,
-					 struct scsi_cmnd, list);
-			list_del_init(&cmd->list);
-		}
-		spin_unlock_irqrestore(&host->free_list_lock, flags);
-	}
-
-	return cmd;
-} /* end scsi_get_command() */
-
-/* -------------------------------------------------- */
-/* scsi_put_command: free a scsi_cmnd struct.
-   Note: the command must not belong to any lists!
-*/
-void scsi_put_command(struct scsi_cmnd *cmd)
-{
-	unsigned long flags;
-	struct Scsi_Host *host;
-	
-	if (!cmd)
-		return;
-
-	if (!cmd->device || !cmd->device->host) {
-		printk(KERN_NOTICE "Trying to free a command which"
-		       " doesn't belong to scsi core?!\n");
-		 /* Memory leak, but let the system survive for now --
-		    they'll get it eventually! */
-		return;
-	}
-
-	host = cmd->device->host;
-
-	spin_lock_irqsave(&host->free_list_lock, flags);
-	if (list_empty(&host->free_list)) {
-		list_add(&cmd->list, &host->free_list);
-		cmd = NULL;
-	}
-	spin_unlock_irqrestore(&host->free_list_lock, flags);
-
-	if (cmd) {
-		if (host->unchecked_isa_dma)
-			kmem_cache_free(scsi_core->scsi_cmd_dma_cache, cmd);
-		else
-			kmem_cache_free(scsi_core->scsi_cmd_cache, cmd);
-	}
-} /* end scsi_put_command() */
-
-/* -------------------------------------------------- */
-/* scsi_setup_command: This will do post-alloc init of the command.
-   We want to do as little as possible here.
-*/
-void scsi_setup_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
-{
-	if (!cmd)
-		return;
-	memset(cmd, 0, sizeof(*cmd));
-	cmd->device = dev;
-	cmd->state = SCSI_STATE_UNUSED;
-	cmd->owner = SCSI_OWNER_NOBODY;
-	init_timer(&cmd->eh_timeout);
-	INIT_LIST_HEAD(&cmd->list);
-} /* end scsi_setup_command() */
-
-/* -------------------------------------------------- */
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index ea5ab1d4d900..ba43ec82d8e7 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -471,9 +471,6 @@ static struct scsi_device *scsi_alloc_sdev(struct Scsi_Host *shost,
 
 		sdev->request_queue->queuedata = sdev;
 		scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
-		if (sdev->current_queue_depth == 0) {
-			goto out_bail;
-		}
 		init_waitqueue_head(&sdev->scpnt_wait);
 
 		if (shost->hostt->slave_alloc)
diff --git a/drivers/scsi/scsi_syms.c b/drivers/scsi/scsi_syms.c
index 1b2ee3cb436b..8ea20af06dc6 100644
--- a/drivers/scsi/scsi_syms.c
+++ b/drivers/scsi/scsi_syms.c
@@ -39,7 +39,6 @@ EXPORT_SYMBOL(scsi_unregister);
 EXPORT_SYMBOL(scsicam_bios_param);
 EXPORT_SYMBOL(scsi_partsize);
 EXPORT_SYMBOL(scsi_bios_ptable);
-EXPORT_SYMBOL(scsi_do_cmd);
 EXPORT_SYMBOL(scsi_ioctl);
 EXPORT_SYMBOL(print_command);
 EXPORT_SYMBOL(print_sense);
@@ -112,8 +111,3 @@ EXPORT_SYMBOL(scsi_delete_timer);
  * sysfs support
  */
 EXPORT_SYMBOL(shost_devclass);
-
-EXPORT_SYMBOL(scsi_get_command);
-EXPORT_SYMBOL(scsi_put_command);
-EXPORT_SYMBOL(scsi_setup_command);
-
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 6e5f81efff8c..e44f54b6baaf 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -210,8 +210,7 @@ static int scsi_sdev_check_buf_bit(const char *buf)
  * Create the actual show/store functions and data structures.
  */
 sdev_rd_attr (device_blocked, "%d\n");
-sdev_rd_attr (current_queue_depth, "%d\n");
-sdev_rd_attr (new_queue_depth, "%d\n");
+sdev_rd_attr (queue_depth, "%d\n");
 sdev_rd_attr (type, "%d\n");
 sdev_rd_attr (scsi_level, "%d\n");
 sdev_rd_attr (access_count, "%d\n");
@@ -222,8 +221,7 @@ sdev_rw_attr_bit (online);
 
 static struct device_attribute * const sdev_attrs[] = {
 	&dev_attr_device_blocked,
-	&dev_attr_current_queue_depth,
-	&dev_attr_new_queue_depth,
+	&dev_attr_queue_depth,
 	&dev_attr_type,
 	&dev_attr_scsi_level,
 	&dev_attr_access_count,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 5a7e5a1952e1..31cbabe903fe 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -842,7 +842,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 			__put_user((int) sdp->device->type, &sg_idp->scsi_type);
 			__put_user((short) sdp->device->host->cmd_per_lun,
 				   &sg_idp->h_cmd_per_lun);
-			__put_user((short) sdp->device->new_queue_depth,
+			__put_user((short) sdp->device->queue_depth,
 				   &sg_idp->d_queue_depth);
 			__put_user(0, &sg_idp->unused[0]);
 			__put_user(0, &sg_idp->unused[1]);
@@ -2982,7 +2982,7 @@ sg_proc_dev_info(char *buffer, int *len, off_t * begin, off_t offset, int size)
 				   scsidp->host->host_no, scsidp->channel,
 				   scsidp->id, scsidp->lun, (int) scsidp->type,
 				   (int) scsidp->access_count,
-				   (int) scsidp->new_queue_depth,
+				   (int) scsidp->queue_depth,
 				   (int) scsidp->device_busy,
 				   (int) scsidp->online);
 		else
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index f02555afeda7..81b9f1cb2e73 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -671,7 +671,7 @@ static int u14_34f_slave_configure(Scsi_Device *dev) {
       tag_suffix = "";
       }
 
-   if (TLDEV(dev->type) && linked_comm && dev->new_queue_depth > 2)
+   if (TLDEV(dev->type) && linked_comm && dev->queue_depth > 2)
       link_suffix = ", sorted";
    else if (TLDEV(dev->type))
       link_suffix = ", unsorted";
@@ -680,7 +680,7 @@ static int u14_34f_slave_configure(Scsi_Device *dev) {
 
    printk("%s: scsi%d, channel %d, id %d, lun %d, cmds/lun %d%s%s.\n",
           BN(j), host->host_no, dev->channel, dev->id, dev->lun,
-          dev->new_queue_depth, link_suffix, tag_suffix);
+          dev->queue_depth, link_suffix, tag_suffix);
 
    return FALSE;
 }
@@ -1264,7 +1264,7 @@ static int u14_34f_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) {
    /* Map DMA buffers and SG list */
    map_dma(i, j);
 
-   if (linked_comm && SCpnt->device->new_queue_depth > 2
+   if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type)) {
       HD(j)->cp_stat[i] = READY;
       flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE);
@@ -1761,7 +1761,7 @@ static void ihdlr(int irq, unsigned int j) {
 
    sync_dma(i, j);
 
-   if (linked_comm && SCpnt->device->new_queue_depth > 2
+   if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type))
       flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE);
 
-- 
cgit v1.2.3


From 762cc267d645943264363ce957c226976909b491 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@nevyn.them.org>
Date: Tue, 4 Feb 2003 07:05:49 -0500
Subject: Use force_sig_specific to send SIGSTOP to newly-created CLONE_PTRACE
 processes.

---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 0ba9e64b5821..347f957b080e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1036,7 +1036,7 @@ struct task_struct *do_fork(unsigned long clone_flags,
 		}
 
 		if (p->ptrace & PT_PTRACED)
-			send_sig(SIGSTOP, p, 1);
+			force_sig_specific(SIGSTOP, p);
 
 		wake_up_forked_process(p);		/* do this last */
 		++total_forks;
-- 
cgit v1.2.3


From b0bb3273bff99d843e4bd9a4716e3e66677464e8 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@conectiva.com.br>
Date: Tue, 4 Feb 2003 21:44:43 -0600
Subject: [PATCH] Re: [CHECKER] 112 potential memory leaks in 2.5.48

On Wed, 5 Feb 2003, Rik van Riel wrote:
> On Tue, 4 Feb 2003, Andy Chou wrote:

Thanks for the checker output.  First patch below...

> > [BUG]
> > u1/acc/linux/2.5.48/drivers/scsi/sr_ioctl.c:188:sr_do_ioctl:
> > ERROR:LEAK:85:188:Memory leak [Allocated from:
> > /u1/acc/linux/2.5.48/drivers/scsi/sr_ioctl.c:85:scsi_allocate_request]
>
> Bug indeed, I've created a patch to fix the possible leak of
> a scsi request, but can't figure out the bounce buffer logic...

The patch below fixes the scsi request leak. I'm not sure
how the bounce buffer thing is supposed to work (Christoph?
James?) so I'm not touching that at the moment.

Linus, could you please apply this patch (against today's
bk tree) ?

thank you,

Rik
--
Bravely reimplemented by the knights who say "NIH".
http://www.surriel.com/		http://guru.conectiva.com/
Current spamtrap:  <a href=mailto:"october@surriel.com">october@surriel.com</a>


===== drivers/scsi/sr_ioctl.c 1.27 vs edited =====
---
 drivers/scsi/sr_ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 86c080eeabeb..38f1363defbc 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -99,7 +99,7 @@ int sr_do_ioctl(Scsi_CD *cd, struct cdrom_generic_command *cgc)
 		if (bounce_buffer == NULL) {
 			printk("SCSI DMA pool exhausted.");
 			err = -ENOMEM;
-			goto out;
+			goto out_free;
 		}
 		memcpy(bounce_buffer, cgc->buffer, cgc->buflen);
 		cgc->buffer = bounce_buffer;
@@ -107,7 +107,7 @@ int sr_do_ioctl(Scsi_CD *cd, struct cdrom_generic_command *cgc)
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
 		err = -ENODEV;
-		goto out;
+		goto out_free;
 	}
 
 	scsi_wait_req(SRpnt, cgc->cmd, cgc->buffer, cgc->buflen,
@@ -179,6 +179,7 @@ int sr_do_ioctl(Scsi_CD *cd, struct cdrom_generic_command *cgc)
 		memcpy(cgc->sense, SRpnt->sr_sense_buffer, sizeof(*cgc->sense));
 
 	/* Wake up a process waiting for device */
+      out_free:
 	scsi_release_request(SRpnt);
 	SRpnt = NULL;
       out:
-- 
cgit v1.2.3


From c36fa9713a6bf91c575f3cfe92d65a55041b6d11 Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@raven.il.steeleye.com>
Date: Tue, 4 Feb 2003 22:14:57 -0600
Subject: Correct compiler warnings with use of likely() on pointers

---
 drivers/scsi/scsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 31a93e8343cb..912639238794 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -402,7 +402,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int gfp_mask)
 {
 	struct scsi_cmnd *cmd = __scsi_get_command(dev->host, gfp_mask);
 
-	if (likely(cmd)) {
+	if (likely(cmd != NULL)) {
 		memset(cmd, 0, sizeof(*cmd));
 		cmd->device = dev;
 		cmd->state = SCSI_STATE_UNUSED;
@@ -437,7 +437,7 @@ void scsi_put_command(struct scsi_cmnd *cmd)
 	}
 	spin_unlock_irqrestore(&shost->free_list_lock, flags);
 
-	if (likely(cmd))
+	if (likely(cmd != NULL))
 		kmem_cache_free(shost->cmd_pool->slab, cmd);
 }
 
-- 
cgit v1.2.3


From 79e992197377df6dd1fc3d0266bcd76b9bcc3774 Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@raven.il.steeleye.com>
Date: Tue, 4 Feb 2003 22:21:15 -0600
Subject: Fix sr_ioctl.c bounce buffer usage

Make sure all DMAs come from kmalloc'd memory with the correct
GFP_ flags
---
 drivers/scsi/sr_ioctl.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 38f1363defbc..0ac9325b90c9 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -80,30 +80,16 @@ int sr_do_ioctl(Scsi_CD *cd, struct cdrom_generic_command *cgc)
 	struct scsi_device *SDev;
         struct request *req;
 	int result, err = 0, retries = 0;
-	char *bounce_buffer;
 
 	SDev = cd->device;
 	SRpnt = scsi_allocate_request(SDev);
         if (!SRpnt) {
-                printk("Unable to allocate SCSI request in sr_do_ioctl");
+                printk(KERN_ERR "Unable to allocate SCSI request in sr_do_ioctl");
 		err = -ENOMEM;
 		goto out;
         }
 	SRpnt->sr_data_direction = cgc->data_direction;
 
-	/* use ISA DMA buffer if necessary */
-	SRpnt->sr_request->buffer = cgc->buffer;
-	if (cgc->buffer && SRpnt->sr_host->unchecked_isa_dma &&
-	    (virt_to_phys(cgc->buffer) + cgc->buflen - 1 > ISA_DMA_THRESHOLD)) {
-		bounce_buffer = (char *) kmalloc(cgc->buflen, GFP_DMA);
-		if (bounce_buffer == NULL) {
-			printk("SCSI DMA pool exhausted.");
-			err = -ENOMEM;
-			goto out_free;
-		}
-		memcpy(bounce_buffer, cgc->buffer, cgc->buflen);
-		cgc->buffer = bounce_buffer;
-	}
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
 		err = -ENODEV;
@@ -276,11 +262,15 @@ int sr_get_last_session(struct cdrom_device_info *cdi,
 	return 0;
 }
 
+/* primitive to determine whether we need to have GFP_DMA set based on
+ * the status of the unchecked_isa_dma flag in the host structure */
+#define SR_GFP_DMA(cd) (((cd)->device->host->unchecked_isa_dma) ? GFP_DMA : 0)
+
 int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
 {
 	Scsi_CD *cd = cdi->handle;
 	struct cdrom_generic_command cgc;
-	char buffer[32];
+	char *buffer = kmalloc(32, GFP_KERNEL | SR_GFP_DMA(cd));
 	int result;
 
 	memset(&cgc, 0, sizeof(struct cdrom_generic_command));
@@ -297,6 +287,7 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
 	memcpy(mcn->medium_catalog_number, buffer + 9, 13);
 	mcn->medium_catalog_number[13] = 0;
 
+	kfree(buffer);
 	return result;
 }
 
@@ -338,7 +329,7 @@ int sr_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, void *arg)
 	Scsi_CD *cd = cdi->handle;
 	struct cdrom_generic_command cgc;
 	int result;
-	unsigned char buffer[32];
+	unsigned char *buffer = kmalloc(32, GFP_KERNEL | SR_GFP_DMA(cd));
 
 	memset(&cgc, 0, sizeof(struct cdrom_generic_command));
 	cgc.timeout = IOCTL_TIMEOUT;
@@ -409,7 +400,7 @@ int sr_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, void *arg)
 	}
 
 	default:
-		return -EINVAL;
+		result = -EINVAL;
 	}
 
 #if 0
@@ -417,6 +408,7 @@ int sr_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, void *arg)
 		printk("DEBUG: sr_audio: result for ioctl %x: %x\n", cmd, result);
 #endif
 
+	kfree(buffer);
 	return result;
 }
 
@@ -528,7 +520,7 @@ int sr_is_xa(Scsi_CD *cd)
 	if (!xa_test)
 		return 0;
 
-	raw_sector = (unsigned char *) kmalloc(2048, GFP_DMA | GFP_KERNEL);
+	raw_sector = (unsigned char *) kmalloc(2048, GFP_KERNEL | SR_GFP_DMA(cd));
 	if (!raw_sector)
 		return -ENOMEM;
 	if (0 == sr_read_sector(cd, cd->ms_offset + 16,
-- 
cgit v1.2.3


From 267033f6bfed8ef87d0d3e71d5cf43c18c0b388d Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@raven.il.steeleye.com>
Date: Tue, 4 Feb 2003 22:33:31 -0600
Subject: move queue_depth check from scsi_prep_fn to scsi_request_fn

---
 drivers/scsi/scsi_lib.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7a005d8d6af5..fc2fd1a076e0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -814,8 +814,6 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		SRpnt = (Scsi_Request *) req->special;
 		
 		if (SRpnt->sr_magic == SCSI_REQ_MAGIC) {
-			if (SDpnt->device_busy >= SDpnt->queue_depth)
-				return BLKPREP_DEFER;
 			SCpnt = scsi_get_command(SRpnt->sr_device, GFP_ATOMIC);
 			if (!SCpnt)
 				return BLKPREP_DEFER;
@@ -827,8 +825,6 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		 * Now try and find a command block that we can use.
 		 */
 		if (!req->special) {
-			if (SDpnt->device_busy >= SDpnt->queue_depth)
-				return BLKPREP_DEFER;
 			SCpnt = scsi_get_command(SDpnt, GFP_ATOMIC);
 			if (unlikely(!SCpnt))
 				return BLKPREP_DEFER;
@@ -951,6 +947,9 @@ void scsi_request_fn(request_queue_t * q)
 		 */
 		req = elv_next_request(q);
 
+		if (SDpnt->device_busy >= SDpnt->queue_depth)
+			break;
+
 		if(SHpnt->host_busy == 0 && SHpnt->host_blocked) {
 			/* unblock after host_blocked iterates to zero */
 			if(--SHpnt->host_blocked == 0) {
-- 
cgit v1.2.3


From 9b300b142931e8addfcc63ddad4bfd16a227d722 Mon Sep 17 00:00:00 2001
From: Patrick Mansfield <patmans@us.ibm.com>
Date: Wed, 5 Feb 2003 08:02:48 -0600
Subject: [PATCH] add back single_lun support

On Wed, Feb 05, 2003 at 05:14:00PM -0600, James Bottomley wrote:

> I don't see device_active getting set anywhere.
>
> shouldn't we just dump device_active in favour of a non-zero check of
> device_busy (it's all done under the queue lock, anyway).
>
> James

OK - once more.

This patch against the current scsi-misc-2.5 adds back the check for the
single_lun case and removes the unused device_active field.

I compiled and booted with this applied but don't have any devices (i.e.
CD ROM changer) for testing.
---
 drivers/scsi/scsi.h     |  1 -
 drivers/scsi/scsi_lib.c | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
index cdf310ed8600..b5a77c9254a9 100644
--- a/drivers/scsi/scsi.h
+++ b/drivers/scsi/scsi.h
@@ -570,7 +570,6 @@ struct scsi_device {
 					   device is busy */
 	struct Scsi_Host *host;
 	request_queue_t *request_queue;
-        atomic_t                device_active; /* commands checked out for device */
 	volatile unsigned short device_busy;	/* commands actually active on low-level */
 	struct list_head free_cmnds;    /* list of available Scsi_Cmnd structs */
 	struct list_head busy_cmnds;    /* list of Scsi_Cmnd structs in use */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fc2fd1a076e0..a286e2248a50 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -787,6 +787,22 @@ out:
 	return ret;
 }
 
+/*
+ * The target associated with myself can only handle one active command at
+ * a time. Scan through all of the luns on the same target as myself,
+ * return 1 if any are active.
+ */
+static int check_all_luns(struct scsi_device *myself)
+{
+	struct scsi_device *sdev;
+
+	list_for_each_entry(sdev, &myself->same_target_siblings,
+			    same_target_siblings)
+		if (sdev->device_busy)
+			return 1;
+	return 0;
+}
+
 int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct Scsi_Device_Template *STpnt;
@@ -950,6 +966,9 @@ void scsi_request_fn(request_queue_t * q)
 		if (SDpnt->device_busy >= SDpnt->queue_depth)
 			break;
 
+		if (SDpnt->single_lun && check_all_luns(SDpnt))
+			break;
+
 		if(SHpnt->host_busy == 0 && SHpnt->host_blocked) {
 			/* unblock after host_blocked iterates to zero */
 			if(--SHpnt->host_blocked == 0) {
-- 
cgit v1.2.3


From 07285c80460e11c0f780246872d4c85b5d36fb5c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:18 -0800
Subject: [PATCH] BTTV build fix

Patch from Gerd Knorr <kraxel@bytesex.org>

bttv requires CONFIG_SOUND.
---
 drivers/media/video/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 5bd4f31d40db..b33d753c00f4 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -19,7 +19,7 @@ comment "Video Adapters"
 
 config VIDEO_BT848
 	tristate "BT848 Video For Linux"
-	depends on VIDEO_DEV && PCI && I2C_ALGOBIT
+	depends on VIDEO_DEV && PCI && I2C_ALGOBIT && SOUND
 	---help---
 	  Support for BT848 based frame grabber/overlay boards. This includes
 	  the Miro, Hauppauge and STB boards. Please read the material in
-- 
cgit v1.2.3


From c5070032bb8db845535ac8c85d45884138a08a4f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:24 -0800
Subject: [PATCH] reiserfs v3 readpages support

Patch from Chris Mason <mason@suse.com>

The patch below is against 2.5.59, various forms have been floating
around for a while, and Andrew recently included this fixed version in
2.5.55-mm.  The end result is faster reads and writes for reiserfs.

This adds reiserfs support for readpages, along with a support func in
fs/mpage.c to deal with the reiserfs_get_block call sending back up to
date buffers with packed tails copied into them.

Most of the changes are to reiserfs_writepage, which still had many
2.4isms in the way it started io, dealt with errors and handled the bh
state bits.  I've also added an optimization so it only starts
transactions when we need to copy a packed tail into the btree or fill a
hole, instead of any time reiserfs_writepage hits an unmapped buffer.
---
 fs/mpage.c          |  63 ++++++++++++-
 fs/reiserfs/inode.c | 251 ++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 238 insertions(+), 76 deletions(-)

diff --git a/fs/mpage.c b/fs/mpage.c
index a44993cd7927..846ca1aca1bc 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -116,6 +116,49 @@ mpage_alloc(struct block_device *bdev,
 	return bio;
 }
 
+/*
+ * support function for mpage_readpages.  The fs supplied get_block might
+ * return an up to date buffer.  This is used to map that buffer into
+ * the page, which allows readpage to avoid triggering a duplicate call
+ * to get_block.
+ *
+ * The idea is to avoid adding buffers to pages that don't already have
+ * them.  So when the buffer is up to date and the page size == block size,
+ * this marks the page up to date instead of adding new buffers.
+ */
+static void 
+map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) 
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head *page_bh, *head;
+	int block = 0;
+
+	if (!page_has_buffers(page)) {
+		/*
+		 * don't make any buffers if there is only one buffer on
+		 * the page and the page just needs to be set up to date
+		 */
+		if (inode->i_blkbits == PAGE_CACHE_SHIFT && 
+		    buffer_uptodate(bh)) {
+			SetPageUptodate(page);    
+			return;
+		}
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+	}
+	head = page_buffers(page);
+	page_bh = head;
+	do {
+		if (block == page_block) {
+			page_bh->b_state = bh->b_state;
+			page_bh->b_bdev = bh->b_bdev;
+			page_bh->b_blocknr = bh->b_blocknr;
+			break;
+		}
+		page_bh = page_bh->b_this_page;
+		block++;
+	} while (page_bh != head);
+}
+
 /**
  * mpage_readpages - populate an address space with some pages, and
  *                       start reads against them.
@@ -186,6 +229,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (inode->i_size + blocksize - 1) >> blkbits;
 
+	bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page;
 				page_block++, block_in_file++) {
 		bh.b_state = 0;
@@ -200,6 +244,17 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 				first_hole = page_block;
 			continue;
 		}
+
+		/* some filesystems will copy data into the page during
+		 * the get_block call, in which case we don't want to
+		 * read it again.  map_buffer_to_page copies the data
+		 * we just collected from get_block into the page's buffers
+		 * so readpage doesn't have to repeat the get_block call
+		 */
+		if (buffer_uptodate(&bh)) {
+			map_buffer_to_page(page, &bh, page_block);
+			goto confused;
+		}
 	
 		if (first_hole != blocks_per_page)
 			goto confused;		/* hole -> non-hole */
@@ -256,7 +311,10 @@ out:
 confused:
 	if (bio)
 		bio = mpage_bio_submit(READ, bio);
-	block_read_full_page(page, get_block);
+	if (!PageUptodate(page))
+	        block_read_full_page(page, get_block);
+	else
+		unlock_page(page);
 	goto out;
 }
 
@@ -344,6 +402,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	sector_t boundary_block = 0;
 	struct block_device *boundary_bdev = NULL;
 	int length;
+	struct buffer_head map_bh;
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -401,8 +460,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	BUG_ON(!PageUptodate(page));
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (inode->i_size - 1) >> blkbits;
+	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
-		struct buffer_head map_bh;
 
 		map_bh.b_state = 0;
 		if (get_block(inode, block_in_file, &map_bh, 1))
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 817c5c465d19..a9e5003b3589 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,6 +11,8 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
 
 /* args for the create parameter of reiserfs_get_block */
 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
@@ -262,7 +264,10 @@ research:
 	blocknr = get_block_num(ind_item, path.pos_in_item) ;
 	ret = 0 ;
 	if (blocknr) {
-		map_bh(bh_result, inode->i_sb, blocknr);
+	    map_bh(bh_result, inode->i_sb, blocknr);
+	    if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
+		set_buffer_boundary(bh_result);
+	    }
 	} else 
 	    // We do not return -ENOENT if there is a hole but page is uptodate, because it means
 	    // That there is some MMAPED data associated with it that is yet to  be written to disk.
@@ -286,7 +291,7 @@ research:
 	return -ENOENT;
     }
 
-    /* if we've got a direct item, and the buffer was uptodate,
+    /* if we've got a direct item, and the buffer or page was uptodate,
     ** we don't want to pull data off disk again.  skip to the
     ** end, where we map the buffer and return
     */
@@ -367,7 +372,9 @@ research:
 
 finished:
     pathrelse (&path);
-    /* I _really_ doubt that you want it.  Chris? */
+    /* this buffer has valid data, but isn't valid for io.  mapping it to
+     * block #0 tells the rest of reiserfs it just has a tail in it
+     */
     map_bh(bh_result, inode->i_sb, 0);
     set_buffer_uptodate (bh_result);
     return 0;
@@ -842,6 +849,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
     return retval;
 }
 
+static int
+reiserfs_readpages(struct file *file, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+    return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
+}
 
 //
 // BAD: new directories have stat data of new type and all other items
@@ -1809,13 +1822,19 @@ static int map_block_for_writepage(struct inode *inode,
     int use_get_block = 0 ;
     int bytes_copied = 0 ;
     int copy_size ;
+    int trans_running = 0;
+
+    /* catch places below that try to log something without starting a trans */
+    th.t_trans_id = 0;
+
+    if (!buffer_uptodate(bh_result)) {
+        buffer_error();
+	return -EIO;
+    }
 
     kmap(bh_result->b_page) ;
 start_over:
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, jbegin_count) ;
-    reiserfs_update_inode_transaction(inode) ;
-
     make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
 
 research:
@@ -1841,7 +1860,6 @@ research:
 	    goto out ;
 	}
 	set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
-        set_buffer_uptodate(bh_result);
     } else if (is_direct_le_ih(ih)) {
         char *p ; 
         p = page_address(bh_result->b_page) ;
@@ -1850,7 +1868,20 @@ research:
 
 	fs_gen = get_generation(inode->i_sb) ;
 	copy_item_head(&tmp_ih, ih) ;
+
+	if (!trans_running) {
+	    /* vs-3050 is gone, no need to drop the path */
+	    journal_begin(&th, inode->i_sb, jbegin_count) ;
+	    reiserfs_update_inode_transaction(inode) ;
+	    trans_running = 1;
+	    if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
+		reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
+		goto research;
+	    }
+	}
+
 	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+
 	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
 	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
 	    goto research;
@@ -1861,7 +1892,6 @@ research:
 	journal_mark_dirty(&th, inode->i_sb, bh) ;
 	bytes_copied += copy_size ;
 	set_block_dev_mapped(bh_result, 0, inode);
-        set_buffer_uptodate(bh_result);
 
 	/* are there still bytes left? */
         if (bytes_copied < bh_result->b_size && 
@@ -1878,7 +1908,10 @@ research:
     
 out:
     pathrelse(&path) ;
-    journal_end(&th, inode->i_sb, jbegin_count) ;
+    if (trans_running) {
+	journal_end(&th, inode->i_sb, jbegin_count) ;
+	trans_running = 0;
+    }
     reiserfs_write_unlock(inode->i_sb);
 
     /* this is where we fill in holes in the file. */
@@ -1894,49 +1927,77 @@ out:
 	}
     }
     kunmap(bh_result->b_page) ;
+
+    if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
+	/* we've copied data from the page into the direct item, so the
+	 * buffer in the page is now clean, mark it to reflect that.
+	 */
+        lock_buffer(bh_result);
+	clear_buffer_dirty(bh_result);
+	unlock_buffer(bh_result);
+    }
     return retval ;
 }
 
-/* helper func to get a buffer head ready for writepage to send to
-** ll_rw_block
-*/
-static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
-    struct buffer_head *bh ;
-    int i;
-    for(i = 0 ; i < nr ; i++) {
-        bh = bhp[i] ;
-	lock_buffer(bh) ;
-	mark_buffer_async_write(bh) ;
-	/* submit_bh doesn't care if the buffer is dirty, but nobody
-	** later on in the call chain will be cleaning it.  So, we
-	** clean the buffer here, it still gets written either way.
-	*/
-	clear_buffer_dirty(bh) ;
-	set_buffer_uptodate(bh) ;
-	submit_bh(WRITE, bh) ;
+/*
+ * does the right thing for deciding when to lock a buffer and
+ * mark it for io during a writepage.  make sure the buffer is
+ * dirty before sending it here though.
+ */
+static void lock_buffer_for_writepage(struct page *page, 
+                                      struct writeback_control *wbc, 
+			              struct buffer_head *bh)
+{
+    if (wbc->sync_mode != WB_SYNC_NONE) {
+	lock_buffer(bh);
+    } else {
+	if (test_set_buffer_locked(bh)) {
+	    __set_page_dirty_nobuffers(page);
+	    return;
+	}
+    }
+    if (test_clear_buffer_dirty(bh)) {
+	if (!buffer_uptodate(bh))
+	    buffer_error();
+	mark_buffer_async_write(bh);
+    } else {
+	unlock_buffer(bh);
     }
 }
 
+/* 
+ * mason@suse.com: updated in 2.5.54 to follow the same general io 
+ * start/recovery path as __block_write_full_page, along with special
+ * code to handle reiserfs tails.
+ */
 static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) {
     struct inode *inode = page->mapping->host ;
     unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
-    unsigned last_offset = PAGE_CACHE_SIZE;
     int error = 0;
     unsigned long block ;
-    unsigned cur_offset = 0 ;
-    struct buffer_head *head, *bh ;
+    struct buffer_head *head, *bh;
     int partial = 0 ;
-    struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
-    int nr = 0 ;
+    int nr = 0;
 
-    if (!page_has_buffers(page))
-        block_prepare_write(page, 0, 0, NULL) ;
+    /* The page dirty bit is cleared before writepage is called, which
+     * means we have to tell create_empty_buffers to make dirty buffers
+     * The page really should be up to date at this point, so tossing
+     * in the BH_Uptodate is just a sanity check.
+     */
+    if (!page_has_buffers(page)) {
+	if (!PageUptodate(page))
+	    buffer_error();
+	create_empty_buffers(page, inode->i_sb->s_blocksize, 
+	                    (1 << BH_Dirty) | (1 << BH_Uptodate));
+    }
+    head = page_buffers(page) ;
 
     /* last page in the file, zero out any contents past the
     ** last byte in the file
     */
     if (page->index >= end_index) {
 	char *kaddr;
+	unsigned last_offset;
 
         last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
 	/* no file contents in this page */
@@ -1949,66 +2010,107 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control
 	flush_dcache_page(page) ;
 	kunmap_atomic(kaddr, KM_USER0) ;
     }
-    head = page_buffers(page) ;
     bh = head ;
     block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
     do {
-	/* if this offset in the page is outside the file */
-	if (cur_offset >= last_offset) {
-	    if (!buffer_uptodate(bh))
-	        partial = 1 ;
-	} else {
-	    /* fast path, buffer mapped to an unformatted node */
+	get_bh(bh);
+	if (buffer_dirty(bh)) {
 	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
-		arr[nr++] = bh ;
+		/* buffer mapped to an unformatted node */
+		lock_buffer_for_writepage(page, wbc, bh);
 	    } else {
-		/* buffer not mapped yet, or points to a direct item.
-		** search and dirty or log
-		*/
+		/* not mapped yet, or it points to a direct item, search
+		 * the btree for the mapping info, and log any direct
+		 * items found
+		 */
 		if ((error = map_block_for_writepage(inode, bh, block))) {
 		    goto fail ;
 		}
-		/* map_block_for_writepage either found an unformatted node
-		** and mapped it for us, or it found a direct item
-		** and logged the changes.  
-		*/
-		if (buffer_mapped(bh) && bh->b_blocknr != 0) {
-		    arr[nr++] = bh ;
-		}
+		if (buffer_mapped(bh) && bh->b_blocknr != 0)  {
+		    lock_buffer_for_writepage(page, wbc, bh);
+		} 
 	    }
 	}
-        bh = bh->b_this_page ;
-	cur_offset += bh->b_size ;
-	block++ ;
+        bh = bh->b_this_page;
+	block++;
     } while(bh != head) ;
 
-    if (!partial)
-        SetPageUptodate(page) ;
     BUG_ON(PageWriteback(page));
     SetPageWriteback(page);
     unlock_page(page);
 
-    /* if this page only had a direct item, it is very possible for
-    ** nr == 0 without there being any kind of error.
-    */
-    if (nr) {
-        submit_bh_for_writepage(arr, nr) ;
-    } else {
-        end_page_writeback(page) ;
+    /*
+     * since any buffer might be the only dirty buffer on the page, 
+     * the first submit_bh can bring the page out of writeback.
+     * be careful with the buffers.
+     */
+    do {
+        struct buffer_head *next = bh->b_this_page;
+	if (buffer_async_write(bh)) {
+	    submit_bh(WRITE, bh);
+	    nr++;
+	}
+	put_bh(bh);
+	bh = next;
+    } while(bh != head);
+
+    error = 0;
+done:
+    if (nr == 0) {
+        /*
+         * if this page only had a direct item, it is very possible for
+         * no io to be required without there being an error.  Or, 
+	 * someone else could have locked them and sent them down the 
+	 * pipe without locking the page
+	 */
+	do {
+	    if (!buffer_uptodate(bh)) {
+	        partial = 1;
+		break;
+	    }
+	} while(bh != head);
+	if (!partial)
+	    SetPageUptodate(page);
+	end_page_writeback(page);
     }
-
-    return 0 ;
+    return error;
 
 fail:
-    if (nr) {
-        SetPageWriteback(page);
-        unlock_page(page);
-        submit_bh_for_writepage(arr, nr) ;
-    } else {
-        unlock_page(page) ;
-    }
-    ClearPageUptodate(page) ;
-    return error ;
+    /* catches various errors, we need to make sure any valid dirty blocks
+     * get to the media.  The page is currently locked and not marked for 
+     * writeback
+     */
+    ClearPageUptodate(page);
+    bh = head;
+    do {
+	get_bh(bh);
+	if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
+	    lock_buffer(bh);
+	    mark_buffer_async_write(bh);
+	} else {
+	    /*
+	     * clear any dirty bits that might have come from getting
+	     * attached to a dirty page
+	     */
+	     clear_buffer_dirty(bh);
+	}
+        bh = bh->b_this_page;
+    } while(bh != head);
+    SetPageError(page);
+    BUG_ON(PageWriteback(page));
+    SetPageWriteback(page);
+    unlock_page(page);
+    do {
+        struct buffer_head *next = bh->b_this_page;
+	if (buffer_async_write(bh)) {
+	    clear_buffer_dirty(bh);
+	    submit_bh(WRITE, bh);
+	    nr++;
+	}
+	put_bh(bh);
+	bh = next;
+    } while(bh != head);
+    goto done;
 }
 
 
@@ -2115,6 +2217,7 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
 struct address_space_operations reiserfs_address_space_operations = {
     .writepage = reiserfs_writepage,
     .readpage = reiserfs_readpage, 
+    .readpages = reiserfs_readpages, 
     .releasepage = reiserfs_releasepage,
     .sync_page = block_sync_page,
     .prepare_write = reiserfs_prepare_write,
-- 
cgit v1.2.3


From 00c8e791cba1bb88db8a8fd73106c28fdbab5716 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:31 -0800
Subject: [PATCH] self-unplugging request queues

The patch teaches a queue to unplug itself:

a) if is has four requests OR
b) if it has had plugged requests for 3 milliseconds.

These numbers may need to be tuned, although doing so doesn't seem to
make much difference.  10 msecs works OK, so HZ=100 machines will be
fine.

Instrumentation shows that about 5-10% of requests were started due to
the three millisecond timeout (during a kernel compile).  That's
somewhat significant.  It means that the kernel is leaving stuff in the
queue, plugged, for too long.  This testing was with a uniprocessor
preemptible kernel, which is particularly vulnerable to unplug latency
(submit some IO, get preempted before the unplug).

This patch permits the removal of a lot of rather lame unplugging in
page reclaim and in the writeback code, which kicks the queues
(globally!) every four megabytes to get writeback underway.

This patch doesn't use blk_run_queues().  It is able to kick just the
particular queue.

The patch is not expected to make much difference really, except for
AIO.  AIO needs a blk_run_queues() in its io_submit() call.  For each
request.  This means that AIO has to disable plugging altogether,
unless something like this patch does it for it.  It means that AIO
will unplug *all* queues in the machine for every io_submit().  Even
against a socket!

This patch was tested by disabling blk_run_queues() completely.  The
system ran OK.

The 3 milliseconds may be too long.  It's OK for the heavy writeback
code, but AIO may want less.  Or maybe AIO really wants zero (ie:
disable plugging).  If that is so, we need new code paths by which AIO
can communicate the "immediate unplug" information - a global unplug is
not good.


To minimise unplug latency due to user CPU load, this patch gives keventd
`nice -10'.  This is of course completely arbitrary.  Really, I think keventd
should be SCHED_RR/MAX_RT_PRIO-1, as it has been in -aa kernels for ages.
---
 drivers/block/ll_rw_blk.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h    | 10 ++++++++++
 kernel/workqueue.c        |  1 +
 3 files changed, 51 insertions(+)

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index f8a4e7a81f4b..e13d0bbca144 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -27,6 +27,8 @@
 #include <linux/completion.h>
 #include <linux/slab.h>
 
+static void blk_unplug_work(void *data);
+
 /*
  * For the allocated request tables
  */
@@ -237,6 +239,14 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	blk_queue_hardsect_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 
+	q->unplug_thresh = 4;		/* hmm */
+	q->unplug_delay = (3 * HZ) / 1000;	/* 3 milliseconds */
+	if (q->unplug_delay == 0)
+		q->unplug_delay = 1;
+
+	init_timer(&q->unplug_timer);
+	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
@@ -960,6 +970,7 @@ void blk_plug_device(request_queue_t *q)
 	if (!blk_queue_plugged(q)) {
 		spin_lock(&blk_plug_lock);
 		list_add_tail(&q->plug_list, &blk_plug_list);
+		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 		spin_unlock(&blk_plug_lock);
 	}
 }
@@ -974,6 +985,7 @@ int blk_remove_plug(request_queue_t *q)
 	if (blk_queue_plugged(q)) {
 		spin_lock(&blk_plug_lock);
 		list_del_init(&q->plug_list);
+		del_timer(&q->unplug_timer);
 		spin_unlock(&blk_plug_lock);
 		return 1;
 	}
@@ -992,6 +1004,8 @@ static inline void __generic_unplug_device(request_queue_t *q)
 	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
 		return;
 
+	del_timer(&q->unplug_timer);
+
 	/*
 	 * was plugged, fire request_fn if queue has stuff to do
 	 */
@@ -1020,6 +1034,18 @@ void generic_unplug_device(void *data)
 	spin_unlock_irq(q->queue_lock);
 }
 
+static void blk_unplug_work(void *data)
+{
+	generic_unplug_device(data);
+}
+
+static void blk_unplug_timeout(unsigned long data)
+{
+	request_queue_t *q = (request_queue_t *)data;
+
+	schedule_work(&q->unplug_work);
+}
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &request_queue_t in question
@@ -1164,6 +1190,9 @@ void blk_cleanup_queue(request_queue_t * q)
 	count -= __blk_cleanup_queue(&q->rq[READ]);
 	count -= __blk_cleanup_queue(&q->rq[WRITE]);
 
+	del_timer_sync(&q->unplug_timer);
+	flush_scheduled_work();
+
 	if (count)
 		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
 
@@ -1269,6 +1298,9 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
 	blk_queue_make_request(q, __make_request);
 	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 
+	q->unplug_timer.function = blk_unplug_timeout;
+	q->unplug_timer.data = (unsigned long)q;
+
 	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
 	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
 
@@ -1811,7 +1843,15 @@ get_rq:
 out:
 	if (freereq)
 		__blk_put_request(q, freereq);
+
+	if (blk_queue_plugged(q)) {
+		int nr_queued = (queue_nr_requests - q->rq[0].count) +
+				(queue_nr_requests - q->rq[1].count);
+		if (nr_queued == q->unplug_thresh)
+			__generic_unplug_device(q);
+	}
 	spin_unlock_irq(q->queue_lock);
+
 	return 0;
 
 end_io:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c599ea36233b..82766b7e60b0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -4,6 +4,8 @@
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
@@ -188,6 +190,14 @@ struct request_queue
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 
+	/*
+	 * Auto-unplugging state
+	 */
+	struct timer_list	unplug_timer;
+	int			unplug_thresh;	/* After this many requests */
+	unsigned long		unplug_delay;	/* After this many jiffies */
+	struct work_struct	unplug_work;
+
 	struct backing_dev_info	backing_dev_info;
 
 	/*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 156583c7dbf7..fb10d360c436 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -177,6 +177,7 @@ static int worker_thread(void *__startup)
 	current->flags |= PF_IOTHREAD;
 	cwq->thread = current;
 
+	set_user_nice(current, -10);
 	set_cpus_allowed(current, 1UL << cpu);
 
 	spin_lock_irq(&current->sig->siglock);
-- 
cgit v1.2.3


From 418f398e98f5b86bd2e2febd0b04c4b41dfd46c1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:38 -0800
Subject: [PATCH] Remove most of the blk_run_queues() calls

We don't need these with self-unplugging queues.

The patch also contains a couple of microopts suggested by Andrea: we
don't need to run sync_page() if the page just came unlocked.
---
 fs/buffer.c         |  7 +++----
 fs/fs-writeback.c   |  1 -
 fs/mpage.c          |  2 --
 mm/filemap.c        | 10 ++++++----
 mm/page-writeback.c |  2 --
 mm/readahead.c      |  4 +---
 mm/vmscan.c         |  1 -
 7 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 3fc9e47c5a0a..bf6ae714c730 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -127,9 +127,10 @@ void __wait_on_buffer(struct buffer_head * bh)
 	get_bh(bh);
 	do {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-		blk_run_queues();
-		if (buffer_locked(bh))
+		if (buffer_locked(bh)) {
+			blk_run_queues();
 			io_schedule();
+		}
 	} while (buffer_locked(bh));
 	put_bh(bh);
 	finish_wait(wqh, &wait);
@@ -959,8 +960,6 @@ no_grow:
 	 * the reserve list is empty, we're sure there are 
 	 * async buffer heads in use.
 	 */
-	blk_run_queues();
-
 	free_more_memory();
 	goto try_again;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d3db0faa9abe..1814f7a9b5ce 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -334,7 +334,6 @@ writeback_inodes(struct writeback_control *wbc)
 	}
 	spin_unlock(&sb_lock);
 	spin_unlock(&inode_lock);
-	blk_run_queues();
 }
 
 /*
diff --git a/fs/mpage.c b/fs/mpage.c
index 846ca1aca1bc..3460144c1894 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -618,7 +618,6 @@ mpage_writepages(struct address_space *mapping,
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		blk_run_queues();
 		wbc->encountered_congestion = 1;
 		return 0;
 	}
@@ -673,7 +672,6 @@ mpage_writepages(struct address_space *mapping,
 			if (ret || (--(wbc->nr_to_write) <= 0))
 				done = 1;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
-				blk_run_queues();
 				wbc->encountered_congestion = 1;
 				done = 1;
 			}
diff --git a/mm/filemap.c b/mm/filemap.c
index 185abe3c596c..a9659f20f74a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -259,9 +259,10 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 
 	do {
 		prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
-		sync_page(page);
-		if (test_bit(bit_nr, &page->flags))
+		if (test_bit(bit_nr, &page->flags)) {
+			sync_page(page);
 			io_schedule();
+		}
 	} while (test_bit(bit_nr, &page->flags));
 	finish_wait(waitqueue, &wait);
 }
@@ -326,9 +327,10 @@ void __lock_page(struct page *page)
 
 	while (TestSetPageLocked(page)) {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-		sync_page(page);
-		if (PageLocked(page))
+		if (PageLocked(page)) {
+			sync_page(page);
 			io_schedule();
+		}
 	}
 	finish_wait(wqh, &wait);
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0710834252dc..1ca41b80a581 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -237,7 +237,6 @@ static void background_writeout(unsigned long _min_pages)
 				break;
 		}
 	}
-	blk_run_queues();
 }
 
 /*
@@ -308,7 +307,6 @@ static void wb_kupdate(unsigned long arg)
 		}
 		nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 	}
-	blk_run_queues();
 	if (time_before(next_jif, jiffies + HZ))
 		next_jif = jiffies + HZ;
 	mod_timer(&wb_timer, next_jif);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77bd1ff6c630..5e20db90cab9 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -236,10 +236,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	 * uptodate then the caller will launch readpage again, and
 	 * will then handle the error.
 	 */
-	if (ret) {
+	if (ret)
 		read_pages(mapping, filp, &page_pool, ret);
-		blk_run_queues();
-	}
 	BUG_ON(!list_empty(&page_pool));
 out:
 	return ret;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc00047b27c..4b40c5483268 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -957,7 +957,6 @@ int kswapd(void *p)
 		finish_wait(&pgdat->kswapd_wait, &wait);
 		get_page_state(&ps);
 		balance_pgdat(pgdat, 0, &ps);
-		blk_run_queues();
 	}
 }
 
-- 
cgit v1.2.3


From 7260b084c870dd02fa9813ac83fa0cc5a15f0d18 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:44 -0800
Subject: [PATCH] Updated Documentation/kernel-parameters.txt

Patch from Petr Baudis <pasky@ucw.cz>

this patch (against 2.5.59) updates Documentation/kernel-parameters.txt to
the (more-or-less; I certainly missed some parameters) current state of
kernel.  Note also that I will probably send up another update after few
further kernel releases..
---
 Documentation/kernel-parameters.txt | 57 +++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index aa52836233f2..bd3af2be8cbe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1,4 +1,4 @@
-November 2002             Kernel Parameters                     v2.5.49
+February 2003             Kernel Parameters                     v2.5.59
                           ~~~~~~~~~~~~~~~~~
 
 The following is a consolidated list of the kernel parameters as implemented
@@ -60,6 +60,7 @@ restrictions referred to are that the relevant option is valid if:
 	V4L	Video For Linux support is enabled.
 	VGA	The VGA console has been enabled.
 	VT	Virtual terminal support is enabled.
+	WDT	Watchdog support is enabled.
 	XT	IBM PC/XT MFM hard disk support is enabled.
 
 In addition, the following text indicates that the option:
@@ -98,6 +99,9 @@ running once the system is up.
 	advansys=	[HW,SCSI]
 			See header of drivers/scsi/advansys.c.
 
+	advwdt=		[HW,WDT] Advantech WDT
+			Format: <iostart>,<iostop>
+
 	aedsp16=	[HW,OSS] Audio Excel DSP 16
 			Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
 			See also header of sound/oss/aedsp16.c.
@@ -111,6 +115,9 @@ running once the system is up.
 	aic7xxx=	[HW,SCSI]
 			See Documentation/scsi/aic7xxx.txt.
 
+	aic79xx=	[HW,SCSI]
+			See Documentation/scsi/aic79xx.txt.
+
 	allowdma0	[ISAPNP]
 
 	AM53C974=	[HW,SCSI]
@@ -230,20 +237,12 @@ running once the system is up.
 
 	cs89x0_media=	[HW,NET]
 			Format: { rj45 | aui | bnc }
-
-	ctc=		[HW,NET]
-			See drivers/s390/net/ctcmain.c, comment before function
-			ctc_setup().
  
 	cyclades=	[HW,SERIAL] Cyclades multi-serial port adapter.
  
 	dasd=		[HW,NET]    
 			See header of drivers/s390/block/dasd_devmap.c.
 
-	dasd_discipline=
-			[HW,NET]
-			See header of drivers/s390/block/dasd.c.
-
 	db9=		[HW,JOY]
 	db9_2=
 	db9_3=
@@ -254,9 +253,6 @@ running once the system is up.
 			Format: <area>[,<node>]
 			See also Documentation/networking/decnet.txt.
 
-	decr_overclock= [PPC]
-	decr_overclock_proc0=
-
 	devfs=		[DEVFS]
 			See Documentation/filesystems/devfs/boot-options.
  
@@ -305,6 +301,9 @@ running once the system is up.
 			This option is obsoleted by the "netdev=" option, which
 			has equivalent usage. See its documentation for details.
 
+	eurwdt=		[HW,WDT] Eurotech CPU-1220/1410 onboard watchdog.
+			Format: <io>[,<irq>]
+
 	fd_mcs=		[HW,SCSI]
 			See header of drivers/scsi/fd_mcs.c.
 
@@ -350,7 +349,7 @@ running once the system is up.
 	hisax=		[HW,ISDN]
 			See Documentation/isdn/README.HiSax.
 
-	hugepages=	[HW,IA-32] Maximal number of HugeTLB pages
+	hugepages=	[HW,IA-32,IA-64] Maximal number of HugeTLB pages.
 
 	i8042_direct	[HW] Non-translated mode
 	i8042_dumbkbd
@@ -394,6 +393,10 @@ running once the system is up.
 
 	inttest=	[IA64]
 
+	io7=		[HW] IO7 for Marvel based alpha systems
+			See comment before marvel_specify_io7 in
+			arch/alpha/kernel/core_marvel.c.
+
 	ip=		[IP_PNP]
 			See Documentation/nfsroot.txt.
 
@@ -495,6 +498,7 @@ running once the system is up.
  
 	mdacon=		[MDA]
 			Format: <first>,<last>
+			Specifies range of consoles to be captured by the MDA.
  
 	mem=exactmap	[KNL,BOOT,IA-32] Enable setting of an exact
 			E820 memory map, as specified by the user.
@@ -576,6 +580,8 @@ running once the system is up.
  
 	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
 
+	noexec		[IA-64]
+
 	nofxsr		[BUGS=IA-32]
 
 	nohighio	[BUGS=IA-32] Disable highmem block I/O.
@@ -599,7 +605,9 @@ running once the system is up.
 
 	noresume	[SWSUSP] Disables resume and restore original swap space.
  
-	no-scroll	[VGA]
+	no-scroll	[VGA] Disables scrollback.
+			This is required for the Braillex ib80-piezo Braille
+			reader made by F.H. Papenmeier (Germany).
 
 	nosbagart	[IA-64]
 
@@ -809,6 +817,9 @@ running once the system is up.
 			See a comment before function sbpcd_setup() in
 			drivers/cdrom/sbpcd.c.
 
+	sc1200wdt=	[HW,WDT] SC1200 WDT (watchdog) driver
+			Format: <io>[,<timeout>[,<isapnp>]]
+
 	scsi_debug_*=	[SCSI]
 			See drivers/scsi/scsi_debug.c.
 
@@ -997,9 +1008,6 @@ running once the system is up.
 	spia_pedr=
 	spia_peddr=
 
-	spread_lpevents=
-			[PPC]
-
 	sscape=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<mpu_io>,<mpu_irq>
  
@@ -1009,6 +1017,19 @@ running once the system is up.
 	st0x=		[HW,SCSI]
 			See header of drivers/scsi/seagate.c.
 
+	sti=		[HW]
+			Format: <num>
+			Set the STI (builtin display/keyboard on the HP-PARISC
+			machines) console (graphic card) which should be used
+			as the initial boot-console.
+			See also comment in drivers/video/console/sticore.c.
+
+	sti_font=	[HW]
+			See comment in drivers/video/console/sticore.c.
+
+	stifb=		[HW]
+			Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+
 	stram_swap=	[HW,M68k]
 
 	swiotlb=	[IA-64] Number of I/O TLB slabs
@@ -1079,7 +1100,7 @@ running once the system is up.
 	wd7000=		[HW,SCSI]
 			See header of drivers/scsi/wd7000.c.
 
-	wdt=		[HW] Watchdog
+	wdt=		[WDT] Watchdog
 			See Documentation/watchdog.txt.
 
 	xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
-- 
cgit v1.2.3


From b573296a46a508552d0909e7b2183c631f43fb51 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:51 -0800
Subject: [PATCH] JBD Documentation

Patch from Roger Gammans <roger@computer-surgery.co.uk>

Adds lots of API documentation to the JBD layer.
---
 Documentation/DocBook/journal-api.tmpl |  39 ++++--
 fs/jbd/journal.c                       | 215 +++++++++++++++++++++++----------
 fs/jbd/recovery.c                      |  29 +++--
 fs/jbd/transaction.c                   | 183 +++++++++++++++++++---------
 include/linux/jbd.h                    | 183 ++++++++++++++++++++--------
 5 files changed, 458 insertions(+), 191 deletions(-)

diff --git a/Documentation/DocBook/journal-api.tmpl b/Documentation/DocBook/journal-api.tmpl
index e57c8593eabd..ece95d2eb843 100644
--- a/Documentation/DocBook/journal-api.tmpl
+++ b/Documentation/DocBook/journal-api.tmpl
@@ -141,17 +141,14 @@ you are have done so you need to call journal_dirty_{meta,}data().
 Or if you've asked for access to a buffer you now know is now longer 
 required to be pushed back on the device you can call journal_forget()
 in much the same way as you might have used bforget() in the past.
-
 </para>
 
-
-
 <para>
 A journal_flush() may be called at any time to commit and checkpoint
 all your transactions.
 </para>
-<para>
 
+<para>
 Then at umount time , in your put_super() (2.4) or write_super() (2.5)
 you can then call journal_destroy() to clean up your in-core journal object.
 </para>
@@ -168,8 +165,8 @@ on another journal. Since transactions can't be nested/batched
 across differing journals, and another filesystem other than
 yours (say ext3) may be modified in a later syscall.
 </para>
-<para>
 
+<para>
 The second case to bear in mind is that journal_start() can 
 block if there isn't enough space in the journal for your transaction 
 (based on the passed nblocks param) - when it blocks it merely(!) needs to
@@ -180,10 +177,14 @@ were semaphores and include them in your semaphore ordering rules to prevent
 deadlocks. Note that journal_extend() has similar blocking behaviour to
 journal_start() so you can deadlock here just as easily as on journal_start().
 </para>
-<para>
 
-Try to reserve the right number of blocks the first time. ;-).
+<para>
+Try to reserve the right number of blocks the first time. ;-). This will
+be the maximum number of blocks you are going to touch in this transaction.
+I advise having a look at at least ext3_jbd.h to see the basis on which 
+ext3 uses to make these decisions.
 </para>
+
 <para>
 Another wriggle to watch out for is your on-disk block allocation strategy.
 why? Because, if you undo a delete, you need to ensure you haven't reused any
@@ -211,6 +212,30 @@ The opportunities for abuse and DOS attacks with this should be obvious,
 if you allow unprivileged userspace to trigger codepaths containing these
 calls.
 </para>
+
+<para>
+A new feature of jbd since 2.5.25 is commit callbacks with the new
+journal_callback_set() function you can now ask the journalling layer
+to call you back when the transaction is finally commited to disk, so that
+you can do some of your own management. The key to this is the journal_callback
+struct, this maintains the internal callback information but you can
+extend it like this:-
+</para>
+<programlisting>
+	struct  myfs_callback_s {
+		//Data structure element required by jbd..
+		struct journal_callback for_jbd;
+		// Stuff for myfs allocated together.
+		myfs_inode*    i_commited;
+	
+	}
+</programlisting>
+
+<para>
+this would be useful if you needed to know when data was commited to a 
+particular inode.
+</para>
+
 </sect1>
 
 <sect1>
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index d2f5935ef972..3117885c3f6e 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -732,14 +732,21 @@ fail:
  * need to set up all of the mapping information to tell the journaling
  * system where the journal blocks are.
  *
- * journal_init_dev creates a journal which maps a fixed contiguous
- * range of blocks on an arbitrary block device.
- *
- * journal_init_inode creates a journal which maps an on-disk inode as
- * the journal.  The inode must exist already, must support bmap() and
- * must have all data blocks preallocated.
  */
 
+/**
+ *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  @bdev: Block device on which to create the journal
+ *  @fs_dev: Device which hold journalled filesystem for this journal.
+ *  @start: Block nr Start of journal.
+ *  @len:  Lenght of the journal in blocks.
+ *  @blocksize: blocksize of journalling device
+ *  @returns: a newly created journal_t *
+ *  
+ *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  range of blocks on an arbitrary block device.
+ * 
+ */
 journal_t * journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
 			int start, int len, int blocksize)
@@ -763,7 +770,15 @@ journal_t * journal_init_dev(struct block_device *bdev,
 
 	return journal;
 }
-
+ 
+/** 
+ *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  @inode: An inode to create the journal in
+ *  
+ * journal_init_inode creates a journal which maps an on-disk inode as
+ * the journal.  The inode must exist already, must support bmap() and
+ * must have all data blocks preallocated.
+ */
 journal_t * journal_init_inode (struct inode *inode)
 {
 	struct buffer_head *bh;
@@ -852,12 +867,15 @@ static int journal_reset (journal_t *journal)
 	return 0;
 }
 
-/*
+/** 
+ * int journal_create() - Initialise the new journal file
+ * @journal: Journal to create. This structure must have been initialised
+ * 
  * Given a journal_t structure which tells us which disk blocks we can
  * use, create a new journal superblock and initialise all of the
- * journal fields from scratch.  */
-
-int journal_create (journal_t *journal)
+ * journal fields from scratch.  
+ **/
+int journal_create(journal_t *journal)
 {
 	unsigned long blocknr;
 	struct buffer_head *bh;
@@ -920,11 +938,14 @@ int journal_create (journal_t *journal)
 	return journal_reset(journal);
 }
 
-/*
+/** 
+ * void journal_update_superblock() - Update journal sb on disk.
+ * @journal: The journal to update.
+ * @wait: Set to '0' if you don't want to wait for IO completion.
+ *
  * Update a journal's dynamic superblock fields and write it to disk,
  * optionally waiting for the IO to complete.
-*/
-
+ */
 void journal_update_superblock(journal_t *journal, int wait)
 {
 	journal_superblock_t *sb = journal->j_superblock;
@@ -1040,12 +1061,14 @@ static int load_superblock(journal_t *journal)
 }
 
 
-/*
+/**
+ * int journal_load() - Read journal from disk.
+ * @journal: Journal to act on.
+ * 
  * Given a journal_t structure which tells us which disk blocks contain
  * a journal, read the journal from disk to initialise the in-memory
  * structures.
  */
-
 int journal_load(journal_t *journal)
 {
 	int err;
@@ -1090,11 +1113,13 @@ recovery_error:
 	return -EIO;
 }
 
-/*
+/**
+ * void journal_destroy() - Release a journal_t structure.
+ * @journal: Journal to act on.
+* 
  * Release a journal_t structure once it is no longer in use by the
  * journaled object.
  */
-
 void journal_destroy (journal_t *journal)
 {
 	/* Wait for the commit thread to wake up and die. */
@@ -1131,8 +1156,12 @@ void journal_destroy (journal_t *journal)
 }
 
 
-/* Published API: Check whether the journal uses all of a given set of
- * features.  Return true (non-zero) if it does. */
+/**
+ *int journal_check_used_features () - Check if features specified are used.
+ * 
+ * Check whether the journal uses all of a given set of
+ * features.  Return true (non-zero) if it does. 
+ **/
 
 int journal_check_used_features (journal_t *journal, unsigned long compat,
 				 unsigned long ro, unsigned long incompat)
@@ -1154,7 +1183,10 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
 	return 0;
 }
 
-/* Published API: Check whether the journaling code supports the use of
+/**
+ * int journal_check_available_features() - Check feature set in journalling layer
+ * 
+ * Check whether the journaling code supports the use of
  * all of a given set of features on this journal.  Return true
  * (non-zero) if it can. */
 
@@ -1183,8 +1215,13 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
 	return 0;
 }
 
-/* Published API: Mark a given journal feature as present on the
- * superblock.  Returns true if the requested features could be set. */
+/**
+ * int journal_set_features () - Mark a given journal feature in the superblock
+ *
+ * Mark a given journal feature as present on the
+ * superblock.  Returns true if the requested features could be set. 
+ *
+ */
 
 int journal_set_features (journal_t *journal, unsigned long compat,
 			  unsigned long ro, unsigned long incompat)
@@ -1210,12 +1247,12 @@ int journal_set_features (journal_t *journal, unsigned long compat,
 }
 
 
-/*
- * Published API:
+/**
+ * int journal_update_format () - Update on-disk journal structure.
+ *
  * Given an initialised but unloaded journal struct, poke about in the
  * on-disk structure to update it to the most recent supported version.
  */
-
 int journal_update_format (journal_t *journal)
 {
 	journal_superblock_t *sb;
@@ -1265,7 +1302,10 @@ static int journal_convert_superblock_v1(journal_t *journal,
 }
 
 
-/*
+/**
+ * int journal_flush () - Flush journal
+ * @journal: Journal to act on.
+ * 
  * Flush all data for a given journal to disk and empty the journal.
  * Filesystems can use this when remounting readonly to ensure that
  * recovery does not need to happen on remount.
@@ -1319,12 +1359,16 @@ int journal_flush (journal_t *journal)
 	return err;
 }
 
-/*
+/**
+ * int journal_wipe() - Wipe journal contents
+ * @journal: Journal to act on.
+ * @write: flag (see below)
+ * 
  * Wipe out all of the contents of a journal, safely.  This will produce
  * a warning if the journal contains any valid recovery information.
  * Must be called between journal_init_*() and journal_load().
  *
- * If (write) is non-zero, then we wipe out the journal on disk; otherwise
+ * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
  * we merely suppress recovery.
  */
 
@@ -1373,43 +1417,11 @@ const char * journal_dev_name(journal_t *journal)
 }
 
 /*
- * journal_abort: perform a complete, immediate shutdown of the ENTIRE
- * journal (not of a single transaction).  This operation cannot be
- * undone without closing and reopening the journal.
- *
- * The journal_abort function is intended to support higher level error
- * recovery mechanisms such as the ext2/ext3 remount-readonly error
- * mode.
- *
- * Journal abort has very specific semantics.  Any existing dirty,
- * unjournaled buffers in the main filesystem will still be written to
- * disk by bdflush, but the journaling mechanism will be suspended
- * immediately and no further transaction commits will be honoured.
- *
- * Any dirty, journaled buffers will be written back to disk without
- * hitting the journal.  Atomicity cannot be guaranteed on an aborted
- * filesystem, but we _do_ attempt to leave as much data as possible
- * behind for fsck to use for cleanup.
- *
- * Any attempt to get a new transaction handle on a journal which is in
- * ABORT state will just result in an -EROFS error return.  A
- * journal_stop on an existing handle will return -EIO if we have
- * entered abort state during the update.
+ * Journal abort has very specific semantics, which we describe
+ * for journal abort. 
  *
- * Recursive transactions are not disturbed by journal abort until the
- * final journal_stop, which will receive the -EIO error.
- *
- * Finally, the journal_abort call allows the caller to supply an errno
- * which will be recored (if possible) in the journal superblock.  This
- * allows a client to record failure conditions in the middle of a
- * transaction without having to complete the transaction to record the
- * failure to disk.  ext3_error, for example, now uses this
- * functionality.
- *
- * Errors which originate from within the journaling layer will NOT
- * supply an errno; a null errno implies that absolutely no further
- * writes are done to the journal (unless there are any already in
- * progress).
+ * Two internal function, which provide abort to te jbd layer
+ * itself are here.
  */
 
 /* Quick version for internal journal use (doesn't lock the journal).
@@ -1447,7 +1459,52 @@ void __journal_abort_soft (journal_t *journal, int errno)
 		journal_update_superblock(journal, 1);
 }
 
-/* Full version for external use */
+/**
+ * void journal_abort () - Shutdown the journal immediately.
+ * @journal: the journal to shutdown.
+ * @errno:   an error number to record in the journal indicating
+ *           the reason for the shutdown.
+ *
+ * Perform a complete, immediate shutdown of the ENTIRE
+ * journal (not of a single transaction).  This operation cannot be
+ * undone without closing and reopening the journal.
+ *           
+ * The journal_abort function is intended to support higher level error
+ * recovery mechanisms such as the ext2/ext3 remount-readonly error
+ * mode.
+ *
+ * Journal abort has very specific semantics.  Any existing dirty,
+ * unjournaled buffers in the main filesystem will still be written to
+ * disk by bdflush, but the journaling mechanism will be suspended
+ * immediately and no further transaction commits will be honoured.
+ *
+ * Any dirty, journaled buffers will be written back to disk without
+ * hitting the journal.  Atomicity cannot be guaranteed on an aborted
+ * filesystem, but we _do_ attempt to leave as much data as possible
+ * behind for fsck to use for cleanup.
+ *
+ * Any attempt to get a new transaction handle on a journal which is in
+ * ABORT state will just result in an -EROFS error return.  A
+ * journal_stop on an existing handle will return -EIO if we have
+ * entered abort state during the update.
+ *
+ * Recursive transactions are not disturbed by journal abort until the
+ * final journal_stop, which will receive the -EIO error.
+ *
+ * Finally, the journal_abort call allows the caller to supply an errno
+ * which will be recorded (if possible) in the journal superblock.  This
+ * allows a client to record failure conditions in the middle of a
+ * transaction without having to complete the transaction to record the
+ * failure to disk.  ext3_error, for example, now uses this
+ * functionality.
+ *
+ * Errors which originate from within the journaling layer will NOT
+ * supply an errno; a null errno implies that absolutely no further
+ * writes are done to the journal (unless there are any already in
+ * progress).
+ * 
+ */
+
 void journal_abort (journal_t *journal, int errno)
 {
 	lock_journal(journal);
@@ -1455,6 +1512,17 @@ void journal_abort (journal_t *journal, int errno)
 	unlock_journal(journal);
 }
 
+/** 
+ * int journal_errno () - returns the journal's error state.
+ * @journal: journal to examine.
+ *
+ * This is the errno numbet set with journal_abort(), the last
+ * time the journal was mounted - if the journal was stopped
+ * without calling abort this will be 0.
+ *
+ * If the journal has been aborted on this mount time -EROFS will
+ * be returned.
+ */
 int journal_errno (journal_t *journal)
 {
 	int err;
@@ -1468,6 +1536,14 @@ int journal_errno (journal_t *journal)
 	return err;
 }
 
+
+
+/** 
+ * int journal_clear_err () - clears the journal's error state
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
 int journal_clear_err (journal_t *journal)
 {
 	int err = 0;
@@ -1481,6 +1557,13 @@ int journal_clear_err (journal_t *journal)
 	return err;
 }
 
+
+/** 
+ * void journal_ack_err() - Ack journal err.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
 void journal_ack_err (journal_t *journal)
 {
 	lock_journal(journal);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index e6a96d3c30ce..f82d7f3cc507 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -206,20 +206,22 @@ do {									\
 		var -= ((journal)->j_last - (journal)->j_first);	\
 } while (0)
 
-/*
- * journal_recover
- *
+/**
+ * int journal_recover(journal_t *journal) - recovers a on-disk journal
+ * @journal: the journal to recover
+ * 
  * The primary function for recovering the log contents when mounting a
  * journaled device.  
- * 
+ */
+int journal_recover(journal_t *journal)
+{
+/*
  * Recovery is done in three passes.  In the first pass, we look for the
  * end of the log.  In the second, we assemble the list of revoke
  * blocks.  In the third and final pass, we replay any un-revoked blocks
  * in the log.  
  */
 
-int journal_recover(journal_t *journal)
-{
 	int			err;
 	journal_superblock_t *	sb;
 
@@ -263,20 +265,23 @@ int journal_recover(journal_t *journal)
 	return err;
 }
 
-/*
- * journal_skip_recovery
- *
+/**
+ * int journal_skip_recovery() - Start journal and wipe exiting records 
+ * @journal: journal to startup
+ * 
  * Locate any valid recovery information from the journal and set up the
  * journal structures in memory to ignore it (presumably because the
  * caller has evidence that it is out of date).  
- *
+ * This function does'nt appear to be exorted..
+ */
+int journal_skip_recovery(journal_t *journal)
+{
+/*
  * We perform one pass over the journal to allow us to tell the user how
  * much recovery information is being erased, and to let us initialise
  * the journal transaction sequence numbers to the next unused ID. 
  */
 
-int journal_skip_recovery(journal_t *journal)
-{
 	int			err;
 	journal_superblock_t *	sb;
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 597562cf47fe..14ca5228e9d6 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -222,19 +222,20 @@ static handle_t *new_handle(int nblocks)
 	return handle;
 }
 
-/*
- * Obtain a new handle.  
+/**
+ * handle_t *journal_start() - Obtain a new handle.  
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
  *
  * We make sure that the transaction can guarantee at least nblocks of
  * modified buffers in the log.  We block until the log can guarantee
  * that much space.  
  *
- * This function is visible to journal users (like ext2fs), so is not
+ * This function is visible to journal users (like ext3fs), so is not
  * called with the journal already locked.
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-
 handle_t *journal_start(journal_t *journal, int nblocks)
 {
 	handle_t *handle = journal_current_handle();
@@ -324,7 +325,11 @@ fail_unlock:
 	return ret;
 }
 
-/*
+/**
+ * handle_t *journal_try_start() - Don't block, but try and get a handle
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ * 
  * Try to start a handle, but non-blockingly.  If we weren't able
  * to, return an ERR_PTR value.
  */
@@ -368,16 +373,18 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
 	return handle;
 }
 
-/*
- * journal_extend: extend buffer credits.
- *
+/**
+ * int journal_extend() - extend buffer credits.
+ * @handle:  handle to 'extend'
+ * @nblocks: nr blocks to try to extend by.
+ * 
  * Some transactions, such as large extends and truncates, can be done
  * atomically all at once or in several stages.  The operation requests
  * a credit for a number of buffer modications in advance, but can
  * extend its credit if it needs more.  
  *
  * journal_extend tries to give the running handle more buffer credits.
- * It does not guarantee that allocation: this is a best-effort only.
+ * It does not guarantee that allocation - this is a best-effort only.
  * The calling process MUST be able to deal cleanly with a failure to
  * extend here.
  *
@@ -386,7 +393,6 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
  * return code < 0 implies an error
  * return code > 0 implies normal transaction-full status.
  */
-
 int journal_extend (handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -435,8 +441,12 @@ error_out:
 }
 
 
-/*
- * journal_restart: restart a handle for a multi-transaction filesystem
+/**
+ * int journal_restart() - restart a handle .
+ * @handle:  handle to restart
+ * @nblocks: nr credits requested
+ * 
+ * Restart a handle for a multi-transaction filesystem
  * operation.
  *
  * If the journal_extend() call above fails to grant new buffer credits
@@ -478,8 +488,9 @@ int journal_restart(handle_t *handle, int nblocks)
 }
 
 
-/* 
- * Barrier operation: establish a transaction barrier. 
+/**
+ * void journal_lock_updates () - establish a transaction barrier.
+ * @journal:  Journal to establish a barrier on.
  *
  * This locks out any further updates from being started, and blocks
  * until all existing updates have completed, returning only once the
@@ -487,7 +498,6 @@ int journal_restart(handle_t *handle, int nblocks)
  *
  * The journal lock should not be held on entry.
  */
-
 void journal_lock_updates (journal_t *journal)
 {
 	lock_journal(journal);
@@ -515,12 +525,14 @@ void journal_lock_updates (journal_t *journal)
 	down(&journal->j_barrier);
 }
 
-/*
+/**
+ * void journal_unlock_updates (journal_t* journal) - release barrier
+ * @journal:  Journal to release the barrier on.
+ * 
  * Release a transaction barrier obtained with journal_lock_updates().
  *
  * Should be called without the journal lock held.
  */
-
 void journal_unlock_updates (journal_t *journal)
 {
 	lock_journal(journal);
@@ -566,9 +578,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 }
 
 /*
- * journal_get_write_access: notify intent to modify a buffer for metadata
- * (not data) update.
- *
  * If the buffer is already part of the current transaction, then there
  * is nothing we need to do.  If it is already part of a prior
  * transaction which we are still committing to disk, then we need to
@@ -577,7 +586,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
  * the handle's metadata buffer credits (unless the buffer is already
  * part of the transaction, that is).
  *
- * Returns an error code or 0 on success.
  */
 
 static int
@@ -786,6 +794,17 @@ out_unlocked:
 	return error;
 }
 
+/**
+ * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * @handle: transaction to add buffer modifications to
+ * @bh:     bh to be used for metadata writes
+ *
+ * Returns an error code or 0 on success.
+ *
+ * In full data journalling mode the buffer may be of type BJ_AsyncData,
+ * because we're write()ing a buffer which is also part of a shared mapping.
+ */
+
 int journal_get_write_access (handle_t *handle, struct buffer_head *bh) 
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -816,6 +835,13 @@ int journal_get_write_access (handle_t *handle, struct buffer_head *bh)
  * There is no lock ranking violation: it was a newly created,
  * unlocked buffer beforehand. */
 
+/**
+ * int journal_get_create_access () - notify intent to use newly created bh
+ * @handle: transaction to new buffer to
+ * @bh: new buffer.
+ *
+ * Call this if you create a new bh.
+ */
 int journal_get_create_access (handle_t *handle, struct buffer_head *bh) 
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -875,13 +901,14 @@ out:
 
 
-/*
- * journal_get_undo_access: Notify intent to modify metadata with non-
- * rewindable consequences
- *
+/**
+ * int journal_get_undo_access() -  Notify intent to modify metadata with non-rewindable consequences
+ * @handle: transaction
+ * @bh: buffer to undo
+ * 
  * Sometimes there is a need to distinguish between metadata which has
  * been committed to disk and that which has not.  The ext3fs code uses
- * this for freeing and allocating space: we have to make sure that we
+ * this for freeing and allocating space, we have to make sure that we
  * do not reuse freed space until the deallocation has been committed,
  * since if we overwrote that space we would make the delete
  * un-rewindable in case of a crash.
@@ -893,13 +920,12 @@ out:
  * as we know that the buffer has definitely been committed to disk.
  * 
  * We never need to know which transaction the committed data is part
- * of: buffers touched here are guaranteed to be dirtied later and so
+ * of, buffers touched here are guaranteed to be dirtied later and so
  * will be committed to a new transaction in due course, at which point
  * we can discard the old committed data pointer.
  *
  * Returns error number or 0 on success.  
  */
-
 int journal_get_undo_access (handle_t *handle, struct buffer_head *bh)
 {
 	journal_t *journal = handle->h_transaction->t_journal;
@@ -942,21 +968,23 @@ out:
 	return err;
 }
 
-/* 
- * journal_dirty_data: mark a buffer as containing dirty data which
- * needs to be flushed before we can commit the current transaction.  
- *
+/** 
+ * int journal_dirty_data() -  mark a buffer as containing dirty data which needs to be flushed before we can commit the current transaction.  
+ * @handle: transaction
+ * @bh: bufferhead to mark
+ * 
  * The buffer is placed on the transaction's data list and is marked as
  * belonging to the transaction.
  *
  * Returns error number or 0 on success.  
- *
+ */
+int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
+{
+/*
  * journal_dirty_data() can be called via page_launder->ext3_writepage
  * by kswapd.  So it cannot block.  Happily, there's nothing here
  * which needs lock_journal if `async' is set.
  */
-int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
-{
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
 	struct journal_head *jh;
@@ -1097,24 +1125,28 @@ no_journal:
 	return 0;
 }
 
-/* 
- * journal_dirty_metadata: mark a buffer as containing dirty metadata
- * which needs to be journaled as part of the current transaction.
+/** 
+ * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * @handle: transaction to add buffer to.
+ * @bh: buffer to mark 
+ * 
+ * mark dirty metadata which needs to be journaled as part of the current transaction.
  *
  * The buffer is placed on the transaction's metadata list and is marked
  * as belonging to the transaction.  
  *
+ * Returns error number or 0 on success.  
+ */
+int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
+{
+/*
  * Special care needs to be taken if the buffer already belongs to the
  * current committing transaction (in which case we should have frozen
  * data present for that commit).  In that case, we don't relink the
  * buffer: that only gets done when the old transaction finally
  * completes its commit.
  * 
- * Returns error number or 0 on success.  
  */
-
-int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
-{
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
 	struct journal_head *jh = bh2jh(bh);
@@ -1199,9 +1231,12 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
 }
 #endif
 
-/* 
- * journal_forget: bforget() for potentially-journaled buffers.  We can
- * only do the bforget if there are no commits pending against the
+/** 
+ * void journal_forget() - bforget() for potentially-journaled buffers.
+ * @handle: transaction handle
+ * @bh:     bh to 'forget'
+ *
+ * We can only do the bforget if there are no commits pending against the
  * buffer.  If the buffer is dirty in the current running transaction we
  * can safely unlink it. 
  *
@@ -1213,7 +1248,6 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
  * Allow this call even if the handle has aborted --- it may be part of
  * the caller's cleanup after an abort.
  */
-
 void journal_forget (handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -1352,8 +1386,14 @@ out:
 }
 #endif
 
-/*
- * Register a callback function for this handle.  The function will be
+/**
+ * void journal_callback_set() -  Register a callback function for this handle.
+ * @handle: handle to attach the callback to.
+ * @func: function to callback.
+ * @jcb:  structure with additional information required by func() , and
+ *        some space for jbd internal information.
+ * 
+ * The function will be
  * called when the transaction that this handle is part of has been
  * committed to disk with the original callback data struct and the
  * error status of the journal as parameters.  There is no guarantee of
@@ -1374,7 +1414,11 @@ void journal_callback_set(handle_t *handle,
 	jcb->jcb_func = func;
 }
 
-/*
+
+/**
+ * int journal_stop() - complete a transaction
+ * @handle: tranaction to complete.
+ * 
  * All done for a particular handle.
  *
  * There is not much action needed here.  We just return any remaining
@@ -1387,7 +1431,6 @@ void journal_callback_set(handle_t *handle,
  * return -EIO if a journal_abort has been executed since the
  * transaction began.
  */
-
 int journal_stop(handle_t *handle)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -1473,8 +1516,10 @@ int journal_stop(handle_t *handle)
 	return err;
 }
 
-/*
- * For synchronous operations: force any uncommitted trasnactions
+/**int journal_force_commit() - force any uncommitted transactions
+ * @journal: journal to force
+ *
+ * For synchronous operations: force any uncommitted transactions
  * to disk.  May seem kludgy, but it reuses all the handle batching
  * code in a very simple manner.
  */
@@ -1667,6 +1712,26 @@ out:
 	return 0;
 }
 
+
+/** 
+ * int journal_try_to_free_buffers() - try to free page buffers.
+ * @journal: journal for operation
+ * @page: to try and free
+ * @gfp_mask: 'IO' mode for try_to_free_buffers()
+ *
+ * 
+ * For all the buffers on this page,
+ * if they are fully written out ordered data, move them onto BUF_CLEAN
+ * so try_to_free_buffers() can reap them.
+ * 
+ * This function returns non-zero if we wish try_to_free_buffers()
+ * to be called. We do this if the page is releasable by try_to_free_buffers().
+ * We also do it if the page has locked or dirty buffers and the caller wants
+ * us to perform sync or async writeout.
+ */
+int journal_try_to_free_buffers(journal_t *journal, 
+				struct page *page, int unused_gfp_mask)
+{
 /*
  * journal_try_to_free_buffers().  Try to remove all this page's buffers
  * from the journal.
@@ -1689,9 +1754,6 @@ out:
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
  */
-int journal_try_to_free_buffers(journal_t *journal, 
-				struct page *page, int unused_gfp_mask)
-{
 	struct buffer_head *head;
 	struct buffer_head *bh;
 	int ret = 0;
@@ -1886,8 +1948,15 @@ zap_buffer:
 	return may_free;
 }
 
-/*
- * Return non-zero if the page's buffers were successfully reaped
+/** 
+ * int journal_invalidatepage() 
+ * @journal: journal to use for flush... 
+ * @page:    page to flush
+ * @offset:  length of page to invalidate.
+ *
+ * Reap page buffers containing data after offset in page.
+ *
+ * Return non-zero if the page's buffers were successfully reaped.
  */
 int journal_invalidatepage(journal_t *journal, 
 		      struct page *page, 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 47a20ce63fa8..2236641f5593 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -63,7 +63,38 @@ extern void * __jbd_kmalloc (const char *where, size_t size, int flags, int retr
 #define JFS_MIN_JOURNAL_BLOCKS 1024
 
 #ifdef __KERNEL__
+
+/**
+ * typedef handle_t - The handle_t type represents a single atomic update being performed by some process.
+ *
+ * All filesystem modifications made by the process go
+ * through this handle.  Recursive operations (such as quota operations)
+ * are gathered into a single update.
+ *
+ * The buffer credits field is used to account for journaled buffers
+ * being modified by the running process.  To ensure that there is
+ * enough log space for all outstanding operations, we need to limit the
+ * number of outstanding buffers possible at any time.  When the
+ * operation completes, any buffer credits not used are credited back to
+ * the transaction, so that at all times we know how many buffers the
+ * outstanding updates on a transaction might possibly touch. 
+ * 
+ * This is an opaque datatype.
+ **/
 typedef struct handle_s		handle_t;	/* Atomic operation type */
+
+
+/**
+ * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem.
+ *
+ * journal_t is linked to from the fs superblock structure.
+ * 
+ * We use the journal_t to keep track of all outstanding transaction
+ * activity on the filesystem, and to manage the state of the log
+ * writing process.
+ *
+ * This is an opaque datatype.
+ **/
 typedef struct journal_s	journal_t;	/* Journal control structure */
 #endif
 
@@ -252,6 +283,20 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
 }
 
 #define HAVE_JOURNAL_CALLBACK_STATUS
+/**
+ *   struct journal_callback - Base structure for callback information.
+ *   @jcb_list: list information for other callbacks attached to the same handle.
+ *   @jcb_func: Function to call with this callback structure. 
+ *
+ *   This struct is a 'seed' structure for a using with your own callback
+ *   structs. If you are using callbacks you must allocate one of these
+ *   or another struct of your own definition which has this struct 
+ *   as it's first element and pass it to journal_callback_set().
+ *
+ *   This is used internally by jbd to maintain callback information.
+ *
+ *   See journal_callback_set for more information.
+ **/
 struct journal_callback {
 	struct list_head jcb_list;
 	void (*jcb_func)(struct journal_callback *jcb, int error);
@@ -260,18 +305,21 @@ struct journal_callback {
 
 struct jbd_revoke_table_s;
 
-/* The handle_t type represents a single atomic update being performed
- * by some process.  All filesystem modifications made by the process go
- * through this handle.  Recursive operations (such as quota operations)
- * are gathered into a single update.
- *
- * The buffer credits field is used to account for journaled buffers
- * being modified by the running process.  To ensure that there is
- * enough log space for all outstanding operations, we need to limit the
- * number of outstanding buffers possible at any time.  When the
- * operation completes, any buffer credits not used are credited back to
- * the transaction, so that at all times we know how many buffers the
- * outstanding updates on a transaction might possibly touch. */
+/**
+ * struct handle_s - The handle_s type is the concrete type associated with handle_t.
+ * @h_transaction: Which compound transaction is this update a part of?
+ * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
+ * @h_ref: Reference count on this handle
+ * @h_jcb: List of application registered callbacks for this handle.
+ * @h_err: Field for caller's use to track errors through large fs operations
+ * @h_sync: flag for sync-on-close
+ * @h_jdata: flag to force data journaling
+ * @h_aborted: flag indicating fatal error on handle
+ **/
+
+/* Docbook can't yet cope with the bit fields, but will leave the documentation
+ * in so it can be fixed later. 
+ */
 
 struct handle_s 
 {
@@ -284,8 +332,8 @@ struct handle_s
 	/* Reference count on this handle */
 	int			h_ref;
 
-	/* Field for caller's use to track errors through large fs
-	   operations */
+	/* Field for caller's use to track errors through large fs */
+	/* operations */
 	int			h_err;
 
 	/* List of application registered callbacks for this handle.
@@ -412,21 +460,58 @@ struct transaction_s
 	struct list_head	t_jcb;
 };
 
-
-/* The journal_t maintains all of the journaling state information for a
- * single filesystem.  It is linked to from the fs superblock structure.
- * 
- * We use the journal_t to keep track of all outstanding transaction
- * activity on the filesystem, and to manage the state of the log
- * writing process. */
+/**
+ * struct journal_s - The journal_s type is the concrete type associated with journal_t.
+ * @j_flags:  General journaling state flags
+ * @j_errno:  Is there an outstanding uncleared error on the journal (from a prior abort)? 
+ * @j_sb_buffer: First part of superblock buffer
+ * @j_superblock: Second part of superblock buffer
+ * @j_format_version: Version of the superblock format
+ * @j_barrier_count:  Number of processes waiting to create a barrier lock
+ * @j_barrier: The barrier lock itself
+ * @j_running_transaction: The current running transaction..
+ * @j_committing_transaction: the transaction we are pushing to disk
+ * @j_checkpoint_transactions: a linked circular list of all transactions waiting for checkpointing
+ * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction to start committing, or for a barrier lock to be released
+ * @j_wait_logspace: Wait queue for waiting for checkpointing to complete
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete 
+ * @j_wait_checkpoint:  Wait queue to trigger checkpointing
+ * @j_wait_commit: Wait queue to trigger commit
+ * @j_wait_updates: Wait queue to wait for updates to complete
+ * @j_checkpoint_sem: Semaphore for locking against concurrent checkpoints
+ * @j_sem: The main journal lock, used by lock_journal() 
+ * @j_head: Journal head - identifies the first unused block in the journal
+ * @j_tail: Journal tail - identifies the oldest still-used block in the journal.
+ * @j_free: Journal free - how many free blocks are there in the journal?
+ * @j_first: The block number of the first usable block 
+ * @j_last: The block number one beyond the last usable block
+ * @j_dev: Device where we store the journal
+ * @j_blocksize: blocksize for the location where we store the journal.
+ * @j_blk_offset: starting block offset for into the device where we store the journal
+ * @j_fs_dev: Device which holds the client fs.  For internal journal this will be equal to j_dev
+ * @j_maxlen: Total maximum capacity of the journal region on disk.
+ * @j_inode: Optional inode where we store the journal.  If present, all  journal block numbers are mapped into this inode via bmap().
+ * @j_tail_sequence:  Sequence number of the oldest transaction in the log 
+ * @j_transaction_sequence: Sequence number of the next transaction to grant
+ * @j_commit_sequence: Sequence number of the most recently committed transaction
+ * @j_commit_request: Sequence number of the most recent transaction wanting commit 
+ * @j_uuid: Uuid of client object.
+ * @j_task: Pointer to the current commit thread for this journal
+ * @j_max_transaction_buffers:  Maximum number of metadata buffers to allow in a single compound commit transaction
+ * @j_commit_interval: What is the maximum transaction lifetime before we begin a commit?
+ * @j_commit_timer:  The timer used to wakeup the commit thread
+ * @j_commit_timer_active: Timer flag
+ * @j_all_journals:  Link all journals together - system-wide 
+ * @j_revoke: The revoke table - maintains the list of revoked blocks in the current transaction.
+ **/
 
 struct journal_s
 {
 	/* General journaling state flags */
 	unsigned long		j_flags;
 
-	/* Is there an outstanding uncleared error on the journal (from
-	 * a prior abort)? */
+	/* Is there an outstanding uncleared error on the journal (from */
+	/* a prior abort)? */
 	int			j_errno;
 	
 	/* The superblock buffer */
@@ -448,13 +533,13 @@ struct journal_s
 	/* ... the transaction we are pushing to disk ... */
 	transaction_t *		j_committing_transaction;
 	
-	/* ... and a linked circular list of all transactions waiting
-	 * for checkpointing. */
+	/* ... and a linked circular list of all transactions waiting */
+	/* for checkpointing. */
 	/* Protected by journal_datalist_lock */
 	transaction_t *		j_checkpoint_transactions;
 
-	/* Wait queue for waiting for a locked transaction to start
-           committing, or for a barrier lock to be released */
+	/* Wait queue for waiting for a locked transaction to start */
+        /*  committing, or for a barrier lock to be released */
 	wait_queue_head_t	j_wait_transaction_locked;
 	
 	/* Wait queue for waiting for checkpointing to complete */
@@ -481,33 +566,33 @@ struct journal_s
 	/* Journal head: identifies the first unused block in the journal. */
 	unsigned long		j_head;
 	
-	/* Journal tail: identifies the oldest still-used block in the
-	 * journal. */
+	/* Journal tail: identifies the oldest still-used block in the */
+	/* journal. */
 	unsigned long		j_tail;
 
 	/* Journal free: how many free blocks are there in the journal? */
 	unsigned long		j_free;
 
-	/* Journal start and end: the block numbers of the first usable
-	 * block and one beyond the last usable block in the journal. */
+	/* Journal start and end: the block numbers of the first usable */
+	/* block and one beyond the last usable block in the journal.   */
 	unsigned long		j_first, j_last;
 
-	/* Device, blocksize and starting block offset for the location
-	 * where we store the journal. */
+	/* Device, blocksize and starting block offset for the location */
+	/* where we store the journal. */
 	struct block_device *	j_dev;
 	int			j_blocksize;
 	unsigned int		j_blk_offset;
 
-	/* Device which holds the client fs.  For internal journal this
-	 * will be equal to j_dev. */
+	/* Device which holds the client fs.  For internal journal this */
+	/* will be equal to j_dev. */
 	struct block_device *	j_fs_dev;
 
 	/* Total maximum capacity of the journal region on disk. */
 	unsigned int		j_maxlen;
 
-	/* Optional inode where we store the journal.  If present, all
-	 * journal block numbers are mapped into this inode via
-	 * bmap(). */
+	/* Optional inode where we store the journal.  If present, all */
+	/* journal block numbers are mapped into this inode via */
+	/* bmap(). */
 	struct inode *		j_inode;
 
 	/* Sequence number of the oldest transaction in the log */
@@ -519,23 +604,23 @@ struct journal_s
 	/* Sequence number of the most recent transaction wanting commit */
 	tid_t			j_commit_request;
 
-	/* Journal uuid: identifies the object (filesystem, LVM volume
-	 * etc) backed by this journal.  This will eventually be
-	 * replaced by an array of uuids, allowing us to index multiple
-	 * devices within a single journal and to perform atomic updates
-	 * across them.  */
+	/* Journal uuid: identifies the object (filesystem, LVM volume   */
+	/* etc) backed by this journal.  This will eventually be         */
+	/* replaced by an array of uuids, allowing us to index multiple  */
+	/* devices within a single journal and to perform atomic updates */
+	/* across them.  */
 
 	__u8			j_uuid[16];
 
 	/* Pointer to the current commit thread for this journal */
 	struct task_struct *	j_task;
 
-	/* Maximum number of metadata buffers to allow in a single
-	 * compound commit transaction */
+	/* Maximum number of metadata buffers to allow in a single */
+	/* compound commit transaction */
 	int			j_max_transaction_buffers;
 
-	/* What is the maximum transaction lifetime before we begin a
-	 * commit? */
+	/* What is the maximum transaction lifetime before we begin a */
+	/* commit? */
 	unsigned long		j_commit_interval;
 
 	/* The timer used to wakeup the commit thread: */
@@ -545,8 +630,8 @@ struct journal_s
 	/* Link all journals together - system-wide */
 	struct list_head	j_all_journals;
 
-	/* The revoke table: maintains the list of revoked blocks in the
-           current transaction. */
+	/* The revoke table: maintains the list of revoked blocks in the */
+        /*  current transaction. */
 	struct jbd_revoke_table_s *j_revoke;
 };
 
-- 
cgit v1.2.3


From 0b3166209169b1c552ab0684305b75810d5e292e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:56:58 -0800
Subject: [PATCH] Restore LSM hook calls to sendfile

Patch from "Stephen D. Smalley" <sds@epoch.ncsc.mil>

This patch restores the LSM hook calls in sendfile to 2.5.59.  The hook was
previously added as of 2.5.29 but the hook calls in sendfile were
subsequently lost as a result of the sendfile rewrite as of 2.5.30.
---
 fs/read_write.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 04904f7fa206..bcdf304115e6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -535,6 +535,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (retval)
 		goto fput_in;
 
+	retval = security_file_permission (in_file, MAY_READ);
+	if (retval)
+		goto fput_in;
+
 	/*
 	 * Get output file, and verify that it is ok..
 	 */
@@ -556,6 +560,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (retval)
 		goto fput_out;
 
+	retval = security_file_permission (out_file, MAY_WRITE);
+	if (retval)
+		goto fput_out;
+
 	if (!ppos)
 		ppos = &in_file->f_pos;
 
-- 
cgit v1.2.3


From 50d49a05cad3675a8a846ed128df8880f1ef3955 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:04 -0800
Subject: [PATCH] Fix SMP race betwen __sync_single_inode and

Patch from Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>

there's a SMP race condition between __sync_single_inode (or __sync_one on
2.4.20) and __mark_inode_dirty. __mark_inode_dirty doesn't take inode
spinlock. As we know -- unless you take a spinlock or use barrier,
processor can change order of instructions.

CPU 1

modify inode
(but modifications are in cpu-local
buffer and do not go to bus)

calls
__mark_inode_dirty
it sees I_DIRTY and exits immediatelly
					CPU 2
					takes spinlock
					calls __sync_single_inode
					inode->i_state &= ~I_DIRTY
					writes the inode (but does not see
					modifications by CPU 1 yet)

CPU 1 flushes its write buffer to the bus
inode is already written, clean, modifications
done by CPU1 are lost

The easiest fix would be to move the test inside spinlock in
__mark_inode_dirty; if you do not want to suffer from performance loss,
use the attached patches that use memory barriers to ensure ordering of
reads and writes.
---
 fs/fs-writeback.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1814f7a9b5ce..ad8ef0487ad2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -61,6 +61,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			sb->s_op->dirty_inode(inode);
 	}
 
+	/*
+	 * make sure that changes are seen by all cpus before we test i_state
+	 * -- mikulas
+	 */
+	smp_mb();
+
 	/* avoid the locking if we can */
 	if ((inode->i_state & flags) == flags)
 		return;
@@ -137,6 +143,12 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	inode->i_state |= I_LOCK;
 	inode->i_state &= ~I_DIRTY;
 
+	/*
+	 * smp_rmb(); note: if you remove write_lock below, you must add this.
+	 * mark_inode_dirty doesn't take spinlock, make sure that inode is not
+	 * read speculatively by this cpu before &= ~I_DIRTY  -- mikulas
+	 */
+
 	write_lock(&mapping->page_lock);
 	if (wait || !wbc->for_kupdate || list_empty(&mapping->io_pages))
 		list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
-- 
cgit v1.2.3


From 08f16f8ff05d9de37ea187a3bde79806c64b82e2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:11 -0800
Subject: [PATCH] ia32 IRQ distribution rework

Patch from "Kamble, Nitin A" <nitin.a.kamble@intel.com>

Hello All,

  We were looking at the performance impact of the IRQ routing from
the 2.5.52 Linux kernel. This email includes some of our findings
about the way the interrupts are getting moved in the 2.5.52 kernel.
Also there is discussion and a patch for a new implementation. Let
me know what you think at nitin.a.kamble@intel.com

Current implementation:
======================
We have found that the existing implementation works well on IA32
SMP systems with light load of interrupts. Also we noticed that it
is not working that well under heavy interrupt load conditions on
these SMP systems. The observations are:

* Interrupt load of each IRQ is getting balanced on CPUs independent
of load of other IRQs. Also the current implementation moves the
IRQs randomly. This works well when the interrupt load is light. But
we start seeing imbalance of interrupt load with existence of
multiple heavy interrupt sources. Frequently multiple heavily loaded
IRQs gets moved to a single CPU while other CPUs stay very lightly
loaded. To achieve a good interrupts load balance, it is important to
consider the load of all the interrupts together.
    This further can be explained with an example of 4 CPUs and 4
heavy interrupt sources. With the existing random movement approach,
the chance of each of these heavy interrupt sources moving to separate
CPUs is: (4/4)*(3/4)*(2/4)*(1/4) = 3/16. It means 13/16 = 81.25% of
the time the situation is, some CPUs are very lightly loaded and some
are loaded with multiple heavy interrupts. This causes the interrupt
load imbalance and results in less performance. In a case of 2 CPUs
and 2 heavily loaded interrupt sources, this imbalance happens
1/2 = 50% of the times. This issue becomes more and more severe with
increasing number of heavy interrupt sources.

* Another interesting observation is: We cannot see the imbalance
of the interrupt load from /proc/interrupts. (/proc/interrupts shows
the cumulative load of interrupts on all CPUs.) If the interrupt load
is imbalanced and this imbalance is getting rotated among CPUs
continuously, then /proc/interrupts will still show that the interrupt
load is going to processors very evenly. Currently at the frequency
(HZ/50) at which IRQs are moved across CPUs, it is not possible to
see any interrupt load imbalance happening.

* We have also found that, in certain cases the static IRQ binding
performs better than the existing kernel distribution of interrupt
load. The reason is, in a well-balanced interrupt load situations,
these interrupts are unnecessarily getting frequently moved across
CPUs. This adds an extra overhead; also it takes off the CPU cache
warmth benefits.
  This came out from the performance measurements done on a 4-way HT
(8 logical processors) Pentium 4 Xeon system running 8 copies of
netperf. The 4 NICs in the system taking different IRQs generated
sizable interrupt load with the help of connected clients.

Here the netperf transactions/sec throughput numbers observed are:

IRQs nicely manually bound to CPUs: 56.20K
The current kernel implementation of IRQ movement: 50.05K
 -----------------------
 The static binding of IRQs has performed 12.28% better than the
current IRQ movement implemented in the kernel.

* The current implementation does not distinguish siblings from the
HT (Hyper-Threading(tm)) enabled CPUs. It will be beneficial to
balance the interrupt load with respect to processor packages first,
and then among logical CPUs inside processor packages.
  For example if we have 2 heavy interrupt sources and 2 processor
packages (4 logical CPUs); Assigning both the heavy interrupt sources
in different processor packages is better, it will use different
execution resources from the different processor packages.


New revised implementation:
==========================
We also have been working on a new implementation. The following
points are in main focus.

* At any moment heavily loaded IRQs are distributed to different
CPUs to achieve as much balance as possible.

* Lightly loaded interrupt sources are ignored from the load
balancing, as they do not cause considerable imbalance.

* When the heavy interrupt sources are balanced, they are not moved
around. This also helps in keeping the CPU caches warm.

* It has been made HT aware. While distributing the load, the load
on a processor package to which the logical CPUs belong to is also
considered.

* In the situations of few (lesser than num_cpus) heavy interrupt
sources, it is not possible to balance them evenly. In such case
the existing code has been reused to move the interrupts. The
randomness from the original code has been removed.

* The time interval for redistribution has been made flexible. It
varies as the system interrupt load changes.

* A new kernel_thread is introduced to do the load balancing
calculations for all the interrupt sources. It keeps the balanace_maps
ready for interrupt handlers, keeping the overhead in the interrupt
handling to minimum.

* It allows the disabling of the IRQ distribution from the boot loader
command line, if anybody wants to do it for any reason.

* The algorithm also takes into account the static binding of
interrupts to CPUs that user imposes from the
/proc/irq/{n}/smp_affinity interface.


Throughput numbers with the netperf setup for the new implementation:

Current kernel IRQ balance implementation: 50.02K transactions/sec
The new IRQ balance implementation: 56.01K transactions/sec
 ---------------------
  The performance improvement on P4 Xeon of 11.9% is observed.

The new IRQ balance implementation also shows little performance
improvement on P6 (Pentium II, III) systems.

On a P6 system the netperf throughput numbers are:
Current kernel IRQ balance implementation: 36.96K transactions/sec
The new IRQ balance implementation: 37.65K transactions/sec
 ---------------------
Here the performance improvement on P6 system of about 2% is observed.


 ---------------------

Andrew Theurer <habanero@us.ibm.com> did some testing of this patch on a quad
P4:


I got a chance to run the NetBench benchmark with your patch on 2.5.54-mjb2
kernel.  NetBench measures SMB/CIFS performance by using several SMB
clients  (in this case 44 Windows 2000 systems), sending SMB requests to a
Linux  server running Samba 2.2.3a+sendfile.  Result is in throughput,
Mbps.   Generally the network traffic on the server is 60% recv, 40% tx.

I believe we have very similar systems.  Mine is a 4 x 1.6 GHz, 1 MB L3 P4
Xeon with 4 GB DDR memory (3.2 GB/sec I believe).  The chipset is "Summit".
 I also have more than one Intel e1000 adapters.

I decided to run a few configurations, first with just one adapter, with
and  without HT support in the kernel (acpi=off), then add another adapter
and  test again with/without HT.

Here are the results:

4P, no HT, 1 x e1000, no kirq:	1214 Mbps, 4% idle
4P, no HT, 1 x e1000, kirq:		1223 Mbps, 4% idle,		+0.74%

I suppose we didn't see much of an improvement here because we never run
into  the situation where more than one interrupt with a high rate is
routed to a  single CPU on irq_balance.

4P, HT, 1 x e1000, no kirq:	1214 Mbps, 25% idle
4P, HT, 1 x e1000, kirq:	1220 Mbps, 30% idle,			+0.49%

Again, not much of a difference just yet, but lots of idle time.  We may
have  reached the limit at which one logical CPU can process interrupts for
an  e1000 adapter.  There are other things I can probably do to help this,
like  int delay, and NAPI, which I will get to eventually.

4P, HT, 2 x e1000, no kirq:	1269 Mbps, 23% idle
4P, HT, 2 x e1000, kirq:	1329 Mbps, 18% idle			+4.7%

OK, almost 5% better!  Probably has to do with a couple of things; the fact
that your code does not route two different interrupts to the same
core/different logical cpus (quite obvious by looking at /proc/interrupts),
and that more than one interrupt does not go to the same cpu if possible.
I  suspect irq_balance did some of those [bad] things some of the time, and
we  observed a bottleneck in int processing that was lower than with kirq.

I don't think all of the idle time is because of a int processing
bottleneck.   I'm just not sure what it is yet :)  Hopefully something will
become obvious  to me...

Overall I like the way it works, and I believe it can be tweaked to work
with  NUMA when necessary.  I hope to have access to a specweb system on a
NUMA box  soon, so we can verify that.
---
 Documentation/kernel-parameters.txt |   2 +
 arch/i386/kernel/io_apic.c          | 370 +++++++++++++++++++++++++++++++++---
 2 files changed, 341 insertions(+), 31 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bd3af2be8cbe..dd5dcfe400d4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -351,6 +351,8 @@ running once the system is up.
 
 	hugepages=	[HW,IA-32,IA-64] Maximal number of HugeTLB pages.
 
+	noirqbalance	[IA-32,SMP,KNL] Disable kernel irq balancing
+
 	i8042_direct	[HW] Non-translated mode
 	i8042_dumbkbd
 	i8042_noaux
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 9d1a99a6ffe9..1edcbcb07b9c 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -207,19 +207,34 @@ static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#if CONFIG_SMP
-
-typedef struct {
-	unsigned int cpu;
-	unsigned long timestamp;
-} ____cacheline_aligned irq_balance_t;
-
-static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned
-			= { [ 0 ... NR_IRQS-1 ] = { 0, 0 } };
+#if defined(CONFIG_SMP)
+# include <asm/processor.h>	/* kernel_thread() */
+# include <linux/kernel_stat.h>	/* kstat */
+# include <linux/slab.h>		/* kmalloc() */
+# include <linux/timer.h>	/* time_after() */
+ 
+# if CONFIG_BALANCED_IRQ_DEBUG
+#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
+#  define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+#  define TDprintk(x...) 
+#  define Dprintk(x...) 
+# endif
 
 extern unsigned long irq_affinity [NR_IRQS];
+unsigned long __cacheline_aligned irq_balance_mask [NR_IRQS];
+static int irqbalance_disabled __initdata = 0;
+static int physical_balance = 0;
 
-#endif
+struct irq_cpu_info {
+	unsigned long * last_irq;
+	unsigned long * irq_delta;
+	unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
 
 #define IDLE_ENOUGH(cpu,now) \
 		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
@@ -227,10 +242,224 @@ extern unsigned long irq_affinity [NR_IRQS];
 #define IRQ_ALLOWED(cpu,allowed_mask) \
 		((1 << cpu) & (allowed_mask))
 
-#if CONFIG_SMP
+#define CPU_TO_PACKAGEINDEX(i) \
+		((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i)
+
+#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
+#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
+#define BALANCED_IRQ_LESS_DELTA		(HZ)
+
+long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+					 
+static inline void balance_irq(int cpu, int irq);
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+	int i, j;
+	Dprintk("Rotating IRQs among CPUs.\n");
+	for (i = 0; i < NR_CPUS; i++) {
+		for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+			if (!irq_desc[j].action)
+				continue;
+			/* Is it a significant load ?  */
+			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < useful_load_threshold)
+				continue;
+			balance_irq(i, j);
+		}
+	}
+	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
+	return;
+}
+
+static void do_irq_balance(void)
+{
+	int i, j;
+	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+	unsigned long move_this_load = 0;
+	int max_loaded = 0, min_loaded = 0;
+	unsigned long useful_load_threshold = balanced_irq_interval + 10;
+	int selected_irq;
+	int tmp_loaded, first_attempt = 1;
+	unsigned long tmp_cpu_irq;
+	unsigned long imbalance = 0;
+	unsigned long allowed_mask;
+	unsigned long target_cpu_mask;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		int package_index;
+		CPU_IRQ(i) = 0;
+		if (!cpu_online(i))
+			continue;
+		package_index = CPU_TO_PACKAGEINDEX(i);
+		for (j = 0; j < NR_IRQS; j++) {
+			unsigned long value_now, delta;
+			/* Is this an active IRQ? */
+			if (!irq_desc[j].action)
+				continue;
+			if ( package_index == i )
+				IRQ_DELTA(package_index,j) = 0;
+			/* Determine the total count per processor per IRQ */
+			value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+			/* Determine the activity per processor per IRQ */
+			delta = value_now - LAST_CPU_IRQ(i,j);
+
+			/* Update last_cpu_irq[][] for the next time */
+			LAST_CPU_IRQ(i,j) = value_now;
+
+			/* Ignore IRQs whose rate is less than the clock */
+			if (delta < useful_load_threshold)
+				continue;
+			/* update the load for the processor or package total */
+			IRQ_DELTA(package_index,j) += delta;
+
+			/* Keep track of the higher numbered sibling as well */
+			if (i != package_index)
+				CPU_IRQ(i) += delta;
+			/*
+			 * We have sibling A and sibling B in the package
+			 *
+			 * cpu_irq[A] = load for cpu A + load for cpu B
+			 * cpu_irq[B] = load for cpu B
+			 */
+			CPU_IRQ(package_index) += delta;
+		}
+	}
+	/* Find the least loaded processor package */
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (physical_balance && i > cpu_sibling_map[i])
+			continue;
+		if (min_cpu_irq > CPU_IRQ(i)) {
+			min_cpu_irq = CPU_IRQ(i);
+			min_loaded = i;
+		}
+	}
+	max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+	/* Look for heaviest loaded processor.
+	 * We may come back to get the next heaviest loaded processor.
+	 * Skip processors with trivial loads.
+	 */
+	tmp_cpu_irq = 0;
+	tmp_loaded = -1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (physical_balance && i > cpu_sibling_map[i])
+			continue;
+		if (max_cpu_irq <= CPU_IRQ(i)) 
+			continue;
+		if (tmp_cpu_irq < CPU_IRQ(i)) {
+			tmp_cpu_irq = CPU_IRQ(i);
+			tmp_loaded = i;
+		}
+	}
+
+	if (tmp_loaded == -1) {
+ 	 /* In the case of small number of heavy interrupt sources, 
+	  * loading some of the cpus too much. We use Ingo's original 
+	  * approach to rotate them around.
+	  */
+		if (!first_attempt && imbalance >= useful_load_threshold) {
+			rotate_irqs_among_cpus(useful_load_threshold);
+			return;
+		}
+		goto not_worth_the_effort;
+	}
+	
+	first_attempt = 0;		/* heaviest search */
+	max_cpu_irq = tmp_cpu_irq;	/* load */
+	max_loaded = tmp_loaded;	/* processor */
+	imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+	
+	Dprintk("max_loaded cpu = %d\n", max_loaded);
+	Dprintk("min_loaded cpu = %d\n", min_loaded);
+	Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+	Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+	Dprintk("load imbalance = %lu\n", imbalance);
+
+	/* if imbalance is less than approx 10% of max load, then
+	 * observe diminishing returns action. - quit
+	 */
+	if (imbalance < (max_cpu_irq >> 3)) {
+		Dprintk("Imbalance too trivial\n");
+		goto not_worth_the_effort;
+	}
+
+tryanotherirq:
+	/* if we select an IRQ to move that can't go where we want, then
+	 * see if there is another one to try.
+	 */
+	move_this_load = 0;
+	selected_irq = -1;
+	for (j = 0; j < NR_IRQS; j++) {
+		/* Is this an active IRQ? */
+		if (!irq_desc[j].action)
+			continue;
+		if (imbalance <= IRQ_DELTA(max_loaded,j))
+			continue;
+		/* Try to find the IRQ that is closest to the imbalance
+		 * without going over.
+		 */
+		if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+			move_this_load = IRQ_DELTA(max_loaded,j);
+			selected_irq = j;
+		}
+	}
+	if (selected_irq == -1) {
+		goto tryanothercpu;
+	}
 
-#define IRQ_BALANCE_INTERVAL (HZ/50)
+	imbalance = move_this_load;
 	
+	/* For physical_balance case, we accumlated both load
+	 * values in the one of the siblings cpu_irq[],
+	 * to use the same code for physical and logical processors
+	 * as much as possible. 
+	 *
+	 * NOTE: the cpu_irq[] array holds the sum of the load for
+	 * sibling A and sibling B in the slot for the lowest numbered
+	 * sibling (A), _AND_ the load for sibling B in the slot for
+	 * the higher numbered sibling.
+	 *
+	 * We seek the least loaded sibling by making the comparison
+	 * (A+B)/2 vs B
+	 */
+	if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > CPU_IRQ(cpu_sibling_map[min_loaded]))
+		min_loaded = cpu_sibling_map[min_loaded];
+
+	allowed_mask = cpu_online_map & irq_affinity[selected_irq];
+	target_cpu_mask = 1 << min_loaded;
+
+	if (target_cpu_mask & allowed_mask) {
+		irq_desc_t *desc = irq_desc + selected_irq;
+		Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded);
+		/* mark for change destination */
+		spin_lock(&desc->lock);
+		irq_balance_mask[selected_irq] = target_cpu_mask;
+		spin_unlock(&desc->lock);
+		/* Since we made a change, come back sooner to 
+		 * check for more variation.
+		 */
+		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);	
+		return;
+	}
+	goto tryanotherirq;
+
+not_worth_the_effort:
+	/* if we did not find an IRQ to move, then adjust the time interval upward */
+	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);	
+	Dprintk("IRQ worth rotating not found\n");
+	return;
+}
+
 static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction)
 {
 	int search_idle = 1;
@@ -257,34 +486,113 @@ inside:
 	return cpu;
 }
 
-static inline void balance_irq(int irq)
+static inline void balance_irq (int cpu, int irq)
 {
-	irq_balance_t *entry = irq_balance + irq;
 	unsigned long now = jiffies;
-
+	unsigned long allowed_mask;
+	unsigned int new_cpu;
+		
 	if (no_balance_irq)
 		return;
 
-	if (unlikely(time_after(now, entry->timestamp + IRQ_BALANCE_INTERVAL))) {
-		unsigned long allowed_mask;
-		unsigned int new_cpu;
-		int random_number;
+	allowed_mask = cpu_online_map & irq_affinity[irq];
+	new_cpu = move(cpu, allowed_mask, now, 1);
+	if (cpu != new_cpu) {
+		irq_desc_t *desc = irq_desc + irq;
+		spin_lock(&desc->lock);
+		irq_balance_mask[irq] = cpu_to_logical_apicid(new_cpu);
+		spin_unlock(&desc->lock);
+	}
+}
 
-		rdtscl(random_number);
-		random_number &= 1;
+int balanced_irq(void *unused)
+{
+	int i;
+	unsigned long prev_balance_time = jiffies;
+	long time_remaining = balanced_irq_interval;
+	daemonize();
+	sigfillset(&current->blocked);
+	sprintf(current->comm, "kirqd");
+	
+	/* push everything to CPU 0 to give us a starting point.  */
+	for (i = 0 ; i < NR_IRQS ; i++)
+		irq_balance_mask[i] = 1 << 0;
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		time_remaining = schedule_timeout(time_remaining);
+		if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) {
+			Dprintk("balanced_irq: calling do_irq_balance() %lu\n", jiffies);
+			do_irq_balance();
+			prev_balance_time = jiffies;
+			time_remaining = balanced_irq_interval;
+		}
+        }
+}
 
-		allowed_mask = cpu_online_map & irq_affinity[irq];
-		entry->timestamp = now;
-		new_cpu = move(entry->cpu, allowed_mask, now, random_number);
-		if (entry->cpu != new_cpu) {
-			entry->cpu = new_cpu;
-			set_ioapic_affinity(irq, cpu_to_logical_apicid(new_cpu));
+static int __init balanced_irq_init(void)
+{
+	int i;
+	struct cpuinfo_x86 *c;
+        c = &boot_cpu_data;
+	if (irqbalance_disabled)
+		return 0;
+	/* Enable physical balance only if more than 1 physical processor is present */
+	if (smp_num_siblings > 1 && cpu_online_map >> 2)
+		physical_balance = 1;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
+			printk(KERN_ERR "balanced_irq_init: out of memory");
+			goto failed;
 		}
+		memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
+		memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
+	}
+	
+	printk(KERN_INFO "Starting balanced_irq\n");
+	if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+		return 0;
+	else 
+		printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+failed:
+	for (i = 0; i < NR_CPUS; i++) {
+		if(irq_cpu_data[i].irq_delta)
+			kfree(irq_cpu_data[i].irq_delta);
+		if(irq_cpu_data[i].last_irq)
+			kfree(irq_cpu_data[i].last_irq);
+	}
+	return 0;
+}
+
+static int __init irqbalance_disable(char *str)
+{
+	irqbalance_disabled = 1;
+	return 0;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask);
+
+static inline void move_irq(int irq)
+{
+	/* note - we hold the desc->lock */
+	if (unlikely(irq_balance_mask[irq])) {
+		set_ioapic_affinity(irq, irq_balance_mask[irq]);
+		irq_balance_mask[irq] = 0;
 	}
 }
+
+__initcall(balanced_irq_init);
+
 #else /* !SMP */
-static inline void balance_irq(int irq) { }
-#endif
+static inline void move_irq(int irq) { }
+#endif /* defined(CONFIG_SMP) */
+
 
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -1307,7 +1615,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
-	balance_irq(irq);
+	move_irq(irq);
 	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
 					== (IRQ_PENDING | IRQ_DISABLED))
 		mask_IO_APIC_irq(irq);
@@ -1347,7 +1655,7 @@ static void end_level_ioapic_irq (unsigned int irq)
 	unsigned long v;
 	int i;
 
-	balance_irq(irq);
+	move_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
-- 
cgit v1.2.3


From f93fcfa9e8a17bb8ef6a631ace1a14b02091e08f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:17 -0800
Subject: [PATCH] Fix futexes in huge pages

Using a futex in a large page causes a kernel lockup in __pin_page() -
because __pin_page's page revalidation uses follow_page(), and follow_page()
doesn't work for hugepages.

The patch fixes up follow_page() to return the appropriate 4k page for
hugepages.

This incurs a vma lookup for each follow_page(), which is considerable
overhead in some situations.  We only _need_ to do this if the architecture
cannot determin a page's hugeness from the contents of the PMD.

So this patch is a "reference" implementation for, say, PPC BAT-based
hugepages.
---
 arch/i386/mm/hugetlbpage.c | 29 +++++++++++++++++++++++++++++
 include/linux/hugetlb.h    | 18 ++++++++++++++++--
 include/linux/sched.h      |  4 +++-
 mm/memory.c                |  5 +++++
 mm/mmap.c                  |  2 +-
 5 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 2dc1534f420d..9d7de7b0fb00 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -150,6 +150,35 @@ back1:
 	return i;
 }
 
+struct page *
+follow_huge_addr(struct mm_struct *mm,
+	struct vm_area_struct *vma, unsigned long address, int write)
+{
+	unsigned long start = address;
+	int length = 1;
+	int nr;
+	struct page *page;
+
+	nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0);
+	if (nr == 1)
+		return page;
+	return NULL;
+}
+
+/*
+ * If virtual address `addr' lies within a huge page, return its controlling
+ * VMA, else NULL.
+ */
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
+{
+	if (mm->used_hugetlb) {
+		struct vm_area_struct *vma = find_vma(mm, addr);
+		if (vma && is_vm_hugetlb_page(vma))
+			return vma;
+	}
+	return NULL;
+}
+
 void free_huge_page(struct page *page)
 {
 	BUG_ON(page_count(page));
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 325d91ba012a..6b2f606c08f7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,16 +20,28 @@ int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 void huge_page_release(struct page *);
 int hugetlb_report_meminfo(char *);
 int is_hugepage_mem_enough(size_t);
-
+struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write);
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
+					unsigned long address);
 extern int htlbpage_max;
 
+static inline void
+mark_mm_hugetlb(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	if (is_vm_hugetlb_page(vma))
+		mm->used_hugetlb = 1;
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
+
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return 0;
 }
 
-#define follow_hugetlb_page(m,v,p,vs,a,b,i)		({ BUG(); 0; })
+#define follow_hugetlb_page(m,v,p,vs,a,b,i)	({ BUG(); 0; })
+#define follow_huge_addr(mm, vma, addr, write)	0
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
 #define zap_hugepage_range(vma, start, len)	BUG()
@@ -37,6 +49,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 #define huge_page_release(page)			BUG()
 #define is_hugepage_mem_enough(size)		0
 #define hugetlb_report_meminfo(buf)		0
+#define hugepage_vma(mm, addr)			0
+#define mark_mm_hugetlb(mm, vma)		do { } while (0)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3a1367bacd1c..648d4d3ace3c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -201,7 +201,9 @@ struct mm_struct {
 	unsigned long swap_address;
 
 	unsigned dumpable:1;
-
+#ifdef CONFIG_HUGETLB_PAGE
+	int used_hugetlb;
+#endif
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
diff --git a/mm/memory.c b/mm/memory.c
index 63b9032c0620..f058e4cfc639 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -607,6 +607,11 @@ follow_page(struct mm_struct *mm, unsigned long address, int write)
 	pmd_t *pmd;
 	pte_t *ptep, pte;
 	unsigned long pfn;
+	struct vm_area_struct *vma;
+
+	vma = hugepage_vma(mm, address);
+	if (vma)
+		return follow_huge_addr(mm, vma, address, write);
 
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || pgd_bad(*pgd))
diff --git a/mm/mmap.c b/mm/mmap.c
index 61d0dc32646a..d3b14b17da38 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -362,6 +362,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (mapping)
 		up(&mapping->i_shared_sem);
 
+	mark_mm_hugetlb(mm, vma);
 	mm->map_count++;
 	validate_mm(mm);
 }
@@ -1423,7 +1424,6 @@ void exit_mmap(struct mm_struct *mm)
 		kmem_cache_free(vm_area_cachep, vma);
 		vma = next;
 	}
-		
 }
 
 /* Insert vm structure into process list sorted by address
-- 
cgit v1.2.3


From 1f1921fc15dc2408ab3900d036cffcf0d732801f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:23 -0800
Subject: [PATCH] Optimise follow_page() for page-table-based hugepages

ia32 and others can determine a page's hugeness by inspecting the pmd's value
directly.  No need to perform a VMA lookup against the user's virtual
address.

This patch ifdef's away the VMA-based implementation of
hugepage-aware-follow_page for ia32 and replaces it with a pmd-based
implementation.

The intent is that architectures will implement one or the other.  So the architecture either:

1: Implements hugepage_vma()/follow_huge_addr(), and stubs out
   pmd_huge()/follow_huge_pmd() or

2: Implements pmd_huge()/follow_huge_pmd(), and stubs out
   hugepage_vma()/follow_huge_addr()
---
 arch/i386/mm/hugetlbpage.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hugetlb.h    |  6 ++++++
 mm/memory.c                |  6 +++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 9d7de7b0fb00..106dcdd8dcf2 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -150,6 +150,7 @@ back1:
 	return i;
 }
 
+#if 0	/* This is just for testing */
 struct page *
 follow_huge_addr(struct mm_struct *mm,
 	struct vm_area_struct *vma, unsigned long address, int write)
@@ -179,6 +180,50 @@ struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
 	return NULL;
 }
 
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+#else
+
+struct page *
+follow_huge_addr(struct mm_struct *mm,
+	struct vm_area_struct *vma, unsigned long address, int write)
+{
+	return NULL;
+}
+
+struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
+{
+	return NULL;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PSE);
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+	return page;
+}
+#endif
+
 void free_huge_page(struct page *page)
 {
 	BUG_ON(page_count(page));
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b2f606c08f7..b51d51d05190 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,6 +24,10 @@ struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, int write);
 struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
 					unsigned long address);
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+				pmd_t *pmd, int write);
+int pmd_huge(pmd_t pmd);
+
 extern int htlbpage_max;
 
 static inline void
@@ -51,6 +55,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 #define hugetlb_report_meminfo(buf)		0
 #define hugepage_vma(mm, addr)			0
 #define mark_mm_hugetlb(mm, vma)		do { } while (0)
+#define follow_huge_pmd(mm, addr, pmd, write)	0
+#define pmd_huge(x)	0
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
diff --git a/mm/memory.c b/mm/memory.c
index f058e4cfc639..e390e0bf4aea 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -618,7 +618,11 @@ follow_page(struct mm_struct *mm, unsigned long address, int write)
 		goto out;
 
 	pmd = pmd_offset(pgd, address);
-	if (pmd_none(*pmd) || pmd_bad(*pmd))
+	if (pmd_none(*pmd))
+		goto out;
+	if (pmd_huge(*pmd))
+		return follow_huge_pmd(mm, address, pmd, write);
+	if (pmd_bad(*pmd))
 		goto out;
 
 	ptep = pte_offset_map(pmd, address);
-- 
cgit v1.2.3


From 87afb5f6cf3e400b9d3deb0f30cba16805cb43f4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:30 -0800
Subject: [PATCH] default_idle micro-optimisation

Patch from rwhron@earthlink.net

Micro-optimization of default_idle from -aa.  current_cpu_data.hlt_works_ok
is only false for some old 386/486 pcs.
---
 arch/i386/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index fbcfc1f2c2bd..f9934af388ed 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -86,7 +86,7 @@ void enable_hlt(void)
  */
 void default_idle(void)
 {
-	if (current_cpu_data.hlt_works_ok && !hlt_counter) {
+	if (!hlt_counter && current_cpu_data.hlt_works_ok) {
 		local_irq_disable();
 		if (!need_resched())
 			safe_halt();
-- 
cgit v1.2.3


From a1329fe850dc3ffeb2dd5b17dc949df5a5328db2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:36 -0800
Subject: [PATCH] loop inefficiency fix

Patch from Hugh Dickins <hugh@veritas.com>

The loop driver's loop over elements of bi_io_vec is in lo_send and
lo_receive: iterating that same transfer bi_vcnt times at the level above is,
er, excessive.  (And no need to increment bi_idx here.)
---
 drivers/block/loop.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 20e8fbdfcecc..86d653d168df 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -350,15 +350,10 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 	int ret;
 
 	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
-
-	do {
-		if (bio_rw(bio) == WRITE)
-			ret = lo_send(lo, bio, lo->lo_blocksize, pos);
-		else
-			ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
-
-	} while (++bio->bi_idx < bio->bi_vcnt);
-
+	if (bio_rw(bio) == WRITE)
+		ret = lo_send(lo, bio, lo->lo_blocksize, pos);
+	else
+		ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
 	return ret;
 }
 
-- 
cgit v1.2.3


From afcde6ef0d970ad1e1a1bc0506d6dae924abaab1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:41 -0800
Subject: [PATCH] pte_chain_alloc fixes

There are several places in which the return value from pte_chain_alloc() is
not being checked, and one place in which a GFP_KERNEL allocatiopn is
happening inside spinlock.
---
 fs/exec.c   |  3 +++
 mm/fremap.c |  7 +++++--
 mm/memory.c | 66 ++++++++++++++++++++++++++++++++++++++-----------------------
 3 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 8be3fa7c0ff2..028fbda85a71 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -300,6 +300,8 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
 
 	pgd = pgd_offset(tsk->mm, address);
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		goto out_sig;
 	spin_lock(&tsk->mm->page_table_lock);
 	pmd = pmd_alloc(tsk->mm, pgd, address);
 	if (!pmd)
@@ -325,6 +327,7 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
 	return;
 out:
 	spin_unlock(&tsk->mm->page_table_lock);
+out_sig:
 	__free_page(page);
 	force_sig(SIGKILL, tsk);
 	pte_chain_free(pte_chain);
diff --git a/mm/fremap.c b/mm/fremap.c
index a8caa89e312c..cde86cec65b5 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -53,8 +53,11 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t *pte, entry;
 	pgd_t *pgd;
 	pmd_t *pmd;
-	struct pte_chain *pte_chain = NULL;
+	struct pte_chain *pte_chain;
 
+	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		goto err;
 	pgd = pgd_offset(mm, addr);
 	spin_lock(&mm->page_table_lock);
 
@@ -62,7 +65,6 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pmd)
 		goto err_unlock;
 
-	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	pte = pte_alloc_map(mm, pmd, addr);
 	if (!pte)
 		goto err_unlock;
@@ -87,6 +89,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 err_unlock:
 	spin_unlock(&mm->page_table_lock);
 	pte_chain_free(pte_chain);
+err:
 	return err;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index e390e0bf4aea..53759b45bc85 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -935,9 +935,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 	struct page *old_page, *new_page;
 	unsigned long pfn = pte_pfn(pte);
 	struct pte_chain *pte_chain = NULL;
+	int ret;
 
-	if (!pfn_valid(pfn))
-		goto bad_wp_page;
+	if (unlikely(!pfn_valid(pfn))) {
+		/*
+		 * This should really halt the system so it can be debugged or
+		 * at least the kernel stops what it's doing before it corrupts
+		 * data, but for the moment just pretend this is OOM.
+		 */
+		pte_unmap(page_table);
+		printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
+				address);
+		goto oom;
+	}
 	old_page = pfn_to_page(pfn);
 
 	if (!TestSetPageLocked(old_page)) {
@@ -945,10 +955,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 		unlock_page(old_page);
 		if (reuse) {
 			flush_cache_page(vma, address);
-			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
+			establish_pte(vma, address, page_table,
+				pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
 			pte_unmap(page_table);
-			spin_unlock(&mm->page_table_lock);
-			return VM_FAULT_MINOR;
+			ret = VM_FAULT_MINOR;
+			goto out;
 		}
 	}
 	pte_unmap(page_table);
@@ -959,11 +970,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 	page_cache_get(old_page);
 	spin_unlock(&mm->page_table_lock);
 
+	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		goto no_mem;
 	new_page = alloc_page(GFP_HIGHUSER);
 	if (!new_page)
 		goto no_mem;
 	copy_cow_page(old_page,new_page,address);
-	pte_chain = pte_chain_alloc(GFP_KERNEL);
 
 	/*
 	 * Re-check the pte - we dropped the lock
@@ -982,25 +995,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 		new_page = old_page;
 	}
 	pte_unmap(page_table);
-	spin_unlock(&mm->page_table_lock);
 	page_cache_release(new_page);
 	page_cache_release(old_page);
-	pte_chain_free(pte_chain);
-	return VM_FAULT_MINOR;
+	ret = VM_FAULT_MINOR;
+	goto out;
 
-bad_wp_page:
-	pte_unmap(page_table);
-	spin_unlock(&mm->page_table_lock);
-	printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address);
-	/*
-	 * This should really halt the system so it can be debugged or
-	 * at least the kernel stops what it's doing before it corrupts
-	 * data, but for the moment just pretend this is OOM.
-	 */
-	return VM_FAULT_OOM;
 no_mem:
 	page_cache_release(old_page);
-	return VM_FAULT_OOM;
+oom:
+	ret = VM_FAULT_OOM;
+out:
+	spin_unlock(&mm->page_table_lock);
+	pte_chain_free(pte_chain);
+	return ret;
 }
 
 static void vmtruncate_list(struct list_head *head, unsigned long pgoff)
@@ -1295,6 +1302,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page * new_page;
 	pte_t entry;
 	struct pte_chain *pte_chain;
+	int ret;
 
 	if (!vma->vm_ops || !vma->vm_ops->nopage)
 		return do_anonymous_page(mm, vma, page_table,
@@ -1310,6 +1318,10 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (new_page == NOPAGE_OOM)
 		return VM_FAULT_OOM;
 
+	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		goto oom;
+
 	/*
 	 * Should we do an early C-O-W break?
 	 */
@@ -1317,7 +1329,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct page * page = alloc_page(GFP_HIGHUSER);
 		if (!page) {
 			page_cache_release(new_page);
-			return VM_FAULT_OOM;
+			goto oom;
 		}
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
@@ -1325,7 +1337,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		new_page = page;
 	}
 
-	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	spin_lock(&mm->page_table_lock);
 	page_table = pte_offset_map(pmd, address);
 
@@ -1355,15 +1366,20 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		pte_unmap(page_table);
 		page_cache_release(new_page);
 		spin_unlock(&mm->page_table_lock);
-		pte_chain_free(pte_chain);
-		return VM_FAULT_MINOR;
+		ret = VM_FAULT_MINOR;
+		goto out;
 	}
 
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
 	spin_unlock(&mm->page_table_lock);
+	ret = VM_FAULT_MAJOR;
+	goto out;
+oom:
+	ret = VM_FAULT_OOM;
+out:
 	pte_chain_free(pte_chain);
-	return VM_FAULT_MAJOR;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 6725839b5daa05f79f74d3088d7ff380eb77504e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:48 -0800
Subject: [PATCH] give hugetlbfs a set_page_dirty a_op

Seems that nobody has tested direct IO into hugetlb pages yet.  The VFS gets
upset about running set_page_dirty() against a non-uptodate page.

So give hugetlbfs inodes a private no-op ->set_page_dirty() to isolate them
from all that.
---
 fs/hugetlbfs/inode.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index bb8bf302da95..1022c9ce54bd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -450,10 +450,19 @@ static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const ch
 	return error;
 }
 
+/*
+ * For direct-IO reads into hugetlb pages
+ */
+int hugetlbfs_set_page_dirty(struct page *page)
+{
+	return 0;
+}
+
 static struct address_space_operations hugetlbfs_aops = {
 	.readpage	= hugetlbfs_readpage,
 	.prepare_write	= hugetlbfs_prepare_write,
-	.commit_write	= hugetlbfs_commit_write
+	.commit_write	= hugetlbfs_commit_write,
+	.set_page_dirty	= hugetlbfs_set_page_dirty,
 };
 
 struct file_operations hugetlbfs_file_operations = {
-- 
cgit v1.2.3


From eefb08ee7da81e1548ffd5b664682dc5b229ddc2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:57:54 -0800
Subject: [PATCH] Infrastructure for correct hugepage refcounting

We currently have a problem when things like ptrace, futexes and direct-io
try to pin user pages.  If the user's address is in a huge page we're
elevting the refcount of a constituent 4k page, not the head page of the
high-order allocation unit.

To solve this, a generic way of handling higher-order pages has been
implemented:

- A higher-order page is called a "compound page".  Chose this because
  "huge page", "large page", "super page", etc all seem to mean different
  things to different people.

- The first (controlling) 4k page of a compound page is referred to as the
  "head" page.

- The remaining pages are tail pages.

All pages have PG_compound set.  All pages have their lru.next pointing at
the head page (even the head page has this).

The head page's lru.prev, if non-zero, holds the address of the compound
page's put_page() function.

The order of the allocation is stored in the first tail page's lru.prev.
This is only for debug at present.  This usage means that zero-order pages
may not be compound.

The above relationships are established for _all_ higher-order pages in the
page allocator.  Which has some cost, but not much - another atomic op during
fork(), mainly.

This functionality is only enabled if CONFIG_HUGETLB_PAGE, although it could
be turned on permanently.  There's a little extra cost in get_page/put_page.

These changes do not preclude adding compound pages to the LRU in the future
- we can add a new page flag to the head page and then move all the
additional data to the first tail page's lru.next, lru.prev, list.next,
list.prev, index, private, etc.
---
 include/linux/mm.h         | 35 ++++++++++++++++++++++--
 include/linux/page-flags.h |  7 ++++-
 mm/page_alloc.c            | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2b99c852301..c68771c27d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -208,24 +208,55 @@ struct page {
  * Also, many kernel routines increase the page count before a critical
  * routine so they can be sure the page doesn't go away from under them.
  */
-#define get_page(p)		atomic_inc(&(p)->count)
-#define __put_page(p)		atomic_dec(&(p)->count)
 #define put_page_testzero(p)				\
 	({						\
 		BUG_ON(page_count(page) == 0);		\
 		atomic_dec_and_test(&(p)->count);	\
 	})
+
 #define page_count(p)		atomic_read(&(p)->count)
 #define set_page_count(p,v) 	atomic_set(&(p)->count, v)
+#define __put_page(p)		atomic_dec(&(p)->count)
 
 extern void FASTCALL(__page_cache_release(struct page *));
 
+#ifdef CONFIG_HUGETLB_PAGE
+
+static inline void get_page(struct page *page)
+{
+	if (PageCompound(page))
+		page = (struct page *)page->lru.next;
+	atomic_inc(&page->count);
+}
+
 static inline void put_page(struct page *page)
 {
+	if (PageCompound(page)) {
+		page = (struct page *)page->lru.next;
+		if (page->lru.prev) {	/* destructor? */
+			(*(void (*)(struct page *))page->lru.prev)(page);
+			return;
+		}
+	}
 	if (!PageReserved(page) && put_page_testzero(page))
 		__page_cache_release(page);
 }
 
+#else		/* CONFIG_HUGETLB_PAGE */
+
+static inline void get_page(struct page *page)
+{
+	atomic_inc(&page->count);
+}
+
+static inline void put_page(struct page *page)
+{
+	if (!PageReserved(page) && put_page_testzero(page))
+		__page_cache_release(page);
+}
+
+#endif		/* CONFIG_HUGETLB_PAGE */
+
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0327a8421c9d..5c3bded564d8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -72,7 +72,8 @@
 
 #define PG_direct		16	/* ->pte_chain points directly at pte */
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
-#define PG_reclaim		18	/* To be recalimed asap */
+#define PG_reclaim		18	/* To be reclaimed asap */
+#define PG_compound		19	/* Part of a compound page */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -251,6 +252,10 @@ extern void get_full_page_state(struct page_state *ret);
 #define ClearPageReclaim(page)	clear_bit(PG_reclaim, &(page)->flags)
 #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
 
+#define PageCompound(page)	test_bit(PG_compound, &(page)->flags)
+#define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
+#define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
+
 /*
  * The PageSwapCache predicate doesn't use a PG_flag at this time,
  * but it may again do so one day.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef7d20c61e46..15df1737f072 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -85,6 +85,62 @@ static void bad_page(const char *function, struct page *page)
 	page->mapping = NULL;
 }
 
+#ifndef CONFIG_HUGETLB_PAGE
+#define prep_compound_page(page, order) do { } while (0)
+#define destroy_compound_page(page, order) do { } while (0)
+#else
+/*
+ * Higher-order pages are called "compound pages".  They are structured thusly:
+ *
+ * The first PAGE_SIZE page is called the "head page".
+ *
+ * The remaining PAGE_SIZE pages are called "tail pages".
+ *
+ * All pages have PG_compound set.  All pages have their lru.next pointing at
+ * the head page (even the head page has this).
+ *
+ * The head page's lru.prev, if non-zero, holds the address of the compound
+ * page's put_page() function.
+ *
+ * The order of the allocation is stored in the first tail page's lru.prev.
+ * This is only for debug at present.  This usage means that zero-order pages
+ * may not be compound.
+ */
+static void prep_compound_page(struct page *page, int order)
+{
+	int i;
+	int nr_pages = 1 << order;
+
+	page->lru.prev = NULL;
+	page[1].lru.prev = (void *)order;
+	for (i = 0; i < nr_pages; i++) {
+		struct page *p = page + i;
+
+		SetPageCompound(p);
+		p->lru.next = (void *)page;
+	}
+}
+
+static void destroy_compound_page(struct page *page, int order)
+{
+	int i;
+	int nr_pages = 1 << order;
+
+	if (page[1].lru.prev != (void *)order)
+		bad_page(__FUNCTION__, page);
+
+	for (i = 0; i < nr_pages; i++) {
+		struct page *p = page + i;
+
+		if (!PageCompound(p))
+			bad_page(__FUNCTION__, page);
+		if (p->lru.next != (void *)page)
+			bad_page(__FUNCTION__, page);
+		ClearPageCompound(p);
+	}
+}
+#endif		/* CONFIG_HUGETLB_PAGE */
+
 /*
  * Freeing function for a buddy system allocator.
  *
@@ -114,6 +170,8 @@ static inline void __free_pages_bulk (struct page *page, struct page *base,
 {
 	unsigned long page_idx, index;
 
+	if (order)
+		destroy_compound_page(page, order);
 	page_idx = page - base;
 	if (page_idx & ~mask)
 		BUG();
@@ -409,6 +467,12 @@ void free_cold_page(struct page *page)
 	free_hot_cold_page(page, 1);
 }
 
+/*
+ * Really, prep_compound_page() should be called from __rmqueue_bulk().  But
+ * we cheat by calling it from here, in the order > 0 path.  Saves a branch
+ * or two.
+ */
+
 static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
 {
 	unsigned long flags;
@@ -435,6 +499,8 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
 		spin_unlock_irqrestore(&zone->lock, flags);
+		if (order && page)
+			prep_compound_page(page, order);
 	}
 
 	if (page != NULL) {
-- 
cgit v1.2.3


From b3a656b6d36622e628974bad5cf19c006c395efe Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:00 -0800
Subject: [PATCH] convert hugetlb code to use compound pages

The odd thing about hugetlb is that it maintains its own freelist of pages.
And it has to do that, else it would trivially run out of pages due to buddy
fragmetation.

So we we don't want callers of put_page() to be passing those pages
to __free_pages_ok() on the final put().

So hugetlb installs a destructor in the compound pages to point at
free_huge_page(), which knows how to put these pages back onto the free list.

Also, don't mark hugepages as all PageReserved any more.  That's preenting
callers from doing proper refcounting.  Any code which does a user pagetable
walk and hits part of a hugepage will now handle it transparently.
---
 arch/i386/mm/hugetlbpage.c    | 22 ++++++++++------------
 arch/ia64/mm/hugetlbpage.c    |  8 ++------
 arch/sparc64/mm/hugetlbpage.c |  7 +------
 arch/x86_64/mm/hugetlbpage.c  | 11 ++++-------
 4 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 106dcdd8dcf2..8a4d7a782dd2 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -46,6 +46,7 @@ static struct page *alloc_hugetlb_page(void)
 	htlbpagemem--;
 	spin_unlock(&htlbpage_lock);
 	set_page_count(page, 1);
+	page->lru.prev = (void *)huge_page_release;
 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
 		clear_highpage(&page[i]);
 	return page;
@@ -134,6 +135,7 @@ back1:
 		page = pte_page(pte);
 		if (pages) {
 			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+			get_page(page);
 			pages[i] = page;
 		}
 		if (vmas)
@@ -218,8 +220,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	struct page *page;
 
 	page = pte_page(*(pte_t *)pmd);
-	if (page)
+	if (page) {
 		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
+		get_page(page);
+	}
 	return page;
 }
 #endif
@@ -372,8 +376,8 @@ int try_to_free_low(int count)
 
 int set_hugetlb_mem_size(int count)
 {
-	int j, lcount;
-	struct page *page, *map;
+	int lcount;
+	struct page *page;
 	extern long htlbzone_pages;
 	extern struct list_head htlbpage_freelist;
 
@@ -389,11 +393,6 @@ int set_hugetlb_mem_size(int count)
 			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 			if (page == NULL)
 				break;
-			map = page;
-			for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-				SetPageReserved(map);
-				map++;
-			}
 			spin_lock(&htlbpage_lock);
 			list_add(&page->list, &htlbpage_freelist);
 			htlbpagemem++;
@@ -415,7 +414,8 @@ int set_hugetlb_mem_size(int count)
 	return (int) htlbzone_pages;
 }
 
-int hugetlb_sysctl_handler(ctl_table *table, int write, struct file *file, void *buffer, size_t *length)
+int hugetlb_sysctl_handler(ctl_table *table, int write,
+		struct file *file, void *buffer, size_t *length)
 {
 	proc_dointvec(table, write, file, buffer, length);
 	htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
@@ -432,15 +432,13 @@ __setup("hugepages=", hugetlb_setup);
 
 static int __init hugetlb_init(void)
 {
-	int i, j;
+	int i;
 	struct page *page;
 
 	for (i = 0; i < htlbpage_max; ++i) {
 		page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 		if (!page)
 			break;
-		for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
-			SetPageReserved(&page[j]);
 		spin_lock(&htlbpage_lock);
 		list_add(&page->list, &htlbpage_freelist);
 		spin_unlock(&htlbpage_lock);
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 36900fb71acc..cb3b7ff44f7c 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -227,6 +227,7 @@ back1:
 		page = pte_page(pte);
 		if (pages) {
 			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+			get_page(page);
 			pages[i] = page;
 		}
 		if (vmas)
@@ -303,11 +304,6 @@ set_hugetlb_mem_size (int count)
 			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 			if (page == NULL)
 				break;
-			map = page;
-			for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-				SetPageReserved(map);
-				map++;
-			}
 			spin_lock(&htlbpage_lock);
 			list_add(&page->list, &htlbpage_freelist);
 			htlbpagemem++;
@@ -327,7 +323,7 @@ set_hugetlb_mem_size (int count)
 		map = page;
 		for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
 			map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
-					1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
+					1 << PG_dirty | 1 << PG_active |
 					1 << PG_private | 1<< PG_writeback);
 			map++;
 		}
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 98045169d9c1..53c698aa6777 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -288,6 +288,7 @@ back1:
 		page = pte_page(pte);
 		if (pages) {
 			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+			get_page(page);
 			pages[i] = page;
 		}
 		if (vmas)
@@ -584,11 +585,6 @@ int set_hugetlb_mem_size(int count)
 			page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
 			if (page == NULL)
 				break;
-			map = page;
-			for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-				SetPageReserved(map);
-				map++;
-			}
 			spin_lock(&htlbpage_lock);
 			list_add(&page->list, &htlbpage_freelist);
 			htlbpagemem++;
@@ -613,7 +609,6 @@ int set_hugetlb_mem_size(int count)
 			map->flags &= ~(1UL << PG_locked | 1UL << PG_error |
 					1UL << PG_referenced |
 					1UL << PG_dirty | 1UL << PG_active |
-					1UL << PG_reserved |
 					1UL << PG_private | 1UL << PG_writeback);
 			set_page_count(page, 0);
 			map++;
diff --git a/arch/x86_64/mm/hugetlbpage.c b/arch/x86_64/mm/hugetlbpage.c
index d4515d57af34..7cf2b510ce03 100644
--- a/arch/x86_64/mm/hugetlbpage.c
+++ b/arch/x86_64/mm/hugetlbpage.c
@@ -134,6 +134,7 @@ back1:
 		page = pte_page(pte);
 		if (pages) {
 			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+			get_page(page);
 			pages[i] = page;
 		}
 		if (vmas)
@@ -263,11 +264,6 @@ int set_hugetlb_mem_size(int count)
 			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
 			if (page == NULL)
 				break;
-			map = page;
-			for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-				SetPageReserved(map);
-				map++;
-			}
 			spin_lock(&htlbpage_lock);
 			list_add(&page->list, &htlbpage_freelist);
 			htlbpagemem++;
@@ -286,8 +282,9 @@ int set_hugetlb_mem_size(int count)
 		spin_unlock(&htlbpage_lock);
 		map = page;
 		for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
-			map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
-					1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
+			map->flags &= ~(1 << PG_locked | 1 << PG_error |
+					1 << PG_referenced |
+					1 << PG_dirty | 1 << PG_active |
 					1 << PG_private | 1<< PG_writeback);
 			set_page_count(map, 0);
 			map++;
-- 
cgit v1.2.3


From 8ca8cd5b98cfc3fff80b01c1e9ab1c50872d7aa7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:06 -0800
Subject: [PATCH] get_unmapped_area for hugetlbfs

Having to specify the mapping address is a pain.  Give hugetlbfs files a
file_operations.get_unmapped_area().

The implementation is in hugetlbfs rather than in arch code because it's
probably common to several architectures.  If the architecture has special
needs it can define HAVE_ARCH_HUGETLB_UNMAPPED_AREA and go it alone.  Just
like HAVE_ARCH_UNMAPPED_AREA.
---
 fs/hugetlbfs/inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1022c9ce54bd..65d32c207254 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -73,6 +73,47 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	return ret;
 }
 
+/*
+ * Called under down_write(mmap_sem), page_table_lock is not held
+ */
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags);
+#else
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = ALIGN(addr, HPAGE_SIZE);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	addr = ALIGN(mm->free_area_cache, HPAGE_SIZE);
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vma || addr + len <= vma->vm_start)
+			return addr;
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+	}
+}
+#endif
+
 /*
  * Read a page. Again trivial. If it didn't already exist
  * in the page cache, it is zero-filled.
@@ -466,8 +507,9 @@ static struct address_space_operations hugetlbfs_aops = {
 };
 
 struct file_operations hugetlbfs_file_operations = {
-	.mmap		= hugetlbfs_file_mmap,
-	.fsync		= simple_sync_file,
+	.mmap			= hugetlbfs_file_mmap,
+	.fsync			= simple_sync_file,
+	.get_unmapped_area	= hugetlb_get_unmapped_area,
 };
 
 static struct inode_operations hugetlbfs_dir_inode_operations = {
-- 
cgit v1.2.3


From 136963d15f2923d7a53f1d9a5cdd22093b93e7fb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:12 -0800
Subject: [PATCH] hugetlbfs: fix truncate

- Opening a hugetlbfs file O_TRUNC calls the generic vmtruncate() functions
  and nukes the kernel.

  Give S_ISREG hugetlbfs files a inode_operations, and hence a setattr
  which know how to handle these files.

- Don't permit the user to truncate hugetlbfs files to sizes which are not
  a multiple of HPAGE_SIZE.

- We don't support expanding in ftruncate(), so remove that code.
---
 fs/hugetlbfs/inode.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 65d32c207254..3c6593928838 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,6 +34,7 @@ static struct super_operations hugetlbfs_ops;
 static struct address_space_operations hugetlbfs_aops;
 struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
+static struct inode_operations hugetlbfs_inode_operations;
 
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
 	.ra_pages	= 0,	/* No readahead */
@@ -326,44 +327,29 @@ static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff)
 	}
 }
 
+/*
+ * Expanding truncates are not allowed.
+ */
 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
 	unsigned long pgoff;
 	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
 
-	pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT;
+	if (offset > inode->i_size)
+		return -EINVAL;
 
-	if (inode->i_size < offset)
-		goto do_expand;
+	BUG_ON(offset & ~HPAGE_MASK);
+	pgoff = offset >> HPAGE_SHIFT;
 
 	inode->i_size = offset;
 	down(&mapping->i_shared_sem);
-	if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
-		goto out_unlock;
 	if (!list_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	if (!list_empty(&mapping->i_mmap_shared))
 		hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
-
-out_unlock:
 	up(&mapping->i_shared_sem);
 	truncate_hugepages(mapping, offset);
 	return 0;
-
-do_expand:
-	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out;
-	inode->i_size = offset;
-	return 0;
-
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out:
-	return -EFBIG;
 }
 
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -390,7 +376,9 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 
 	if (ia_valid & ATTR_SIZE) {
-		error = hugetlb_vmtruncate(inode, attr->ia_size);
+		error = -EINVAL;
+		if (!(attr->ia_size & ~HPAGE_MASK))
+			error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
 			goto out;
 		attr->ia_valid &= ~ATTR_SIZE;
@@ -425,6 +413,7 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 			init_special_inode(inode, mode, dev);
 			break;
 		case S_IFREG:
+			inode->i_op = &hugetlbfs_inode_operations;
 			inode->i_fop = &hugetlbfs_file_operations;
 			break;
 		case S_IFDIR:
@@ -525,6 +514,10 @@ static struct inode_operations hugetlbfs_dir_inode_operations = {
 	.setattr	= hugetlbfs_setattr,
 };
 
+static struct inode_operations hugetlbfs_inode_operations = {
+	.setattr	= hugetlbfs_setattr,
+};
+
 static struct super_operations hugetlbfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= hugetlbfs_drop_inode,
-- 
cgit v1.2.3


From 05732657630a045b2f6a917162187d5c4de93dc0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:19 -0800
Subject: [PATCH] hugetlbfs i_size fixes

We're expanding hugetlbfs i_size in the wrong place.  If someone attempts to
mmap more pages than are available, i_size is updated to reflect the
attempted mapping size.

So set i_size only when pages are successfully added to the mapping.

i_size handling at truncate time is still a bit wrong - if the mapping has
pages at (say) page offset 100-200 and the mappng is truncated to (say) page
offset 50, i_size should be set to zero.  But it is instead set to
50*HPAGE_SIZE.  That's harmless.
---
 arch/i386/mm/hugetlbpage.c   | 5 +++++
 arch/x86_64/mm/hugetlbpage.c | 6 ++++++
 fs/hugetlbfs/inode.c         | 5 -----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 8a4d7a782dd2..24ef8785ad12 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -284,6 +284,7 @@ void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigne
 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = current->mm;
+	struct inode *inode = mapping->host;
 	unsigned long addr;
 	int ret = 0;
 
@@ -307,6 +308,7 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 		page = find_get_page(mapping, idx);
 		if (!page) {
+			loff_t i_size;
 			page = alloc_hugetlb_page();
 			if (!page) {
 				ret = -ENOMEM;
@@ -318,6 +320,9 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 				free_huge_page(page);
 				goto out;
 			}
+			i_size = (loff_t)(idx + 1) * HPAGE_SIZE;
+			if (i_size > inode->i_size)
+				inode->i_size = i_size;
 		}
 		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
 	}
diff --git a/arch/x86_64/mm/hugetlbpage.c b/arch/x86_64/mm/hugetlbpage.c
index 7cf2b510ce03..6cde6a4c0518 100644
--- a/arch/x86_64/mm/hugetlbpage.c
+++ b/arch/x86_64/mm/hugetlbpage.c
@@ -205,6 +205,7 @@ void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigne
 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 {
 	struct mm_struct *mm = current->mm;
+	struct inode = mapping->host;
 	unsigned long addr;
 	int ret = 0;
 
@@ -228,6 +229,8 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 		page = find_get_page(mapping, idx);
 		if (!page) {
+			loff_t i_size;
+
 			page = alloc_hugetlb_page();
 			if (!page) {
 				ret = -ENOMEM;
@@ -239,6 +242,9 @@ int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
 				free_huge_page(page);
 				goto out;
 			}
+			i_size = (loff_t)(idx + 1) * HPAGE_SIZE;
+			if (i_size > inode->i_size)
+				inode->i_size = i_size;
 		}
 		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3c6593928838..ca1027875766 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode =file->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	size_t len;
 	int ret;
 
 	if (!capable(CAP_IPC_LOCK))
@@ -66,10 +65,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
 	ret = hugetlb_prefault(mapping, vma);
-	len = (vma->vm_end - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
-	if (inode->i_size < len)
-		inode->i_size = len;
-
 	up(&inode->i_sem);
 	return ret;
 }
-- 
cgit v1.2.3


From 3cc33271a3a6c898ccada71b799934ceac363932 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:26 -0800
Subject: [PATCH] hugetlbfs cleanups

- Remove quota code.

- Remove extraneous copy-n-paste code from truncate: that's only for
  physically-backed filesystems.

- Whitespace changes.
---
 fs/hugetlbfs/inode.c | 97 +++++++++-------------------------------------------
 1 file changed, 17 insertions(+), 80 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ca1027875766..698cb1ff85ef 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -120,12 +120,14 @@ static int hugetlbfs_readpage(struct file *file, struct page * page)
 	return -EINVAL;
 }
 
-static int hugetlbfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_prepare_write(struct file *file,
+			struct page *page, unsigned offset, unsigned to)
 {
 	return -EINVAL;
 }
 
-static int hugetlbfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_commit_write(struct file *file,
+			struct page *page, unsigned offset, unsigned to)
 {
 	return -EINVAL;
 }
@@ -140,28 +142,8 @@ void huge_pagevec_release(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-void truncate_partial_hugepage(struct page *page, unsigned partial)
+void truncate_huge_page(struct page *page)
 {
-	int i;
-	const unsigned piece = partial & (PAGE_SIZE - 1);
-	const unsigned tailstart = PAGE_SIZE - piece;
-	const unsigned whole_pages = partial / PAGE_SIZE;
-	const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages;
-
-	for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i)
-		memclear_highpage_flush(&page[i], 0, PAGE_SIZE);
-
-	if (!piece)
-		return;
-
-	memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece);
-}
-
-void truncate_huge_page(struct address_space *mapping, struct page *page)
-{
-	if (page->mapping != mapping)
-		return;
-
 	clear_page_dirty(page);
 	ClearPageUptodate(page);
 	remove_from_page_cache(page);
@@ -170,52 +152,13 @@ void truncate_huge_page(struct address_space *mapping, struct page *page)
 
 void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 {
-	const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT;
-	const unsigned partial = lstart & (HPAGE_SIZE - 1);
+	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
 	int i;
 
 	pagevec_init(&pvec, 0);
 	next = start;
-
-	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		for (i = 0; i < pagevec_count(&pvec); ++i) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-
-			if (page_index > next)
-				next = page_index;
-
-			++next;
-
-			if (TestSetPageLocked(page))
-				continue;
-
-			if (PageWriteback(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			truncate_huge_page(mapping, page);
-			unlock_page(page);
-		}
-		huge_pagevec_release(&pvec);
-		cond_resched();
-	}
-
-	if (partial) {
-		struct page *page = find_lock_page(mapping, start - 1);
-		if (page) {
-			wait_on_page_writeback(page);
-			truncate_partial_hugepage(page, partial);
-			unlock_page(page);
-			huge_page_release(page);
-		}
-	}
-
-	next = start;
-
 	while (1) {
 		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 			if (next == start)
@@ -228,11 +171,10 @@ void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 			struct page *page = pvec.pages[i];
 
 			lock_page(page);
-			wait_on_page_writeback(page);
 			if (page->index > next)
 				next = page->index;
 			++next;
-			truncate_huge_page(mapping, page);
+			truncate_huge_page(page);
 			unlock_page(page);
 		}
 		huge_pagevec_release(&pvec);
@@ -363,13 +305,6 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 	error = security_inode_setattr(dentry, attr);
 	if (error)
 		goto out;
-
-	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
-		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
-	if (error)
-		goto out;
-
 	if (ia_valid & ATTR_SIZE) {
 		error = -EINVAL;
 		if (!(attr->ia_size & ~HPAGE_MASK))
@@ -388,8 +323,8 @@ out:
 	return error;
 }
 
-static struct inode *
-hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+static struct inode *hugetlbfs_get_inode(struct super_block *sb,
+					int mode, dev_t dev)
 {
 	struct inode * inode = new_inode(sb);
 
@@ -401,7 +336,7 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_blocks = 0;
 		inode->i_rdev = NODEV;
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
-		inode->i_mapping->backing_dev_info = &hugetlbfs_backing_dev_info;
+		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
@@ -430,8 +365,8 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
  * File creation. Allocate an inode, and we're done..
  */
 /* SMP-safe */
-static int
-hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+static int hugetlbfs_mknod(struct inode *dir,
+			struct dentry *dentry, int mode, dev_t dev)
 {
 	struct inode * inode = hugetlbfs_get_inode(dir->i_sb, mode, dev);
 	int error = -ENOSPC;
@@ -444,7 +379,7 @@ hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return error;
 }
 
-static int hugetlbfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
 	if (!retval)
@@ -457,7 +392,8 @@ static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode)
 	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
 
-static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+static int hugetlbfs_symlink(struct inode *dir,
+			struct dentry *dentry, const char *symname)
 {
 	struct inode *inode;
 	int error = -ENOSPC;
@@ -518,7 +454,8 @@ static struct super_operations hugetlbfs_ops = {
 	.drop_inode	= hugetlbfs_drop_inode,
 };
 
-static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent)
+static int
+hugetlbfs_fill_super(struct super_block * sb, void * data, int silent)
 {
 	struct inode * inode;
 	struct dentry * root;
-- 
cgit v1.2.3


From 08a1cc4eb53dd834ddfcda3d70b0def5ed15f747 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:32 -0800
Subject: [PATCH] Give all architectures a hugetlb_nopage().

If someone maps a hugetlbfs file, then truncates it, then references the part
of the mapping outside the truncation point, they take a pagefault and we end
up hitting hugetlb_nopage().

We want to prevent this from ever happening.  This patch just makes sure that
all architectures have a goes-BUG hugetlb_nopage() to trap it.
---
 arch/i386/mm/hugetlbpage.c    | 10 ++++++++--
 arch/ia64/mm/hugetlbpage.c    | 11 +++++++++--
 arch/sparc64/mm/hugetlbpage.c |  8 ++++++++
 arch/x86_64/mm/hugetlbpage.c  |  4 ++--
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 24ef8785ad12..792dfd1876e6 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -26,7 +26,6 @@ static long    htlbpagemem;
 int     htlbpage_max;
 static long    htlbzone_pages;
 
-struct vm_operations_struct hugetlb_vm_ops;
 static LIST_HEAD(htlbpage_freelist);
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
 
@@ -472,7 +471,14 @@ int is_hugepage_mem_enough(size_t size)
 	return 1;
 }
 
-static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
+/*
+ * We cannot handle pagefaults against hugetlb pages at all.  They cause
+ * handle_mm_fault() to try to instantiate regular-sized pages in the
+ * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
+ * this far.
+ */
+static struct page *
+hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
 {
 	BUG();
 	return NULL;
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index cb3b7ff44f7c..37bcf76e2b63 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -18,7 +18,6 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-static struct vm_operations_struct hugetlb_vm_ops;
 struct list_head htlbpage_freelist;
 spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
 extern long htlbpagemem;
@@ -333,6 +332,14 @@ set_hugetlb_mem_size (int count)
 	return (int) htlbzone_pages;
 }
 
+static struct page *
+hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
+{
+	BUG();
+	return NULL;
+}
+
 static struct vm_operations_struct hugetlb_vm_ops = {
-	.close =	zap_hugetlb_resources
+	.nopage =	hugetlb_nopage,
+	.close =	zap_hugetlb_resources,
 };
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 53c698aa6777..c137cb8c9d56 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -619,6 +619,14 @@ int set_hugetlb_mem_size(int count)
 	return (int) htlbzone_pages;
 }
 
+static struct page *
+hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
+{
+	BUG();
+	return NULL;
+}
+
 static struct vm_operations_struct hugetlb_vm_ops = {
+	.nopage = hugetlb_nopage,
 	.close	= zap_hugetlb_resources,
 };
diff --git a/arch/x86_64/mm/hugetlbpage.c b/arch/x86_64/mm/hugetlbpage.c
index 6cde6a4c0518..f8e146193dc6 100644
--- a/arch/x86_64/mm/hugetlbpage.c
+++ b/arch/x86_64/mm/hugetlbpage.c
@@ -25,7 +25,6 @@ static long    htlbpagemem;
 int     htlbpage_max;
 static long    htlbzone_pages;
 
-struct vm_operations_struct hugetlb_vm_ops;
 static LIST_HEAD(htlbpage_freelist);
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
 
@@ -349,7 +348,8 @@ int hugetlb_report_meminfo(char *buf)
 			HPAGE_SIZE/1024);
 }
 
-static struct page * hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
+static struct page *
+hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
 {
 	BUG();
 	return NULL;
-- 
cgit v1.2.3


From 8b5111ec625859a3a56f04598fd85e89622228a5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:38 -0800
Subject: [PATCH] Fix hugetlbfs faults

If the underlying mapping was truncated and someone references the
now-unmapped memory the kernel will enter handle_mm_fault() and will start
instantiating PAGE_SIZE pte's inside the hugepage VMA.  Everything goes
generally pear-shaped.

So trap this in handle_mm_fault().  It adds no overhead to non-hugepage
builds.

Another possible fix would be to not unmap the huge pages at all in truncate
- just anonymise them.

But I think we want full ftruncate semantics for hugepages for management
purposes.
---
 mm/memory.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 53759b45bc85..1bef6a16f7a4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1447,6 +1447,10 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 	pgd = pgd_offset(mm, address);
 
 	inc_page_state(pgfault);
+
+	if (is_vm_hugetlb_page(vma))
+		return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
+
 	/*
 	 * We need the page table lock to synchronize with kswapd
 	 * and the SMP-safe atomic PTE updates.
-- 
cgit v1.2.3


From a20d5200d36f3b1604b91c1a73a9f31f91a1bc2c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:44 -0800
Subject: [PATCH] ia32 hugetlb cleanup

- whitespace

- remove unneeded spinlocking no-op.
---
 arch/i386/mm/hugetlbpage.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 792dfd1876e6..243d844a1a79 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -248,7 +248,8 @@ void huge_page_release(struct page *page)
 	free_huge_page(page);
 }
 
-void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+void unmap_hugepage_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -258,8 +259,6 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig
 	BUG_ON(start & (HPAGE_SIZE - 1));
 	BUG_ON(end & (HPAGE_SIZE - 1));
 
-	spin_lock(&htlbpage_lock);
-	spin_unlock(&htlbpage_lock);
 	for (address = start; address < end; address += HPAGE_SIZE) {
 		pte = huge_pte_offset(mm, address);
 		if (pte_none(*pte))
@@ -272,7 +271,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig
 	flush_tlb_range(vma, start, end);
 }
 
-void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length)
+void
+zap_hugepage_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long length)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spin_lock(&mm->page_table_lock);
@@ -477,8 +478,8 @@ int is_hugepage_mem_enough(size_t size)
  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
  * this far.
  */
-static struct page *
-hugetlb_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
+static struct page *hugetlb_nopage(struct vm_area_struct *vma,
+				unsigned long address, int unused)
 {
 	BUG();
 	return NULL;
-- 
cgit v1.2.3


From 8a1335e97ac2598f8f5ea8f18c7d8eeb4906e841 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:51 -0800
Subject: [PATCH] Fix hugetlb_vmtruncate_list()

This function is quite wrong - has an "=" where it should have a "-" and
confuses PAGE_SIZE and HPAGE_SIZE in its address and file offset arithmetic.
---
 fs/hugetlbfs/inode.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 698cb1ff85ef..5ce105bd3d1e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -238,29 +238,47 @@ static void hugetlbfs_drop_inode(struct inode *inode)
 		hugetlbfs_forget_inode(inode);
 }
 
-static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff)
+/*
+ * h_pgoff is in HPAGE_SIZE units.
+ * vma->vm_pgoff is in PAGE_SIZE units.
+ */
+static void
+hugetlb_vmtruncate_list(struct list_head *list, unsigned long h_pgoff)
 {
-	unsigned long start, end, length, delta;
 	struct vm_area_struct *vma;
 
 	list_for_each_entry(vma, list, shared) {
-		start = vma->vm_start;
-		end = vma->vm_end;
-		length = end - start;
-
-		if (vma->vm_pgoff >= pgoff) {
-			zap_hugepage_range(vma, start, length);
+		unsigned long h_vm_pgoff;
+		unsigned long v_length;
+		unsigned long h_length;
+		unsigned long v_offset;
+
+		h_vm_pgoff = vma->vm_pgoff << (HPAGE_SHIFT - PAGE_SHIFT);
+		v_length = vma->vm_end - vma->vm_start;
+		h_length = v_length >> HPAGE_SHIFT;
+		v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
+
+		/*
+		 * Is this VMA fully outside the truncation point?
+		 */
+		if (h_vm_pgoff >= h_pgoff) {
+			zap_hugepage_range(vma, vma->vm_start, v_length);
 			continue;
 		}
 
-		length >>= PAGE_SHIFT;
-		delta = pgoff = vma->vm_pgoff;
-		if (delta >= length)
+		/*
+		 * Is this VMA fully inside the truncaton point?
+		 */
+		if (h_vm_pgoff + (v_length >> HPAGE_SHIFT) <= h_pgoff)
 			continue;
 
-		start += delta << PAGE_SHIFT;
-		length = (length - delta) << PAGE_SHIFT;
-		zap_hugepage_range(vma, start, length);
+		/*
+		 * The VMA straddles the truncation point.  v_offset is the
+		 * offset (in bytes) into the VMA where the point lies.
+		 */
+		zap_hugepage_range(vma,
+				vma->vm_start + v_offset,
+				v_length - v_offset);
 	}
 }
 
-- 
cgit v1.2.3


From df79ea4004dd472d22b1ae21bb51f6b4ec3a312e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:58:57 -0800
Subject: [PATCH] hugetlb mremap fix

If you attempt to perform a relocating 4k-aligned mremap and the new address
for the map lands on top of a hugepage VMA, do_mremap() will attempt to
perform a 4k-aligned unmap inside the hugetlb VMA.  The hugetlb layer goes
BUG.

Fix that by trapping the poorly-aligned unmap attempt in do_munmap().
do_remap() will then fall through without having done anything to the place
where it tests for a hugetlb VMA.

It would be neater to perform these checks on entry to do_mremap(), but that
would incur another VMA lookup.

Also, if you attempt to perform a 4k-aligned and/or sized munmap() inside a
hugepage VMA the same BUG happens.  This patch fixes that too.

This all means that an mremap attempt against a hugetlb area will fail, but
only after having unmapped the source pages.  That's a bit messy, but
supporting hugetlb mremap doesn't seem worth it, and completely disallowing
it will add overhead to normal mremaps.
---
 include/linux/hugetlb.h | 4 ++++
 mm/mmap.c               | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b51d51d05190..370411eaaba2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -58,6 +58,10 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 #define follow_huge_pmd(mm, addr, pmd, write)	0
 #define pmd_huge(x)	0
 
+#ifndef HPAGE_MASK
+#define HPAGE_MASK	0		/* Keep the compiler happy */
+#endif
+
 #endif /* !CONFIG_HUGETLB_PAGE */
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/mm/mmap.c b/mm/mmap.c
index d3b14b17da38..af3d4a272ad7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1223,6 +1223,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 		return 0;
 	/* we have  start < mpnt->vm_end  */
 
+	if (is_vm_hugetlb_page(mpnt)) {
+		if ((start & ~HPAGE_MASK) || (len & ~HPAGE_MASK))
+			return -EINVAL;
+	}
+
 	/* if it doesn't overlap, we have nothing.. */
 	end = start + len;
 	if (mpnt->vm_start >= end)
-- 
cgit v1.2.3


From 32738fbfa01aa3aebe6ace9b4976473123e10fea Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:59:04 -0800
Subject: [PATCH] mm/mremap.c whitespace cleanup

- Not everyone uses 160-column xterms.

- Coding style consistency
---
 mm/mremap.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index c94c9da3f4b5..8336eaf69911 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -24,9 +24,9 @@
 
 static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
 {
-	pgd_t * pgd;
-	pmd_t * pmd;
-	pte_t * pte = NULL;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
 	if (pgd_none(*pgd))
@@ -73,8 +73,8 @@ static inline int page_table_present(struct mm_struct *mm, unsigned long addr)
 
 static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
 {
-	pmd_t * pmd;
-	pte_t * pte = NULL;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
 
 	pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
 	if (pmd)
@@ -88,7 +88,7 @@ copy_one_pte(struct mm_struct *mm, pte_t *src, pte_t *dst,
 {
 	int error = 0;
 	pte_t pte;
-	struct page * page = NULL;
+	struct page *page = NULL;
 
 	if (pte_present(*src))
 		page = pte_page(*src);
@@ -183,12 +183,12 @@ oops_we_failed:
 	return -1;
 }
 
-static unsigned long move_vma(struct vm_area_struct * vma,
+static unsigned long move_vma(struct vm_area_struct *vma,
 	unsigned long addr, unsigned long old_len, unsigned long new_len,
 	unsigned long new_addr)
 {
-	struct mm_struct * mm = vma->vm_mm;
-	struct vm_area_struct * new_vma, * next, * prev;
+	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *new_vma, *next, *prev;
 	int allocated_vma;
 	int split = 0;
 
@@ -196,14 +196,16 @@ static unsigned long move_vma(struct vm_area_struct * vma,
 	next = find_vma_prev(mm, new_addr, &prev);
 	if (next) {
 		if (prev && prev->vm_end == new_addr &&
-		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file &&
+					!(vma->vm_flags & VM_SHARED)) {
 			spin_lock(&mm->page_table_lock);
 			prev->vm_end = new_addr + new_len;
 			spin_unlock(&mm->page_table_lock);
 			new_vma = prev;
 			if (next != prev->vm_next)
 				BUG();
-			if (prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags)) {
+			if (prev->vm_end == next->vm_start &&
+					can_vma_merge(next, prev->vm_flags)) {
 				spin_lock(&mm->page_table_lock);
 				prev->vm_end = next->vm_end;
 				__vma_unlink(mm, next, prev);
@@ -214,7 +216,8 @@ static unsigned long move_vma(struct vm_area_struct * vma,
 				kmem_cache_free(vm_area_cachep, next);
 			}
 		} else if (next->vm_start == new_addr + new_len &&
-			   can_vma_merge(next, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+			  	can_vma_merge(next, vma->vm_flags) &&
+				!vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
 			spin_lock(&mm->page_table_lock);
 			next->vm_start = new_addr;
 			spin_unlock(&mm->page_table_lock);
@@ -223,7 +226,8 @@ static unsigned long move_vma(struct vm_area_struct * vma,
 	} else {
 		prev = find_vma(mm, new_addr-1);
 		if (prev && prev->vm_end == new_addr &&
-		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
+		    can_vma_merge(prev, vma->vm_flags) && !vma->vm_file &&
+				!(vma->vm_flags & VM_SHARED)) {
 			spin_lock(&mm->page_table_lock);
 			prev->vm_end = new_addr + new_len;
 			spin_unlock(&mm->page_table_lock);
@@ -249,7 +253,7 @@ static unsigned long move_vma(struct vm_area_struct * vma,
 			INIT_LIST_HEAD(&new_vma->shared);
 			new_vma->vm_start = new_addr;
 			new_vma->vm_end = new_addr+new_len;
-			new_vma->vm_pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+			new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT;
 			if (new_vma->vm_file)
 				get_file(new_vma->vm_file);
 			if (new_vma->vm_ops && new_vma->vm_ops->open)
@@ -428,7 +432,8 @@ unsigned long do_mremap(unsigned long addr,
 			if (vma->vm_flags & VM_SHARED)
 				map_flags |= MAP_SHARED;
 
-			new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags);
+			new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
+						vma->vm_pgoff, map_flags);
 			ret = new_addr;
 			if (new_addr & ~PAGE_MASK)
 				goto out;
-- 
cgit v1.2.3


From ecd2d2201d475af0f3aa822ba991463da33bf54f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:59:11 -0800
Subject: [PATCH] spinlock debugging on uniprocessors

Patch from Manfred Spraul <manfred@colorfullife.com>

This enables spinlock debuggng on uniprocessor builds, under
CONFIG_DEBUG_SPINLOCK.

The reason I want this is that one day we'll need to pull out the debugging
support from the timer code which detects uninitialised timers.  And once
that has gone, uniprocessor developers and testers have no way of detecting
uninitialised timers - there will be mysterious deadlocks on SMP machines.
And there will surely be more uninitialised timers

The patch also removes the last pieces of the support for including
<asm/spinlock.h> directly.  Doesn't work since (IIRC) 2.3.x
---
 include/linux/spinlock.h | 122 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 112 insertions(+), 10 deletions(-)

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 403033961628..a289a20a2484 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -37,30 +37,120 @@
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
 
-/*
- * !CONFIG_SMP and spin_lock_init not previously defined
- * (e.g. by including include/asm/spinlock.h)
- */
-#elif !defined(spin_lock_init)
+#else
 
-#ifndef CONFIG_PREEMPT
+#if !defined(CONFIG_PREEMPT) && !defined(CONFIG_DEBUG_SPINLOCK)
 # define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
 # define ATOMIC_DEC_AND_LOCK
 #endif
 
+#ifdef CONFIG_DEBUG_SPINLOCK
+ 
+#define SPINLOCK_MAGIC	0x1D244B3C
+typedef struct {
+	unsigned long magic;
+	volatile unsigned long lock;
+	volatile unsigned int babble;
+	const char *module;
+	char *owner;
+	int oline;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { SPINLOCK_MAGIC, 0, 10, __FILE__ , NULL, 0}
+
+#define spin_lock_init(x) \
+	do { \
+		(x)->magic = SPINLOCK_MAGIC; \
+		(x)->lock = 0; \
+		(x)->babble = 5; \
+		(x)->module = __FILE__; \
+		(x)->owner = NULL; \
+		(x)->oline = 0; \
+	} while (0)
+
+#define CHECK_LOCK(x) \
+	do { \
+	 	if ((x)->magic != SPINLOCK_MAGIC) { \
+			printk(KERN_ERR "%s:%d: spin_is_locked on uninitialized spinlock %p.\n", \
+					__FILE__, __LINE__, (x)); \
+		} \
+	} while(0)
+
+#define _raw_spin_lock(x)		\
+	do { \
+	 	CHECK_LOCK(x); \
+		if ((x)->lock&&(x)->babble) { \
+			printk("%s:%d: spin_lock(%s:%p) already locked by %s/%d\n", \
+					__FILE__,__LINE__, (x)->module, \
+					(x), (x)->owner, (x)->oline); \
+			(x)->babble--; \
+		} \
+		(x)->lock = 1; \
+		(x)->owner = __FILE__; \
+		(x)->oline = __LINE__; \
+	} while (0)
+
+/* without debugging, spin_is_locked on UP always says
+ * FALSE. --> printk if already locked. */
+#define spin_is_locked(x) \
+	({ \
+	 	CHECK_LOCK(x); \
+		if ((x)->lock&&(x)->babble) { \
+			printk("%s:%d: spin_is_locked(%s:%p) already locked by %s/%d\n", \
+					__FILE__,__LINE__, (x)->module, \
+					(x), (x)->owner, (x)->oline); \
+			(x)->babble--; \
+		} \
+		0; \
+	})
+
+/* without debugging, spin_trylock on UP always says
+ * TRUE. --> printk if already locked. */
+#define _raw_spin_trylock(x) \
+	({ \
+	 	CHECK_LOCK(x); \
+		if ((x)->lock&&(x)->babble) { \
+			printk("%s:%d: spin_trylock(%s:%p) already locked by %s/%d\n", \
+					__FILE__,__LINE__, (x)->module, \
+					(x), (x)->owner, (x)->oline); \
+			(x)->babble--; \
+		} \
+		(x)->lock = 1; \
+		(x)->owner = __FILE__; \
+		(x)->oline = __LINE__; \
+		1; \
+	})
+
+#define spin_unlock_wait(x)	\
+	do { \
+	 	CHECK_LOCK(x); \
+		if ((x)->lock&&(x)->babble) { \
+			printk("%s:%d: spin_unlock_wait(%s:%p) owned by %s/%d\n", \
+					__FILE__,__LINE__, (x)->module, (x), \
+					(x)->owner, (x)->oline); \
+			(x)->babble--; \
+		}\
+	} while (0)
+
+#define _raw_spin_unlock(x) \
+	do { \
+	 	CHECK_LOCK(x); \
+		if (!(x)->lock&&(x)->babble) { \
+			printk("%s:%d: spin_unlock(%s:%p) not locked\n", \
+					__FILE__,__LINE__, (x)->module, (x));\
+			(x)->babble--; \
+		} \
+		(x)->lock = 0; \
+	} while (0)
+#else
 /*
  * gcc versions before ~2.95 have a nasty bug with empty initializers.
  */
 #if (__GNUC__ > 2)
   typedef struct { } spinlock_t;
-  typedef struct { } rwlock_t;
   #define SPIN_LOCK_UNLOCKED (spinlock_t) { }
-  #define RW_LOCK_UNLOCKED (rwlock_t) { }
 #else
   typedef struct { int gcc_is_buggy; } spinlock_t;
-  typedef struct { int gcc_is_buggy; } rwlock_t;
   #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
-  #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
 #endif
 
 /*
@@ -72,6 +162,18 @@
 #define _raw_spin_trylock(lock)	((void)(lock), 1)
 #define spin_unlock_wait(lock)	do { (void)(lock); } while(0)
 #define _raw_spin_unlock(lock)	do { (void)(lock); } while(0)
+#endif /* CONFIG_DEBUG_SPINLOCK */
+
+/* RW spinlocks: No debug version */
+
+#if (__GNUC__ > 2)
+  typedef struct { } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { }
+#else
+  typedef struct { int gcc_is_buggy; } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
+#endif
+
 #define rwlock_init(lock)	do { (void)(lock); } while(0)
 #define _raw_read_lock(lock)	do { (void)(lock); } while(0)
 #define _raw_read_unlock(lock)	do { (void)(lock); } while(0)
-- 
cgit v1.2.3


From 4f1cb3ff0f25323a228c77dea55ca93b8dde31d0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:59:17 -0800
Subject: [PATCH] CPU Hotplug mm/slab.c CPU_UP_CANCELED fix

Patch from Manfred Spraul.

Fixes a bug which was exposed by Zwane's hotplug CPU work.  The
cache_cache.array pointer is initially given a temp bootstrap area, which is
later converted over to the final value after the CPU is brought up.

But if slab is enhanced to permit cancellation of a CPU bringup, this pointer
ends up pointing at stale memory.  So reinitialise it by hand when
kmem_cache_init() is run.
---
 mm/slab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 87f623dde759..af0886e3c7a5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -439,7 +439,6 @@ struct arraycache_init initarray_generic __initdata = { { 0, BOOT_CPUCACHE_ENTRI
 static kmem_cache_t cache_cache = {
 	.lists		= LIST3_INIT(cache_cache.lists),
 	/* Allow for boot cpu != 0 */
-	.array		= { [0 ... NR_CPUS-1] = &initarray_cache.cache },
 	.batchcount	= 1,
 	.limit		= BOOT_CPUCACHE_ENTRIES,
 	.objsize	= sizeof(kmem_cache_t),
@@ -611,6 +610,7 @@ void __init kmem_cache_init(void)
 	init_MUTEX(&cache_chain_sem);
 	INIT_LIST_HEAD(&cache_chain);
 	list_add(&cache_cache.next, &cache_chain);
+	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
 
 	cache_estimate(0, cache_cache.objsize, 0,
 			&left_over, &cache_cache.num);
-- 
cgit v1.2.3


From 9a3e1a9676060802c78d70332a18da28b8e1e480 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Wed, 5 Feb 2003 16:59:24 -0800
Subject: [PATCH] Fix signed use of i_blocks in ext3 truncate

Patch from "Stephen C. Tweedie" <sct@redhat.com>

Fix "h_buffer_credits<0" assert failure during truncate.

The bug occurs when the "i_blocks" count in the file's inode overflows
past 2^31.  That works fine most of the time, because i_blocks is an
unsigned long, and should go up to 2^32; but there's a place in truncate
where ext3 calculates the size of the next transaction chunk for the
delete, and that mistakenly uses a signed long instead.  Because the
huge i_blocks gets cast to a negative value, ext3 does not reserve
enough credits for the transaction and the above error results.

This is usually only possible on filesystems corrupted for other
reasons, but it is reproducible if you create a single, non-sparse file
larger than 1TB on ext3 and then try to delete it.
---
 fs/ext3/inode.c          | 47 +++++++++++++++++++++++++++++++----------------
 include/linux/ext3_jbd.h |  6 +++---
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ccdb52c9cc77..24897acf33da 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -99,6 +99,34 @@ int ext3_forget(handle_t *handle, int is_metadata,
 	return err;
 }
 
+/*
+ * Work out how many blocks we need to progress with the next chunk of a
+ * truncate transaction.
+ */
+
+static unsigned long blocks_for_truncate(struct inode *inode) 
+{
+	unsigned long needed;
+	
+	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+
+	/* Give ourselves just enough room to cope with inodes in which
+	 * i_blocks is corrupt: we've seen disk corruptions in the past
+	 * which resulted in random data in an inode which looked enough
+	 * like a regular file for ext3 to try to delete it.  Things
+	 * will go a bit crazy if that happens, but at least we should
+	 * try not to panic the whole kernel. */
+	if (needed < 2)
+		needed = 2;
+
+	/* But we need to bound the transaction so we don't overflow the
+	 * journal. */
+	if (needed > EXT3_MAX_TRANS_DATA) 
+		needed = EXT3_MAX_TRANS_DATA;
+
+	return EXT3_DATA_TRANS_BLOCKS + needed;
+}
+	
 /* 
  * Truncate transactions can be complex and absolutely huge.  So we need to
  * be able to restart the transaction at a conventient checkpoint to make
@@ -112,14 +140,9 @@ int ext3_forget(handle_t *handle, int is_metadata,
 
 static handle_t *start_transaction(struct inode *inode) 
 {
-	long needed;
 	handle_t *result;
 	
-	needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
-	
-	result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
+	result = ext3_journal_start(inode, blocks_for_truncate(inode));
 	if (!IS_ERR(result))
 		return result;
 	
@@ -135,14 +158,9 @@ static handle_t *start_transaction(struct inode *inode)
  */
 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
 {
-	long needed;
-	
 	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
 		return 0;
-	needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
-	if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed))
+	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
 		return 0;
 	return 1;
 }
@@ -154,11 +172,8 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  */
 static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
 {
-	long needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
 	jbd_debug(2, "restarting handle %p\n", handle);
-	return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed);
+	return ext3_journal_restart(handle, blocks_for_truncate(inode));
 }
 
 /*
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index 13508f6053b9..7ac910d15863 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -28,7 +28,7 @@
  * indirection blocks, the group and superblock summaries, and the data
  * block to complete the transaction.  */
 
-#define EXT3_SINGLEDATA_TRANS_BLOCKS	8
+#define EXT3_SINGLEDATA_TRANS_BLOCKS	8U
 
 /* Extended attributes may touch two data buffers, two bitmap buffers,
  * and two group and summaries. */
@@ -58,7 +58,7 @@ extern int ext3_writepage_trans_blocks(struct inode *inode);
  * start off at the maximum transaction size and grow the transaction
  * optimistically as we go. */
 
-#define EXT3_MAX_TRANS_DATA		64
+#define EXT3_MAX_TRANS_DATA		64U
 
 /* We break up a large truncate or write transaction once the handle's
  * buffer credits gets this low, we need either to extend the
@@ -67,7 +67,7 @@ extern int ext3_writepage_trans_blocks(struct inode *inode);
  * one block, plus two quota updates.  Quota allocations are not
  * needed. */
 
-#define EXT3_RESERVE_TRANS_BLOCKS	12
+#define EXT3_RESERVE_TRANS_BLOCKS	12U
 
 #define EXT3_INDEX_EXTRA_TRANS_BLOCKS	8
 
-- 
cgit v1.2.3


From 4a69c79bc4f360b3acaa87274a324cdcc8accad7 Mon Sep 17 00:00:00 2001
From: Vojtech Pavlik <vojtech@suse.cz>
Date: Thu, 6 Feb 2003 17:50:20 +0100
Subject: x86-64: Minor fixes to make the kernel compile and remove warnings.

---
 arch/x86_64/kernel/apic.c        |  2 +-
 arch/x86_64/kernel/time.c        |  4 ++++
 drivers/block/floppy.c           | 14 +++++++-------
 drivers/i2c/i2c-proc.c           |  4 ++--
 drivers/ide/pci/amd74xx.c        |  2 +-
 drivers/usb/input/pid.c          |  4 ++--
 drivers/usb/media/usbvideo.c     |  4 ++--
 drivers/usb/media/vicam.c        |  2 +-
 drivers/video/vesafb.c           |  8 ++++++++
 fs/xfs/linux/xfs_aops.c          |  2 +-
 include/asm-x86_64/compat.h      |  7 +++++++
 include/asm-x86_64/dma-mapping.h |  6 ++++++
 include/asm-x86_64/proto.h       |  2 ++
 13 files changed, 44 insertions(+), 17 deletions(-)
 create mode 100644 include/asm-x86_64/dma-mapping.h

diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index d7efa0f8370a..9241f4043fbc 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -895,7 +895,7 @@ int setup_profiling_timer(unsigned int multiplier)
  * value into /proc/profile.
  */
 
-inline void smp_local_timer_interrupt(struct pt_regs *regs)
+void smp_local_timer_interrupt(struct pt_regs *regs)
 {
 	int cpu = smp_processor_id();
 
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 8fe37d578416..32f8e0b2bf18 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -30,6 +30,10 @@ u64 jiffies_64;
 
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 
+extern int using_apic_timer;
+extern void smp_local_timer_interrupt(struct pt_regs * regs);
+
+
 unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
 unsigned long hpet_period;				/* fsecs / HPET clock */
 unsigned long hpet_tick;				/* HPET clocks / interrupt */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8cbc0ed3d14d..a56db0bc9444 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2298,7 +2298,7 @@ static inline void end_request(struct request *req, int uptodate)
 	if (end_that_request_first(req, uptodate, current_count_sectors))
 		return;
 	add_disk_randomness(req->rq_disk);
-	floppy_off((int)req->rq_disk->private_data);
+	floppy_off((long)req->rq_disk->private_data);
 	blkdev_dequeue_request(req);
 	end_that_request_last(req);
 
@@ -2631,7 +2631,7 @@ static int make_raw_rw_request(void)
 		return 0;
 	}
 
-	set_fdc((int)current_req->rq_disk->private_data);
+	set_fdc((long)current_req->rq_disk->private_data);
 
 	raw_cmd = &default_raw_cmd;
 	raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_DISK |
@@ -2923,7 +2923,7 @@ static void redo_fd_request(void)
 			}
 			current_req = req;
 		}
-		drive = (int)current_req->rq_disk->private_data;
+		drive = (long)current_req->rq_disk->private_data;
 		set_fdc(drive);
 		reschedule_timeout(current_reqD, "redo fd request", 0);
 
@@ -3302,7 +3302,7 @@ static int raw_cmd_ioctl(int cmd, void *param)
 static int invalidate_drive(struct block_device *bdev)
 {
 	/* invalidate the buffer track to force a reread */
-	set_bit((int)bdev->bd_disk->private_data, &fake_change);
+	set_bit((long)bdev->bd_disk->private_data, &fake_change);
 	process_fd_request();
 	check_disk_change(bdev);
 	return 0;
@@ -3797,7 +3797,7 @@ static int floppy_open(struct inode * inode, struct file * filp)
  */
 static int check_floppy_change(struct gendisk *disk)
 {
-	int drive = (int)disk->private_data;
+	int drive = (long)disk->private_data;
 
 	if (UTESTF(FD_DISK_CHANGED) || UTESTF(FD_VERIFY))
 		return 1;
@@ -3900,7 +3900,7 @@ static int floppy_read_block_0(struct gendisk *disk)
  * geometry formats */
 static int floppy_revalidate(struct gendisk *disk)
 {
-	int drive=(int)disk->private_data;
+	int drive=(long)disk->private_data;
 #define NO_GEOM (!current_type[drive] && !ITYPE(UDRS->fd_device))
 	int cf;
 	int res = 0;
@@ -4358,7 +4358,7 @@ int __init floppy_init(void)
 		if (fdc_state[FDC(drive)].version == FDC_NONE)
 			continue;
 		/* to be cleaned up... */
-		disks[drive]->private_data = (void*)drive;
+		disks[drive]->private_data = (void*)(long)drive;
 		disks[drive]->queue = &floppy_queue;
 		add_disk(disks[drive]);
 	}
diff --git a/drivers/i2c/i2c-proc.c b/drivers/i2c/i2c-proc.c
index 3b97e96de669..ab462d034e22 100644
--- a/drivers/i2c/i2c-proc.c
+++ b/drivers/i2c/i2c-proc.c
@@ -39,7 +39,7 @@ static int i2c_create_name(char **name, const char *prefix,
 			       struct i2c_adapter *adapter, int addr);
 static int i2c_parse_reals(int *nrels, void *buffer, int bufsize,
 			       long *results, int magnitude);
-static int i2c_write_reals(int nrels, void *buffer, int *bufsize,
+static int i2c_write_reals(int nrels, void *buffer, size_t *bufsize,
 			       long *results, int magnitude);
 static int i2c_proc_chips(ctl_table * ctl, int write,
 			      struct file *filp, void *buffer,
@@ -514,7 +514,7 @@ int i2c_parse_reals(int *nrels, void *buffer, int bufsize,
 	return 0;
 }
 
-int i2c_write_reals(int nrels, void *buffer, int *bufsize,
+int i2c_write_reals(int nrels, void *buffer, size_t *bufsize,
 			 long *results, int magnitude)
 {
 #define BUFLEN 20
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index b3ac87d756ef..71d41408d033 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -82,7 +82,7 @@ static char *amd_dma[] = { "MWDMA16", "UDMA33", "UDMA66", "UDMA100" };
 #include <linux/stat.h>
 #include <linux/proc_fs.h>
 
-static int amd_base;
+static long amd_base;
 static struct pci_dev *bmide_dev;
 extern int (*amd74xx_display_info)(char *, char **, off_t, int); /* ide-proc.c */
 
diff --git a/drivers/usb/input/pid.c b/drivers/usb/input/pid.c
index ad838ccede15..bca8d5ce81b5 100644
--- a/drivers/usb/input/pid.c
+++ b/drivers/usb/input/pid.c
@@ -176,7 +176,7 @@ static int hid_pid_upload_effect(struct input_dev *dev,
 	struct hid_ff_pid* pid_private  = (struct hid_ff_pid*)(dev->private);
 	int ret;
 	int is_update;
-	int flags=0;
+	unsigned long flags = 0;
 
         dev_dbg(&pid_private->hid->dev->dev, "upload effect called: effect_type=%x\n",effect->type);
 	/* Check this effect type is supported by this device */
@@ -192,7 +192,7 @@ static int hid_pid_upload_effect(struct input_dev *dev,
 		int id=0;
 
 		// Spinlock so we don`t get a race condition when choosing IDs
-		spin_lock_irqsave(&pid_private->lock,flags);
+		spin_lock_irqsave(&pid_private->lock, flags);
 
 		while(id < FF_EFFECTS_MAX)
 			if (!test_and_set_bit(FF_PID_FLAGS_USED, &pid_private->effects[id++].flags)) 
diff --git a/drivers/usb/media/usbvideo.c b/drivers/usb/media/usbvideo.c
index 4c81b621ff76..873019ccd845 100644
--- a/drivers/usb/media/usbvideo.c
+++ b/drivers/usb/media/usbvideo.c
@@ -61,7 +61,7 @@ static int usbvideo_v4l_ioctl(struct inode *inode, struct file *file,
 			      unsigned int cmd, unsigned long arg);
 static int usbvideo_v4l_mmap(struct file *file, struct vm_area_struct *vma);
 static int usbvideo_v4l_open(struct inode *inode, struct file *file);
-static int usbvideo_v4l_read(struct file *file, char *buf,
+static ssize_t usbvideo_v4l_read(struct file *file, char *buf,
 			     size_t count, loff_t *ppos);
 static int usbvideo_v4l_close(struct inode *inode, struct file *file);
 
@@ -1641,7 +1641,7 @@ static int usbvideo_v4l_ioctl(struct inode *inode, struct file *file,
  * 20-Oct-2000 Created.
  * 01-Nov-2000 Added mutex (uvd->lock).
  */
-static int usbvideo_v4l_read(struct file *file, char *buf,
+static ssize_t usbvideo_v4l_read(struct file *file, char *buf,
 		      size_t count, loff_t *ppos)
 {
 	struct uvd *uvd = file->private_data;
diff --git a/drivers/usb/media/vicam.c b/drivers/usb/media/vicam.c
index 44acb135ba96..831d932ea9e5 100644
--- a/drivers/usb/media/vicam.c
+++ b/drivers/usb/media/vicam.c
@@ -988,7 +988,7 @@ read_frame(struct vicam_camera *cam, int framenum)
 	up(&cam->cam_lock);
 }
 
-static int
+static ssize_t
 vicam_read( struct file *file, char *buf, size_t count, loff_t *ppos )
 {
 	struct vicam_camera *cam = file->private_data;
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index 1f64653ece3d..9b9f7cc44bf3 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -62,6 +62,7 @@ static void            (*pmi_pal)(void);
 static int vesafb_pan_display(struct fb_var_screeninfo *var,
                               struct fb_info *info)
 {
+#ifdef __i386__
 	int offset;
 
 	if (!ypan)
@@ -83,11 +84,13 @@ static int vesafb_pan_display(struct fb_var_screeninfo *var,
                   "c" (offset),         /* ECX */
                   "d" (offset >> 16),   /* EDX */
                   "D" (&pmi_start));    /* EDI */
+#endif
 	return 0;
 }
 
 static void vesa_setpalette(int regno, unsigned red, unsigned green, unsigned blue)
 {
+#ifdef __i386__
 	struct { u_char blue, green, red, pad; } entry;
 
 	if (pmi_setpal) {
@@ -111,6 +114,7 @@ static void vesa_setpalette(int regno, unsigned red, unsigned green, unsigned bl
 		outb_p(green >> 10, dac_val);
 		outb_p(blue  >> 10, dac_val);
 	}
+#endif
 }
 
 static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
@@ -225,6 +229,10 @@ int __init vesafb_init(void)
 	vesafb_fix.visual   = (vesafb_defined.bits_per_pixel == 8) ?
 		FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
 
+#ifndef __i386__
+	screen_info.vesapm_seg = 0;
+#endif
+
 	if (!request_mem_region(vesafb_fix.smem_start, vesafb_fix.smem_len, "vesafb")) {
 		printk(KERN_WARNING
 		       "vesafb: abort, cannot reserve video memory at 0x%lx\n",
diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index 9398993ec4d4..5505483ca88c 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -50,7 +50,7 @@ map_blocks(
 
 	if (((flags & (PBF_DIRECT|PBF_SYNC)) == PBF_DIRECT) &&
 	    (offset >= inode->i_size))
-		count = max(count, XFS_WRITE_IO_LOG);
+		count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
 retry:
 	VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
 	if (flags & PBF_WRITE) {
diff --git a/include/asm-x86_64/compat.h b/include/asm-x86_64/compat.h
index 5307fdeb598c..0763ad9d8db8 100644
--- a/include/asm-x86_64/compat.h
+++ b/include/asm-x86_64/compat.h
@@ -81,4 +81,11 @@ struct compat_statfs {
 	int		f_spare[6];
 };
 
+typedef u32		compat_old_sigset_t;	/* at least 32 bits */
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32               compat_sigset_word;
+
 #endif /* _ASM_X86_64_COMPAT_H */
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
new file mode 100644
index 000000000000..48ada1b2956f
--- /dev/null
+++ b/include/asm-x86_64/dma-mapping.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X8664_DMA_MAPPING_H
+#define _ASM_X8664_DMA_MAPPING_H
+
+#include <asm-generic/dma-mapping.h>
+
+#endif
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index f58ac42093e1..2d56397b90e1 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X8664_PROTO_H
 #define _ASM_X8664_PROTO_H 1
 
+#include <asm/ldt.h>
+
 /* misc architecture specific prototypes */
 
 struct cpuinfo_x86; 
-- 
cgit v1.2.3


From a2dd146402912b8dfb2ff07168d7391bd8fd3ed1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@verizon.net>
Date: Wed, 5 Feb 2003 17:18:16 -0800
Subject: [PATCH] quota memleak

The Stanford Checker found a memleak.
---
 fs/quota_v2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 64811521d0ce..c051de09c559 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,6 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
 		blk = get_free_dqblk(filp, info);
 		if ((int)blk < 0) {
 			*err = blk;
+			freedqbuf(buf);
 			return 0;
 		}
 		memset(buf, 0, V2_DQBLKSIZE);
-- 
cgit v1.2.3


From 32dbc81b9a35a593a31db56506048945d7ecfd7d Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@groveronline.com>
Date: Wed, 5 Feb 2003 17:54:00 -0800
Subject: ACPI: Enable compilation w/o cpufreq

---
 drivers/acpi/processor.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/processor.c b/drivers/acpi/processor.c
index 3558ad0a80b9..38960418e6d9 100644
--- a/drivers/acpi/processor.c
+++ b/drivers/acpi/processor.c
@@ -1040,6 +1040,7 @@ acpi_processor_apply_limit (
 	if (!pr->flags.limit)
 		return_VALUE(-ENODEV);
 
+#ifdef CONFIG_CPU_FREQ
 	if (pr->flags.performance) {
 		px = pr->performance_platform_limit;
 		if (pr->limit.user.px > px)
@@ -1058,6 +1059,7 @@ acpi_processor_apply_limit (
 	} else if (pr->performance_platform_limit) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Platform limit event detected. Consider using ACPI P-States CPUfreq driver\n"));
 	}
+#endif
 
 	if (pr->flags.throttling) {
 		if (pr->limit.user.tx > tx)
-- 
cgit v1.2.3


From d30a24be358d7bc9ae41aae6f0f9dbe7465393cf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 5 Feb 2003 21:20:54 -0600
Subject: [SCSI] Remove host_active

It isn't used anywhere anymore
---
 drivers/scsi/hosts.c      |  1 -
 drivers/scsi/hosts.h      |  1 -
 drivers/scsi/scsi.c       | 10 +++++-----
 drivers/scsi/scsi_error.c | 15 +++++++--------
 drivers/scsi/scsi_proc.c  |  3 +--
 5 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 5891be92ec51..ca7185175eb2 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -392,7 +392,6 @@ struct Scsi_Host * scsi_register(Scsi_Host_Template *shost_tp, int xtr_bytes)
 
 	spin_lock_init(&shost->default_lock);
 	scsi_assign_lock(shost, &shost->default_lock);
-	atomic_set(&shost->host_active,0);
 	INIT_LIST_HEAD(&shost->my_devices);
 
 	init_waitqueue_head(&shost->host_wait);
diff --git a/drivers/scsi/hosts.h b/drivers/scsi/hosts.h
index f812838f01a8..6d26e62b7e52 100644
--- a/drivers/scsi/hosts.h
+++ b/drivers/scsi/hosts.h
@@ -396,7 +396,6 @@ struct Scsi_Host
     unsigned int            eh_kill:1; /* set when killing the eh thread */
     wait_queue_head_t       host_wait;
     Scsi_Host_Template    * hostt;
-    atomic_t                host_active; /* commands checked out */
     volatile unsigned short host_busy;   /* commands actually active on low-level */
     volatile unsigned short host_failed; /* commands that failed. */
     
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 912639238794..cc8ff7cf49be 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -994,11 +994,11 @@ static void scsi_softirq(struct softirq_action *h)
 				 * Here we have a fatal error of some sort.
 				 * Turn it over to the error handler.
 				 */
-				SCSI_LOG_MLCOMPLETE(3, printk("Command failed %p %x active=%d busy=%d failed=%d\n",
-						    SCpnt, SCpnt->result,
-				  atomic_read(&SCpnt->device->host->host_active),
-						  SCpnt->device->host->host_busy,
-					      SCpnt->device->host->host_failed));
+				SCSI_LOG_MLCOMPLETE(3,
+					printk("Command failed %p %x busy=%d failed=%d\n",
+						SCpnt, SCpnt->result,
+						SCpnt->device->host->host_busy,
+						SCpnt->device->host->host_failed));
 
 				/*
 				 * Dump the sense information too.
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index bef0602ef98a..dfe139a63e1e 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -131,23 +131,22 @@ int scsi_delete_timer(Scsi_Cmnd *scmd)
  **/
 void scsi_times_out(Scsi_Cmnd *scmd)
 {
+	struct Scsi_Host *shost = scmd->device->host;
+
 	/* Set the serial_number_at_timeout to the current serial_number */
 	scmd->serial_number_at_timeout = scmd->serial_number;
 
 	scsi_eh_eflags_set(scmd, SCSI_EH_CMD_TIMEOUT | SCSI_EH_CMD_ERR);
 
-	if( scmd->device->host->eh_wait == NULL ) {
+	if (unlikely(shost->eh_wait == NULL)) {
 		panic("Error handler thread not present at %p %p %s %d",
-		      scmd, scmd->device->host, __FILE__, __LINE__);
+		      scmd, shost, __FILE__, __LINE__);
 	}
 
-	scsi_host_failed_inc_and_test(scmd->device->host);
+	scsi_host_failed_inc_and_test(shost);
 
-	SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d "
-				   " failed=%d\n",
-				   atomic_read(&scmd->device->host->host_active),
-				   scmd->device->host->host_busy,
-				   scmd->device->host->host_failed));
+	SCSI_LOG_TIMEOUT(3, printk("Command timed out busy=%d failed=%d\n",
+				   shost->host_busy, shost->host_failed));
 }
 
 /**
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index bbc31dcfe191..173cce3c39ba 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -345,10 +345,9 @@ static void scsi_dump_status(int level)
 	i = 0;
 	for (shpnt = scsi_host_get_next(NULL); shpnt;
 	     shpnt = scsi_host_get_next(shpnt)) {
-		printk(KERN_INFO " %d %d %d : %d %d\n",
+		printk(KERN_INFO " %d %d : %d %d\n",
 		       shpnt->host_failed,
 		       shpnt->host_busy,
-		       atomic_read(&shpnt->host_active),
 		       shpnt->host_blocked,
 		       shpnt->host_self_blocked);
 	}
-- 
cgit v1.2.3


From f8646d2045f474b2be3f6ff29c7856fe21b01a0f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 5 Feb 2003 21:25:30 -0600
Subject: [PATCH] [patch, 2.5] scsi_qla1280.c free on error path

From:  Marcus Alanen <maalanen@ra.abo.fi>

  Remove check_region in favour of request_region. Free resources
  properly on error path. Horribly subtle ioremap/iounmap lurks here I
  think, in qla1280_pci_config(), which the below patch should take care
  of.

  I'm wondering if there couldn't / shouldn't be a better way to
  allocate resources. Obviously lots of drivers have broken error paths.
  Is this even necessary?

  Marcus


  #
  # create_patch: qla1280_release_on_error_path-2002-12-08-A.patch
  # Date: Sun Dec  8 22:32:33 EET 2002
  #
---
 drivers/scsi/qla1280.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index cb759e19c945..5e53c8b6c7b1 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -866,19 +866,17 @@ qla1280_do_device_init(struct pci_dev *pdev,
 			"qla1280", ha)) {
 		printk("qla1280 : Failed to reserve interrupt %d already "
 		       "in use\n", host->irq);
-		goto error_mem_alloced;
+		goto error_unmap;
 	}
 #if !MEMORY_MAPPED_IO
 	/* Register the I/O space with Linux */
-	if (check_region(host->io_port, 0xff)) {
+	if (!request_region(host->io_port, 0xff, "qla1280")) {
 		printk("qla1280 : Failed to reserve i/o region 0x%04lx-0x%04lx"
 		       " already in use\n",
 		       host->io_port, host->io_port + 0xff);
-		free_irq(host->irq, ha);
-		goto error_mem_alloced;
+		goto error_irq;
 	}
 
-	request_region(host->io_port, 0xff, "qla1280");
 #endif
 
 	reg = ha->iobase;
@@ -886,7 +884,7 @@ qla1280_do_device_init(struct pci_dev *pdev,
 	/* load the F/W, read paramaters, and init the H/W */
 	if (qla1280_initialize_adapter(ha)) {
 		printk(KERN_INFO "qla1x160:Failed to initialize adapter\n");
-		goto error_mem_alloced;
+		goto error_region;
 	}
 
 	/* set our host ID  (need to do something about our two IDs) */
@@ -894,6 +892,21 @@ qla1280_do_device_init(struct pci_dev *pdev,
 
 	return host;
 
+ error_region:
+#if !MEMORY_MAPPED_IO
+	release_region(host->io_port, 0xff);
+#endif
+
+ error_irq:
+	free_irq(host->irq, ha);
+
+ error_unmap:
+#if MEMORY_MAPPED_IO
+	if (ha->mmpbase)
+		iounmap((void *)(((unsigned long) ha->mmpbase) & PAGE_MASK));
+#endif
+
+
  error_mem_alloced:
 	qla1280_mem_free(ha);
 
-- 
cgit v1.2.3


From baaf76ddcc29ded9797daad83533354c1e96858c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 5 Feb 2003 21:25:59 -0600
Subject: [PATCH] 2.5.59 add two help texts to drivers_scsi_Kconfig

From:  Steven Cole <elenstev@mesatop.com>

  Here are some help texts from 2.4.21-pre3 Configure.help which are
  needed in 2.5.59 drivers/scsi/Kconfig.

  Steven
---
 drivers/scsi/Kconfig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 5668037c38be..f07f2da4c629 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -176,6 +176,9 @@ menu "SCSI low-level drivers"
 config SGIWD93_SCSI
 	tristate "SGI WD93C93 SCSI Driver"
 	depends on SGI_IP22 && SCSI
+  	help
+	  If you have a Western Digital WD93 SCSI controller on
+	  an SGI MIPS system, say Y.  Otherwise, say N.
 
 config SCSI_DECNCR
 	tristate "DEC NCR53C94 Scsi Driver"
@@ -1342,6 +1345,10 @@ config SCSI_QLOGIC_FC
 config SCSI_QLOGIC_FC_FIRMWARE
 	bool "Include loadable firmware in driver"
 	depends on SCSI_QLOGIC_FC
+  	help
+	  Say Y to include ISP2100 Fabric Initiator/Target Firmware, with
+	  expanded LUN addressing and FcTape (FCP-2) support, in the
+	  Qlogic QLA 1280 driver. This is required on some platforms.
 
 config SCSI_QLOGIC_1280
 	tristate "Qlogic QLA 1280 SCSI support"
-- 
cgit v1.2.3


From 78ef52ec6ffe0a0dd90797666ed282607fbaf43c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 5 Feb 2003 21:31:08 -0600
Subject: [PATCH] coding style updates for scsi_lib.c

I just couldn't see the mess anymore..  Nuke the ifdefs and use sane
variable names.  Some more small nitpicks but no behaviour changes at
all.
---
 drivers/scsi/scsi_lib.c | 591 +++++++++++++++++++++++-------------------------
 1 file changed, 284 insertions(+), 307 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a286e2248a50..eb62a113d6f7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -41,8 +41,8 @@ struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR] = {
  *
  * Purpose:     Insert pre-formed command into request queue.
  *
- * Arguments:   SCpnt   - command that is ready to be queued.
- *              at_head - boolean.  True if we should insert at head
+ * Arguments:   cmd	- command that is ready to be queued.
+ *              at_head	- boolean.  True if we should insert at head
  *                        of queue, false if we should insert at tail.
  *
  * Lock status: Assumed that lock is not held upon entry.
@@ -56,10 +56,10 @@ struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR] = {
  *              for now), and then call the queue request function to actually
  *              process it.
  */
-int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
+int scsi_insert_special_cmd(struct scsi_cmnd *cmd, int at_head)
 {
-	blk_insert_request(SCpnt->device->request_queue, SCpnt->request,
-		       	   at_head, SCpnt);
+	blk_insert_request(cmd->device->request_queue, cmd->request,
+		       	   at_head, cmd);
 	return 0;
 }
 
@@ -68,8 +68,8 @@ int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
  *
  * Purpose:     Insert pre-formed request into request queue.
  *
- * Arguments:   SRpnt   - request that is ready to be queued.
- *              at_head - boolean.  True if we should insert at head
+ * Arguments:   sreq	- request that is ready to be queued.
+ *              at_head	- boolean.  True if we should insert at head
  *                        of queue, false if we should insert at tail.
  *
  * Lock status: Assumed that lock is not held upon entry.
@@ -83,24 +83,24 @@ int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
  *              for now), and then call the queue request function to actually
  *              process it.
  */
-int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
+int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
 {
-	/* This is used to insert SRpnt specials.  Because users of
-	 * this function are apt to reuse requests with no modification,
-	 * we have to sanitise the request flags here
+	/*
+	 * Because users of this function are apt to reuse requests with no
+	 * modification, we have to sanitise the request flags here
 	 */
-	SRpnt->sr_request->flags &= ~REQ_DONTPREP;
-	blk_insert_request(SRpnt->sr_device->request_queue, SRpnt->sr_request,
-		       	   at_head, SRpnt);
+	sreq->sr_request->flags &= ~REQ_DONTPREP;
+	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
+		       	   at_head, sreq);
 	return 0;
 }
 
 /*
  * Function:    scsi_init_cmd_errh()
  *
- * Purpose:     Initialize SCpnt fields related to error handling.
+ * Purpose:     Initialize cmd fields related to error handling.
  *
- * Arguments:   SCpnt   - command that is ready to be queued.
+ * Arguments:   cmd	- command that is ready to be queued.
  *
  * Returns:     Nothing
  *
@@ -108,21 +108,20 @@ int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
  *              fields related to error handling.   Typically this will
  *              be called once for each command, as required.
  */
-static int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
+static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
 {
-	SCpnt->owner = SCSI_OWNER_MIDLEVEL;
-	SCpnt->reset_chain = NULL;
-	SCpnt->serial_number = 0;
-	SCpnt->serial_number_at_timeout = 0;
-	SCpnt->flags = 0;
-	SCpnt->retries = 0;
-
-	SCpnt->abort_reason = 0;
+	cmd->owner = SCSI_OWNER_MIDLEVEL;
+	cmd->reset_chain = NULL;
+	cmd->serial_number = 0;
+	cmd->serial_number_at_timeout = 0;
+	cmd->flags = 0;
+	cmd->retries = 0;
+	cmd->abort_reason = 0;
 
-	memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
 
-	if (SCpnt->cmd_len == 0)
-		SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+	if (cmd->cmd_len == 0)
+		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
 
 	/*
 	 * We need saved copies of a number of fields - this is because
@@ -131,19 +130,16 @@ static int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
 	 * we will need to restore these values prior to running the actual
 	 * command.
 	 */
-	SCpnt->old_use_sg = SCpnt->use_sg;
-	SCpnt->old_cmd_len = SCpnt->cmd_len;
-	SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
-	SCpnt->old_underflow = SCpnt->underflow;
-	memcpy((void *) SCpnt->data_cmnd,
-	       (const void *) SCpnt->cmnd, sizeof(SCpnt->cmnd));
-	SCpnt->buffer = SCpnt->request_buffer;
-	SCpnt->bufflen = SCpnt->request_bufflen;
-
-	SCpnt->reset_chain = NULL;
-
-	SCpnt->internal_timeout = NORMAL_TIMEOUT;
-	SCpnt->abort_reason = 0;
+	cmd->old_use_sg = cmd->use_sg;
+	cmd->old_cmd_len = cmd->cmd_len;
+	cmd->sc_old_data_direction = cmd->sc_data_direction;
+	cmd->old_underflow = cmd->underflow;
+	memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
+	cmd->buffer = cmd->request_buffer;
+	cmd->bufflen = cmd->request_bufflen;
+	cmd->reset_chain = NULL;
+	cmd->internal_timeout = NORMAL_TIMEOUT;
+	cmd->abort_reason = 0;
 
 	return 1;
 }
@@ -153,23 +149,22 @@ static int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
  *
  * Purpose:    Restore the command state for a retry
  *
- * Arguments:  SCpnt   - command to be restored
+ * Arguments:  cmd	- command to be restored
  *
  * Returns:    Nothing
  *
  * Notes:      Immediately prior to retrying a command, we need
  *             to restore certain fields that we saved above.
  */
-void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
+void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
 {
-	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
-		sizeof(SCpnt->data_cmnd));
-	SCpnt->request_buffer = SCpnt->buffer;
-	SCpnt->request_bufflen = SCpnt->bufflen;
-	SCpnt->use_sg = SCpnt->old_use_sg;
-	SCpnt->cmd_len = SCpnt->old_cmd_len;
-	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-	SCpnt->underflow = SCpnt->old_underflow;
+	memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
+	cmd->request_buffer = cmd->buffer;
+	cmd->request_bufflen = cmd->bufflen;
+	cmd->use_sg = cmd->old_use_sg;
+	cmd->cmd_len = cmd->old_cmd_len;
+	cmd->sc_data_direction = cmd->sc_old_data_direction;
+	cmd->underflow = cmd->old_underflow;
 }
 
 /*
@@ -177,7 +172,7 @@ void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
  *
  * Purpose:     Handle post-processing of completed commands.
  *
- * Arguments:   SCpnt   - command that may need to be requeued.
+ * Arguments:   cmd	- command that may need to be requeued.
  *
  * Returns:     Nothing
  *
@@ -187,7 +182,7 @@ void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
  *              that a medium error occurred, and the sectors after
  *              the bad block need to be re-read.
  *
- *              If SCpnt is NULL, it means that the previous command
+ *              If cmd is NULL, it means that the previous command
  *              was completely finished, and we should simply start
  *              a new command, if possible.
  *
@@ -208,17 +203,17 @@ void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
  *		permutations grows as 2**N, and if too many more special cases
  *		get added, we start to get screwed.
  */
-void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
+void scsi_queue_next_request(request_queue_t *q, struct scsi_cmnd *cmd)
 {
-	int all_clear;
+	struct scsi_device *sdev, *sdev2;
+	struct Scsi_Host *shost;
 	unsigned long flags;
-	Scsi_Device *SDpnt, *SDpnt2;
-	struct Scsi_Host *SHpnt;
+	int all_clear;
 
 	ASSERT_LOCK(q->queue_lock, 0);
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (SCpnt != NULL) {
+	if (cmd != NULL) {
 
 		/*
 		 * For some reason, we are not done with this request.
@@ -226,16 +221,18 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
 		 * in which case we need to request the blocks that come after
 		 * the bad sector.
 		 */
-		SCpnt->request->special = (void *) SCpnt;
-		if(blk_rq_tagged(SCpnt->request))
-			blk_queue_end_tag(q, SCpnt->request);
-		/* set REQ_SPECIAL - we have a command
+		cmd->request->special = cmd;
+		if (blk_rq_tagged(cmd->request))
+			blk_queue_end_tag(q, cmd->request);
+
+		/*
+		 * set REQ_SPECIAL - we have a command
 		 * clear REQ_DONTPREP - we assume the sg table has been 
 		 *	nuked so we need to set it up again.
 		 */
-		SCpnt->request->flags |= REQ_SPECIAL;
-		SCpnt->request->flags &= ~REQ_DONTPREP;
-		__elv_add_request(q, SCpnt->request, 0, 0);
+		cmd->request->flags |= REQ_SPECIAL;
+		cmd->request->flags &= ~REQ_DONTPREP;
+		__elv_add_request(q, cmd->request, 0, 0);
 	}
 
 	/*
@@ -243,8 +240,8 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
 	 */
 	__blk_run_queue(q);
 
-	SDpnt = (Scsi_Device *) q->queuedata;
-	SHpnt = SDpnt->host;
+	sdev = q->queuedata;
+	shost = sdev->host;
 
 	/*
 	 * If this is a single-lun device, and we are currently finished
@@ -253,15 +250,15 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
 	 * with special case code, then spin off separate versions and
 	 * use function pointers to pick the right one.
 	 */
-	if (SDpnt->single_lun && blk_queue_empty(q) && SDpnt->device_busy ==0 &&
-			!SHpnt->host_blocked && !SHpnt->host_self_blocked &&
-			!((SHpnt->can_queue > 0) && (SHpnt->host_busy >=
-				       		     SHpnt->can_queue))) {
-		list_for_each_entry(SDpnt2, &SDpnt->same_target_siblings,
+	if (sdev->single_lun && blk_queue_empty(q) && sdev->device_busy ==0 &&
+			!shost->host_blocked && !shost->host_self_blocked &&
+			!((shost->can_queue > 0) && (shost->host_busy >=
+				       		     shost->can_queue))) {
+		list_for_each_entry(sdev2, &sdev->same_target_siblings,
 			       same_target_siblings) {
-			if (!SDpnt2->device_blocked &&
-			    !blk_queue_empty(SDpnt2->request_queue)) {
-				__blk_run_queue(SDpnt2->request_queue);
+			if (!sdev2->device_blocked &&
+			    !blk_queue_empty(sdev2->request_queue)) {
+				__blk_run_queue(sdev2->request_queue);
 				break;
 			}
 		}
@@ -276,22 +273,21 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
 	 * other device might have become starved along the way.
 	 */
 	all_clear = 1;
-	if (SHpnt->some_device_starved) {
-		list_for_each_entry(SDpnt, &SHpnt->my_devices, siblings) {
-			if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
-			    || (SHpnt->host_blocked) 
-			    || (SHpnt->host_self_blocked)) {
+	if (shost->some_device_starved) {
+		list_for_each_entry(sdev, &shost->my_devices, siblings) {
+			if (shost->can_queue > 0 &&
+			    shost->host_busy >= shost->can_queue)
 				break;
-			}
-			if (SDpnt->device_blocked || !SDpnt->starved) {
+			if (shost->host_blocked || shost->host_self_blocked)
+				break;
+			if (sdev->device_blocked || !sdev->starved)
 				continue;
-			}
-			__blk_run_queue(SDpnt->request_queue);
+			__blk_run_queue(sdev->request_queue);
 			all_clear = 0;
 		}
-		if (SDpnt == NULL && all_clear) {
-			SHpnt->some_device_starved = 0;
-		}
+
+		if (sdev == NULL && all_clear)
+			shost->some_device_starved = 0;
 	}
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -302,7 +298,7 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
  * Purpose:     Post-processing of completed commands called from interrupt
  *              handler or a bottom-half handler.
  *
- * Arguments:   SCpnt    - command that is complete.
+ * Arguments:   cmd	 - command that is complete.
  *              uptodate - 1 if I/O indicates success, 0 for I/O error.
  *              sectors  - number of sectors we want to mark.
  *		requeue  - indicates whether we should requeue leftovers.
@@ -319,13 +315,11 @@ void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
  *		We are guaranteeing that the request queue will be goosed
  *		at some point during this call.
  */
-static Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, 
-				     int uptodate, 
-				     int sectors,
-				     int requeue)
+static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
+					  int sectors, int requeue)
 {
-	request_queue_t *q = SCpnt->device->request_queue;
-	struct request *req = SCpnt->request;
+	request_queue_t *q = cmd->device->request_queue;
+	struct request *req = cmd->request;
 	unsigned long flags;
 
 	ASSERT_LOCK(q->queue_lock, 0);
@@ -335,15 +329,14 @@ static Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt,
 	 * to queue the remainder of them.
 	 */
 	if (end_that_request_first(req, uptodate, sectors)) {
-		if (!requeue)
-			return SCpnt;
-
-		/*
-		 * Bleah.  Leftovers again.  Stick the leftovers in
-		 * the front of the queue, and goose the queue again.
-		 */
-		scsi_queue_next_request(q, SCpnt);
-		return SCpnt;
+		if (requeue) {
+			/*
+			 * Bleah.  Leftovers again.  Stick the leftovers in
+			 * the front of the queue, and goose the queue again.
+			 */
+			scsi_queue_next_request(q, cmd);
+		}
+		return cmd;
 	}
 
 	add_disk_randomness(req->rq_disk);
@@ -358,39 +351,39 @@ static Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt,
 	 * This will goose the queue request function at the end, so we don't
 	 * need to worry about launching another command.
 	 */
-	scsi_put_command(SCpnt);
+	scsi_put_command(cmd);
 	scsi_queue_next_request(q, NULL);
 	return NULL;
 }
 
-static struct scatterlist *scsi_alloc_sgtable(Scsi_Cmnd *SCpnt, int gfp_mask)
+static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
 {
 	struct scsi_host_sg_pool *sgp;
 	struct scatterlist *sgl;
 
-	BUG_ON(!SCpnt->use_sg);
+	BUG_ON(!cmd->use_sg);
 
-	switch (SCpnt->use_sg) {
+	switch (cmd->use_sg) {
 	case 1 ... 8:
-		SCpnt->sglist_len = 0;
+		cmd->sglist_len = 0;
 		break;
 	case 9 ... 16:
-		SCpnt->sglist_len = 1;
+		cmd->sglist_len = 1;
 		break;
 	case 17 ... 32:
-		SCpnt->sglist_len = 2;
+		cmd->sglist_len = 2;
 		break;
 	case 33 ... 64:
-		SCpnt->sglist_len = 3;
+		cmd->sglist_len = 3;
 		break;
 	case 65 ... MAX_PHYS_SEGMENTS:
-		SCpnt->sglist_len = 4;
+		cmd->sglist_len = 4;
 		break;
 	default:
 		return NULL;
 	}
 
-	sgp = scsi_sg_pools + SCpnt->sglist_len;
+	sgp = scsi_sg_pools + cmd->sglist_len;
 	sgl = mempool_alloc(sgp->pool, gfp_mask);
 	if (sgl)
 		memset(sgl, 0, sgp->size);
@@ -407,13 +400,12 @@ static void scsi_free_sgtable(struct scatterlist *sgl, int index)
 	mempool_free(sgl, sgp->pool);
 }
 
-
 /*
  * Function:    scsi_release_buffers()
  *
  * Purpose:     Completion processing for block device I/O requests.
  *
- * Arguments:   SCpnt   - command that we are bailing.
+ * Arguments:   cmd	- command that we are bailing.
  *
  * Lock status: Assumed that no lock is held upon entry.
  *
@@ -425,28 +417,28 @@ static void scsi_free_sgtable(struct scatterlist *sgl, int index)
  *		the scatter-gather table, and potentially any bounce
  *		buffers.
  */
-static void scsi_release_buffers(Scsi_Cmnd * SCpnt)
+static void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
-	struct request *req = SCpnt->request;
+	struct request *req = cmd->request;
 
-	ASSERT_LOCK(SCpnt->device->host->host_lock, 0);
+	ASSERT_LOCK(cmd->device->host->host_lock, 0);
 
 	/*
 	 * Free up any indirection buffers we allocated for DMA purposes. 
 	 */
-	if (SCpnt->use_sg)
-		scsi_free_sgtable(SCpnt->request_buffer, SCpnt->sglist_len);
-	else if (SCpnt->request_buffer != req->buffer)
-		kfree(SCpnt->request_buffer);
+	if (cmd->use_sg)
+		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+	else if (cmd->request_buffer != req->buffer)
+		kfree(cmd->request_buffer);
 
 	/*
 	 * Zero these out.  They now point to freed memory, and it is
 	 * dangerous to hang onto the pointers.
 	 */
-	SCpnt->buffer  = NULL;
-	SCpnt->bufflen = 0;
-	SCpnt->request_buffer = NULL;
-	SCpnt->request_bufflen = 0;
+	cmd->buffer  = NULL;
+	cmd->bufflen = 0;
+	cmd->request_buffer = NULL;
+	cmd->request_bufflen = 0;
 }
 
 /*
@@ -477,7 +469,7 @@ static struct Scsi_Device_Template *scsi_get_request_dev(struct request *req)
  *
  * Purpose:     Completion processing for block device I/O requests.
  *
- * Arguments:   SCpnt   - command that is finished.
+ * Arguments:   cmd   - command that is finished.
  *
  * Lock status: Assumed that no lock is held upon entry.
  *
@@ -489,13 +481,13 @@ static struct Scsi_Device_Template *scsi_get_request_dev(struct request *req)
  *              (the normal case for most drivers), we don't need
  *              the logic to deal with cleaning up afterwards.
  */
-void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+void scsi_io_completion(struct scsi_cmnd *cmd, int good_sectors,
 			int block_sectors)
 {
-	int result = SCpnt->result;
-	int this_count = SCpnt->bufflen >> 9;
-	request_queue_t *q = SCpnt->device->request_queue;
-	struct request *req = SCpnt->request;
+	int result = cmd->result;
+	int this_count = cmd->bufflen >> 9;
+	request_queue_t *q = cmd->device->request_queue;
+	struct request *req = cmd->request;
 	int clear_errors = 1;
 
 	/*
@@ -518,44 +510,43 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 	 * For the case of a READ, we need to copy the data out of the
 	 * bounce buffer and into the real buffer.
 	 */
-	if (SCpnt->use_sg)
-		scsi_free_sgtable(SCpnt->buffer, SCpnt->sglist_len);
-	else if (SCpnt->buffer != req->buffer) {
+	if (cmd->use_sg)
+		scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
+	else if (cmd->buffer != req->buffer) {
 		if (rq_data_dir(req) == READ) {
 			unsigned long flags;
 			char *to = bio_kmap_irq(req->bio, &flags);
-			memcpy(to, SCpnt->buffer, SCpnt->bufflen);
+			memcpy(to, cmd->buffer, cmd->bufflen);
 			bio_kunmap_irq(to, &flags);
 		}
-		kfree(SCpnt->buffer);
+		kfree(cmd->buffer);
 	}
 
 	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
 		req->errors = (driver_byte(result) & DRIVER_SENSE) ?
 			      (CHECK_CONDITION << 1) : (result & 0xff);
-		if (!result) 
-			req->data_len -= SCpnt->bufflen;
-		else {
+		if (result) {
 			clear_errors = 0;
-			if (SCpnt->sense_buffer[0] & 0x70) {
-				int len = 8 + SCpnt->sense_buffer[7];
+			if (cmd->sense_buffer[0] & 0x70) {
+				int len = 8 + cmd->sense_buffer[7];
 
 				if (len > SCSI_SENSE_BUFFERSIZE)
 					len = SCSI_SENSE_BUFFERSIZE;
-				memcpy(req->sense, SCpnt->sense_buffer,  len);
+				memcpy(req->sense, cmd->sense_buffer,  len);
 				req->sense_len = len;
 			}
-		}
+		} else
+			req->data_len -= cmd->bufflen;
 	}
 
 	/*
 	 * Zero these out.  They now point to freed memory, and it is
 	 * dangerous to hang onto the pointers.
 	 */
-	SCpnt->buffer  = NULL;
-	SCpnt->bufflen = 0;
-	SCpnt->request_buffer = NULL;
-	SCpnt->request_bufflen = 0;
+	cmd->buffer  = NULL;
+	cmd->bufflen = 0;
+	cmd->request_buffer = NULL;
+	cmd->request_bufflen = 0;
 
 	/*
 	 * Next deal with any sectors which we were able to correctly
@@ -564,7 +555,7 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 	if (good_sectors >= 0) {
 		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n",
 					      req->nr_sectors, good_sectors));
-		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg));
+		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", cmd->use_sg));
 
 		if (clear_errors)
 			req->errors = 0;
@@ -579,13 +570,13 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 		 * requeueing right here - we will requeue down below
 		 * when we handle the bad sectors.
 		 */
-		SCpnt = scsi_end_request(SCpnt, 1, good_sectors, result == 0);
+		cmd = scsi_end_request(cmd, 1, good_sectors, result == 0);
 
 		/*
 		 * If the command completed without error, then either finish off the
 		 * rest of the command, or start a new one.
 		 */
-		if (result == 0 || SCpnt == NULL ) {
+		if (result == 0 || cmd == NULL ) {
 			return;
 		}
 	}
@@ -601,28 +592,28 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 			 * Not yet implemented.  A read will fail after being remapped,
 			 * a write will call the strategy routine again.
 			 */
-			if (SCpnt->device->remap) {
+			if (cmd->device->remap) {
 				result = 0;
 			}
 #endif
 		}
-		if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) {
+		if ((cmd->sense_buffer[0] & 0x7f) == 0x70) {
 			/*
 			 * If the device is in the process of becoming ready,
 			 * retry.
 			 */
-			if (SCpnt->sense_buffer[12] == 0x04 &&
-			    SCpnt->sense_buffer[13] == 0x01) {
-				scsi_queue_next_request(q, SCpnt);
+			if (cmd->sense_buffer[12] == 0x04 &&
+			    cmd->sense_buffer[13] == 0x01) {
+				scsi_queue_next_request(q, cmd);
 				return;
 			}
-			if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
-				if (SCpnt->device->removable) {
+			if ((cmd->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
+				if (cmd->device->removable) {
 					/* detected disc change.  set a bit 
 					 * and quietly refuse further access.
 		 			 */
-					SCpnt->device->changed = 1;
-					SCpnt = scsi_end_request(SCpnt, 0,
+					cmd->device->changed = 1;
+					cmd = scsi_end_request(cmd, 0,
 							this_count, 1);
 					return;
 				} else {
@@ -632,7 +623,7 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 				 	* media change, so we just retry the
 				 	* request and see what happens.  
 				 	*/
-					scsi_queue_next_request(q, SCpnt);
+					scsi_queue_next_request(q, cmd);
 					return;
 				}
 			}
@@ -644,35 +635,35 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 		 * past the end of the disk.
 		 */
 
-		switch (SCpnt->sense_buffer[2]) {
+		switch (cmd->sense_buffer[2]) {
 		case ILLEGAL_REQUEST:
-			if (SCpnt->device->ten) {
-				SCpnt->device->ten = 0;
+			if (cmd->device->ten) {
+				cmd->device->ten = 0;
 				/*
 				 * This will cause a retry with a 6-byte
 				 * command.
 				 */
-				scsi_queue_next_request(q, SCpnt);
+				scsi_queue_next_request(q, cmd);
 				result = 0;
 			} else {
-				SCpnt = scsi_end_request(SCpnt, 0, this_count, 1);
+				cmd = scsi_end_request(cmd, 0, this_count, 1);
 				return;
 			}
 			break;
 		case NOT_READY:
 			printk(KERN_INFO "Device %s not ready.\n",
 			       req->rq_disk ? req->rq_disk->disk_name : "");
-			SCpnt = scsi_end_request(SCpnt, 0, this_count, 1);
+			cmd = scsi_end_request(cmd, 0, this_count, 1);
 			return;
 			break;
 		case MEDIUM_ERROR:
 		case VOLUME_OVERFLOW:
 			printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
-			       SCpnt->device->host->host_no, (int) SCpnt->device->channel,
-			       (int) SCpnt->device->id, (int) SCpnt->device->lun);
-			print_command(SCpnt->data_cmnd);
-			print_sense("sd", SCpnt);
-			SCpnt = scsi_end_request(SCpnt, 0, block_sectors, 1);
+			       cmd->device->host->host_no, (int) cmd->device->channel,
+			       (int) cmd->device->id, (int) cmd->device->lun);
+			print_command(cmd->data_cmnd);
+			print_sense("sd", cmd);
+			cmd = scsi_end_request(cmd, 0, block_sectors, 1);
 			return;
 		default:
 			break;
@@ -684,28 +675,28 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
 		 * recovery reasons.  Just retry the request
 		 * and see what happens.  
 		 */
-		scsi_queue_next_request(q, SCpnt);
+		scsi_queue_next_request(q, cmd);
 		return;
 	}
 	if (result) {
-		struct Scsi_Device_Template *STpnt;
+		struct Scsi_Device_Template *sdt;
 
-		STpnt = scsi_get_request_dev(SCpnt->request);
+		sdt = scsi_get_request_dev(cmd->request);
 		printk("SCSI %s error : host %d channel %d id %d lun %d return code = %x\n",
-		       (STpnt ? STpnt->name : "device"),
-		       SCpnt->device->host->host_no,
-		       SCpnt->device->channel,
-		       SCpnt->device->id,
-		       SCpnt->device->lun, result);
+		       (sdt ? sdt->name : "device"),
+		       cmd->device->host->host_no,
+		       cmd->device->channel,
+		       cmd->device->id,
+		       cmd->device->lun, result);
 
 		if (driver_byte(result) & DRIVER_SENSE)
-			print_sense("sd", SCpnt);
+			print_sense("sd", cmd);
 		/*
 		 * Mark a single buffer as not uptodate.  Queue the remainder.
 		 * We sometimes get this cruft in the event that a medium error
 		 * isn't properly reported.
 		 */
-		SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors, 1);
+		cmd = scsi_end_request(cmd, 0, req->current_nr_sectors, 1);
 		return;
 	}
 }
@@ -715,26 +706,26 @@ void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
  *
  * Purpose:     SCSI I/O initialize function.
  *
- * Arguments:   SCpnt   - Command descriptor we wish to initialize
+ * Arguments:   cmd   - Command descriptor we wish to initialize
  *
  * Returns:     0 on success
  *		BLKPREP_DEFER if the failure is retryable
  *		BLKPREP_KILL if the failure is fatal
  */
-static int scsi_init_io(Scsi_Cmnd *SCpnt)
+static int scsi_init_io(struct scsi_cmnd *cmd)
 {
-	struct request     *req = SCpnt->request;
+	struct request     *req = cmd->request;
 	struct scatterlist *sgpnt;
-	int count, ret = 0;
+	int		   count;
 
 	/*
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 */
 	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
-		SCpnt->request_bufflen = req->data_len;
-		SCpnt->request_buffer = req->data;
+		cmd->request_bufflen = req->data_len;
+		cmd->request_buffer = req->data;
 		req->buffer = req->data;
-		SCpnt->use_sg = 0;
+		cmd->use_sg = 0;
 		return 0;
 	}
 
@@ -743,48 +734,45 @@ static int scsi_init_io(Scsi_Cmnd *SCpnt)
 	 * but now we do (it makes highmem I/O easier to support without
 	 * kmapping pages)
 	 */
-	SCpnt->use_sg = req->nr_phys_segments;
+	cmd->use_sg = req->nr_phys_segments;
 
 	/*
 	 * if sg table allocation fails, requeue request later.
 	 */
-	sgpnt = scsi_alloc_sgtable(SCpnt, GFP_ATOMIC);
+	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
 	if (unlikely(!sgpnt)) {
 		req->flags |= REQ_SPECIAL;
-		ret = BLKPREP_DEFER;
-		goto out;
+		return BLKPREP_DEFER;
 	}
 
-	SCpnt->request_buffer = (char *) sgpnt;
-	SCpnt->request_bufflen = req->nr_sectors << 9;
+	cmd->request_buffer = (char *) sgpnt;
+	cmd->request_bufflen = req->nr_sectors << 9;
 	if (blk_pc_request(req))
-		SCpnt->request_bufflen = req->data_len;
+		cmd->request_bufflen = req->data_len;
 	req->buffer = NULL;
 
 	/* 
 	 * Next, walk the list, and fill in the addresses and sizes of
 	 * each segment.
 	 */
-	count = blk_rq_map_sg(req->q, req, SCpnt->request_buffer);
+	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
 
 	/*
 	 * mapped well, send it off
 	 */
-	if (count <= SCpnt->use_sg) {
-		SCpnt->use_sg = count;
+	if (likely(count <= cmd->use_sg)) {
+		cmd->use_sg = count;
 		return 0;
 	}
 
 	printk(KERN_ERR "Incorrect number of segments after building list\n");
-	printk(KERN_ERR "counted %d, received %d\n", count, SCpnt->use_sg);
+	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
 	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
 			req->current_nr_sectors);
 
 	/* release the command and kill it */
-	scsi_put_command(SCpnt);
-	ret = BLKPREP_KILL;
-out:
-	return ret;
+	scsi_put_command(cmd);
+	return BLKPREP_KILL;
 }
 
 /*
@@ -805,60 +793,53 @@ static int check_all_luns(struct scsi_device *myself)
 
 int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
-	struct Scsi_Device_Template *STpnt;
-	Scsi_Cmnd *SCpnt;
-	Scsi_Device *SDpnt;
-
-	SDpnt = (Scsi_Device *) q->queuedata;
-	BUG_ON(!SDpnt);
+	struct Scsi_Device_Template *sdt;
+	struct scsi_device *sdev = q->queuedata;
+	struct scsi_cmnd *cmd;
 
 	/*
 	 * Find the actual device driver associated with this command.
 	 * The SPECIAL requests are things like character device or
 	 * ioctls, which did not originate from ll_rw_blk.  Note that
-	 * the special field is also used to indicate the SCpnt for
+	 * the special field is also used to indicate the cmd for
 	 * the remainder of a partially fulfilled request that can 
 	 * come up when there is a medium error.  We have to treat
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 */
 	if (req->flags & REQ_SPECIAL) {
-		Scsi_Request *SRpnt;
+		struct scsi_request *sreq = req->special;
 
-		STpnt = NULL;
-		SCpnt = (Scsi_Cmnd *) req->special;
-		SRpnt = (Scsi_Request *) req->special;
-		
-		if (SRpnt->sr_magic == SCSI_REQ_MAGIC) {
-			SCpnt = scsi_get_command(SRpnt->sr_device, GFP_ATOMIC);
-			if (!SCpnt)
+		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
+			cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
+			if (unlikely(!cmd))
 				return BLKPREP_DEFER;
-			scsi_init_cmd_from_req(SCpnt, SRpnt);
-		}
-		
+			scsi_init_cmd_from_req(cmd, sreq);
+		} else
+			cmd = req->special;
 	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
 		/*
 		 * Now try and find a command block that we can use.
 		 */
 		if (!req->special) {
-			SCpnt = scsi_get_command(SDpnt, GFP_ATOMIC);
-			if (unlikely(!SCpnt))
+			cmd = scsi_get_command(sdev, GFP_ATOMIC);
+			if (unlikely(!cmd))
 				return BLKPREP_DEFER;
 		} else
-			SCpnt = req->special;
+			cmd = req->special;
 		
 		/* pull a tag out of the request if we have one */
-		SCpnt->tag = req->tag;
+		cmd->tag = req->tag;
 	} else {
 		blk_dump_rq_flags(req, "SCSI bad req");
 		return BLKPREP_KILL;
 	}
 	
 	/* note the overloading of req->special.  When the tag
-	 * is active it always means SCpnt.  If the tag goes
+	 * is active it always means cmd.  If the tag goes
 	 * back for re-queueing, it may be reset */
-	req->special = SCpnt;
-	SCpnt->request = req;
+	req->special = cmd;
+	cmd->request = req;
 	
 	/*
 	 * FIXME: drop the lock here because the functions below
@@ -867,7 +848,6 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * happening now.
 	 */
-
 	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
 		int ret;
 
@@ -883,27 +863,30 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		 * some kinds of consistency checking may cause the	
 		 * request to be rejected immediately.
 		 */
-		STpnt = scsi_get_request_dev(req);
-		BUG_ON(!STpnt);
+		sdt = scsi_get_request_dev(req);
+		BUG_ON(!sdt);
 
 		/* 
 		 * This sets up the scatter-gather table (allocating if
 		 * required).
 		 */
-		if ((ret = scsi_init_io(SCpnt)))
-			/* BLKPREP_KILL return also releases the command */
+		ret = scsi_init_io(cmd);
+		if (ret)	/* BLKPREP_KILL return also releases the command */
 			return ret;
 		
 		/*
 		 * Initialize the actual SCSI command for this request.
 		 */
-		if (!STpnt->init_command(SCpnt)) {
-			scsi_release_buffers(SCpnt);
-			scsi_put_command(SCpnt);
+		if (unlikely(!sdt->init_command(cmd))) {
+			scsi_release_buffers(cmd);
+			scsi_put_command(cmd);
 			return BLKPREP_KILL;
 		}
 	}
-	/* The request is now prepped, no need to come back here */
+
+	/*
+	 * The request is now prepped, no need to come back here
+	 */
 	req->flags |= REQ_DONTPREP;
 	return BLKPREP_OK;
 }
@@ -911,48 +894,34 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 /*
  * Function:    scsi_request_fn()
  *
- * Purpose:     Generic version of request function for SCSI hosts.
+ * Purpose:     Main strategy routine for SCSI.
  *
  * Arguments:   q       - Pointer to actual queue.
  *
  * Returns:     Nothing
  *
  * Lock status: IO request lock assumed to be held when called.
- *
- * Notes:       The theory is that this function is something which individual
- *              drivers could also supply if they wished to.   The problem
- *              is that we have 30 some odd low-level drivers in the kernel
- *              tree already, and it would be most difficult to retrofit
- *              this crap into all of them.   Thus this function has the job
- *              of acting as a generic queue manager for all of those existing
- *              drivers.
  */
-void scsi_request_fn(request_queue_t * q)
+void scsi_request_fn(request_queue_t *q)
 {
+	struct scsi_device *sdev = q->queuedata;
+	struct Scsi_Host *shost = sdev->host;
+	struct scsi_cmnd *cmd;
 	struct request *req;
-	Scsi_Cmnd *SCpnt;
-	Scsi_Device *SDpnt;
-	struct Scsi_Host *SHpnt;
 
 	ASSERT_LOCK(q->queue_lock, 1);
 
-	SDpnt = (Scsi_Device *) q->queuedata;
-	if (!SDpnt) {
-		panic("Missing device");
-	}
-	SHpnt = SDpnt->host;
-
 	/*
 	 * To start with, we keep looping until the queue is empty, or until
 	 * the host is no longer able to accept any more requests.
 	 */
-	while (1 == 1) {
+	for (;;) {
 		/*
 		 * Check this again - each time we loop through we will have
 		 * released the lock and grabbed it again, so each time
 		 * we need to check to see if the queue is plugged or not.
 		 */
-		if (SHpnt->in_recovery || blk_queue_plugged(q))
+		if (shost->in_recovery || blk_queue_plugged(q))
 			return;
 
 		/*
@@ -963,39 +932,43 @@ void scsi_request_fn(request_queue_t * q)
 		 */
 		req = elv_next_request(q);
 
-		if (SDpnt->device_busy >= SDpnt->queue_depth)
+		if (sdev->device_busy >= sdev->queue_depth)
 			break;
 
-		if (SDpnt->single_lun && check_all_luns(SDpnt))
+		if (sdev->single_lun && check_all_luns(sdev))
 			break;
 
-		if(SHpnt->host_busy == 0 && SHpnt->host_blocked) {
+		if (shost->host_busy == 0 && shost->host_blocked) {
 			/* unblock after host_blocked iterates to zero */
-			if(--SHpnt->host_blocked == 0) {
-				SCSI_LOG_MLQUEUE(3, printk("scsi%d unblocking host at zero depth\n", SHpnt->host_no));
+			if (--shost->host_blocked == 0) {
+				SCSI_LOG_MLQUEUE(3,
+					printk("scsi%d unblocking host at zero depth\n",
+						shost->host_no));
 			} else {
 				blk_plug_device(q);
 				break;
 			}
 		}
-		if(SDpnt->device_busy == 0 && SDpnt->device_blocked) {
+
+		if (sdev->device_busy == 0 && sdev->device_blocked) {
 			/* unblock after device_blocked iterates to zero */
-			if(--SDpnt->device_blocked == 0) {
-				SCSI_LOG_MLQUEUE(3, printk("scsi%d (%d:%d) unblocking device at zero depth\n", SHpnt->host_no, SDpnt->id, SDpnt->lun));
+			if (--sdev->device_blocked == 0) {
+				SCSI_LOG_MLQUEUE(3,
+					printk("scsi%d (%d:%d) unblocking device at zero depth\n",
+						shost->host_no, sdev->id, sdev->lun));
 			} else {
 				blk_plug_device(q);
 				break;
 			}
 		}
+
 		/*
 		 * If the device cannot accept another request, then quit.
 		 */
-		if (SDpnt->device_blocked) {
+		if (sdev->device_blocked)
 			break;
-		}
-		if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
-		    || (SHpnt->host_blocked) 
-		    || (SHpnt->host_self_blocked)) {
+		if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
+		    shost->host_blocked || shost->host_self_blocked) {
 			/*
 			 * If we are unable to process any commands at all for
 			 * this device, then we consider it to be starved.
@@ -1004,14 +977,13 @@ void scsi_request_fn(request_queue_t * q)
 			 * little help getting it started again
 			 * once the host isn't quite so busy.
 			 */
-			if (SDpnt->device_busy == 0) {
-				SDpnt->starved = 1;
-				SHpnt->some_device_starved = 1;
+			if (sdev->device_busy == 0) {
+				sdev->starved = 1;
+				shost->some_device_starved = 1;
 			}
 			break;
-		} else {
-			SDpnt->starved = 0;
-		}
+		} else
+			sdev->starved = 0;
 
 		/*
 		 * If we couldn't find a request that could be queued, then we
@@ -1020,21 +992,22 @@ void scsi_request_fn(request_queue_t * q)
 		if (blk_queue_empty(q))
 			break;
 
-		if(!req) {
+		if (!req) {
 			/* If the device is busy, a returning I/O
 			 * will restart the queue.  Otherwise, we have
 			 * to plug the queue */
-			if(SDpnt->device_busy == 0)
+			if(sdev->device_busy == 0)
 				blk_plug_device(q);
 			break;
 		}
 
-		SCpnt = (struct scsi_cmnd *)req->special;
+		cmd = req->special;
 
-		/* Should be impossible for a correctly prepared request
+		/*
+		 * Should be impossible for a correctly prepared request
 		 * please mail the stack trace to linux-scsi@vger.kernel.org
 		 */
-		BUG_ON(!SCpnt);
+		BUG_ON(!cmd);
 
 		/*
 		 * Finally, before we release the lock, we copy the
@@ -1044,27 +1017,27 @@ void scsi_request_fn(request_queue_t * q)
 		 * reason to search the list, because all of the
 		 * commands in this queue are for the same device.
 		 */
-		if(!(blk_queue_tagged(q) && (blk_queue_start_tag(q, req) == 0)))
+		if (!(blk_queue_tagged(q) && (blk_queue_start_tag(q, req) == 0)))
 			blkdev_dequeue_request(req);
 	
 		/*
 		 * Now bump the usage count for both the host and the
 		 * device.
 		 */
-		SHpnt->host_busy++;
-		SDpnt->device_busy++;
+		shost->host_busy++;
+		sdev->device_busy++;
 		spin_unlock_irq(q->queue_lock);
 
 		/*
 		 * Finally, initialize any error handling parameters, and set up
 		 * the timers for timeouts.
 		 */
-		scsi_init_cmd_errh(SCpnt);
+		scsi_init_cmd_errh(cmd);
 
 		/*
 		 * Dispatch the command to the low-level driver.
 		 */
-		scsi_dispatch_cmd(SCpnt);
+		scsi_dispatch_cmd(cmd);
 
 		/*
 		 * Now we need to grab the lock again.  We are about to mess
@@ -1080,7 +1053,7 @@ void scsi_request_fn(request_queue_t * q)
  * Purpose:     Utility function used by low-level drivers to prevent further
  *		commands from being queued to the device.
  *
- * Arguments:   SHpnt       - Host in question
+ * Arguments:   shost       - Host in question
  *
  * Returns:     Nothing
  *
@@ -1090,9 +1063,9 @@ void scsi_request_fn(request_queue_t * q)
  *		get unblocked other than the low-level driver calling
  *		scsi_unblock_requests().
  */
-void scsi_block_requests(struct Scsi_Host * SHpnt)
+void scsi_block_requests(struct Scsi_Host *shost)
 {
-	SHpnt->host_self_blocked = 1;
+	shost->host_self_blocked = 1;
 }
 
 /*
@@ -1101,7 +1074,7 @@ void scsi_block_requests(struct Scsi_Host * SHpnt)
  * Purpose:     Utility function used by low-level drivers to allow further
  *		commands from being queued to the device.
  *
- * Arguments:   SHpnt       - Host in question
+ * Arguments:   shost       - Host in question
  *
  * Returns:     Nothing
  *
@@ -1115,14 +1088,17 @@ void scsi_block_requests(struct Scsi_Host * SHpnt)
  *		internals of the scsi mid-layer won't require wholesale
  *		changes to drivers that use this feature.
  */
-void scsi_unblock_requests(struct Scsi_Host * SHpnt)
+void scsi_unblock_requests(struct Scsi_Host *shost)
 {
-	Scsi_Device *SDloop;
+	struct scsi_device *sdev;
 
-	SHpnt->host_self_blocked = 0;
-	/* Now that we are unblocked, try to start the queues. */
-	list_for_each_entry(SDloop, &SHpnt->my_devices, siblings)
-		scsi_queue_next_request(SDloop->request_queue, NULL);
+	shost->host_self_blocked = 0;
+
+	/*
+	 * Now that we are unblocked, try to start the queues.
+	 */
+	list_for_each_entry(sdev, &shost->my_devices, siblings)
+		scsi_queue_next_request(sdev->request_queue, NULL);
 }
 
 /*
@@ -1131,7 +1107,7 @@ void scsi_unblock_requests(struct Scsi_Host * SHpnt)
  * Purpose:     Utility function used by low-level drivers to report that
  *		they have observed a bus reset on the bus being handled.
  *
- * Arguments:   SHpnt       - Host in question
+ * Arguments:   shost       - Host in question
  *		channel     - channel on which reset was observed.
  *
  * Returns:     Nothing
@@ -1146,13 +1122,14 @@ void scsi_unblock_requests(struct Scsi_Host * SHpnt)
  *		The main purpose of this is to make sure that a CHECK_CONDITION
  *		is properly treated.
  */
-void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel)
+void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
 {
-	Scsi_Device *SDloop;
-	list_for_each_entry(SDloop, &SHpnt->my_devices, siblings) {
-		if (channel == SDloop->channel) {
-			SDloop->was_reset = 1;
-			SDloop->expecting_cc_ua = 1;
+	struct scsi_device *sdev;
+
+	list_for_each_entry(sdev, &shost->my_devices, siblings) {
+		if (channel == sdev->channel) {
+			sdev->was_reset = 1;
+			sdev->expecting_cc_ua = 1;
 		}
 	}
 }
@@ -1166,11 +1143,11 @@ void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel)
  * The details of the implementation remain to be settled, however the
  * stubs are here now so that the actual drivers will properly compile.
  */
-void scsi_register_blocked_host(struct Scsi_Host * SHpnt)
+void scsi_register_blocked_host(struct Scsi_Host * shost)
 {
 }
 
-void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt)
+void scsi_deregister_blocked_host(struct Scsi_Host * shost)
 {
 }
 
-- 
cgit v1.2.3


From 9d252c210314ce22a68f7019a2bc33b05df7f738 Mon Sep 17 00:00:00 2001
From: David Jeffery <david_jeffery@adaptec.com>
Date: Wed, 5 Feb 2003 18:23:29 -0800
Subject: [PATCH] ips driver 1/4: fix struct length and remove dead code

This small patch fixes the length of the IPS_ENQ
struct.  It was too short which can cause the adapter
to write beyond the the end of the struct during
driver initialization and corrupt part of memory.
---
 drivers/scsi/ips.c | 7 -------
 drivers/scsi/ips.h | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index f355f3d2bf0a..d2f3efbb4fc3 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1155,13 +1155,6 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) {
 
    ips_next(ha, IPS_INTR_IORL);
    
-   /* If We were using the CD Boot Flash Buffer, Restore the Old Values */
-   if ( ips_FlashData == ha->ioctl_data ) {                               
-      ha->ioctl_data = ha->flash_data;                           
-      ha->ioctl_order = ha->flash_order;                          
-      ha->ioctl_datasize = ha->flash_datasize;                       
-      ips_FlashDataInUse = 0;                                             
-   }
    return (0);
 }
 
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index 2735b6150e96..6da69d0c8995 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -714,7 +714,7 @@ typedef struct {
    uint16_t usConfigUpdateCount;
    uint8_t  ucBlkFlag;
    uint8_t  reserved;
-   uint16_t usAddrDeadDisk[IPS_MAX_CHANNELS * IPS_MAX_TARGETS];
+   uint16_t usAddrDeadDisk[IPS_MAX_CHANNELS * (IPS_MAX_TARGETS + 1)];
 } IPS_ENQ, *PIPS_ENQ;
 
 typedef struct {
-- 
cgit v1.2.3


From 836f40cbc2742795df8af5d0ec356e7ebff8b4f0 Mon Sep 17 00:00:00 2001
From: David Jeffery <david_jeffery@adaptec.com>
Date: Wed, 5 Feb 2003 18:23:35 -0800
Subject: [PATCH] ips driver 2/4: initialization reordering

This large patch reworks much of the adapter initialization
code.

It splits the scsi initialization code from the pci
initialization.  It adds support for working with some
future cards.  It also removes the use of multiple pci_driver
registrations and instead does its own adapter ordering.
---
 drivers/scsi/ips.c | 472 ++++++++++++++++++++++++++++++++++++-----------------
 drivers/scsi/ips.h | 148 ++++++++++-------
 2 files changed, 408 insertions(+), 212 deletions(-)

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index d2f3efbb4fc3..52942481a758 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -6,7 +6,7 @@
 /*             David Jeffery, Adaptec, Inc.                                  */
 /*                                                                           */
 /* Copyright (C) 2000 IBM Corporation                                        */ 
-/* Copyright (C) 2002 Adaptec, Inc.                                          */ 
+/* Copyright (C) 2002,2003 Adaptec, Inc.                                     */ 
 /*                                                                           */
 /* This program is free software; you can redistribute it and/or modify      */
 /* it under the terms of the GNU General Public License as published by      */
@@ -127,6 +127,9 @@
 /*          - Get rid on IOCTL_NEW_COMMAND code                              */
 /*          - Add Extended DCDB Commands for Tape Support in 5I              */
 /* 5.10.12  - use pci_dma interfaces, update for 2.5 kernel changes          */
+/* 5.10.15  - remove unused code (sem, macros, etc.)                         */
+/* 5.30.00  - use __devexit_p()                                              */
+/* 6.00.00  - Add 6x Adapters and Battery Flash                              */
 /*****************************************************************************/
 
 /*
@@ -191,14 +194,16 @@
 /*
  * DRIVER_VER
  */
-#define IPS_VERSION_HIGH        "5.10"
-#define IPS_VERSION_LOW         ".13-BETA "
+#define IPS_VERSION_HIGH        "5.99"
+#define IPS_VERSION_LOW         ".00-BETA"
+
 
 #if !defined(__i386__) && !defined(__ia64__)
    #error "This driver has only been tested on the x86/ia64 platforms"
 #endif
 
 #if LINUX_VERSION_CODE <= LinuxVersionCode(2,5,0)
+    #include "sd.h"
     #define IPS_SG_ADDRESS(sg)       ((sg)->address)
     #define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags)
     #define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags)
@@ -241,34 +246,23 @@ static int          ips_reset_timeout = 60 * 5;
 static int          ips_force_memio = 1;             /* Always use Memory Mapped I/O    */
 static int          ips_force_i2o = 1;               /* Always use I2O command delivery */
 static int          ips_ioctlsize = IPS_IOCTL_SIZE;  /* Size of the ioctl buffer        */
-static int          ips_cd_boot = 0;                 /* Booting from ServeRAID Manager CD */
+static int          ips_cd_boot = 0;                 /* Booting from Manager CD         */
 static char        *ips_FlashData = NULL;            /* CD Boot - Flash Data Buffer      */
-static long          ips_FlashDataInUse = 0;          /* CD Boot - Flash Data In Use Flag */
+static long         ips_FlashDataInUse = 0;          /* CD Boot - Flash Data In Use Flag */
 static uint32_t     MaxLiteCmds = 32;                /* Max Active Cmds for a Lite Adapter */  
+static Scsi_Host_Template ips_driver_template = IPS;
 
 IPS_DEFINE_COMPAT_TABLE( Compatable );               /* Version Compatability Table      */
 
 
-   /* This table describes any / all ServeRAID Adapters */
+   /* This table describes all ServeRAID Adapters */
    static struct  pci_device_id  ips_pci_table[]  __devinitdata = {
            { 0x1014, 0x002E, PCI_ANY_ID, PCI_ANY_ID, 0, 0 },
            { 0x1014, 0x01BD, PCI_ANY_ID, PCI_ANY_ID, 0, 0 },
+           { 0x9005, 0x0250, PCI_ANY_ID, PCI_ANY_ID, 0, 0 },
            { 0, }
    };
 
-   /* This table describes only Sarasota ( ServeRAID 5i ) Adapters */
-   static struct  pci_device_id  ips_pci_table_5i[]  __devinitdata = {
-                   { 0x1014, 0x01BD, PCI_ANY_ID, 0x259, 0, 0 },
-                   { 0x1014, 0x01BD, PCI_ANY_ID, 0x258, 0, 0 },
-                   { 0, }
-                   };   
-    
-   /* This table describes all i960 Adapters */
-   static struct  pci_device_id  ips_pci_table_i960[]  __devinitdata = {
-                   { 0x1014, 0x01BD, PCI_ANY_ID, PCI_ANY_ID, 0, 0 },
-                   { 0, }
-                   };   
-
    MODULE_DEVICE_TABLE( pci, ips_pci_table );
 
    static char ips_hot_plug_name[] = "ips";
@@ -283,27 +277,13 @@ IPS_DEFINE_COMPAT_TABLE( Compatable );               /* Version Compatability Ta
        .remove		= __devexit_p(ips_remove_device),
    }; 
            
-   struct pci_driver ips_pci_driver_5i = {
-       .name		= ips_hot_plug_name,
-       .id_table	= ips_pci_table_5i,
-       .probe		= ips_insert_device,
-       .remove		= __devexit_p(ips_remove_device),
-   };
-
-   struct pci_driver ips_pci_driver_i960 = {
-       .name		= ips_hot_plug_name,
-       .id_table	= ips_pci_table_i960,
-       .probe		= ips_insert_device,
-       .remove		= __devexit_p(ips_remove_device),
-   };
-
 
 /*
  * Necessary forward function protoypes
  */
 static int ips_halt(struct notifier_block *nb, ulong event, void *buf);
 
-#define MAX_ADAPTER_NAME 11
+#define MAX_ADAPTER_NAME 15
 
 static char ips_adapter_name[][30] = {
    "ServeRAID",
@@ -318,9 +298,12 @@ static char ips_adapter_name[][30] = {
    "ServeRAID 4Mx",
    "ServeRAID 4Lx",
    "ServeRAID 5i",
-   "ServeRAID 5i"
+   "ServeRAID 5i",
+   "ServeRAID 00",
+   "ServeRAID 00"
 };
 
+
 static struct notifier_block ips_notifier = {
    ips_halt, NULL, 0
 };
@@ -390,9 +373,6 @@ int ips_release(struct Scsi_Host *);
 int ips_eh_abort(Scsi_Cmnd *);
 int ips_eh_reset(Scsi_Cmnd *);
 int ips_queue(Scsi_Cmnd *, void (*) (Scsi_Cmnd *));
-int ips_biosparam(struct scsi_device *, struct block_device *,
-		sector_t, int *);
-int ips_slave_configure(Scsi_Device *);
 const char * ips_info(struct Scsi_Host *);
 void do_ipsintr(int, void *, struct pt_regs *);
 static int ips_hainit(ips_ha_t *);
@@ -441,7 +421,6 @@ static int ips_flash_firmware(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
 static void ips_free_flash_copperhead(ips_ha_t *ha);
 static void ips_get_bios_version(ips_ha_t *, int);
 static void ips_identify_controller(ips_ha_t *);
-//static void ips_select_queue_depth(struct Scsi_Host *, Scsi_Device *);
 static void ips_chkstatus(ips_ha_t *, IPS_STATUS *);
 static void ips_enable_int_copperhead(ips_ha_t *);
 static void ips_enable_int_copperhead_memio(ips_ha_t *);
@@ -489,11 +468,11 @@ static void copy_mem_info(IPS_INFOSTR *, char *, int);
 static int copy_info(IPS_INFOSTR *, char *, ...);
 static int ips_get_version_info(ips_ha_t *ha, IPS_VERSION_DATA *Buffer, int intr );
 static void ips_version_check(ips_ha_t *ha, int intr);
-static int ips_abort_init(ips_ha_t *ha, struct Scsi_Host *sh, int index);
+static int ips_abort_init(ips_ha_t *ha, int index);
 static int ips_init_phase2( int index );
 
 static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr );
-
+static int ips_register_scsi(int index);
 /*--------------------------------------------------------------------------*/
 /* Exported Functions                                                       */
 /*--------------------------------------------------------------------------*/
@@ -562,6 +541,7 @@ __setup("ips=", ips_setup);
 /****************************************************************************/
 int
 ips_detect(Scsi_Host_Template *SHT) {
+   int  i;
 
    METHOD_TRACE("ips_detect", 1);
 
@@ -570,36 +550,26 @@ ips_detect(Scsi_Host_Template *SHT) {
       ips_setup(ips);
 #endif
 
-   /* If Booting from the ServeRAID Manager CD, Allocate a large Flash  */
-   /* Buffer ( so we won't need to allocate one for each adapter ).     */
+   /* If Booting from the Manager CD, Allocate a large Flash        */
+   /* Buffer ( so we won't need to allocate one for each adapter ). */
     if ( ips_cd_boot ) {            
-      ips_FlashData = ( char * ) __get_free_pages( GFP_KERNEL, 7 );   
+      ips_FlashData = ( char * ) __get_free_pages( GFP_ATOMIC, 7 );   
       if (ips_FlashData == NULL) {
          /* The validity of this pointer is checked in ips_make_passthru() before it is used */
          printk( KERN_WARNING "ERROR: Can't Allocate Large Buffer for Flashing\n" );
       }
    }                                                                               
+   if (!pci_present())
+      return (0);
 
    SHT->proc_info = ips_proc_info;
    SHT->proc_name = "ips";
 
- #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0)
-   spin_unlock_irq(&io_request_lock);
- #endif
-   /* By definition, a Sarasota ( 5i ) Adapter MUST be enumerated first or the */
-   /* server may not boot properly. The adapters must be enumerated in exactly */
-   /* the same order as ServeRAID BIOS for the machine to come up properly.    */
-
-   pci_module_init(&ips_pci_driver_5i);          /* Ask for 5i Adapters First  */
-   if (ips_num_controllers)                      /* If there is a 5i Adapter   */
-      pci_module_init(&ips_pci_driver_i960);     /*    Get all i960's next     */
-   pci_module_init(&ips_pci_driver);             /* Get all remaining Adapters */
-                                                 /*  ( in normal BUS order )   */
- #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0)
-   spin_lock_irq(&io_request_lock);
- #endif
-   if (ips_num_controllers > 0) 
-      register_reboot_notifier(&ips_notifier);
+   for(i = 0; i < ips_num_controllers; i++){
+      if ( ips_register_scsi(i) )
+         ips_free(ips_ha[i]);
+         ips_released_controllers++;
+   } 
 
    return (ips_num_controllers);
 }
@@ -614,7 +584,7 @@ static void ips_setup_funclist(ips_ha_t *ha){
    /*                                
     * Setup Functions
     */
-   if (IPS_IS_MORPHEUS(ha)) {
+   if (IPS_IS_MORPHEUS(ha) || IPS_IS_MARCO(ha)) {
       /* morpheus / marco / sebring */
       ha->func.isintr = ips_isintr_morpheus;
       ha->func.isinit = ips_isinit_morpheus;
@@ -734,10 +704,6 @@ ips_release(struct Scsi_Host *sh) {
 
    ips_released_controllers++;
 
-   if (ips_num_controllers == ips_released_controllers){
-      unregister_reboot_notifier(&ips_notifier);
-      pci_unregister_driver(&ips_pci_driver);
-   }
    return (FALSE);
 }
 
@@ -1167,7 +1133,7 @@ ips_queue(Scsi_Cmnd *SC, void (*done) (Scsi_Cmnd *)) {
 /*   Set bios geometry for the controller                                   */
 /*                                                                          */
 /****************************************************************************/
-int
+static int
 ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
 		sector_t capacity, int geom[]) {
    ips_ha_t         *ha;
@@ -1301,6 +1267,12 @@ do_ipsintr(int irq, void *dev_id, struct pt_regs *regs) {
    if (!ha) 
       return;
    host = ips_sh[ha->host_num];
+   /* interrupt during initialization */
+   if(!host){
+      (*ha->func.intr)(ha);
+      return;
+   }
+
    IPS_LOCK_SAVE(host->host_lock, cpu_flags);
 
    if (!ha->active) {
@@ -2238,6 +2210,17 @@ ips_identify_controller(ips_ha_t *ha) {
       }
 
       break;
+
+   case IPS_DEVICEID_MARCO:
+      switch (ha->subdevice_id) {
+      case IPS_SUBDEVICEID_6M:
+          ha->ad_type = IPS_ADTYPE_SERVERAID6M;
+          break;
+      case IPS_SUBDEVICEID_6I:
+          ha->ad_type = IPS_ADTYPE_SERVERAID6I;
+          break;
+      }
+      break;
    }
 }
 
@@ -2470,6 +2453,10 @@ ips_hainit(ips_ha_t *ha) {
       return (0);
    }
 
+   /* If there are Logical Drives and a Reset Occurred, then an EraseStripeLock is Needed */
+   if ( (ha->conf->ucLogDriveCount > 0) && (ha->requires_esl == 1) ) 
+      ips_clear_adapter(ha, IPS_INTR_IORL);
+   
    /* set limits on SID, LUN, BUS */
    ha->ntargets = IPS_MAX_TARGETS + 1;
    ha->nlun = 1;
@@ -4946,6 +4933,13 @@ ips_init_morpheus(ips_ha_t *ha) {
    writel(Oimr, ha->mem_ptr + IPS_REG_I960_OIMR);
 
    /* if we get here then everything went OK */
+
+   /* Since we did a RESET, an EraseStripeLock may be needed */
+   if (Post == 0xEF10) {
+      if ( (Config == 0x000F) || (Config == 0x0009) )
+         ha->requires_esl = 1;
+   }
+
    return (1);
 }
 
@@ -6598,21 +6592,186 @@ static int ips_get_version_info(ips_ha_t *ha, IPS_VERSION_DATA *Buffer, int intr
    return( rc );
 }
 
-  
-
-static Scsi_Host_Template driver_template = IPS;
-#include "scsi_module.c"
-
-static int ips_abort_init(ips_ha_t *ha, struct Scsi_Host *sh, int index){
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_abort_init                                             */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   cleanup routine for a failed adapter initialization                    */
+/****************************************************************************/ 
+static int ips_abort_init(ips_ha_t *ha, int index){
    ha->active = 0;
    ips_free(ha);
-   scsi_unregister(sh);
    ips_ha[index] = 0;
    ips_sh[index] = 0;
    return -1;
 }
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_shift_controllers                                      */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   helper function for ordering adapters                                  */
+/****************************************************************************/
+static void
+ips_shift_controllers(int lowindex, int highindex){
+	ips_ha_t *ha_sav = ips_ha[highindex];
+	struct Scsi_Host *sh_sav = ips_sh[highindex];
+	int i;
+
+	for ( i = highindex; i > lowindex; i--){
+		ips_ha[i] = ips_ha[i - 1];
+		ips_sh[i] = ips_sh[i - 1];
+		ips_ha[i]->host_num = i;
+	}
+	ha_sav->host_num = lowindex;
+	ips_ha[lowindex] = ha_sav;
+	ips_sh[lowindex] = sh_sav;
+}
+
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_order_controllers                                      */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   place controllers is the "proper" boot order                           */
+/****************************************************************************/
+static void
+ips_order_controllers(void){
+	int i, j, tmp, position = 0;
+	IPS_NVRAM_P5 *nvram;
+	if(!ips_ha[0])
+		return;
+	nvram = ips_ha[0]->nvram;
+
+	if(nvram->adapter_order[0]){
+		for(i = 1; i <= nvram->adapter_order[0]; i++){
+			for(j = position; j < ips_num_controllers; j++){
+				switch(ips_ha[j]->ad_type){
+				case IPS_ADTYPE_SERVERAID6M:
+					if(nvram->adapter_order[i] == 'M'){
+						ips_shift_controllers(position, j);
+						position++;
+					}
+					break;
+				case IPS_ADTYPE_SERVERAID4L:
+				case IPS_ADTYPE_SERVERAID4M:
+				case IPS_ADTYPE_SERVERAID4MX:
+				case IPS_ADTYPE_SERVERAID4LX:
+					if(nvram->adapter_order[i] == 'N'){
+						ips_shift_controllers(position, j);
+						position++;
+					}
+					break;
+				case IPS_ADTYPE_SERVERAID6I:
+				case IPS_ADTYPE_SERVERAID5I2:
+				case IPS_ADTYPE_SERVERAID5I1:
+					if(nvram->adapter_order[i] == 'S'){
+						ips_shift_controllers(position, j);
+						position++;
+					}
+					break;
+				case IPS_ADTYPE_SERVERAID:
+				case IPS_ADTYPE_SERVERAID2:
+				case IPS_ADTYPE_NAVAJO:
+				case IPS_ADTYPE_KIOWA:
+				case IPS_ADTYPE_SERVERAID3L:
+				case IPS_ADTYPE_SERVERAID3:
+				case IPS_ADTYPE_SERVERAID4H:
+					if(nvram->adapter_order[i] == 'A'){
+						ips_shift_controllers(position, j);
+						position++;
+					}
+					break;
+				default:
+				}
+			}
+		}
+		/* if adapter_order[0], then ordering is complete */
+		return;
+	}
+	/* old bios, use older ordering */
+	tmp = 0;
+	for(i = position; i < ips_num_controllers; i++){
+		if (ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID5I2 ||
+		    ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID5I1){
+			ips_shift_controllers(position, i);
+			position++;
+			tmp = 1;
+		}
+	}
+	/* if there were no 5I cards, then don't do any extra ordering */
+	if (!tmp)
+		return;
+	for(i = position; i < ips_num_controllers; i++){
+		if (ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID4L ||
+		    ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID4M ||
+		    ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID4LX ||
+		    ips_ha[i]->ad_type == IPS_ADTYPE_SERVERAID4MX){
+			ips_shift_controllers(position, i);
+			position++;
+		}
+	}
+
+	return;
+}
 
 
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_register_scsi                                          */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   perform any registration and setup with the scsi layer                 */
+/****************************************************************************/
+static int
+ips_register_scsi( int index){
+	struct Scsi_Host *sh;
+	ips_ha_t *ha, *oldha;
+	sh = scsi_register(&ips_driver_template, sizeof(ips_ha_t));
+	if(!sh) {
+		printk(KERN_WARNING "Unable to register controller with SCSI subsystem\n" );
+		return -1;
+	}
+	oldha = ips_ha[index];
+	ha = IPS_HA(sh);
+	memcpy(ha, oldha, sizeof(ips_ha_t));
+	free_irq(oldha->irq, oldha);
+	/* Install the interrupt handler with the new ha */
+	if (request_irq(ha->irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) {
+		printk(KERN_WARNING "Unable to install interrupt handler\n" );
+		scsi_unregister(sh);
+		return -1;
+	}
+
+	kfree(oldha);
+	ips_sh[index] = sh;
+	ips_ha[index] = ha;
+	scsi_set_pci_device(sh, ha->pcidev);
+
+	/* Store away needed values for later use */
+	sh->io_port = ha->io_addr;
+	sh->n_io_port = ha->io_addr ? 255 : 0;
+	sh->unique_id = (ha->io_addr) ? ha->io_addr : ha->mem_addr;
+	sh->irq = ha->irq;
+	sh->sg_tablesize = sh->hostt->sg_tablesize;
+	sh->can_queue = sh->hostt->can_queue;
+	sh->cmd_per_lun = sh->hostt->cmd_per_lun;
+	sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma;
+	sh->use_clustering = sh->hostt->use_clustering;
+
+#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,7)
+	sh->max_sectors = 128;
+#endif 
+
+	sh->max_id = ha->ntargets;
+	sh->max_lun = ha->nlun;
+	sh->max_channel = ha->nbus - 1;
+	sh->can_queue = ha->max_cmds-1;
+
+	return 0;
+}
+
 /*---------------------------------------------------------------------------*/
 /*   Routine Name: ips_remove_device                                         */
 /*                                                                           */
@@ -6637,6 +6796,43 @@ static void __devexit ips_remove_device(struct pci_dev *pci_dev)
    }
 }
 
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_module_init                                            */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   function called on module load                                         */
+/****************************************************************************/
+static int __init
+ips_module_init(void){
+	if( pci_module_init(&ips_pci_driver) < 0 )
+		return -ENODEV;
+	ips_driver_template.module = THIS_MODULE;
+	ips_order_controllers();
+	if( scsi_register_host(&ips_driver_template) ){
+		pci_unregister_driver(&ips_pci_driver);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_module_exit                                            */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   function called on module unload                                       */
+/****************************************************************************/
+static void __exit
+ips_module_exit(void){
+	scsi_unregister_host(&ips_driver_template);
+	pci_unregister_driver(&ips_pci_driver);
+	unregister_reboot_notifier(&ips_notifier);
+}
+
+module_init(ips_module_init);
+module_exit(ips_module_exit);
 
 /*---------------------------------------------------------------------------*/
 /*   Routine Name: ips_insert_device                                         */
@@ -6679,7 +6875,6 @@ static int __devinit ips_insert_device(struct pci_dev *pci_dev, const struct pci
 /*---------------------------------------------------------------------------*/
 static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
 {         
-   struct Scsi_Host *sh;
    ips_ha_t         *ha;
    uint32_t          io_addr;
    uint32_t          mem_addr;
@@ -6770,35 +6965,47 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
     subdevice_id = pci_dev->subsystem_device;
 
     /* found a controller */
-    sh = scsi_register(&driver_template, sizeof(ips_ha_t));
-#if LINUX_VERSION_CODE > LinuxVersionCode(2,5,0) 
-    pci_set_dma_mask(pci_dev, (u64)0xffffffff);
-    scsi_set_pci_device(sh, pci_dev);
-#endif
-    if (sh == NULL) {
-       printk(KERN_WARNING "Unable to register controller with SCSI subsystem\n" );
+    ha = kmalloc(sizeof(ips_ha_t), GFP_KERNEL);
+    if (ha == NULL) {
+       printk(KERN_WARNING "Unable to allocate temporary ha struct\n" );
        return -1;
     }
 
-    ha = IPS_HA(sh);
     memset(ha, 0, sizeof(ips_ha_t));
     
-    ips_sh[index] = sh;
+    ips_sh[index] = NULL;
     ips_ha[index] = ha;
     ha->active = 1;
 
+    /* Store info in HA structure */
+    ha->irq = irq;
+    ha->io_addr = io_addr;
+    ha->io_len = io_len;
+    ha->mem_addr = mem_addr;
+    ha->mem_len = mem_len;
+    ha->mem_ptr = mem_ptr;
+    ha->ioremap_ptr = ioremap_ptr;
+    ha->host_num = ( uint32_t) index;
+    ha->revision_id = revision_id;
+    ha->slot_num = PCI_SLOT(pci_dev->devfn);
+    ha->device_id = pci_dev->device;
+    ha->subdevice_id = subdevice_id;
+    ha->pcidev = pci_dev;
+
+    pci_set_dma_mask(ha->pcidev, (u64)0xffffffff);
+
     ha->enq = kmalloc(sizeof(IPS_ENQ), GFP_KERNEL);
 
     if (!ha->enq) {
        printk(KERN_WARNING "Unable to allocate host inquiry structure\n" );
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
 
     ha->adapt = pci_alloc_consistent(pci_dev, sizeof(IPS_ADAPTER) +
                                      sizeof(IPS_IO_CMD), &dma_address);
     if (!ha->adapt) {
        printk(KERN_WARNING "Unable to allocate host adapt & dummy structures\n");
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
     ha->adapt->hw_status_start = dma_address;
     ha->dummy = (void *)(ha->adapt + 1);
@@ -6807,21 +7014,21 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
 
     if (!ha->conf) {
        printk(KERN_WARNING "Unable to allocate host conf structure\n" );
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
 
     ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), GFP_KERNEL);
 
     if (!ha->nvram) {
        printk(KERN_WARNING "Unable to allocate host NVRAM structure\n" );
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
 
     ha->subsys = kmalloc(sizeof(IPS_SUBSYS), GFP_KERNEL);
 
     if (!ha->subsys) {
        printk(KERN_WARNING "Unable to allocate host subsystem structure\n" );
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
 
     for (count = PAGE_SIZE, ha->ioctl_order = 0;
@@ -6838,49 +7045,19 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
        ha->ioctl_datasize = 0;
     }
 
-    /* Store away needed values for later use */
-    sh->io_port = io_addr;
-    sh->n_io_port = io_addr ? 255 : 0;
-    sh->unique_id = (io_addr) ? io_addr : mem_addr;
-    sh->irq = irq;
-    //sh->select_queue_depths = ips_select_queue_depth;
-    sh->sg_tablesize = sh->hostt->sg_tablesize;
-    sh->can_queue = sh->hostt->can_queue;
-    sh->cmd_per_lun = sh->hostt->cmd_per_lun;
-    sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma;
-    sh->use_clustering = sh->hostt->use_clustering;
-
-#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,7)
-    sh->max_sectors = 128;
-#endif                      
-
-    /* Store info in HA structure */
-    ha->irq = irq;
-    ha->io_addr = io_addr;
-    ha->io_len = io_len;
-    ha->mem_addr = mem_addr;
-    ha->mem_len = mem_len;
-    ha->mem_ptr = mem_ptr;
-    ha->ioremap_ptr = ioremap_ptr;
-    ha->host_num = ( uint32_t) index;
-    ha->revision_id = revision_id;
-    ha->slot_num = PCI_SLOT(pci_dev->devfn);
-    ha->device_id = pci_dev->device;
-    ha->subdevice_id = subdevice_id;
-    ha->pcidev = pci_dev;
-
     /*
      * Setup Functions
      */
     ips_setup_funclist(ha);
 
-    if ( IPS_IS_MORPHEUS( ha ) ) {
+    if ( ( IPS_IS_MORPHEUS( ha ) ) || ( IPS_IS_MARCO( ha ) ) ) {
         /* If Morpheus appears dead, reset it */
         IsDead = readl( ha->mem_ptr + IPS_REG_I960_MSG1 );
         if ( IsDead == 0xDEADBEEF ) {
             ips_reset_morpheus( ha );
         }
     }
+
     /*
      * Initialize the card if it isn't already
      */
@@ -6891,31 +7068,14 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
            * Initialization failed
            */
           printk(KERN_WARNING "Unable to initialize controller\n" );
-          return ips_abort_init(ha, sh, index);
+          return ips_abort_init(ha, index);
        }
     }
 
-    /* Install the interrupt handler */
-     if (request_irq(irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) {
-       printk(KERN_WARNING "Unable to install interrupt handler\n" );
-       return ips_abort_init(ha, sh, index);
-    }
-
-    /*
-     * Allocate a temporary SCB for initialization
-     */
-    ha->max_cmds = 1;
-    if (!ips_allocatescbs(ha)) {
-       printk(KERN_WARNING "Unable to allocate a CCB\n" );
-       free_irq(ha->irq, ha);
-       return ips_abort_init(ha, sh, index);
-    }
-
     *indexPtr = index;
     return SUCCESS;
 }
 
-
 /*---------------------------------------------------------------------------*/
 /*   Routine Name: ips_init_phase2                                           */
 /*                                                                           */
@@ -6927,24 +7087,36 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
 /*---------------------------------------------------------------------------*/
 static int ips_init_phase2( int index )
 {         
-    struct Scsi_Host *sh;
     ips_ha_t         *ha;
 
     ha = ips_ha[index];
-    sh = ips_sh[index];
 
     METHOD_TRACE("ips_init_phase2", 1);
     if (!ha->active) {
-       scsi_unregister(sh);
        ips_ha[index] = NULL;
-       ips_sh[index] = NULL;
-       return -1;;
+       return -1;
+    }
+
+    /* Install the interrupt handler */
+     if (request_irq(ha->irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) {
+       printk(KERN_WARNING "Unable to install interrupt handler\n" );
+       return ips_abort_init(ha, index);
+    }
+
+    /*
+     * Allocate a temporary SCB for initialization
+     */
+    ha->max_cmds = 1;
+    if (!ips_allocatescbs(ha)) {
+       printk(KERN_WARNING "Unable to allocate a CCB\n" );
+       free_irq(ha->irq, ha);
+       return ips_abort_init(ha, index);
     }
 
     if (!ips_hainit(ha)) {
        printk(KERN_WARNING "Unable to initialize controller\n" );
        free_irq(ha->irq, ha);
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
     /* Free the temporary SCB */
     ips_deallocatescbs(ha, 1);
@@ -6953,20 +7125,16 @@ static int ips_init_phase2( int index )
     if (!ips_allocatescbs(ha)) {
        printk(KERN_WARNING "Unable to allocate CCBs\n" );
        free_irq(ha->irq, ha);
-       return ips_abort_init(ha, sh, index);
+       return ips_abort_init(ha, index);
     }
 
-    /* finish setting values */
-    sh->max_id = ha->ntargets;
-    sh->max_lun = ha->nlun;
-    sh->max_channel = ha->nbus - 1;
-    sh->can_queue = ha->max_cmds-1;
-
     return SUCCESS;
 }
 
 
+#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,9)
 MODULE_LICENSE("GPL");
+#endif
 
 /*
  * Overrides for Emacs so that we almost follow Linus's tabbing style.
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index 6da69d0c8995..dbd21f197d59 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -59,8 +59,6 @@
    extern int ips_eh_abort(Scsi_Cmnd *);
    extern int ips_eh_reset(Scsi_Cmnd *);
    extern int ips_queue(Scsi_Cmnd *, void (*) (Scsi_Cmnd *));
-   extern int ips_biosparam(struct scsi_device *, struct block_device *,
-		   sector_t, int *);
    extern int ips_slave_configure(Scsi_Device *);
    extern const char * ips_info(struct Scsi_Host *);
    extern void do_ips(int, void *, struct pt_regs *);
@@ -81,18 +79,32 @@
                                          (ha->revision_id >= IPS_REVID_CLARINETP1) && \
                                          (ha->revision_id <= IPS_REVID_CLARINETP3)) ? 1 : 0)
    #define IPS_IS_MORPHEUS(ha)         (ha->device_id == IPS_DEVICEID_MORPHEUS)
+   #define IPS_IS_MARCO(ha)            (ha->device_id == IPS_DEVICEID_MARCO)
    #define IPS_USE_I2O_DELIVER(ha)     ((IPS_IS_MORPHEUS(ha) || \
                                          (IPS_IS_TROMBONE(ha) && \
                                           (ips_force_i2o))) ? 1 : 0)
-   #define IPS_USE_I2O_STATUS(ha)      (IPS_IS_MORPHEUS(ha))
    #define IPS_USE_MEMIO(ha)           ((IPS_IS_MORPHEUS(ha) || \
                                          ((IPS_IS_TROMBONE(ha) || IPS_IS_CLARINET(ha)) && \
                                           (ips_force_memio))) ? 1 : 0)
 
+   #if LINUX_VERSION_CODE < LinuxVersionCode(2,4,4)
+      #define pci_set_dma_mask(dev,mask) (1)
+      #define scsi_set_pci_device(sh,dev) (0)
+   #endif
+
+   #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0)
+      #define scsi_register_host(x)    scsi_register_module(MODULE_SCSI_HA,x)
+      #define scsi_unregister_host(x)  scsi_unregister_module(MODULE_SCSI_HA,x)
+   #endif
+
    #ifndef MDELAY
       #define MDELAY mdelay
    #endif
-   
+
+   #ifndef min
+      #define min(x,y) ((x) < (y) ? x : y)
+   #endif
+
    /*
     * Adapter address map equates
     */
@@ -152,7 +164,7 @@
    #define IPS_CMD_DCDB                 0x04
    #define IPS_CMD_DCDB_SG              0x84
    #define IPS_CMD_EXTENDED_DCDB 	    0x95
-	#define IPS_CMD_EXTENDED_DCDB_SG		 0x96
+   #define IPS_CMD_EXTENDED_DCDB_SG	    0x96
    #define IPS_CMD_CONFIG_SYNC          0x58
    #define IPS_CMD_ERROR_TABLE          0x17
    #define IPS_CMD_DOWNLOAD             0x20
@@ -166,6 +178,7 @@
    #define IPS_CSL                      0xFF
    #define IPS_POCL                     0x30
    #define IPS_NORM_STATE               0x00
+   #define IPS_MAX_ADAPTER_TYPES        3
    #define IPS_MAX_ADAPTERS             16
    #define IPS_MAX_IOCTL                1
    #define IPS_MAX_IOCTL_QUEUE          8
@@ -188,15 +201,19 @@
    #define IPS_INTR_IORL                1
    #define IPS_FFDC                     99
    #define IPS_ADAPTER_ID               0xF
-   #define IPS_VENDORID                 0x1014
+   #define IPS_VENDORID_IBM             0x1014
+   #define IPS_VENDORID_ADAPTEC         0x9005
    #define IPS_DEVICEID_COPPERHEAD      0x002E
    #define IPS_DEVICEID_MORPHEUS        0x01BD
+   #define IPS_DEVICEID_MARCO           0x0250
    #define IPS_SUBDEVICEID_4M           0x01BE
    #define IPS_SUBDEVICEID_4L           0x01BF
    #define IPS_SUBDEVICEID_4MX          0x0208
    #define IPS_SUBDEVICEID_4LX          0x020E
    #define IPS_SUBDEVICEID_5I2          0x0259
    #define IPS_SUBDEVICEID_5I1          0x0258
+   #define IPS_SUBDEVICEID_6M           0x0279
+   #define IPS_SUBDEVICEID_6I           0x028C
    #define IPS_IOCTL_SIZE               8192
    #define IPS_STATUS_SIZE              4
    #define IPS_STATUS_Q_SIZE            (IPS_MAX_CMDS+1) * IPS_STATUS_SIZE
@@ -279,6 +296,8 @@
    #define IPS_ADTYPE_SERVERAID4LX      0x0B
    #define IPS_ADTYPE_SERVERAID5I2      0x0C
    #define IPS_ADTYPE_SERVERAID5I1      0x0D
+   #define IPS_ADTYPE_SERVERAID6M       0x0E
+   #define IPS_ADTYPE_SERVERAID6I       0x0F
 
    /*
     * Adapter Command/Status Packet Definitions
@@ -349,6 +368,12 @@
    #define IPS_SCSI_MP3_Removeable      0x04
    #define IPS_SCSI_MP3_AllocateSurface 0x08
 
+   /*
+    * HA Flags
+    */
+
+   #define IPS_HA_ENH_SG                0x1
+
    /*
     * SCB Flags
     */
@@ -387,34 +412,31 @@
     * Scsi_Host Template
     */
 #if LINUX_VERSION_CODE < LinuxVersionCode(2,5,0)
-#define IPS{	\
-	.module				= NULL,		\
-	.proc_info			= NULL,		\
-	.name				= NULL,		\
+   static void ips_select_queue_depth(struct Scsi_Host *, Scsi_Device *);
+   static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]);
+#define IPS {	\
 	.detect				= ips_detect,	\
 	.release			= ips_release,	\
 	.info				= ips_info,	\
-	.command			= NULL,		\
 	.queuecommand			= ips_queue,	\
-	.eh_strategy_handler		= NULL,		\
 	.eh_abort_handler		= ips_eh_abort,	\
-	.eh_device_reset_handler	= NULL,		\
-	.eh_bus_reset_handler		= NULL,		\
 	.eh_host_reset_handler		= ips_eh_reset,	\
-	.abort				= NULL,		\
-	.reset				= NULL,		\
-	.slave_attach			= NULL,		\
 	.bios_param			= ips_biosparam,\
+	.select_queue_depths		= ips_select_queue_depth, \
 	.can_queue			= 0,		\
 	.this_id			= -1,		\
 	.sg_tablesize			= IPS_MAX_SG,	\
-	.cmd_per_lun			= 3,		\
+	.cmd_per_lun			= 16,		\
 	.present			= 0,		\
 	.unchecked_isa_dma		= 0,		\
 	.use_clustering			= ENABLE_CLUSTERING,\
-	.use_new_eh_code		= 1 \
+	.use_new_eh_code		= 1, \
+	IPS_HIGHMEM_IO \
 }
 #else
+   static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+		sector_t capacity, int geom[]);
+   int ips_slave_configure(Scsi_Device *SDptr);
 #define IPS {	\
 	.detect			= ips_detect,		\
 	.release		= ips_release,		\
@@ -790,7 +812,8 @@ typedef struct {
    uint8_t   ReservedForOS2[8];
    uint8_t   bios_high[4];                 /* Adapter's Flashed BIOS Version   */
    uint8_t   bios_low[4];
-   uint8_t   Filler[76];
+   uint8_t   adapter_order[16];            /* BIOS Telling us the Sort Order   */
+   uint8_t   Filler[60];
 } IPS_NVRAM_P5, *PIPS_NVRAM_P5;
 
 /*--------------------------------------------------------------------------*/
@@ -1050,7 +1073,6 @@ typedef struct ips_ha {
    uint8_t            slot_num;           /* PCI Slot Number            */
    uint16_t           subdevice_id;       /* Subsystem device ID        */
    uint8_t            ioctl_order;        /* Number of pages in ioctl   */
-   uint8_t            reserved2;          /* Empty                      */
    uint8_t            bios_version[8];    /* BIOS Revision              */
    uint32_t           mem_addr;           /* Memory mapped address      */
    uint32_t           io_len;             /* Size of IO Address         */
@@ -1060,8 +1082,9 @@ typedef struct ips_ha {
    ips_hw_func_t      func;               /* hw function pointers       */
    struct pci_dev    *pcidev;             /* PCI device handle          */
    char              *flash_data;         /* Save Area for flash data   */
-   u8                 flash_order;        /* Save Area for flash size order  */
-   u32                flash_datasize;   /* Save Area for flash data size */
+   u8                 flash_order;        /* Save Area for flash size order */
+   u32                flash_datasize;     /* Save Area for flash data size */
+   uint8_t            requires_esl;       /* Requires an EraseStripeLock */
 } ips_ha_t;
 
 typedef void (*ips_scb_callback) (ips_ha_t *, struct ips_scb *);
@@ -1148,58 +1171,63 @@ typedef struct {
 
 #define IPS_VER_MAJOR 5
 #define IPS_VER_MAJOR_STRING "5"
-#define IPS_VER_MINOR 10
-#define IPS_VER_MINOR_STRING "10"
-#define IPS_VER_BUILD_STRING "13"
-#define IPS_VER_STRING  "5.10.13-BETA"
-#define IPS_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002. All Rights Reserved."
-#define IPS_NT_LEGALCOPYRIGHT_STRING "(C) Copyright IBM Corp. 1994, 2002."
+#define IPS_VER_MINOR 99
+#define IPS_VER_MINOR_STRING "99"
+#define IPS_VER_BUILD 00
+#define IPS_VER_BUILD_STRING "00"
+#define IPS_VER_STRING "5.99.00"
 
 /* Version numbers for various adapters */
-#define IPS_VER_SERVERAID1    "2.25.01"
-#define IPS_VER_SERVERAID2    "2.88.13"
-#define IPS_VER_NAVAJO        "2.88.13"
-#define IPS_VER_SERVERAID3    "5.10.01"
-#define IPS_VER_SERVERAID4H   "5.10.01"
-#define IPS_VER_SERVERAID4MLx "5.10.01"
-#define IPS_VER_SARASOTA      "5.10.05"
+#define IPS_VER_SERVERAID1 "2.25.01"
+#define IPS_VER_SERVERAID2 "2.88.13"
+#define IPS_VER_NAVAJO "2.88.13"
+#define IPS_VER_SERVERAID3 "5.11.05"
+#define IPS_VER_SERVERAID4H "5.11.05"
+#define IPS_VER_SERVERAID4MLx "5.11.05"
+#define IPS_VER_SARASOTA "5.11.05"
+#define IPS_VER_MARCO "0.00.00"
+#define IPS_VER_SEBRING "0.00.00"
 
 /* Compatability IDs for various adapters */
-#define IPS_COMPAT_UNKNOWN      ""
-#define IPS_COMPAT_CURRENT      "SA510"
-#define IPS_COMPAT_SERVERAID1   "2.25.01"
-#define IPS_COMPAT_SERVERAID2   "2.88.13"
-#define IPS_COMPAT_NAVAJO       "2.88.13"
-#define IPS_COMPAT_KIOWA        "2.88.13"
-#define IPS_COMPAT_SERVERAID3H  "5.10.01"
-#define IPS_COMPAT_SERVERAID3L  "5.10.01"
-#define IPS_COMPAT_SERVERAID4H  "5.10.01"
+#define IPS_COMPAT_UNKNOWN ""
+#define IPS_COMPAT_CURRENT "SB610"
+#define IPS_COMPAT_SERVERAID1 "2.25.01"
+#define IPS_COMPAT_SERVERAID2 "2.88.13"
+#define IPS_COMPAT_NAVAJO  "2.88.13"
+#define IPS_COMPAT_KIOWA "2.88.13"
+#define IPS_COMPAT_SERVERAID3H  "SA510"
+#define IPS_COMPAT_SERVERAID3L  "SA510"
+#define IPS_COMPAT_SERVERAID4H  "SA510"
 #define IPS_COMPAT_SERVERAID4M  "SA510"
 #define IPS_COMPAT_SERVERAID4L  "SA510"
 #define IPS_COMPAT_SERVERAID4Mx "SA510"
 #define IPS_COMPAT_SERVERAID4Lx "SA510"
 #define IPS_COMPAT_SARASOTA     "SA510"
-#define IPS_COMPAT_BIOS         "SA510"
+#define IPS_COMPAT_MARCO        "SA000"
+#define IPS_COMPAT_SEBRING      "SA000"
+#define IPS_COMPAT_BIOS "SA510"
 
-#define IPS_COMPAT_MAX_ADAPTER_TYPE 14
-#define IPS_COMPAT_ID_LENGTH         8
+#define IPS_COMPAT_MAX_ADAPTER_TYPE 16
+#define IPS_COMPAT_ID_LENGTH 8
 
 #define IPS_DEFINE_COMPAT_TABLE(tablename) \
    char tablename[IPS_COMPAT_MAX_ADAPTER_TYPE] [IPS_COMPAT_ID_LENGTH] = { \
-      IPS_COMPAT_UNKNOWN,      \
-      IPS_COMPAT_SERVERAID1,   \
-      IPS_COMPAT_SERVERAID2,   \
-      IPS_COMPAT_NAVAJO,       \
-      IPS_COMPAT_KIOWA,        \
-      IPS_COMPAT_SERVERAID3H,  \
-      IPS_COMPAT_SERVERAID3L,  \
-      IPS_COMPAT_SERVERAID4H,  \
-      IPS_COMPAT_SERVERAID4M,  \
-      IPS_COMPAT_SERVERAID4L,  \
+      IPS_COMPAT_UNKNOWN, \
+      IPS_COMPAT_SERVERAID1, \
+      IPS_COMPAT_SERVERAID2, \
+      IPS_COMPAT_NAVAJO, \
+      IPS_COMPAT_KIOWA, \
+      IPS_COMPAT_SERVERAID3H, \
+      IPS_COMPAT_SERVERAID3L, \
+      IPS_COMPAT_SERVERAID4H, \
+      IPS_COMPAT_SERVERAID4M, \
+      IPS_COMPAT_SERVERAID4L, \
       IPS_COMPAT_SERVERAID4Mx, \
       IPS_COMPAT_SERVERAID4Lx, \
-      IPS_COMPAT_SARASOTA,     \
-      IPS_COMPAT_SARASOTA      \
+      IPS_COMPAT_SARASOTA,         /* one-channel variety of SARASOTA */  \
+      IPS_COMPAT_SARASOTA,         /* two-channel variety of SARASOTA */  \
+      IPS_COMPAT_MARCO, \
+      IPS_COMPAT_SEBRING \
    }
 
 
-- 
cgit v1.2.3


From d31bb16c6a187b32a5f19f72c4100c16ac76d31b Mon Sep 17 00:00:00 2001
From: David Jeffery <david_jeffery@adaptec.com>
Date: Wed, 5 Feb 2003 18:23:41 -0800
Subject: [PATCH] ips driver 3/4: 64bit dma addressing

This large patch adds support for using 64bit addressing.

Special thanks goes to Mike Anderson who did the initial
versions of this patch.
---
 drivers/scsi/ips.c | 598 ++++++++++++++++++++++++++---------------------------
 drivers/scsi/ips.h |  59 +++++-
 2 files changed, 343 insertions(+), 314 deletions(-)

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 52942481a758..8fa145c54c4c 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -461,6 +461,8 @@ static int ips_is_passthru(Scsi_Cmnd *);
 static int ips_make_passthru(ips_ha_t *, Scsi_Cmnd *, ips_scb_t *, int);
 static int ips_usrcmd(ips_ha_t *, ips_passthru_t *, ips_scb_t *);
 static void ips_cleanup_passthru(ips_ha_t *, ips_scb_t *);
+static void ips_scmd_buf_write(Scsi_Cmnd *scmd, void *data, unsigned int count);
+static void ips_scmd_buf_read(Scsi_Cmnd *scmd, void *data, unsigned int count);
 
 int  ips_proc_info(char *, char **, off_t, int, int, int);
 static int ips_host_info(ips_ha_t *, char *, off_t, int);
@@ -553,7 +555,7 @@ ips_detect(Scsi_Host_Template *SHT) {
    /* If Booting from the Manager CD, Allocate a large Flash        */
    /* Buffer ( so we won't need to allocate one for each adapter ). */
     if ( ips_cd_boot ) {            
-      ips_FlashData = ( char * ) __get_free_pages( GFP_ATOMIC, 7 );   
+      ips_FlashData = ( char * ) __get_free_pages( IPS_INIT_GFP, 7 );   
       if (ips_FlashData == NULL) {
          /* The validity of this pointer is checked in ips_make_passthru() before it is used */
          printk( KERN_WARNING "ERROR: Can't Allocate Large Buffer for Flashing\n" );
@@ -1542,6 +1544,39 @@ ips_is_passthru(Scsi_Cmnd *SC) {
    return 0;
 }
 
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_alloc_passthru_buffer                                  */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   allocate a buffer large enough for the ioctl data if the ioctl buffer  */
+/*   is too small or doesn't exist                                          */
+/****************************************************************************/
+static int
+ips_alloc_passthru_buffer(ips_ha_t *ha, int length){
+	void *bigger_buf;
+	int count;
+	int order;
+
+	if(ha->ioctl_data && length <= (PAGE_SIZE << ha->ioctl_order))
+		return 0;
+	/* there is no buffer or it's not big enough, allocate a new one */
+	for (count = PAGE_SIZE, order = 0;
+	     count < length;
+	     order++, count <<= 1);
+	bigger_buf = (void *) __get_free_pages(IPS_ATOMIC_GFP, order);
+	if (bigger_buf) {
+		/* free the old memory */
+		free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order);
+		/* use the new memory */
+		ha->ioctl_data = (char *) bigger_buf;
+		ha->ioctl_order = order;
+	} else {
+		return -1;
+	}
+	return 0;
+}
+
 /****************************************************************************/
 /*                                                                          */
 /* Routine Name: ips_make_passthru                                          */
@@ -1554,73 +1589,41 @@ ips_is_passthru(Scsi_Cmnd *SC) {
 static int
 ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) {
    ips_passthru_t *pt;
-   char *buffer;
    int length = 0;
 
    METHOD_TRACE("ips_make_passthru", 1);
 
    if(!SC->use_sg){
-      buffer = SC->request_buffer;
       length = SC->request_bufflen;
    }else{
       struct scatterlist *sg = SC->request_buffer;
       int i;
       for(i = 0; i < SC->use_sg; i++)
          length += sg[i].length;
-
-      if (length < sizeof(ips_passthru_t)) {
-         /* wrong size */
-         DEBUG_VAR(1, "(%s%d) Passthru structure wrong size",
-             ips_name, ha->host_num);
-         return (IPS_FAILURE);
-      }else if(!ha->ioctl_data || length > (PAGE_SIZE << ha->ioctl_order)){
-         void *bigger_buf;
-         int count;
-         int order;
-         /* try to allocate a bigger buffer */
-         for (count = PAGE_SIZE, order = 0;
-              count < length;
-              order++, count <<= 1);
-         bigger_buf = (void *) __get_free_pages(GFP_ATOMIC, order);
-         if (bigger_buf) {
-            /* free the old memory */
-            free_pages((unsigned long) ha->ioctl_data, ha->ioctl_order);
-            /* use the new memory */
-            ha->ioctl_data = (char *) bigger_buf;
-            ha->ioctl_order = order;
-            ha->ioctl_datasize = count;
-         } else {
-             pt = (ips_passthru_t*)IPS_SG_ADDRESS(sg);
-             pt->BasicStatus = 0x0B;
-             pt->ExtendedStatus = 0x00;
-             SC->result = DID_ERROR << 16;
-             return (IPS_FAILURE);
-         }
-      }
-      ha->ioctl_datasize = length;
-      length = 0;
-      for(i = 0; i < SC->use_sg; i++){
-         memcpy(&ha->ioctl_data[length], IPS_SG_ADDRESS(&sg[i]), sg[i].length);
-         length += sg[i].length;
-      }
-      pt = (ips_passthru_t *)ha->ioctl_data;
-      buffer = ha->ioctl_data;
-   }
-   if (!length || !buffer) {
-      /* no data */
-      DEBUG_VAR(1, "(%s%d) No passthru structure",
-                ips_name, ha->host_num);
-
-      return (IPS_FAILURE);
    }
    if (length < sizeof(ips_passthru_t)) {
       /* wrong size */
       DEBUG_VAR(1, "(%s%d) Passthru structure wrong size",
-             ips_name, ha->host_num);
-
+          ips_name, ha->host_num);
       return (IPS_FAILURE);
    }
-   pt = (ips_passthru_t*) buffer;
+   if(ips_alloc_passthru_buffer(ha, length)){
+      /* allocation failure!  If ha->ioctl_data exists, use it to return
+         some error codes.  Return a failed command to the scsi layer. */
+      if(ha->ioctl_data){
+         pt = (ips_passthru_t *)ha->ioctl_data;
+         ips_scmd_buf_read(SC, pt, sizeof(ips_passthru_t));
+         pt->BasicStatus = 0x0B;
+         pt->ExtendedStatus = 0x00;
+         ips_scmd_buf_write(SC, pt, sizeof(ips_passthru_t));
+      }
+      return IPS_FAILURE;
+   }
+   ha->ioctl_datasize = length;
+
+   ips_scmd_buf_read(SC, ha->ioctl_data, ha->ioctl_datasize);      
+   pt = (ips_passthru_t *)ha->ioctl_data;
+
    /*
     * Some notes about the passthru interface used
     *
@@ -1629,26 +1632,14 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) {
     * packet we received from the sg driver. In this
     * case the CmdBSize field of the pt structure is
     * used for the size of the buffer.
-    *
-    * IF the scsi op_code == 0x81 then we assume that
-    * we will need our own buffer and we will copy the
-    * data to/from the user buffer passed in the scsi
-    * command.  The data address resides at offset 4
-    * in the scsi command.  The length of the data resides
-    * at offset 8 in the scsi command.
     */
 
    switch (pt->CoppCmd) {
    case IPS_NUMCTRLS:
-      memcpy(buffer + sizeof(ips_passthru_t),
+      memcpy(ha->ioctl_data + sizeof(ips_passthru_t),
              &ips_num_controllers, sizeof(int));
-      SC->result = DID_OK << 16;
-
-      return (IPS_SUCCESS_IMM);
-
-   case IPS_CTRLINFO:
-      memcpy(buffer + sizeof(ips_passthru_t),
-             ha, sizeof(ips_ha_t));
+      ips_scmd_buf_write(SC, ha->ioctl_data,
+                         sizeof(ips_passthru_t) + sizeof(int));
       SC->result = DID_OK << 16;
 
       return (IPS_SUCCESS_IMM);
@@ -1678,8 +1669,8 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) {
 
    } /* end switch */
 
-         return (IPS_FAILURE);
-      }
+   return (IPS_FAILURE);
+}
 
 /****************************************************************************/
 /* Routine Name: ips_flash_copperhead                                       */
@@ -1717,7 +1708,7 @@ ips_flash_copperhead(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb){
                     pt->CoppCP.cmd.flashfw.count;
          for (count = PAGE_SIZE, ha->flash_order = 0; count < datasize;
               ha->flash_order++, count <<= 1);
-         ha->flash_data = (char *)__get_free_pages(GFP_ATOMIC, ha->flash_order);
+         ha->flash_data = (char *)__get_free_pages(IPS_ATOMIC_GFP, ha->flash_order);
          ha->flash_datasize = 0;
       }else
          return IPS_FAILURE;
@@ -1793,6 +1784,48 @@ error:
    return IPS_FAILURE;
 }
 
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_fill_scb_sg_single                                     */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*   Fill in a single scb sg_list element from an address                   */
+/*   return a -1 if a breakup occured                                       */
+/****************************************************************************/
+static inline int ips_fill_scb_sg_single(ips_ha_t *ha, dma_addr_t busaddr,
+                            ips_scb_t *scb, int indx, unsigned int e_len)
+{
+
+   int      ret_val = 0;
+
+   if ( (scb->data_len + e_len) > ha->max_xfer) {
+      e_len = ha->max_xfer - scb->data_len;
+      scb->breakup = indx;
+      ++scb->sg_break;
+      ret_val = -1;
+   } else {
+      scb->breakup = 0;
+      scb->sg_break = 0;
+   }
+   if (IPS_USE_ENH_SGLIST(ha)) {
+      scb->sg_list.enh_list[indx].address_lo =
+         cpu_to_le32(pci_dma_lo32(busaddr));
+      scb->sg_list.enh_list[indx].address_hi =
+         cpu_to_le32(pci_dma_hi32(busaddr));
+      scb->sg_list.enh_list[indx].length =
+         cpu_to_le32(e_len);
+   } else {
+      scb->sg_list.std_list[indx].address =
+         cpu_to_le32(pci_dma_lo32(busaddr));
+      scb->sg_list.std_list[indx].length =
+         cpu_to_le32(e_len);
+   }
+
+   ++scb->sg_len;
+   scb->data_len += e_len;
+   return ret_val;
+}
+
 /****************************************************************************/
 /* Routine Name: ips_flash_firmware                                         */
 /* Routine Description:                                                     */
@@ -1800,7 +1833,7 @@ error:
 /****************************************************************************/
 static int
 ips_flash_firmware(ips_ha_t * ha, ips_passthru_t *pt, ips_scb_t *scb){
-   IPS_SG_LIST *sg_list;
+   IPS_SG_LIST sg_list;
    uint32_t cmd_busaddr;
 
    if(pt->CoppCP.cmd.flashfw.type == IPS_FW_IMAGE &&
@@ -1815,12 +1848,12 @@ ips_flash_firmware(ips_ha_t * ha, ips_passthru_t *pt, ips_scb_t *scb){
       return IPS_FAILURE;
    }
    /* Save the S/G list pointer so it doesn't get clobbered */
-   sg_list = scb->sg_list;
+   sg_list.list = scb->sg_list.list;
    cmd_busaddr = scb->scb_busaddr;
    /* copy in the CP */
    memcpy(&scb->cmd, &pt->CoppCP.cmd, sizeof(IPS_IOCTL_CMD));
    /* FIX stuff that might be wrong */
-   scb->sg_list = sg_list;
+   scb->sg_list.list = sg_list.list;
    scb->scb_busaddr = cmd_busaddr;
    scb->bus = scb->scsi_cmd->device->channel;
    scb->target_id = scb->scsi_cmd->device->id;
@@ -1837,7 +1870,7 @@ ips_flash_firmware(ips_ha_t * ha, ips_passthru_t *pt, ips_scb_t *scb){
                                       IPS_DMA_DIR(scb));
    scb->flags |= IPS_SCB_MAP_SINGLE;
    scb->cmd.flashfw.command_id = IPS_COMMAND_ID(ha, scb);
-   scb->cmd.flashfw.buffer_addr = scb->data_busaddr;
+   scb->cmd.flashfw.buffer_addr = cpu_to_le32(scb->data_busaddr);
    if (pt->TimeOut)
       scb->timeout = pt->TimeOut;
    scb->scsi_cmd->result = DID_OK <<16;
@@ -1869,7 +1902,7 @@ ips_free_flash_copperhead(ips_ha_t *ha){
 /****************************************************************************/
 static int
 ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) {
-   IPS_SG_LIST *sg_list;
+   IPS_SG_LIST sg_list;
    uint32_t cmd_busaddr;
 
    METHOD_TRACE("ips_usrcmd", 1);
@@ -1878,14 +1911,14 @@ ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) {
       return (0);
 
    /* Save the S/G list pointer so it doesn't get clobbered */
-   sg_list = scb->sg_list;
+   sg_list.list = scb->sg_list.list;
    cmd_busaddr = scb->scb_busaddr;
    /* copy in the CP */
    memcpy(&scb->cmd, &pt->CoppCP.cmd, sizeof(IPS_IOCTL_CMD));
    memcpy(&scb->dcdb, &pt->CoppCP.dcdb, sizeof(IPS_DCDB_TABLE));
 
    /* FIX stuff that might be wrong */
-   scb->sg_list = sg_list;
+   scb->sg_list.list = sg_list.list;
    scb->scb_busaddr = cmd_busaddr;
    scb->bus = scb->scsi_cmd->device->channel;
    scb->target_id = scb->scsi_cmd->device->id;
@@ -1905,23 +1938,13 @@ ips_usrcmd(ips_ha_t *ha, ips_passthru_t *pt, ips_scb_t *scb) {
       return (0);
 
    if (pt->CmdBSize) {
-      if(!scb->scsi_cmd->use_sg){
-         scb->data_len = pt->CmdBSize;
-         scb->data_busaddr = pci_map_single(ha->pcidev,
-                                            scb->scsi_cmd->request_buffer +
-                                            sizeof(ips_passthru_t),
-                                            pt->CmdBSize,
-                                            IPS_DMA_DIR(scb));
-         scb->flags |= IPS_SCB_MAP_SINGLE;
-      } else {
-         scb->data_len = pt->CmdBSize;
-         scb->data_busaddr = pci_map_single(ha->pcidev,
-                                            ha->ioctl_data +
-                                            sizeof(ips_passthru_t),
-                                            pt->CmdBSize,
-                                            IPS_DMA_DIR(scb));
-         scb->flags |= IPS_SCB_MAP_SINGLE;
-      }
+      scb->data_len = pt->CmdBSize;
+      scb->data_busaddr = pci_map_single(ha->pcidev,
+                                         ha->ioctl_data +
+                                         sizeof(ips_passthru_t),
+                                         pt->CmdBSize,
+                                         IPS_DMA_DIR(scb));
+      scb->flags |= IPS_SCB_MAP_SINGLE;
    } else {
       scb->data_busaddr = 0L;
    }
@@ -1978,10 +2001,7 @@ ips_cleanup_passthru(ips_ha_t *ha, ips_scb_t *scb) {
 
       return ;
    }
-   if(!scb->scsi_cmd->use_sg)
-      pt = (ips_passthru_t *) scb->scsi_cmd->request_buffer;
-   else
-      pt = (ips_passthru_t *) ha->ioctl_data;
+   pt = (ips_passthru_t *) ha->ioctl_data;
 
    /* Copy data back to the user */
    if (scb->cmd.dcdb.op_code == IPS_CMD_DCDB)        /* Copy DCDB Back to Caller's Area */
@@ -1996,14 +2016,7 @@ ips_cleanup_passthru(ips_ha_t *ha, ips_scb_t *scb) {
      scb->cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW))
       ips_free_flash_copperhead(ha);
 
-   if(scb->scsi_cmd->use_sg){
-      int i, length = 0;
-      struct scatterlist *sg = scb->scsi_cmd->request_buffer;
-      for(i = 0; i < scb->scsi_cmd->use_sg; i++){
-         memcpy(IPS_SG_ADDRESS(&sg[i]), &ha->ioctl_data[length], sg[i].length);
-         length += sg[i].length;
-      }
-   }
+   ips_scmd_buf_write(scb->scsi_cmd, ha->ioctl_data, ha->ioctl_datasize);
 }
 
 /****************************************************************************/
@@ -2331,7 +2344,7 @@ ips_get_bios_version(ips_ha_t *ha, int intr) {
    } else {
       /* Morpheus Family - Send Command to the card */
 
-      buffer = kmalloc(0x1000, GFP_ATOMIC);
+      buffer = kmalloc(0x1000, IPS_ATOMIC_GFP);
       if (!buffer)
          return;
 
@@ -2685,68 +2698,20 @@ ips_next(ips_ha_t *ha, int intr) {
          scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg,
                                     scsi_to_pci_dma_dir(SC->sc_data_direction));
          scb->flags |= IPS_SCB_MAP_SG;
-         if (scb->sg_count == 1) {
-            if (sg_dma_len(sg) > ha->max_xfer) {
-     	       scb->breakup = 1;
-               scb->data_len = ha->max_xfer;
-            } else
-               scb->data_len = sg_dma_len(sg);
-
-            scb->dcdb.transfer_length = scb->data_len;
-            scb->data_busaddr = sg_dma_address(sg);
-            scb->sg_len = 0;
-         } else {
-            /* Check for the first Element being bigger than MAX_XFER */
-            if (sg_dma_len(&sg[0]) > ha->max_xfer) {
-               scb->sg_list[0].address = cpu_to_le32(sg_dma_address(&sg[0]));
-               scb->sg_list[0].length = ha->max_xfer;
-               scb->data_len = ha->max_xfer;
-               scb->breakup = 0; 
-               scb->sg_break=1;  
-               scb->sg_len = 1;
-            } else {
-               for (i = 0; i < scb->sg_count; i++) {
-                  scb->sg_list[i].address = cpu_to_le32(sg_dma_address(&sg[i]));
-                  scb->sg_list[i].length = cpu_to_le32(sg_dma_len(&sg[i]));
-            
-                  if (scb->data_len + sg_dma_len(&sg[i]) > ha->max_xfer) {
-                     /*
-                      * Data Breakup required
-                      */
-                     scb->breakup = i;
-                     break;
-                  }
-               
-                  scb->data_len += sg_dma_len(&sg[i]);
-               }
-
-               if (!scb->breakup)
-                  scb->sg_len = scb->sg_count;
-               else
-                  scb->sg_len = scb->breakup;
-            }
-
-            scb->dcdb.transfer_length = scb->data_len;
-            scb->data_busaddr = scb->sg_busaddr;
+         for (i = 0; i < scb->sg_count; i++) {
+            if ( ips_fill_scb_sg_single(ha, sg_dma_address(&sg[i]),
+                                        scb, i, sg_dma_len(&sg[i])) < 0)
+               break;
          }
+         scb->dcdb.transfer_length = scb->data_len;
       } else {
          if (SC->request_bufflen) {
-            if (SC->request_bufflen > ha->max_xfer) {
-               /*
-                * Data breakup required
-                */
-               scb->breakup = 1;
-               scb->data_len = ha->max_xfer;
-            } else {
-               scb->data_len = SC->request_bufflen;
-            }
-
-            scb->dcdb.transfer_length = scb->data_len;
             scb->data_busaddr = pci_map_single(ha->pcidev, SC->request_buffer,
-                                               scb->data_len,
+                                               SC->request_bufflen,
                                                scsi_to_pci_dma_dir(SC->sc_data_direction));
             scb->flags |= IPS_SCB_MAP_SINGLE;
-            scb->sg_len = 0;
+            ips_fill_scb_sg_single(ha, scb->data_busaddr, scb, 0, SC->request_bufflen);
+            scb->dcdb.transfer_length = scb->data_len;
          } else {
             scb->data_busaddr = 0L;
             scb->sg_len = 0;
@@ -3299,118 +3264,41 @@ ips_done(ips_ha_t *ha, ips_scb_t *scb) {
        */
       if ((scb->breakup) || (scb->sg_break)) {
          /* we had a data breakup */
-         uint8_t bk_save;
-
-         bk_save = scb->breakup;
-         scb->breakup = 0;
-         mod_timer(&scb->scsi_cmd->eh_timeout, jiffies + 120 * HZ);
+         scb->data_len = 0;
 
          if (scb->sg_count) {
             /* S/G request */
             struct scatterlist *sg;
-            int                 i;
+            int ips_sg_index = 0;
+            int sg_dma_index;
 
             sg = scb->scsi_cmd->request_buffer;
 
-            if (scb->sg_count == 1) {
-               if (sg_dma_len(sg) - (bk_save * ha->max_xfer) > ha->max_xfer) {
-                  /* Further breakup required */
-                  scb->data_len = ha->max_xfer;
-                  scb->data_busaddr = sg_dma_address(sg) + (bk_save * ha->max_xfer);
-                  scb->breakup = bk_save + 1;
-               } else {
-                  scb->data_len = sg_dma_len(sg) - (bk_save * ha->max_xfer);
-                  scb->data_busaddr = sg_dma_address(sg) + (bk_save * ha->max_xfer);
-               }
-
-               scb->dcdb.transfer_length = scb->data_len;
-               scb->sg_len = 0;
-            } else {
-               /* We're here because there was MORE than one s/g unit. */
-	            /* bk_save points to which sg unit to look at           */
-	            /* sg_break points to how far through this unit we are  */
-	            /* NOTE: We will not move from one sg to another here,  */
-               /*    just finish the one we are in.  Not the most      */
-               /*    efficient, but it keeps it from getting too hacky */
-
-		         /* IF sg_break is non-zero, then just work on this current sg piece, */
-               /* pointed to by bk_save                                             */
-               if (scb->sg_break) {
-                  scb->sg_len = 1;
-                  scb->sg_list[0].address = sg_dma_address(&sg[bk_save])
-                                            + ha->max_xfer*scb->sg_break;
-                  if (ha->max_xfer > sg_dma_len(&sg[bk_save]) - ha->max_xfer * scb->sg_break) 
-                     scb->sg_list[0].length = sg_dma_len(&sg[bk_save]) - ha->max_xfer * scb->sg_break;
-                  else 
-                     scb->sg_list[0].length = ha->max_xfer;
-                  scb->sg_break++;              /* MUST GO HERE for math below to work */
-                  scb->data_len = scb->sg_list[0].length;;
-
-                  if (sg_dma_len(&sg[bk_save]) <= ha->max_xfer * scb->sg_break ) {
-                     scb->sg_break = 0;         /* No more work in this unit */
-                     if (( bk_save + 1 ) >= scb->sg_count) 
-                        scb->breakup = 0;
-                     else
-                        scb->breakup = bk_save + 1;
-                  }
-               } else {
-			         /* ( sg_break == 0 ), so this is our first look at a new sg piece */
-                  if (sg_dma_len(&sg[bk_save]) > ha->max_xfer) {
-                     scb->sg_list[0].address = sg_dma_address(&sg[bk_save]);
-                     scb->sg_list[0].length = ha->max_xfer;
-                     scb->breakup = bk_save;
-                     scb->sg_break = 1;
-                     scb->data_len = ha->max_xfer;
-                     scb->sg_len = 1;
- 	          } else {
-	         /* OK, the next sg is a short one, so loop until full */
-                     scb->data_len = 0;
-                     scb->sg_len = 0;
-                     scb->sg_break = 0;
-                     /*   We're only doing full units here */
-                     for (i = bk_save; i < scb->sg_count; i++) {
-                        scb->sg_list[i - bk_save].address = sg_dma_address(&sg[i]);
-                        scb->sg_list[i - bk_save].length = cpu_to_le32(sg_dma_len(&sg[i]));
-                        if (scb->data_len + sg_dma_len(&sg[i]) > ha->max_xfer) {
-                           scb->breakup = i;  /* sneaky, if not more work, than breakup is 0 */
-                           break;
-                        }
-                        scb->data_len += sg_dma_len(&sg[i]);
-                        scb->sg_len++;           /* only if we didn't get too big */
-		  		         }
-			         }
-		         }
-
-               /* Also, we need to be sure we don't queue work ( breakup != 0 )
-                  if no more sg units for next time */
-               scb->dcdb.transfer_length = scb->data_len;
-               scb->data_busaddr = scb->sg_busaddr;
-            }
-                                              
-         } else {
-            /* Non S/G Request */
-            pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len,
-                             IPS_DMA_DIR(scb));
-            if ((scb->scsi_cmd->request_bufflen - (bk_save * ha->max_xfer)) > ha->max_xfer) {
-               /* Further breakup required */
-               scb->data_len = ha->max_xfer;
-               scb->data_busaddr = pci_map_single(ha->pcidev,
-                                           scb->scsi_cmd->request_buffer +
-                                           (bk_save * ha->max_xfer),
-                                           scb->data_len, IPS_DMA_DIR(scb));
-               scb->breakup = bk_save + 1;
-            } else {
-               scb->data_len = scb->scsi_cmd->request_bufflen - (bk_save * ha->max_xfer);
-               scb->data_busaddr = pci_map_single(ha->pcidev,
-                                           scb->scsi_cmd->request_buffer +
-                                           (bk_save * ha->max_xfer),
-                                           scb->data_len, IPS_DMA_DIR(scb));
-	    }
+            /* Spin forward to last dma chunk */
+            sg_dma_index = scb->breakup;
 
-            scb->dcdb.transfer_length = scb->data_len;
-            scb->sg_len = 0;
-         }
+            /* Take care of possible partial on last chunk*/
+            ips_fill_scb_sg_single(ha, sg_dma_address(&sg[sg_dma_index]),
+                                   scb, ips_sg_index++,
+                                   sg_dma_len(&sg[sg_dma_index]));
 
+            for (; sg_dma_index < scb->sg_count; sg_dma_index++) {
+                 if ( ips_fill_scb_sg_single(ha, sg_dma_address(&sg[sg_dma_index]),
+                                             scb, ips_sg_index++, 
+                                             sg_dma_len(&sg[sg_dma_index])) < 0)
+                    break;
+
+            }
+
+         } else {
+	 /* Non S/G Request */
+            (void) ips_fill_scb_sg_single(ha,
+                                          scb->data_busaddr + (scb->sg_break * ha->max_xfer),
+                                          scb, 0,
+                                          scb->scsi_cmd->request_bufflen - (scb->sg_break * ha->max_xfer));
+          }
+ 
+         scb->dcdb.transfer_length = scb->data_len;
          scb->dcdb.cmd_attribute |= ips_command_direction[scb->scsi_cmd->cmnd[0]];
 
          if (!scb->dcdb.cmd_attribute & 0x3)
@@ -3614,6 +3502,68 @@ ips_send_wait(ips_ha_t *ha, ips_scb_t *scb, int timeout, int intr) {
    return (ret);
 }
 
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_scmd_buf_write                                         */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*  Write data to Scsi_Cmnd request_buffer at proper offsets                */
+/****************************************************************************/
+static void ips_scmd_buf_write(Scsi_Cmnd *scmd, void *data, unsigned
+				   int count)
+{
+	if (scmd->use_sg) {
+		int i;
+		unsigned int min_cnt, xfer_cnt;
+		char *cdata = (char *)data;
+		struct scatterlist *sg = scmd->request_buffer;
+		for (i = 0, xfer_cnt = 0;
+		     (i < scmd->use_sg) && (xfer_cnt < count); i++){
+			if(!IPS_SG_ADDRESS(&sg[i]))
+				return;
+			min_cnt = min( count - xfer_cnt, sg[i].length);
+			memcpy(IPS_SG_ADDRESS(&sg[i]), &cdata[xfer_cnt],
+			       min_cnt);
+			xfer_cnt += min_cnt;
+		}
+
+	} else {
+		unsigned int min_cnt = min(count, scmd->request_bufflen);
+		memcpy(scmd->request_buffer, data, min_cnt);
+	}
+}
+
+/****************************************************************************/
+/*                                                                          */
+/* Routine Name: ips_scmd_buf_read                                          */
+/*                                                                          */
+/* Routine Description:                                                     */
+/*  Copy data from a Scsi_Cmnd to a new, linear buffer                      */
+/****************************************************************************/
+static void ips_scmd_buf_read(Scsi_Cmnd *scmd, void *data, unsigned
+				   int count)
+{
+	if (scmd->use_sg) {
+		int i;
+		unsigned int min_cnt, xfer_cnt;
+		char *cdata = (char *)data;
+		struct scatterlist *sg = scmd->request_buffer;
+		for (i = 0, xfer_cnt = 0;
+		     (i < scmd->use_sg) && (xfer_cnt < count); i++){
+			if(!IPS_SG_ADDRESS(&sg[i]))
+				return;
+			min_cnt = min( count - xfer_cnt, sg[i].length);
+			memcpy(&cdata[xfer_cnt],IPS_SG_ADDRESS(&sg[i]),
+			       min_cnt);
+			xfer_cnt += min_cnt;
+		}
+
+	} else {
+		unsigned int min_cnt = min(count, scmd->request_bufflen);
+		memcpy(data, scmd->request_buffer, min_cnt);
+	}
+}
+
 /****************************************************************************/
 /*                                                                          */
 /* Routine Name: ips_send_cmd                                               */
@@ -3690,7 +3640,7 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
                strncpy(inquiry.ProductId, "SERVERAID       ", 16);
                strncpy(inquiry.ProductRevisionLevel, "1.00", 4);
 
-               memcpy(scb->scsi_cmd->request_buffer, &inquiry, scb->scsi_cmd->request_bufflen);
+	       ips_scmd_buf_write(scb->scsi_cmd, &inquiry, sizeof(inquiry));
 
                scb->scsi_cmd->result = DID_OK << 16;
             }
@@ -3720,15 +3670,19 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
          if (!scb->sg_len) {
             scb->cmd.basic_io.op_code =
             (scb->scsi_cmd->cmnd[0] == READ_6) ? IPS_CMD_READ : IPS_CMD_WRITE;
+            scb->cmd.basic_io.enhanced_sg = 0;
+            scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->data_busaddr);
          } else {
             scb->cmd.basic_io.op_code =
             (scb->scsi_cmd->cmnd[0] == READ_6) ? IPS_CMD_READ_SG : IPS_CMD_WRITE_SG;
+            scb->cmd.basic_io.enhanced_sg = IPS_USE_ENH_SGLIST(ha) ? 0xFF : 0;
+            scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->sg_busaddr);
          }
 
+         scb->cmd.basic_io.segment_4G = 0;
          scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb);
          scb->cmd.basic_io.log_drv = scb->target_id;
          scb->cmd.basic_io.sg_count = scb->sg_len;
-         scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->data_busaddr);
 
          if (scb->cmd.basic_io.lba)
             scb->cmd.basic_io.lba = cpu_to_le32(le32_to_cpu(scb->cmd.basic_io.lba) +
@@ -3743,7 +3697,6 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
          if (le16_to_cpu(scb->cmd.basic_io.sector_count) == 0)
             scb->cmd.basic_io.sector_count = cpu_to_le16(256);
 
-         scb->cmd.basic_io.reserved = 0;
          ret = IPS_SUCCESS;
          break;
 
@@ -3752,15 +3705,19 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
          if (!scb->sg_len) {
             scb->cmd.basic_io.op_code =
             (scb->scsi_cmd->cmnd[0] == READ_10) ? IPS_CMD_READ : IPS_CMD_WRITE;
+            scb->cmd.basic_io.enhanced_sg = 0;
+            scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->data_busaddr);
          } else {
             scb->cmd.basic_io.op_code =
             (scb->scsi_cmd->cmnd[0] == READ_10) ? IPS_CMD_READ_SG : IPS_CMD_WRITE_SG;
+            scb->cmd.basic_io.enhanced_sg = IPS_USE_ENH_SGLIST(ha) ? 0xFF : 0;
+            scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->sg_busaddr);
          }
 
+         scb->cmd.basic_io.segment_4G = 0;
          scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb);
          scb->cmd.basic_io.log_drv = scb->target_id;
          scb->cmd.basic_io.sg_count = scb->sg_len;
-         scb->cmd.basic_io.sg_addr = cpu_to_le32(scb->data_busaddr);
 
          if (scb->cmd.basic_io.lba)
             scb->cmd.basic_io.lba = cpu_to_le32(le32_to_cpu(scb->cmd.basic_io.lba) +
@@ -3773,7 +3730,6 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
 
          scb->cmd.basic_io.sector_count = cpu_to_le16(scb->data_len / IPS_BLKSIZE);
 
-         scb->cmd.basic_io.reserved = 0;
 
          if (cpu_to_le16(scb->cmd.basic_io.sector_count) == 0) {
             /*
@@ -3795,6 +3751,8 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
       case MODE_SENSE:
          scb->cmd.basic_io.op_code = IPS_CMD_ENQUIRY;
          scb->cmd.basic_io.command_id = IPS_COMMAND_ID(ha, scb);
+	 scb->cmd.basic_io.segment_4G = 0;
+	 scb->cmd.basic_io.enhanced_sg = 0;
          scb->data_len = sizeof(*ha->enq);
          scb->data_busaddr = pci_map_single(ha->pcidev, ha->enq,
                                             scb->data_len, IPS_DMA_DIR(scb));
@@ -3853,10 +3811,6 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
 
    /* setup DCDB */
    if (scb->bus > 0) {
-      if (!scb->sg_len)
-         scb->cmd.dcdb.op_code = IPS_CMD_DCDB;
-      else
-         scb->cmd.dcdb.op_code = IPS_CMD_DCDB_SG;
 
       /* If we already know the Device is Not there, no need to attempt a Command   */
       /* This also protects an NT FailOver Controller from getting CDB's sent to it */
@@ -3872,15 +3826,19 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
                                                (unsigned long)scb);
       scb->cmd.dcdb.reserved = 0;
       scb->cmd.dcdb.reserved2 = 0;
-      scb->cmd.dcdb.reserved3 = 0;   
+      scb->cmd.dcdb.reserved3 = 0;
+      scb->cmd.dcdb.segment_4G = 0;
+      scb->cmd.dcdb.enhanced_sg = 0;
 
       TimeOut = scb->scsi_cmd->timeout_per_command;
 
       if (ha->subsys->param[4] & 0x00100000) {          /* If NEW Tape DCDB is Supported */
-         if (!scb->sg_len)
+         if (!scb->sg_len) {
             scb->cmd.dcdb.op_code = IPS_CMD_EXTENDED_DCDB;
-         else
+         } else {
             scb->cmd.dcdb.op_code = IPS_CMD_EXTENDED_DCDB_SG;
+            scb->cmd.dcdb.enhanced_sg = IPS_USE_ENH_SGLIST(ha) ? 0xFF : 0;
+         }
  
          tapeDCDB = (IPS_DCDB_TABLE_TAPE *) &scb->dcdb; /* Use Same Data Area as Old DCDB Struct */
          tapeDCDB->device_address = ((scb->bus - 1) << 4) | scb->target_id;
@@ -3899,13 +3857,23 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
          tapeDCDB->cdb_length = scb->scsi_cmd->cmd_len;
          tapeDCDB->reserved_for_LUN = 0;
          tapeDCDB->transfer_length = scb->data_len;
-         tapeDCDB->buffer_pointer = cpu_to_le32(scb->data_busaddr);
+         if(scb->cmd.dcdb.op_code == IPS_CMD_EXTENDED_DCDB_SG)
+            tapeDCDB->buffer_pointer = cpu_to_le32(scb->sg_busaddr);
+         else
+            tapeDCDB->buffer_pointer = cpu_to_le32(scb->data_busaddr);
          tapeDCDB->sg_count = scb->sg_len;
          tapeDCDB->sense_length = sizeof(tapeDCDB->sense_info);
          tapeDCDB->scsi_status = 0;
          tapeDCDB->reserved = 0;
          memcpy(tapeDCDB->scsi_cdb, scb->scsi_cmd->cmnd, scb->scsi_cmd->cmd_len);
       } else {
+         if (!scb->sg_len) {
+            scb->cmd.dcdb.op_code = IPS_CMD_DCDB;
+         } else {
+            scb->cmd.dcdb.op_code = IPS_CMD_DCDB_SG;
+            scb->cmd.dcdb.enhanced_sg = IPS_USE_ENH_SGLIST(ha) ? 0xFF : 0;
+         }
+
          scb->dcdb.device_address = ((scb->bus - 1) << 4) | scb->target_id;
          scb->dcdb.cmd_attribute |= IPS_DISCONNECT_ALLOWED;
 
@@ -3921,7 +3889,10 @@ ips_send_cmd(ips_ha_t *ha, ips_scb_t *scb) {
          scb->dcdb.transfer_length = scb->data_len;
          if ( scb->dcdb.cmd_attribute & IPS_TRANSFER64K ) 
              scb->dcdb.transfer_length = 0;
-         scb->dcdb.buffer_pointer = cpu_to_le32(scb->data_busaddr);
+         if(scb->cmd.dcdb.op_code == IPS_CMD_DCDB_SG)
+            scb->dcdb.buffer_pointer = cpu_to_le32(scb->sg_busaddr);
+         else
+            scb->dcdb.buffer_pointer = cpu_to_le32(scb->data_busaddr);
          scb->dcdb.cdb_length = scb->scsi_cmd->cmd_len;
          scb->dcdb.sense_length = sizeof(scb->dcdb.sense_info);
          scb->dcdb.sg_count = scb->sg_len;
@@ -4146,7 +4117,7 @@ ips_inquiry(ips_ha_t *ha, ips_scb_t *scb) {
    strncpy(inquiry.ProductId, "SERVERAID       ", 16);
    strncpy(inquiry.ProductRevisionLevel, "1.00", 4);
 
-   memcpy(scb->scsi_cmd->request_buffer, &inquiry, scb->scsi_cmd->request_bufflen);
+   ips_scmd_buf_write(scb->scsi_cmd, &inquiry, sizeof(inquiry));
 
    return (1);
 }
@@ -4162,17 +4133,17 @@ ips_inquiry(ips_ha_t *ha, ips_scb_t *scb) {
 /****************************************************************************/
 static int
 ips_rdcap(ips_ha_t *ha, ips_scb_t *scb) {
-   IPS_SCSI_CAPACITY *cap;
+   IPS_SCSI_CAPACITY cap;
 
    METHOD_TRACE("ips_rdcap", 1);
 
    if (scb->scsi_cmd->bufflen < 8)
       return (0);
 
-   cap = (IPS_SCSI_CAPACITY *) scb->scsi_cmd->request_buffer;
+   cap.lba = cpu_to_be32(le32_to_cpu(ha->adapt->logical_drive_info.drive_info[scb->target_id].sector_count) - 1);
+   cap.len = cpu_to_be32((uint32_t) IPS_BLKSIZE);
 
-   cap->lba = cpu_to_be32(le32_to_cpu(ha->adapt->logical_drive_info.drive_info[scb->target_id].sector_count) - 1);
-   cap->len = cpu_to_be32((uint32_t) IPS_BLKSIZE);
+   ips_scmd_buf_write(scb->scsi_cmd, &cap, sizeof(cap));
 
    return (1);
 }
@@ -4250,7 +4221,7 @@ ips_msense(ips_ha_t *ha, ips_scb_t *scb) {
       return (0);
    } /* end switch */
 
-   memcpy(scb->scsi_cmd->request_buffer, &mdata, scb->scsi_cmd->request_bufflen);
+   ips_scmd_buf_write(scb->scsi_cmd, &mdata, sizeof(mdata));
 
    return (1);
 }
@@ -4277,7 +4248,7 @@ ips_reqsen(ips_ha_t *ha, ips_scb_t *scb) {
    reqsen.AdditionalSenseCode = IPS_SCSI_REQSEN_NO_SENSE;
    reqsen.AdditionalSenseCodeQual = IPS_SCSI_REQSEN_NO_SENSE;
 
-   memcpy(scb->scsi_cmd->request_buffer, &reqsen, scb->scsi_cmd->request_bufflen);
+   ips_scmd_buf_write(scb->scsi_cmd, &reqsen, sizeof(reqsen));
 
    return (1);
 }
@@ -4356,8 +4327,8 @@ ips_free(ips_ha_t *ha) {
 static int
 ips_deallocatescbs(ips_ha_t *ha, int cmds) {
    if (ha->scbs) {
-      pci_free_consistent(ha->pcidev,sizeof(IPS_SG_LIST) * IPS_MAX_SG *
-                          cmds, ha->scbs->sg_list, ha->scbs->sg_busaddr);
+      pci_free_consistent(ha->pcidev, IPS_SGLIST_SIZE(ha) * IPS_MAX_SG *
+                          cmds, ha->scbs->sg_list.list, ha->scbs->sg_busaddr);
       pci_free_consistent(ha->pcidev, sizeof(ips_scb_t) * cmds,
                           ha->scbs, ha->scbs->scb_busaddr);
       ha->scbs = NULL;
@@ -4377,7 +4348,7 @@ return 1;
 static int
 ips_allocatescbs(ips_ha_t *ha) {
    ips_scb_t *scb_p;
-   IPS_SG_LIST* ips_sg;
+   IPS_SG_LIST ips_sg;
    int        i;
    dma_addr_t command_dma, sg_dma;
    
@@ -4388,9 +4359,9 @@ ips_allocatescbs(ips_ha_t *ha) {
 	                           &command_dma);
    if (ha->scbs == NULL)
       return 0;
-   ips_sg = pci_alloc_consistent(ha->pcidev, sizeof(IPS_SG_LIST) * IPS_MAX_SG * 
-	                         ha->max_cmds, &sg_dma);
-   if(ips_sg == NULL){
+   ips_sg.list = pci_alloc_consistent(ha->pcidev, IPS_SGLIST_SIZE(ha) * IPS_MAX_SG *
+                                    ha->max_cmds, &sg_dma);
+   if(ips_sg.list == NULL){
       pci_free_consistent(ha->pcidev,ha->max_cmds * sizeof(ips_scb_t),ha->scbs, command_dma);
       return 0;
    }
@@ -4401,8 +4372,13 @@ ips_allocatescbs(ips_ha_t *ha) {
       scb_p = &ha->scbs[i];
       scb_p->scb_busaddr = command_dma + sizeof(ips_scb_t) * i;
       /* set up S/G list */
-      scb_p->sg_list = ips_sg + i * IPS_MAX_SG;
-      scb_p->sg_busaddr = sg_dma + sizeof(IPS_SG_LIST) * IPS_MAX_SG * i;
+      if (IPS_USE_ENH_SGLIST(ha)) {
+         scb_p->sg_list.enh_list = ips_sg.enh_list + i * IPS_MAX_SG;
+         scb_p->sg_busaddr = sg_dma + IPS_SGLIST_SIZE(ha) * IPS_MAX_SG * i;
+      } else {
+         scb_p->sg_list.std_list = ips_sg.std_list + i * IPS_MAX_SG;
+         scb_p->sg_busaddr = sg_dma + IPS_SGLIST_SIZE(ha) * IPS_MAX_SG * i;
+      }
 
       /* add to the free list */
       if (i < ha->max_cmds - 1) {
@@ -4426,14 +4402,14 @@ ips_allocatescbs(ips_ha_t *ha) {
 /****************************************************************************/
 static void
 ips_init_scb(ips_ha_t *ha, ips_scb_t *scb) {
-   IPS_SG_LIST *sg_list;
+   IPS_SG_LIST sg_list;
    uint32_t cmd_busaddr, sg_busaddr;
    METHOD_TRACE("ips_init_scb", 1);
 
    if (scb == NULL)
       return ;
 
-   sg_list = scb->sg_list;
+   sg_list.list = scb->sg_list.list;
    cmd_busaddr = scb->scb_busaddr;
    sg_busaddr = scb->sg_busaddr;
    /* zero fill */
@@ -4449,7 +4425,7 @@ ips_init_scb(ips_ha_t *ha, ips_scb_t *scb) {
    /* set bus address of scb */
    scb->scb_busaddr = cmd_busaddr;
    scb->sg_busaddr = sg_busaddr;
-   scb->sg_list = sg_list;
+   scb->sg_list.list = sg_list.list;
 
    /* Neptune Fix */
    scb->cmd.basic_io.cccr = cpu_to_le32((uint32_t) IPS_BIT_ILE);
@@ -4503,8 +4479,7 @@ ips_freescb(ips_ha_t *ha, ips_scb_t *scb) {
 
    METHOD_TRACE("ips_freescb", 1);
    if(scb->flags & IPS_SCB_MAP_SG)
-      pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer,
-                   scb->scsi_cmd->use_sg,
+      pci_unmap_sg(ha->pcidev,scb->scsi_cmd->request_buffer, scb->scsi_cmd->use_sg,
                    IPS_DMA_DIR(scb));
    else if(scb->flags & IPS_SCB_MAP_SINGLE)
       pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len,
@@ -5620,7 +5595,6 @@ ips_read_adapter_status(ips_ha_t *ha, int intr) {
    scb->cmd.basic_io.lba = 0;
    scb->cmd.basic_io.sector_count = 0;
    scb->cmd.basic_io.log_drv = 0;
-   scb->cmd.basic_io.reserved = 0;
    scb->data_len = sizeof(*ha->enq);
    scb->data_busaddr = pci_map_single(ha->pcidev, ha->enq, scb->data_len,
                                       IPS_DMA_DIR(scb));
@@ -5665,7 +5639,6 @@ ips_read_subsystem_parameters(ips_ha_t *ha, int intr) {
    scb->cmd.basic_io.lba = 0;
    scb->cmd.basic_io.sector_count = 0;
    scb->cmd.basic_io.log_drv = 0;
-   scb->cmd.basic_io.reserved = 0;
    scb->data_len = sizeof(*ha->subsys);
    scb->data_busaddr = pci_map_single(ha->pcidev, ha->subsys,
                                       scb->data_len, IPS_DMA_DIR(scb));
@@ -6992,9 +6965,20 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
     ha->subdevice_id = subdevice_id;
     ha->pcidev = pci_dev;
 
-    pci_set_dma_mask(ha->pcidev, (u64)0xffffffff);
+    /*
+     * Set the pci_dev's dma_mask.  Not all adapters support 64bit
+     * addressing so don't enable it if the adapter can't support
+     * it!  Also, don't use 64bit addressing if dma addresses
+     * are guaranteed to be < 4G.
+     */
+    if ( IPS_ENABLE_DMA64 && IPS_HAS_ENH_SGLIST(ha) &&
+         !pci_set_dma_mask(ha->pcidev, (u64)0xffffffffffffffff)) {
+       (ha)->flags |= IPS_HA_ENH_SG;
+    } else {
+       pci_set_dma_mask(ha->pcidev, (u64)0xffffffff);
+    }
 
-    ha->enq = kmalloc(sizeof(IPS_ENQ), GFP_KERNEL);
+    ha->enq = kmalloc(sizeof(IPS_ENQ), IPS_INIT_GFP);
 
     if (!ha->enq) {
        printk(KERN_WARNING "Unable to allocate host inquiry structure\n" );
@@ -7010,21 +6994,21 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
     ha->adapt->hw_status_start = dma_address;
     ha->dummy = (void *)(ha->adapt + 1);
 
-    ha->conf = kmalloc(sizeof(IPS_CONF), GFP_KERNEL);
+    ha->conf = kmalloc(sizeof(IPS_CONF), IPS_INIT_GFP);
 
     if (!ha->conf) {
        printk(KERN_WARNING "Unable to allocate host conf structure\n" );
        return ips_abort_init(ha, index);
     }
 
-    ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), GFP_KERNEL);
+    ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), IPS_INIT_GFP);
 
     if (!ha->nvram) {
        printk(KERN_WARNING "Unable to allocate host NVRAM structure\n" );
        return ips_abort_init(ha, index);
     }
 
-    ha->subsys = kmalloc(sizeof(IPS_SUBSYS), GFP_KERNEL);
+    ha->subsys = kmalloc(sizeof(IPS_SUBSYS), IPS_INIT_GFP);
 
     if (!ha->subsys) {
        printk(KERN_WARNING "Unable to allocate host subsystem structure\n" );
@@ -7035,7 +7019,7 @@ static int ips_init_phase1( struct pci_dev *pci_dev, int *indexPtr )
          count < ips_ioctlsize;
          ha->ioctl_order++, count <<= 1);
 
-    ha->ioctl_data = (char *) __get_free_pages(GFP_KERNEL, ha->ioctl_order);
+    ha->ioctl_data = (char *) __get_free_pages(IPS_INIT_GFP, ha->ioctl_order);
     ha->ioctl_datasize = count;
 
     if (!ha->ioctl_data) {
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index dbd21f197d59..86df629eab10 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -59,9 +59,7 @@
    extern int ips_eh_abort(Scsi_Cmnd *);
    extern int ips_eh_reset(Scsi_Cmnd *);
    extern int ips_queue(Scsi_Cmnd *, void (*) (Scsi_Cmnd *));
-   extern int ips_slave_configure(Scsi_Device *);
    extern const char * ips_info(struct Scsi_Host *);
-   extern void do_ips(int, void *, struct pt_regs *);
 
    /*
     * Some handy macros
@@ -70,6 +68,13 @@
       #define LinuxVersionCode(x,y,z)  (((x)<<16)+((y)<<8)+(z))
    #endif
 
+   #if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,20) || defined CONFIG_HIGHIO
+      #define IPS_HIGHIO
+      #define IPS_HIGHMEM_IO     .highmem_io = 1,
+   #else
+      #define IPS_HIGHMEM_IO
+   #endif
+
    #define IPS_HA(x)                   ((ips_ha_t *) x->hostdata)
    #define IPS_COMMAND_ID(ha, scb)     (int) (scb - ha->scbs)
    #define IPS_IS_TROMBONE(ha)         (((ha->device_id == IPS_DEVICEID_COPPERHEAD) && \
@@ -87,6 +92,11 @@
                                          ((IPS_IS_TROMBONE(ha) || IPS_IS_CLARINET(ha)) && \
                                           (ips_force_memio))) ? 1 : 0)
 
+    #define IPS_HAS_ENH_SGLIST(ha)    (IPS_IS_MORPHEUS(ha) || IPS_IS_MARCO(ha))
+    #define IPS_USE_ENH_SGLIST(ha)    ((ha)->flags & IPS_HA_ENH_SG)
+    #define IPS_SGLIST_SIZE(ha)       (IPS_USE_ENH_SGLIST(ha) ? \
+                                         sizeof(IPS_ENH_SG_LIST) : sizeof(IPS_STD_SG_LIST))
+
    #if LINUX_VERSION_CODE < LinuxVersionCode(2,4,4)
       #define pci_set_dma_mask(dev,mask) (1)
       #define scsi_set_pci_device(sh,dev) (0)
@@ -105,6 +115,24 @@
       #define min(x,y) ((x) < (y) ? x : y)
    #endif
 
+   #define pci_dma_lo32(a)         (a & 0xffffffff)
+
+   #if (BITS_PER_LONG > 32) || (defined CONFIG_HIGHMEM64G && defined IPS_HIGHIO)
+      #define IPS_ENABLE_DMA64        (1)
+      #define pci_dma_hi32(a)         (a >> 32)
+   #else
+      #define IPS_ENABLE_DMA64        (0)
+      #define pci_dma_hi32(a)         (0)
+   #endif
+
+   #if defined(__ia64__)
+      #define IPS_ATOMIC_GFP	(GFP_DMA | GFP_ATOMIC)
+      #define IPS_INIT_GFP	GFP_DMA
+   #else
+      #define IPS_ATOMIC_GFP    GFP_ATOMIC
+      #define IPS_INIT_GFP	GFP_KERNEL
+   #endif
+
    /*
     * Adapter address map equates
     */
@@ -458,7 +486,7 @@
 #endif
 
 /*
- * IBM PCI Raid Command Formats
+ * Raid Command Formats
  */
 typedef struct {
    uint8_t  op_code;
@@ -468,7 +496,8 @@ typedef struct {
    uint32_t lba;
    uint32_t sg_addr;
    uint16_t sector_count;
-   uint16_t reserved;
+   uint8_t  segment_4G;
+   uint8_t  enhanced_sg;
    uint32_t ccsar;
    uint32_t cccr;
 } IPS_IO_CMD, *PIPS_IO_CMD;
@@ -519,7 +548,9 @@ typedef struct {
    uint16_t reserved;
    uint32_t reserved2;
    uint32_t dcdb_address;
-   uint32_t reserved3;
+   uint16_t reserved3;
+   uint8_t  segment_4G;
+   uint8_t  enhanced_sg;
    uint32_t ccsar;
    uint32_t cccr;
 } IPS_DCDB_CMD, *PIPS_DCDB_CMD;
@@ -963,7 +994,20 @@ typedef struct {
 typedef struct ips_sglist {
    uint32_t address;
    uint32_t length;
-} IPS_SG_LIST, *PIPS_SG_LIST;
+} IPS_STD_SG_LIST;
+
+typedef struct ips_enh_sglist {
+   uint32_t address_lo;
+   uint32_t address_hi;
+   uint32_t length;
+   uint32_t reserved;
+} IPS_ENH_SG_LIST;
+
+typedef union {
+   void             *list;
+   IPS_STD_SG_LIST  *std_list;
+   IPS_ENH_SG_LIST  *enh_list;
+} IPS_SG_LIST;
 
 typedef struct _IPS_INFOSTR {
    char *buffer;
@@ -1063,6 +1107,7 @@ typedef struct ips_ha {
    char              *ioctl_data;         /* IOCTL data area            */
    uint32_t           ioctl_datasize;     /* IOCTL data size            */
    uint32_t           cmd_in_progress;    /* Current command in progress*/
+   int                flags;              /*                            */
    uint8_t            waitflag;           /* are we waiting for cmd     */
    uint8_t            active;
    int                ioctl_reset;        /* IOCTL Requested Reset Flag */
@@ -1110,7 +1155,7 @@ typedef struct ips_scb {
    uint32_t          sg_len;
    uint32_t          flags;
    uint32_t          op_code;
-   IPS_SG_LIST      *sg_list;
+   IPS_SG_LIST       sg_list;
    Scsi_Cmnd        *scsi_cmd;
    struct ips_scb   *q_next;
    ips_scb_callback  callback;
-- 
cgit v1.2.3


From 44a5a59c0b5d34ff01c685be87894f24132a8328 Mon Sep 17 00:00:00 2001
From: David Jeffery <david_jeffery@adaptec.com>
Date: Wed, 5 Feb 2003 18:23:48 -0800
Subject: [PATCH] ips driver 4/4: error messages

This small patch does 2 things.  It reworks the firmware/driver
versioning messages to make them more understandable, and it
fixes one case where the 64bit addressing changes caused
error/success to not be properly reported to the serveraid tools.
---
 drivers/scsi/ips.c | 43 +++++++++++++++++++++++++++----------------
 drivers/scsi/ips.h |  1 +
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 8fa145c54c4c..40c270c0e823 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1445,7 +1445,8 @@ ips_info(struct Scsi_Host *SH) {
    bp = &buffer[0];
    memset(bp, 0, sizeof(buffer));
 
-   sprintf(bp, "%s%s%s", "IBM PCI ServeRAID ", IPS_VERSION_HIGH, IPS_VERSION_LOW );
+   sprintf(bp, "%s%s%s Build %d", "IBM PCI ServeRAID ",
+               IPS_VERSION_HIGH, IPS_VERSION_LOW, IPS_BUILD_IDENT );
 
    if (ha->ad_type > 0 &&
        ha->ad_type <= MAX_ADAPTER_NAME) {
@@ -1590,6 +1591,7 @@ static int
 ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) {
    ips_passthru_t *pt;
    int length = 0;
+   int ret;
 
    METHOD_TRACE("ips_make_passthru", 1);
 
@@ -1656,9 +1658,11 @@ ips_make_passthru(ips_ha_t *ha, Scsi_Cmnd *SC, ips_scb_t *scb, int intr) {
          }
 
          if(ha->device_id == IPS_DEVICEID_COPPERHEAD &&
-            pt->CoppCP.cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW)
-            return ips_flash_copperhead(ha, pt, scb);
-
+            pt->CoppCP.cmd.flashfw.op_code == IPS_CMD_RW_BIOSFW) {
+            ret = ips_flash_copperhead(ha, pt, scb);
+            ips_scmd_buf_write(SC, ha->ioctl_data, sizeof(ips_passthru_t));
+            return ret;
+         }
          if (ips_usrcmd(ha, pt, scb))
             return (IPS_SUCCESS);
          else
@@ -2082,6 +2086,9 @@ ips_host_info(ips_ha_t *ha, char *ptr, off_t offset, int len) {
    copy_info(&info, "\tDriver Version                    : %s%s\n",
              IPS_VERSION_HIGH, IPS_VERSION_LOW);
 
+   copy_info(&info, "\tDriver Build                      : %d\n",
+             IPS_BUILD_IDENT);
+
    copy_info(&info, "\tMax Physical Devices              : %d\n",
              ha->enq->ucMaxPhysicalDevices);
    copy_info(&info, "\tMax Active Commands               : %d\n",
@@ -6464,6 +6471,8 @@ static void ips_version_check(ips_ha_t *ha, int intr) {
  uint8_t  BiosVersion[ IPS_COMPAT_ID_LENGTH + 1];
  int      MatchError;
  int      rc;
+ char     BiosString[10];
+ char     FirmwareString[10];
 
  METHOD_TRACE("ips_version_check", 1);
 
@@ -6496,28 +6505,30 @@ static void ips_version_check(ips_ha_t *ha, int intr) {
  MatchError = 0;
 
  if  (strncmp(FirmwareVersion, Compatable[ ha->nvram->adapter_type ], IPS_COMPAT_ID_LENGTH) != 0)
- {
-     if (ips_cd_boot == 0)                                                                              
-       printk(KERN_WARNING "Warning: Adapter %d Firmware Compatible Version is %s, but should be %s\n", 
-              ha->host_num, FirmwareVersion, Compatable[ ha->nvram->adapter_type ]);                    
      MatchError = 1;
- }
 
  if  (strncmp(BiosVersion, IPS_COMPAT_BIOS, IPS_COMPAT_ID_LENGTH) != 0)
- {
-     if (ips_cd_boot == 0)                                                                          
-       printk(KERN_WARNING "Warning: Adapter %d BIOS Compatible Version is %s, but should be %s\n", 
-              ha->host_num, BiosVersion, IPS_COMPAT_BIOS);                                          
      MatchError = 1;
- }
 
  ha->nvram->versioning = 1;          /* Indicate the Driver Supports Versioning */
 
  if  (MatchError)
  {
      ha->nvram->version_mismatch = 1;
-     if (ips_cd_boot == 0)                                               
-       printk(KERN_WARNING "Warning ! ! ! ServeRAID Version Mismatch\n");
+     if (ips_cd_boot == 0)
+     {
+         strncpy(&BiosString[0], ha->nvram->bios_high, 4);
+         strncpy(&BiosString[4], ha->nvram->bios_low, 4);
+         BiosString[8] = 0;
+
+         strncpy(&FirmwareString[0], ha->enq->CodeBlkVersion, 8);
+         FirmwareString[8] = 0;
+
+         printk(KERN_WARNING "Warning ! ! ! ServeRAID Version Mismatch\n");
+         printk(KERN_WARNING "Bios = %s, Firmware = %s, Device Driver = %s%s\n",
+                              BiosString, FirmwareString, IPS_VERSION_HIGH, IPS_VERSION_LOW );
+         printk(KERN_WARNING "These levels should match to avoid possible compatibility problems.\n" );
+     }
  }
  else
  {
diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index 86df629eab10..3f6fc724d330 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h
@@ -1221,6 +1221,7 @@ typedef struct {
 #define IPS_VER_BUILD 00
 #define IPS_VER_BUILD_STRING "00"
 #define IPS_VER_STRING "5.99.00"
+#define IPS_BUILD_IDENT 1132
 
 /* Version numbers for various adapters */
 #define IPS_VER_SERVERAID1 "2.25.01"
-- 
cgit v1.2.3


From ebf5ebe31d2cd1e0f13e5b65deb0b4af7afd9dc1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 5 Feb 2003 20:49:30 -0800
Subject: [PATCH] signal-fixes-2.5.59-A4

this is the current threading patchset, which accumulated up during the
past two weeks. It consists of a biggest set of changes from Roland, to
make threaded signals work. There were still tons of testcases and
boundary conditions (mostly in the signal/exit/ptrace area) that we did
not handle correctly.

Roland's thread-signal semantics/behavior/ptrace fixes:

 - fix signal delivery race with do_exit() => signals are re-queued to the
   'process' if do_exit() finds pending unhandled ones. This prevents
   signals getting lost upon thread-sys_exit().

 - a non-main thread has died on one processor and gone to TASK_ZOMBIE,
   but before it's gotten to release_task a sys_wait4 on the other
   processor reaps it.  It's only because it's ptraced that this gets
   through eligible_child.  Somewhere in there the main thread is also
   dying so it reparents the child thread to hit that case.  This means
   that there is a race where P might be totally invalid.

 - forget_original_parent is not doing the right thing when the group
   leader dies, i.e. reparenting threads to init when there is a zombie
   group leader.  Perhaps it doesn't matter for any practical purpose
   without ptrace, though it makes for ppid=1 for each thread in core
   dumps, which looks funny. Incidentally, SIGCHLD here really should be
   p->exit_signal.

 - one of the gdb tests makes a questionable assumption about what kill
   will do when it has some threads stopped by ptrace and others running.

exit races:

1. Processor A is in sys_wait4 case TASK_STOPPED considering task P.
   Processor B is about to resume P and then switch to it.

   While A is inside that case block, B starts running P and it clears
   P->exit_code, or takes a pending fatal signal and sets it to a new
   value. Depending on the interleaving, the possible failure modes are:
        a. A gets to its put_user after B has cleared P->exit_code
           => returns with WIFSTOPPED, WSTOPSIG==0
        b. A gets to its put_user after B has set P->exit_code anew
           => returns with e.g. WIFSTOPPED, WSTOPSIG==SIGKILL

   A can spend an arbitrarily long time in that case block, because
   there's getrusage and put_user that can take page faults, and
   write_lock'ing of the tasklist_lock that can block.  But even if it's
   short the race is there in principle.

2. This is new with NPTL, i.e. CLONE_THREAD.
   Two processors A and B are both in sys_wait4 case TASK_STOPPED
   considering task P.

   Both get through their tests and fetches of P->exit_code before either
   gets to P->exit_code = 0.  => two threads return the same pid from
   waitpid.

   In other interleavings where one processor gets to its put_user after
   the other has cleared P->exit_code, it's like case 1(a).


3. SMP races with stop/cont signals

   First, take:

        kill(pid, SIGSTOP);
        kill(pid, SIGCONT);

   or:

        kill(pid, SIGSTOP);
        kill(pid, SIGKILL);

   It's possible for this to leave the process stopped with a pending
   SIGCONT/SIGKILL.  That's a state that should never be possible.
   Moreover, kill(pid, SIGKILL) without any repetition should always be
   enough to kill a process.  (Likewise SIGCONT when you know it's
   sequenced after the last stop signal, must be sufficient to resume a
   process.)

4. take:

        kill(pid, SIGKILL);     // or any fatal signal
        kill(pid, SIGCONT);     // or SIGKILL

    it's possible for this to cause pid to be reaped with status 0
    instead of its true termination status.  The equivalent scenario
    happens when the process being killed is in an _exit call or a
    trap-induced fatal signal before the kills.

plus i've done stability fixes for bugs that popped up during
beta-testing, and minor tidying of Roland's changes:

 - a rare tasklist corruption during exec, causing some very spurious and
   colorful crashes.

 - a copy_process()-related dereference of already freed thread structure
   if hit with a SIGKILL in the wrong moment.

 - SMP spinlock deadlocks in the signal code

this patchset has been tested quite well in the 2.4 backport of the
threading changes - and i've done some stresstesting on 2.5.59 SMP as
well, and did an x86 UP testcompile + testboot as well.
---
 fs/exec.c             |    6 +-
 include/linux/sched.h |   10 +-
 kernel/exit.c         |  148 +++++--
 kernel/fork.c         |   24 +-
 kernel/signal.c       | 1058 +++++++++++++++++++++++++++++--------------------
 kernel/suspend.c      |    3 +-
 6 files changed, 779 insertions(+), 470 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 028fbda85a71..0b41239937b7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -587,7 +587,7 @@ static inline int de_thread(struct signal_struct *oldsig)
 		return -EAGAIN;
 	}
 	oldsig->group_exit = 1;
-	__broadcast_thread_group(current, SIGKILL);
+	zap_other_threads(current);
 
 	/*
 	 * Account for the thread group leader hanging around:
@@ -659,7 +659,8 @@ static inline int de_thread(struct signal_struct *oldsig)
 			current->ptrace = ptrace;
 			__ptrace_link(current, parent);
 		}
-		
+
+		list_del(&current->tasks);
 		list_add_tail(&current->tasks, &init_task.tasks);
 		current->exit_signal = SIGCHLD;
 		state = leader->state;
@@ -680,6 +681,7 @@ out:
 	newsig->group_exit = 0;
 	newsig->group_exit_code = 0;
 	newsig->group_exit_task = NULL;
+	newsig->group_stop_count = 0;
 	memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
 	init_sigpending(&newsig->shared_pending);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 648d4d3ace3c..d41f7a24fc14 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -235,6 +235,9 @@ struct signal_struct {
 	int			group_exit;
 	int			group_exit_code;
 	struct task_struct	*group_exit_task;
+
+	/* thread group stop support, overloads group_exit_code too */
+	int			group_stop_count;
 };
 
 /*
@@ -508,7 +511,6 @@ extern int in_egroup_p(gid_t);
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *);
-extern void sig_exit(int, int, struct siginfo *);
 extern int dequeue_signal(sigset_t *mask, siginfo_t *info);
 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
 			      sigset_t *mask);
@@ -525,7 +527,7 @@ extern void do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
-extern int __broadcast_thread_group(struct task_struct *p, int sig);
+extern void zap_other_threads(struct task_struct *p);
 extern int kill_pg(pid_t, int, int);
 extern int kill_sl(pid_t, int, int);
 extern int kill_proc(pid_t, int, int);
@@ -590,6 +592,8 @@ extern void exit_files(struct task_struct *);
 extern void exit_sighand(struct task_struct *);
 extern void __exit_sighand(struct task_struct *);
 
+extern NORET_TYPE void do_group_exit(int);
+
 extern void reparent_to_init(void);
 extern void daemonize(void);
 extern task_t *child_reaper;
@@ -762,6 +766,8 @@ static inline void cond_resched_lock(spinlock_t * lock)
 extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
 extern void recalc_sigpending(void);
 
+extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 057c562f62b1..25281033be8d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -647,7 +647,7 @@ fake_volatile:
 	exit_namespace(tsk);
 	exit_thread();
 
-	if (current->leader)
+	if (tsk->leader)
 		disassociate_ctty(1);
 
 	module_put(tsk->thread_info->exec_domain->module);
@@ -657,8 +657,31 @@ fake_volatile:
 	tsk->exit_code = code;
 	exit_notify();
 	preempt_disable();
-	if (current->exit_signal == -1)
-		release_task(current);
+	if (signal_pending(tsk) && !tsk->sig->group_exit
+	    && !thread_group_empty(tsk)) {
+		/*
+		 * This occurs when there was a race between our exit
+		 * syscall and a group signal choosing us as the one to
+		 * wake up.  It could be that we are the only thread
+		 * alerted to check for pending signals, but another thread
+		 * should be woken now to take the signal since we will not.
+		 * Now we'll wake all the threads in the group just to make
+		 * sure someone gets all the pending signals.
+		 */
+		struct task_struct *t;
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sig->siglock);
+		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
+			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
+				recalc_sigpending_tsk(t);
+				if (signal_pending(t))
+					signal_wake_up(t, 0);
+			}
+		spin_unlock_irq(&tsk->sig->siglock);
+		read_unlock(&tasklist_lock);
+	}
+	if (tsk->exit_signal == -1)
+		release_task(tsk);
 	schedule();
 	BUG();
 /*
@@ -710,31 +733,44 @@ task_t *next_thread(task_t *p)
 }
 
 /*
- * this kills every thread in the thread group. Note that any externally
- * wait4()-ing process will get the correct exit code - even if this 
- * thread is not the thread group leader.
+ * Take down every thread in the group.  This is called by fatal signals
+ * as well as by sys_exit_group (below).
  */
-asmlinkage long sys_exit_group(int error_code)
+NORET_TYPE void
+do_group_exit(int exit_code)
 {
-	unsigned int exit_code = (error_code & 0xff) << 8;
-
-	if (!thread_group_empty(current)) {
-		struct signal_struct *sig = current->sig;
+	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
 
+	if (current->sig->group_exit)
+		exit_code = current->sig->group_exit_code;
+	else if (!thread_group_empty(current)) {
+		struct signal_struct *const sig = current->sig;
+		read_lock(&tasklist_lock);
 		spin_lock_irq(&sig->siglock);
-		if (sig->group_exit) {
-			spin_unlock_irq(&sig->siglock);
-
-			/* another thread was faster: */
-			do_exit(sig->group_exit_code);
-		}
+		if (sig->group_exit)
+			/* Another thread got here before we took the lock.  */
+			exit_code = sig->group_exit_code;
+		else {
 		sig->group_exit = 1;
 		sig->group_exit_code = exit_code;
-		__broadcast_thread_group(current, SIGKILL);
+			zap_other_threads(current);
+		}
 		spin_unlock_irq(&sig->siglock);
+		read_unlock(&tasklist_lock);
 	}
 
 	do_exit(exit_code);
+	/* NOTREACHED */
+}
+
+/*
+ * this kills every thread in the thread group. Note that any externally
+ * wait4()-ing process will get the correct exit code - even if this
+ * thread is not the thread group leader.
+ */
+asmlinkage long sys_exit_group(int error_code)
+{
+	do_group_exit((error_code & 0xff) << 8);
 }
 
 static int eligible_child(pid_t pid, int options, task_t *p)
@@ -800,6 +836,8 @@ repeat:
 		int ret;
 
 		list_for_each(_p,&tsk->children) {
+			int exit_code;
+
 			p = list_entry(_p,struct task_struct,sibling);
 
 			ret = eligible_child(pid, options, p);
@@ -813,20 +851,69 @@ repeat:
 					continue;
 				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 					continue;
+				if (ret == 2 && !(p->ptrace & PT_PTRACED) &&
+				    p->sig && p->sig->group_stop_count > 0)
+					/*
+					 * A group stop is in progress and
+					 * we are the group leader.  We won't
+					 * report until all threads have
+					 * stopped.
+					 */
+					continue;
 				read_unlock(&tasklist_lock);
 
 				/* move to end of parent's list to avoid starvation */
 				write_lock_irq(&tasklist_lock);
 				remove_parent(p);
 				add_parent(p, p->parent);
+
+				/*
+				 * This uses xchg to be atomic with
+				 * the thread resuming and setting it.
+				 * It must also be done with the write
+				 * lock held to prevent a race with the
+				 * TASK_ZOMBIE case (below).
+				 */
+				exit_code = xchg(&p->exit_code, 0);
+				if (unlikely(p->state > TASK_STOPPED)) {
+					/*
+					 * The task resumed and then died.
+					 * Let the next iteration catch it
+					 * in TASK_ZOMBIE.  Note that
+					 * exit_code might already be zero
+					 * here if it resumed and did
+					 * _exit(0).  The task itself is
+					 * dead and won't touch exit_code
+					 * again; other processors in
+					 * this function are locked out.
+					 */
+					p->exit_code = exit_code;
+					exit_code = 0;
+				}
+				if (unlikely(exit_code == 0)) {
+					/*
+					 * Another thread in this function
+					 * got to it first, or it resumed,
+					 * or it resumed and then died.
+					 */
+					write_unlock_irq(&tasklist_lock);
+					continue;
+				}
+				/*
+				 * Make sure this doesn't get reaped out from
+				 * under us while we are examining it below.
+				 * We don't want to keep holding onto the
+				 * tasklist_lock while we call getrusage and
+				 * possibly take page faults for user memory.
+				 */
+				get_task_struct(p);
 				write_unlock_irq(&tasklist_lock);
 				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
 				if (!retval && stat_addr) 
-					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
-				if (!retval) {
-					p->exit_code = 0;
+					retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+				if (!retval)
 					retval = p->pid;
-				}
+				put_task_struct(p);
 				goto end_wait4;
 			case TASK_ZOMBIE:
 				/*
@@ -841,6 +928,13 @@ repeat:
 				state = xchg(&p->state, TASK_DEAD);
 				if (state != TASK_ZOMBIE)
 					continue;
+				if (unlikely(p->exit_signal == -1))
+					/*
+					 * This can only happen in a race with
+					 * a ptraced thread dying on another
+					 * processor.
+					 */
+					continue;
 				read_unlock(&tasklist_lock);
 
 				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
@@ -857,11 +951,17 @@ repeat:
 				retval = p->pid;
 				if (p->real_parent != p->parent) {
 					write_lock_irq(&tasklist_lock);
+					/* Double-check with lock held.  */
+					if (p->real_parent != p->parent) {
 					__ptrace_unlink(p);
-					do_notify_parent(p, SIGCHLD);
+						do_notify_parent(
+							p, p->exit_signal);
 					p->state = TASK_ZOMBIE;
+						p = NULL;
+					}
 					write_unlock_irq(&tasklist_lock);
-				} else
+				}
+				if (p != NULL)
 					release_task(p);
 				goto end_wait4;
 			default:
diff --git a/kernel/fork.c b/kernel/fork.c
index 4fc3fcd5dacb..c042b5a8eaec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -680,6 +680,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	sig->group_exit = 0;
 	sig->group_exit_code = 0;
 	sig->group_exit_task = NULL;
+	sig->group_stop_count = 0;
 	memcpy(sig->action, current->sig->action, sizeof(sig->action));
 	sig->curr_target = NULL;
 	init_sigpending(&sig->shared_pending);
@@ -801,7 +802,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_lock_init(&p->alloc_lock);
 	spin_lock_init(&p->switch_lock);
 
-	clear_tsk_thread_flag(p,TIF_SIGPENDING);
+	clear_tsk_thread_flag(p, TIF_SIGPENDING);
 	init_sigpending(&p->pending);
 
 	p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
@@ -910,6 +911,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 */
 	if (sigismember(&current->pending.signal, SIGKILL)) {
 		write_unlock_irq(&tasklist_lock);
+		retval = -EINTR;
 		goto bad_fork_cleanup_namespace;
 	}
 
@@ -934,6 +936,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		}
 		p->tgid = current->tgid;
 		p->group_leader = current->group_leader;
+
+		if (current->sig->group_stop_count > 0) {
+			/*
+			 * There is an all-stop in progress for the group.
+			 * We ourselves will stop as soon as we check signals.
+			 * Make the new thread part of that group stop too.
+			 */
+			current->sig->group_stop_count++;
+			set_tsk_thread_flag(p, TIF_SIGPENDING);
+		}
+
 		spin_unlock(&current->sig->siglock);
 	}
 
@@ -1036,8 +1049,13 @@ struct task_struct *do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		if (p->ptrace & PT_PTRACED)
-			send_sig(SIGSTOP, p, 1);
+		if (p->ptrace & PT_PTRACED) {
+			/*
+			 * We'll start up with an immediate SIGSTOP.
+			 */
+			sigaddset(&p->pending.signal, SIGSTOP);
+			set_tsk_thread_flag(p, TIF_SIGPENDING);
+		}
 
 		wake_up_forked_process(p);		/* do this last */
 		++total_forks;
diff --git a/kernel/signal.c b/kernel/signal.c
index 7c485d01a4b0..809ea104b63f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -55,7 +55,7 @@ int max_queued_signals = 1024;
 |  SIGALRM           |  load-balance    |  kill-all      |
 |  SIGTERM           |  load-balance    |  kill-all      |
 |  SIGCHLD           |  load-balance    |  ignore        |
-|  SIGCONT           |  specific        |  continue-all  |
+|  SIGCONT           |  load-balance    |  ignore        |
 |  SIGSTOP           |  n/a             |  stop-all      |
 |  SIGTSTP           |  load-balance    |  stop-all      |
 |  SIGTTIN           |  load-balance    |  stop-all      |
@@ -98,26 +98,11 @@ int max_queued_signals = 1024;
 #endif
 
 #if SIGRTMIN > BITS_PER_LONG
-#define M(sig) (1ULL << (sig))
+#define M(sig) (1ULL << ((sig)-1))
 #else
-#define M(sig) (1UL << (sig))
+#define M(sig) (1UL << ((sig)-1))
 #endif
-#define T(sig, mask) (M(sig) & mask)
-
-#define SIG_USER_SPECIFIC_MASK (\
-	M(SIGILL)    |  M(SIGTRAP)   |  M(SIGABRT)   |  M(SIGBUS)    | \
-	M(SIGFPE)    |  M(SIGSEGV)   |  M(SIGPIPE)   |  M(SIGXFSZ)   | \
-	M(SIGPROF)   |  M(SIGSYS)    |  M_SIGSTKFLT  |  M(SIGCONT)   | \
-        M_SIGEMT )
-
-#define SIG_USER_LOAD_BALANCE_MASK (\
-        M(SIGHUP)    |  M(SIGINT)    |  M(SIGQUIT)   |  M(SIGUSR1)   | \
-        M(SIGUSR2)   |  M(SIGALRM)   |  M(SIGTERM)   |  M(SIGCHLD)   | \
-        M(SIGURG)    |  M(SIGVTALRM) |  M(SIGPOLL)   |  M(SIGWINCH)  | \
-        M(SIGPWR)    |  M(SIGTSTP)   |  M(SIGTTIN)   |  M(SIGTTOU)   )
-
-#define SIG_KERNEL_SPECIFIC_MASK (\
-        M(SIGCHLD)   |   M(SIGURG)   |  M(SIGWINCH)                  )
+#define T(sig, mask) (M(sig) & (mask))
 
 #define SIG_KERNEL_BROADCAST_MASK (\
 	M(SIGHUP)    |  M(SIGINT)    |  M(SIGQUIT)   |  M(SIGILL)    | \
@@ -132,34 +117,37 @@ int max_queued_signals = 1024;
 #define SIG_KERNEL_ONLY_MASK (\
 	M(SIGKILL)   |  M(SIGSTOP)                                   )
 
+#define SIG_KERNEL_STOP_MASK (\
+	M(SIGSTOP)   |  M(SIGTSTP)   |  M(SIGTTIN)   |  M(SIGTTOU)   )
+
 #define SIG_KERNEL_COREDUMP_MASK (\
         M(SIGQUIT)   |  M(SIGILL)    |  M(SIGTRAP)   |  M(SIGABRT)   | \
         M(SIGFPE)    |  M(SIGSEGV)   |  M(SIGBUS)    |  M(SIGSYS)    | \
         M(SIGXCPU)   |  M(SIGXFSZ)   |  M_SIGEMT                     )
 
-#define sig_user_specific(sig) \
-		(((sig) < SIGRTMIN)  && T(sig, SIG_USER_SPECIFIC_MASK))
-#define sig_user_load_balance(sig) \
-		(((sig) >= SIGRTMIN) || T(sig, SIG_USER_LOAD_BALANCE_MASK))
-#define sig_kernel_specific(sig) \
-		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_SPECIFIC_MASK))
-#define sig_kernel_broadcast(sig) \
-		(((sig) >= SIGRTMIN) || T(sig, SIG_KERNEL_BROADCAST_MASK))
+#define SIG_KERNEL_IGNORE_MASK (\
+        M(SIGCONT)   |  M(SIGCHLD)   |  M(SIGWINCH)  |  M(SIGURG)    )
+
 #define sig_kernel_only(sig) \
 		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_ONLY_MASK))
 #define sig_kernel_coredump(sig) \
 		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_COREDUMP_MASK))
+#define sig_kernel_ignore(sig) \
+		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_IGNORE_MASK))
+#define sig_kernel_stop(sig) \
+		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_STOP_MASK))
 
-#define sig_user_defined(t, sig) \
-	(((t)->sig->action[(sig)-1].sa.sa_handler != SIG_DFL) &&	\
-	 ((t)->sig->action[(sig)-1].sa.sa_handler != SIG_IGN))
+#define sig_user_defined(t, signr) \
+	(((t)->sig->action[(signr)-1].sa.sa_handler != SIG_DFL) &&	\
+	 ((t)->sig->action[(signr)-1].sa.sa_handler != SIG_IGN))
 
-#define sig_ignored(t, sig) \
-	(((sig) != SIGCHLD) && \
-		((t)->sig->action[(sig)-1].sa.sa_handler == SIG_IGN))
+#define sig_ignored(t, signr) \
+	(!((t)->ptrace & PT_PTRACED) && \
+	 (t)->sig->action[(signr)-1].sa.sa_handler == SIG_IGN)
 
-static int
-__send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+#define sig_fatal(t, signr) \
+	(!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
+	 (t)->sig->action[(signr)-1].sa.sa_handler == SIG_DFL)
 
 /*
  * Re-calculate pending state from the set of locally pending
@@ -193,9 +181,10 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
 
-void recalc_sigpending_tsk(struct task_struct *t)
+inline void recalc_sigpending_tsk(struct task_struct *t)
 {
-	if (PENDING(&t->pending, &t->blocked) ||
+	if (t->sig->group_stop_count > 0 ||
+	    PENDING(&t->pending, &t->blocked) ||
 			PENDING(&t->sig->shared_pending, &t->blocked))
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
 	else
@@ -204,11 +193,7 @@ void recalc_sigpending_tsk(struct task_struct *t)
 
 void recalc_sigpending(void)
 {
-	if (PENDING(&current->pending, &current->blocked) ||
-		    PENDING(&current->sig->shared_pending, &current->blocked))
-		set_thread_flag(TIF_SIGPENDING);
-	else
-		clear_thread_flag(TIF_SIGPENDING);
+	recalc_sigpending_tsk(current);
 }
 
 /* Given the mask, find the first available signal that should be serviced. */
@@ -337,23 +322,6 @@ flush_signal_handlers(struct task_struct *t)
 	}
 }
 
-/*
- * sig_exit - cause the current task to exit due to a signal.
- */
-
-void
-sig_exit(int sig, int exit_code, struct siginfo *info)
-{
-	sigaddset(&current->pending.signal, sig);
-	recalc_sigpending();
-	current->flags |= PF_SIGNALED;
-
-	if (current->sig->group_exit)
-		exit_code = current->sig->group_exit_code;
-
-	do_exit(exit_code);
-	/* NOTREACHED */
-}
 
 /* Notify the system that a driver wants to block all signals for this
  * process, and wants to be notified if any signals at all were to be
@@ -473,32 +441,74 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  */
 int dequeue_signal(sigset_t *mask, siginfo_t *info)
 {
+	int signr = __dequeue_signal(&current->pending, mask, info);
+	if (!signr)
+		signr = __dequeue_signal(&current->sig->shared_pending,
+					 mask, info);
+	return signr;
+}
+
+/*
+ * Tell a process that it has a new active signal..
+ *
+ * NOTE! we rely on the previous spin_lock to
+ * lock interrupts for us! We can only be called with
+ * "siglock" held, and the local interrupt must
+ * have been disabled when that got acquired!
+ *
+ * No need to set need_resched since signal event passing
+ * goes through ->blocked
+ */
+inline void signal_wake_up(struct task_struct *t, int resume)
+{
+	set_tsk_thread_flag(t,TIF_SIGPENDING);
+
+	/*
+	 * If the task is running on a different CPU
+	 * force a reschedule on the other CPU to make
+	 * it notice the new signal quickly.
+	 *
+	 * The code below is a tad loose and might occasionally
+	 * kick the wrong CPU if we catch the process in the
+	 * process of changing - but no harm is done by that
+	 * other than doing an extra (lightweight) IPI interrupt.
+	 */
+	if (t->state == TASK_RUNNING)
+		kick_if_running(t);
 	/*
-	 * Here we handle shared pending signals. To implement the full
-	 * semantics we need to unqueue and resend them. It will likely
-	 * get into our own pending queue.
+	 * If resume is set, we want to wake it up in the TASK_STOPPED case.
+	 * We don't check for TASK_STOPPED because there is a race with it
+	 * executing another processor and just now entering stopped state.
+	 * By calling wake_up_process any time resume is set, we ensure
+	 * the process will wake up and handle its stop or death signal.
 	 */
-	if (current->sig->shared_pending.head) {
-		int signr = __dequeue_signal(&current->sig->shared_pending, mask, info);
-		if (signr)
-			__send_sig_info(signr, info, current);
+	if ((t->state & TASK_INTERRUPTIBLE) ||
+	    (resume && t->state < TASK_ZOMBIE)) {
+		wake_up_process(t);
+		return;
 	}
-	return __dequeue_signal(&current->pending, mask, info);
 }
 
-static int rm_from_queue(int sig, struct sigpending *s)
+/*
+ * Remove signals in mask from the pending set and queue.
+ * Returns 1 if any signals were found.
+ *
+ * All callers must be holding the siglock.
+ */
+static int rm_from_queue(unsigned long mask, struct sigpending *s)
 {
 	struct sigqueue *q, **pp;
 
-	if (!sigismember(&s->signal, sig))
+	if (!sigtestsetmask(&s->signal, mask))
 		return 0;
 
-	sigdelset(&s->signal, sig);
+	sigdelsetmask(&s->signal, mask);
 
 	pp = &s->head;
 
 	while ((q = *pp) != NULL) {
-		if (q->info.si_signo == sig) {
+		if (q->info.si_signo < SIGRTMIN &&
+		    (mask & sigmask (q->info.si_signo))) {
 			if ((*pp = q->next) == NULL)
 				s->tail = pp;
 			kmem_cache_free(sigqueue_cachep,q);
@@ -510,112 +520,101 @@ static int rm_from_queue(int sig, struct sigpending *s)
 	return 1;
 }
 
-/*
- * Remove signal sig from t->pending.
- * Returns 1 if sig was found.
- *
- * All callers must be holding the siglock.
- */
-static int rm_sig_from_queue(int sig, struct task_struct *t)
-{
-	return rm_from_queue(sig, &t->pending);
-}
-
 /*
  * Bad permissions for sending the signal
  */
-static inline int bad_signal(int sig, struct siginfo *info, struct task_struct *t)
+static inline int check_kill_permission(int sig, struct siginfo *info,
+					struct task_struct *t)
 {
-	return (!info || ((unsigned long)info != 1 &&
+	int error = -EINVAL;
+	if (sig < 0 || sig > _NSIG)
+		return error;
+	error = -EPERM;
+	if ((!info || ((unsigned long)info != 1 &&
 			(unsigned long)info != 2 && SI_FROMUSER(info)))
 	    && ((sig != SIGCONT) || (current->session != t->session))
 	    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 	    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
-	    && !capable(CAP_KILL);
+	    && !capable(CAP_KILL))
+		return error;
+	return security_task_kill(t, info, sig);
 }
 
+/* forward decl */
+static void do_notify_parent_cldstop(struct task_struct *tsk,
+				     struct task_struct *parent);
+
 /*
- * Signal type:
- *    < 0 : global action (kill - spread to all non-blocked threads)
- *    = 0 : ignored
- *    > 0 : wake up.
+ * Handle magic process-wide effects of stop/continue signals, and SIGKILL.
+ * Unlike the signal actions, these happen immediately at signal-generation
+ * time regardless of blocking, ignoring, or handling.  This does the
+ * actual continuing for SIGCONT, but not the actual stopping for stop
+ * signals.  The process stop is done as a signal action for SIG_DFL.
  */
-static int signal_type(int sig, struct signal_struct *signals)
+static void handle_stop_signal(int sig, struct task_struct *p)
 {
-	unsigned long handler;
-
-	if (!signals)
-		return 0;
-	
-	handler = (unsigned long) signals->action[sig-1].sa.sa_handler;
-	if (handler > 1)
-		return 1;
-
-	/* "Ignore" handler.. Illogical, but that has an implicit handler for SIGCHLD */
-	if (handler == 1)
-		return sig == SIGCHLD;
-
-	/* Default handler. Normally lethal, but.. */
-	switch (sig) {
-
-	/* Ignored */
-	case SIGCONT: case SIGWINCH:
-	case SIGCHLD: case SIGURG:
-		return 0;
-
-	/* Implicit behaviour */
-	case SIGTSTP: case SIGTTIN: case SIGTTOU:
-		return 1;
+	struct task_struct *t;
 
-	/* Implicit actions (kill or do special stuff) */
-	default:
-		return -1;
+	if (sig_kernel_stop(sig)) {
+		/*
+		 * This is a stop signal.  Remove SIGCONT from all queues.
+		 */
+		rm_from_queue(sigmask(SIGCONT), &p->sig->shared_pending);
+		t = p;
+		do {
+			rm_from_queue(sigmask(SIGCONT), &t->pending);
+			t = next_thread(t);
+		} while (t != p);
 	}
-}
-		
-
-/*
- * Determine whether a signal should be posted or not.
- *
- * Signals with SIG_IGN can be ignored, except for the
- * special case of a SIGCHLD. 
- *
- * Some signals with SIG_DFL default to a non-action.
+	else if (sig == SIGCONT) {
+		/*
+		 * Remove all stop signals from all queues,
+		 * and wake all threads.
  */
-static int ignored_signal(int sig, struct task_struct *t)
-{
-	/* Don't ignore traced or blocked signals */
-	if ((t->ptrace & PT_PTRACED) || sigismember(&t->blocked, sig))
-		return 0;
-
-	return signal_type(sig, t->sig) == 0;
-}
-
-/*
- * Handle TASK_STOPPED cases etc implicit behaviour
- * of certain magical signals.
- *
- * SIGKILL gets spread out to every thread. 
+		if (unlikely(p->sig->group_stop_count > 0)) {
+			/*
+			 * There was a group stop in progress.  We'll
+			 * pretend it finished before we got here.  We are
+			 * obliged to report it to the parent: if the
+			 * SIGSTOP happened "after" this SIGCONT, then it
+			 * would have cleared this pending SIGCONT.  If it
+			 * happened "before" this SIGCONT, then the parent
+			 * got the SIGCHLD about the stop finishing before
+			 * the continue happened.  We do the notification
+			 * now, and it's as if the stop had finished and
+			 * the SIGCHLD was pending on entry to this kill.
+			 */
+			p->sig->group_stop_count = 0;
+			if (p->ptrace & PT_PTRACED)
+				do_notify_parent_cldstop(p, p->parent);
+			else
+				do_notify_parent_cldstop(
+					p->group_leader,
+					p->group_leader->real_parent);
+		}
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->sig->shared_pending);
+		t = p;
+		do {
+			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
+			if (t->state == TASK_STOPPED) {
+				/*
+				 * If there is a handler for SIGCONT, we
+				 * must make sure that no thread returns to
+				 * user mode before we post the signal, in
+				 * case it was the only thread eligible to
+				 * run the signal handler--then it must not
+				 * do anything between resuming and running
+				 * the handler.  With the TIF_SIGPENDING flag
+				 * set, the thread will pause and acquire the
+				 * siglock that we hold now and until we've
+				 * queued the pending signal.
  */
-static void handle_stop_signal(int sig, struct task_struct *t)
-{
-	switch (sig) {
-	case SIGKILL: case SIGCONT:
-		/* Wake up the process if stopped.  */
-		if (t->state == TASK_STOPPED)
-			wake_up_process(t);
-		t->exit_code = 0;
-		rm_sig_from_queue(SIGSTOP, t);
-		rm_sig_from_queue(SIGTSTP, t);
-		rm_sig_from_queue(SIGTTOU, t);
-		rm_sig_from_queue(SIGTTIN, t);
-		break;
-
-	case SIGSTOP: case SIGTSTP:
-	case SIGTTIN: case SIGTTOU:
-		/* If we're stopping again, cancel SIGCONT */
-		rm_sig_from_queue(SIGCONT, t);
-		break;
+				if (sig_user_defined(p, SIGCONT))
+					set_tsk_thread_flag(t, TIF_SIGPENDING);
+				wake_up_process(t);
+			}
+			t = next_thread(t);
+		} while (t != p);
 	}
 }
 
@@ -678,51 +677,12 @@ out_set:
 	return 0;
 }
 
-/*
- * Tell a process that it has a new active signal..
- *
- * NOTE! we rely on the previous spin_lock to
- * lock interrupts for us! We can only be called with
- * "siglock" held, and the local interrupt must
- * have been disabled when that got acquired!
- *
- * No need to set need_resched since signal event passing
- * goes through ->blocked
- */
-inline void signal_wake_up(struct task_struct *t)
-{
-	set_tsk_thread_flag(t,TIF_SIGPENDING);
-
-	/*
-	 * If the task is running on a different CPU 
-	 * force a reschedule on the other CPU to make
-	 * it notice the new signal quickly.
-	 *
-	 * The code below is a tad loose and might occasionally
-	 * kick the wrong CPU if we catch the process in the
-	 * process of changing - but no harm is done by that
-	 * other than doing an extra (lightweight) IPI interrupt.
-	 */
-	if (t->state == TASK_RUNNING)
-		kick_if_running(t);
-	if (t->state & TASK_INTERRUPTIBLE) {
-		wake_up_process(t);
-		return;
-	}
-}
-
-static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t)
-{
-	int retval = send_signal(sig, info, &t->pending);
-
-	if (!retval && !sigismember(&t->blocked, sig))
-		signal_wake_up(t);
+#define LEGACY_QUEUE(sigptr, sig) \
+	(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
 
-	return retval;
-}
 
 static int
-specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t, int shared)
+specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 {
 	int ret;
 
@@ -732,49 +692,21 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t, int
 	if (!spin_is_locked(&t->sig->siglock))
 		BUG();
 #endif
-	ret = -EINVAL;
-	if (sig < 0 || sig > _NSIG)
-		goto out;
-	/* The somewhat baroque permissions check... */
-	ret = -EPERM;
-	if (bad_signal(sig, info, t))
-		goto out;
-	ret = security_task_kill(t, info, sig);
-	if (ret)
-		goto out;
-
-	/* The null signal is a permissions and process existence probe.
-	   No signal is actually delivered.  Same goes for zombies. */
-	ret = 0;
-	if (!sig || !t->sig)
-		goto out;
 
-	handle_stop_signal(sig, t);
-
-	/* Optimize away the signal, if it's a signal that can be
-	   handled immediately (ie non-blocked and untraced) and
-	   that is ignored (either explicitly or by default).  */
-
-	if (ignored_signal(sig, t))
-		goto out;
-
-#define LEGACY_QUEUE(sigptr, sig) \
-	(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
+	/* Short-circuit ignored signals.  */
+	if (sig_ignored(t, sig))
+		return 0;
 
-	if (!shared) {
 		/* Support queueing exactly one non-rt signal, so that we
 		   can get more detailed information about the cause of
 		   the signal. */
 		if (LEGACY_QUEUE(&t->pending, sig))
-			goto out;
+		return 0;
+
+	ret = send_signal(sig, info, &t->pending);
+	if (!ret && !sigismember(&t->blocked, sig))
+		signal_wake_up(t, sig == SIGKILL);
 
-		ret = deliver_signal(sig, info, t);
-	} else {
-		if (LEGACY_QUEUE(&t->sig->shared_pending, sig))
-			goto out;
-		ret = send_signal(sig, info, &t->sig->shared_pending);
-	}
-out:
 	return ret;
 }
 
@@ -794,26 +726,12 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
 	sigdelset(&t->blocked, sig);
 	recalc_sigpending_tsk(t);
-	ret = __send_sig_info(sig, info, t);
+	ret = specific_send_sig_info(sig, info, t);
 	spin_unlock_irqrestore(&t->sig->siglock, flags);
 
 	return ret;
 }
 
-static int
-__specific_force_sig_info(int sig, struct task_struct *t)
-{
-	if (!t->sig)
-		return -ESRCH;
-
-	if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
-		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
-	sigdelset(&t->blocked, sig);
-	recalc_sigpending_tsk(t);
-
-	return specific_send_sig_info(sig, (void *)2, t, 0);
-}
-
 void
 force_sig_specific(int sig, struct task_struct *t)
 {
@@ -824,157 +742,182 @@ force_sig_specific(int sig, struct task_struct *t)
 		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
 	sigdelset(&t->blocked, sig);
 	recalc_sigpending_tsk(t);
-	specific_send_sig_info(sig, (void *)2, t, 0);
+	specific_send_sig_info(sig, (void *)2, t);
 	spin_unlock_irqrestore(&t->sig->siglock, flags);
 }
 
-#define can_take_signal(p, sig)	\
-	(((unsigned long) p->sig->action[sig-1].sa.sa_handler > 1) && \
-	!sigismember(&p->blocked, sig) && (task_curr(p) || !signal_pending(p)))
+/*
+ * Test if P wants to take SIG.  After we've checked all threads with this,
+ * it's equivalent to finding no threads not blocking SIG.  Any threads not
+ * blocking SIG were ruled out because they are not running and already
+ * have pending signals.  Such threads will dequeue from the shared queue
+ * as soon as they're available, so putting the signal on the shared queue
+ * will be equivalent to sending it to one such thread.
+ */
+#define wants_signal(sig, p)	(!sigismember(&(p)->blocked, sig) \
+				 && (p)->state < TASK_STOPPED \
+				 && !((p)->flags & PF_EXITING) \
+				 && (task_curr(p) || !signal_pending(p)))
 
-static inline
-int load_balance_thread_group(struct task_struct *p, int sig,
-				struct siginfo *info)
+static inline int
+__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-	struct task_struct *tmp;
+	struct task_struct *t;
 	int ret;
 
+#if CONFIG_SMP
+	if (!spin_is_locked(&p->sig->siglock))
+		BUG();
+#endif
+	handle_stop_signal(sig, p);
+
+	/* Short-circuit ignored signals.  */
+	if (sig_ignored(p, sig))
+		return 0;
+
+	if (LEGACY_QUEUE(&p->sig->shared_pending, sig))
+		/* This is a non-RT signal and we already have one queued.  */
+		return 0;
+
 	/*
-	 * if the specified thread is not blocking this signal
-	 * then deliver it.
+	 * Put this signal on the shared-pending queue, or fail with EAGAIN.
+	 * We always use the shared queue for process-wide signals,
+	 * to avoid several races.
 	 */
-	if (can_take_signal(p, sig))
-		return specific_send_sig_info(sig, info, p, 0);
+	ret = send_signal(sig, info, &p->sig->shared_pending);
+	if (unlikely(ret))
+		return ret;
 
 	/*
+	 * Now find a thread we can wake up to take the signal off the queue.
+	 *
+	 * If the main thread wants the signal, it gets first crack.
+	 * Probably the least surprising to the average bear.
+	 */
+	if (p->state < TASK_ZOMBIE &&
+	    (sig_kernel_only(sig) || wants_signal(sig, p)))
+		t = p;
+	else if (thread_group_empty(p))
+		/*
+		 * There is just one thread and it does not need to be woken.
+		 * It will dequeue unblocked signals before it runs again.
+		 */
+		return 0;
+	else {
+		/*
 	 * Otherwise try to find a suitable thread.
-	 * If no such thread is found then deliver to
-	 * the original thread.
 	 */
-
-	tmp = p->sig->curr_target;
-
-	if (!tmp || tmp->tgid != p->tgid)
+		t = p->sig->curr_target;
+		if (t == NULL)
 		/* restart balancing at this thread */
-		p->sig->curr_target = p;
-
-	else for (;;) {
-		if (thread_group_empty(p))
-			BUG();
-		if (!tmp || tmp->tgid != p->tgid)
-			BUG();
+			t = p->sig->curr_target = p;
+		BUG_ON(t->tgid != p->tgid);
 
+		while (!wants_signal(sig, t)) {
+			t = next_thread(t);
+			if (t == p->sig->curr_target)
 		/*
-		 * Do not send signals that are ignored or blocked,
-		 * or to not-running threads that are overworked:
+				 * No thread needs to be woken.
+				 * Any eligible threads will see
+				 * the signal in the queue soon.
 		 */
-		if (!can_take_signal(tmp, sig)) {
-			tmp = next_thread(tmp);
-			p->sig->curr_target = tmp;
-			if (tmp == p)
-				break;
-			continue;
+				return 0;
 		}
-		ret = specific_send_sig_info(sig, info, tmp, 0);
-		return ret;
+		p->sig->curr_target = t;
 	}
+
 	/*
-	 * No suitable thread was found - put the signal
-	 * into the shared-pending queue.
+	 * Found a killable thread.  If the signal will be fatal,
+	 * then start taking the whole group down immediately.
 	 */
-	return specific_send_sig_info(sig, info, p, 1);
-}
-
-int __broadcast_thread_group(struct task_struct *p, int sig)
-{
-	struct task_struct *tmp;
-	struct list_head *l;
-	struct pid *pid;
-	int err = 0;
-
-	for_each_task_pid(p->tgid, PIDTYPE_TGID, tmp, l, pid)
-		err = __specific_force_sig_info(sig, tmp);
-
-	return err;
-}
+	if (sig_fatal(p, sig) && !p->sig->group_exit &&
+	    !sigismember(&t->real_blocked, sig) &&
+	    (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) {
+		/*
+		 * This signal will be fatal to the whole group.
+		 */
+		if (!sig_kernel_coredump(sig)) {
+			/*
+			 * Start a group exit and wake everybody up.
+			 * This way we don't have other threads
+			 * running and doing things after a slower
+			 * thread has the fatal signal pending.
+			 */
+			p->sig->group_exit = 1;
+			p->sig->group_exit_code = sig;
+			p->sig->group_stop_count = 0;
+			t = p;
+			do {
+				sigaddset(&t->pending.signal, SIGKILL);
+				signal_wake_up(t, 1);
+				t = next_thread(t);
+			} while (t != p);
+			return 0;
+		}
 
-struct task_struct * find_unblocked_thread(struct task_struct *p, int signr)
-{
-	struct task_struct *tmp;
-	struct list_head *l;
-	struct pid *pid;
+		/*
+		 * There will be a core dump.  We make all threads other
+		 * than the chosen one go into a group stop so that nothing
+		 * happens until it gets scheduled, takes the signal off
+		 * the shared queue, and does the core dump.  This is a
+		 * little more complicated than strictly necessary, but it
+		 * keeps the signal state that winds up in the core dump
+		 * unchanged from the death state, e.g. which thread had
+		 * the core-dump signal unblocked.
+		 */
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->sig->shared_pending);
+		p->sig->group_stop_count = 0;
+		p->sig->group_exit_task = t;
+		t = p;
+		do {
+			p->sig->group_stop_count++;
+			signal_wake_up(t, 0);
+			t = next_thread(t);
+		} while (t != p);
+		wake_up_process(p->sig->group_exit_task);
+		return 0;
+	}
 
-	for_each_task_pid(p->tgid, PIDTYPE_TGID, tmp, l, pid)
-		if (!sigismember(&tmp->blocked, signr))
-			return tmp;
-	return NULL;
+	/*
+	 * The signal is already in the shared-pending queue.
+	 * Tell the chosen thread to wake up and dequeue it.
+	 */
+	signal_wake_up(t, sig == SIGKILL);
+	return 0;
 }
 
-static int
-__send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+/*
+ * Nuke all other threads in the group.
+ */
+void zap_other_threads(struct task_struct *p)
 {
 	struct task_struct *t;
-	int ret = 0;
-
-#if CONFIG_SMP
-	if (!spin_is_locked(&p->sig->siglock))
-		BUG();
-#endif
-	/* not a thread group - normal signal behavior */
-	if (thread_group_empty(p) || !sig)
-		goto out_send;
-
-	if (sig_user_defined(p, sig)) {
-		if (sig_user_specific(sig))
-			goto out_send;
-		if (sig_user_load_balance(sig)) {
-			ret = load_balance_thread_group(p, sig, info);
-			goto out_unlock;
-		}
 
-		/* must not happen */
-		BUG();
-	}
-	/* optimize away ignored signals: */
-	if (sig_ignored(p, sig))
-		goto out_unlock;
+	p->sig->group_stop_count = 0;
 
-	if (sig_kernel_specific(sig) ||
-		       ((p->ptrace & PT_PTRACED) && !sig_kernel_only(sig)))
-		goto out_send;
+	if (thread_group_empty(p))
+		return;
 
-	/* Does any of the threads unblock the signal? */
-	t = find_unblocked_thread(p, sig);
-	if (!t) {
-		ret = specific_send_sig_info(sig, info, p, 1);
-		goto out_unlock;
-	}
-	if (sigismember(&t->real_blocked,sig)) {
-		ret = specific_send_sig_info(sig, info, t, 0);
-		goto out_unlock;
+	for (t = next_thread(p); t != p; t = next_thread(t)) {
+		sigaddset(&t->pending.signal, SIGKILL);
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
+		signal_wake_up(t, 1);
 	}
-	if (sig_kernel_broadcast(sig) || sig_kernel_coredump(sig)) {
-		ret = __broadcast_thread_group(p, sig);
-		goto out_unlock;
-	}
-
-	/* must not happen */
-	BUG();
-out_send:
-	ret = specific_send_sig_info(sig, info, p, 0);
-out_unlock:
-	return ret;
 }
 
 int
-send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&p->sig->siglock, flags);
-	ret = __send_sig_info(sig, info, p);
-	spin_unlock_irqrestore(&p->sig->siglock, flags);
+	ret = check_kill_permission(sig, info, p);
+	if (!ret && sig && p->sig) {
+		spin_lock_irqsave(&p->sig->siglock, flags);
+		ret = __group_send_sig_info(sig, info, p);
+		spin_unlock_irqrestore(&p->sig->siglock, flags);
+	}
 
 	return ret;
 }
@@ -995,7 +938,7 @@ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 		return -EINVAL;
 
 	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
-		err = send_sig_info(sig, info, p);
+		err = group_send_sig_info(sig, info, p);
 		if (retval)
 			retval = err;
 	}
@@ -1037,7 +980,7 @@ kill_sl_info(int sig, struct siginfo *info, pid_t sid)
 	for_each_task_pid(sid, PIDTYPE_SID, p, l, pid) {
 		if (!p->leader)
 			continue;
-		err = send_sig_info(sig, info, p);
+		err = group_send_sig_info(sig, info, p);
 		if (retval)
 			retval = err;
 	}
@@ -1056,7 +999,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 	p = find_task_by_pid(pid);
 	error = -ESRCH;
 	if (p)
-		error = send_sig_info(sig, info, p);
+		error = group_send_sig_info(sig, info, p);
 	read_unlock(&tasklist_lock);
 	return error;
 }
@@ -1079,8 +1022,8 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
 
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
-			if (p->pid > 1 && p != current) {
-				int err = send_sig_info(sig, info, p);
+			if (p->pid > 1 && p->tgid != current->tgid) {
+				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
 					retval = err;
@@ -1099,6 +1042,22 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
  * These are for backward compatibility with the rest of the kernel source.
  */
 
+int
+send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+{
+	/* XXX should nix these interfaces and update the kernel */
+	if (T(sig, SIG_KERNEL_BROADCAST_MASK))
+		/* XXX do callers really always hold the tasklist_lock?? */
+		return group_send_sig_info(sig, info, p);
+	else {
+		int error;
+		spin_lock_irq(&p->sig->siglock);
+		error = specific_send_sig_info(sig, info, p);
+		spin_unlock_irq(&p->sig->siglock);
+		return error;
+	}
+}
+
 int
 send_sig(int sig, struct task_struct *p, int priv)
 {
@@ -1133,9 +1092,10 @@ kill_proc(pid_t pid, int sig, int priv)
  * Joy. Or not. Pthread wants us to wake up every thread
  * in our parent group.
  */
-static inline void __wake_up_parent(struct task_struct *p)
+static inline void __wake_up_parent(struct task_struct *p,
+				    struct task_struct *parent)
 {
-	struct task_struct *parent = p->parent, *tsk = parent;
+	struct task_struct *tsk = parent;
 
 	/*
 	 * Fortunately this is not necessary for thread groups:
@@ -1162,6 +1122,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	int why, status;
+	struct signal_struct *psig;
 
 	if (sig == -1)
 		BUG();
@@ -1200,10 +1161,34 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_code = why;
 	info.si_status = status;
 
-	spin_lock_irqsave(&tsk->parent->sig->siglock, flags);
-	__send_sig_info(sig, &info, tsk->parent);
-	__wake_up_parent(tsk);
-	spin_unlock_irqrestore(&tsk->parent->sig->siglock, flags);
+	psig = tsk->parent->sig;
+	spin_lock_irqsave(&psig->siglock, flags);
+	if (sig == SIGCHLD && tsk->state != TASK_STOPPED &&
+	    (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+	     (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
+		/*
+		 * We are exiting and our parent doesn't care.  POSIX.1
+		 * defines special semantics for setting SIGCHLD to SIG_IGN
+		 * or setting the SA_NOCLDWAIT flag: we should be reaped
+		 * automatically and not left for our parent's wait4 call.
+		 * Rather than having the parent do it as a magic kind of
+		 * signal handler, we just set this to tell do_exit that we
+		 * can be cleaned up without becoming a zombie.  Note that
+		 * we still call __wake_up_parent in this case, because a
+		 * blocked sys_wait4 might now return -ECHILD.
+		 *
+		 * Whether we send SIGCHLD or not for SA_NOCLDWAIT
+		 * is implementation-defined: we do (if you don't want
+		 * it, just use SIG_IGN instead).
+		 */
+		tsk->exit_signal = -1;
+		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
+			sig = 0;
+	}
+	if (sig > 0 && sig <= _NSIG)
+		__group_send_sig_info(sig, &info, tsk->parent);
+	__wake_up_parent(tsk, tsk->parent);
+	spin_unlock_irqrestore(&psig->siglock, flags);
 }
 
 
@@ -1224,6 +1209,149 @@ notify_parent(struct task_struct *tsk, int sig)
 	}
 }
 
+static void
+do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
+{
+	struct siginfo info;
+	unsigned long flags;
+
+	info.si_signo = SIGCHLD;
+	info.si_errno = 0;
+	info.si_pid = tsk->pid;
+	info.si_uid = tsk->uid;
+
+	/* FIXME: find out whether or not this is supposed to be c*time. */
+	info.si_utime = tsk->utime;
+	info.si_stime = tsk->stime;
+
+	info.si_status = tsk->exit_code & 0x7f;
+	info.si_code = CLD_STOPPED;
+
+	spin_lock_irqsave(&parent->sig->siglock, flags);
+	if (parent->sig->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
+	    !(parent->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
+		__group_send_sig_info(SIGCHLD, &info, parent);
+	/*
+	 * Even if SIGCHLD is not generated, we must wake up wait4 calls.
+	 */
+	__wake_up_parent(tsk, parent);
+	spin_unlock_irqrestore(&parent->sig->siglock, flags);
+}
+
+static void
+finish_stop(int stop_count)
+{
+	/*
+	 * If there are no other threads in the group, or if there is
+	 * a group stop in progress and we are the last to stop,
+	 * report to the parent.  When ptraced, every thread reports itself.
+	 */
+	if (stop_count < 0 || (current->ptrace & PT_PTRACED)) {
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current, current->parent);
+		read_unlock(&tasklist_lock);
+	}
+	else if (stop_count == 0) {
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current->group_leader,
+					 current->group_leader->real_parent);
+		read_unlock(&tasklist_lock);
+	}
+
+	schedule();
+	/*
+	 * Now we don't run again until continued.
+	 */
+	current->exit_code = 0;
+}
+
+/*
+ * This performs the stopping for SIGSTOP and other stop signals.
+ * We have to stop all threads in the thread group.
+ */
+static void
+do_signal_stop(int signr)
+{
+	struct signal_struct *sig = current->sig;
+	int stop_count = -1;
+
+	if (sig->group_stop_count > 0) {
+		/*
+		 * There is a group stop in progress.  We don't need to
+		 * start another one.
+		 */
+		spin_lock_irq(&sig->siglock);
+		if (unlikely(sig->group_stop_count == 0)) {
+			BUG_ON(!sig->group_exit);
+			spin_unlock_irq(&sig->siglock);
+			return;
+		}
+		signr = sig->group_exit_code;
+		stop_count = --sig->group_stop_count;
+		current->exit_code = signr;
+		set_current_state(TASK_STOPPED);
+		spin_unlock_irq(&sig->siglock);
+	}
+	else if (thread_group_empty(current)) {
+		/*
+		 * No locks needed in this case.
+		 */
+		current->exit_code = signr;
+		set_current_state(TASK_STOPPED);
+	}
+	else {
+		/*
+		 * There is no group stop already in progress.
+		 * We must initiate one now.
+		 */
+		struct task_struct *t;
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&sig->siglock);
+
+		if (unlikely(sig->group_exit)) {
+			/*
+			 * There is a group exit in progress now.
+			 * We'll just ignore the stop and process the
+			 * associated fatal signal.
+			 */
+			spin_unlock_irq(&sig->siglock);
+			read_unlock(&tasklist_lock);
+			return;
+		}
+
+		if (sig->group_stop_count == 0) {
+			sig->group_exit_code = signr;
+			stop_count = 0;
+			for (t = next_thread(current); t != current;
+			     t = next_thread(t))
+				/*
+				 * Setting state to TASK_STOPPED for a group
+				 * stop is always done with the siglock held,
+				 * so this check has no races.
+				 */
+				if (t->state < TASK_STOPPED) {
+					stop_count++;
+					signal_wake_up(t, 0);
+				}
+			sig->group_stop_count = stop_count;
+		}
+		else {
+			/* A race with another thread while unlocked.  */
+			signr = sig->group_exit_code;
+			stop_count = --sig->group_stop_count;
+		}
+
+		current->exit_code = signr;
+		set_current_state(TASK_STOPPED);
+
+		spin_unlock_irq(&sig->siglock);
+		read_unlock(&tasklist_lock);
+	}
+
+	finish_stop(stop_count);
+}
+
+
 #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
 
 int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
@@ -1235,6 +1363,28 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 		struct k_sigaction *ka;
 
 		spin_lock_irq(&current->sig->siglock);
+		if (unlikely(current->sig->group_stop_count > 0)) {
+			int stop_count;
+			if (current->sig->group_exit_task == current) {
+				/*
+				 * Group stop is so we can do a core dump.
+				 */
+				current->sig->group_exit_task = NULL;
+				goto dequeue;
+			}
+			/*
+			 * There is a group stop in progress.  We stop
+			 * without any associated signal being in our queue.
+			 */
+			stop_count = --current->sig->group_stop_count;
+			signr = current->sig->group_exit_code;
+			current->exit_code = signr;
+			set_current_state(TASK_STOPPED);
+			spin_unlock_irq(&current->sig->siglock);
+			finish_stop(stop_count);
+			continue;
+		}
+	dequeue:
 		signr = dequeue_signal(mask, info);
 		spin_unlock_irq(&current->sig->siglock);
 
@@ -1242,6 +1392,16 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 			break;
 
 		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
+			/*
+			 * If there is a group stop in progress,
+			 * we must participate in the bookkeeping.
+			 */
+			if (current->sig->group_stop_count > 0) {
+				spin_lock_irq(&current->sig->siglock);
+				--current->sig->group_stop_count;
+				spin_unlock_irq(&current->sig->siglock);
+			}
+
 			/* Let the debugger run.  */
 			current->exit_code = signr;
 			set_current_state(TASK_STOPPED);
@@ -1254,10 +1414,6 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 				continue;
 			current->exit_code = 0;
 
-			/* The debugger continued.  Ignore SIGSTOP.  */
-			if (signr == SIGSTOP)
-				continue;
-
 			/* Update the siginfo structure.  Is this good?  */
 			if (signr != info->si_signo) {
 				info->si_signo = signr;
@@ -1269,61 +1425,69 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 
 			/* If the (new) signal is now blocked, requeue it.  */
 			if (sigismember(&current->blocked, signr)) {
-				send_sig_info(signr, info, current);
+				spin_lock_irq(&current->sig->siglock);
+				specific_send_sig_info(signr, info, current);
+				spin_unlock_irq(&current->sig->siglock);
 				continue;
 			}
 		}
 
 		ka = &current->sig->action[signr-1];
-		if (ka->sa.sa_handler == SIG_IGN) {
-			if (signr != SIGCHLD)
-				continue;
-			/* Check for SIGCHLD: it's special.  */
-			while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
-				/* nothing */;
+		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
-		}
+		if (ka->sa.sa_handler != SIG_DFL) /* Run the handler.  */
+			return signr;
 
-		if (ka->sa.sa_handler == SIG_DFL) {
-			int exit_code = signr;
+		/*
+		 * Now we are doing the default action for this signal.
+		 */
+		if (sig_kernel_ignore(signr)) /* Default is nothing. */
+			continue;
 
 			/* Init gets no signals it doesn't want.  */
 			if (current->pid == 1)
 				continue;
 
-			switch (signr) {
-			case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG:
-				continue;
-
-			case SIGTSTP: case SIGTTIN: case SIGTTOU:
-				if (is_orphaned_pgrp(current->pgrp))
-					continue;
-				/* FALLTHRU */
-
-			case SIGSTOP: {
-				struct signal_struct *sig;
-				set_current_state(TASK_STOPPED);
-				current->exit_code = signr;
-				sig = current->parent->sig;
-				if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
-					notify_parent(current, SIGCHLD);
-				schedule();
+		if (sig_kernel_stop(signr)) {
+			/*
+			 * The default action is to stop all threads in
+			 * the thread group.  The job control signals
+			 * do nothing in an orphaned pgrp, but SIGSTOP
+			 * always works.
+			 */
+			if (signr == SIGSTOP ||
+			    !is_orphaned_pgrp(current->pgrp))
+				do_signal_stop(signr);
 				continue;
 			}
 
-			case SIGQUIT: case SIGILL: case SIGTRAP:
-			case SIGABRT: case SIGFPE: case SIGSEGV:
-			case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
-				if (do_coredump(signr, exit_code, regs))
-					exit_code |= 0x80;
-				/* FALLTHRU */
-
-			default:
-				sig_exit(signr, exit_code, info);
+		/*
+		 * Anything else is fatal, maybe with a core dump.
+		 */
+		current->flags |= PF_SIGNALED;
+		if (sig_kernel_coredump(signr) &&
+		    do_coredump(signr, signr, regs)) {
+			/*
+			 * That killed all other threads in the group and
+			 * synchronized with their demise, so there can't
+			 * be any more left to kill now.  The group_exit
+			 * flags are set by do_coredump.  Note that
+			 * thread_group_empty won't always be true yet,
+			 * because those threads were blocked in __exit_mm
+			 * and we just let them go to finish dying.
+			 */
+			const int code = signr | 0x80;
+			BUG_ON(!current->sig->group_exit);
+			BUG_ON(current->sig->group_exit_code != code);
+			do_exit(code);
 				/* NOTREACHED */
 			}
-		}
-		return signr;
+
+		/*
+		 * Death signals, no core dump.
+		 */
+		do_group_exit(signr);
+		/* NOTREACHED */
 	}
 	return 0;
 }
@@ -1435,12 +1599,17 @@ long do_sigpending(void *set, unsigned long sigsetsize)
 		goto out;
 
 	spin_lock_irq(&current->sig->siglock);
-	sigandsets(&pending, &current->blocked, &current->pending.signal);
+	sigorsets(&pending, &current->pending.signal,
+		  &current->sig->shared_pending.signal);
 	spin_unlock_irq(&current->sig->siglock);
 
+	/* Outside the lock because only this thread touches it.  */
+	sigandsets(&pending, &current->blocked, &pending);
+
 	error = -EFAULT;
 	if (!copy_to_user(set, &pending, sigsetsize))
 		error = 0;
+
 out:
 	return error;
 }	
@@ -1628,9 +1797,17 @@ sys_tkill(int pid, int sig)
 	p = find_task_by_pid(pid);
 	error = -ESRCH;
 	if (p) {
-		spin_lock_irq(&p->sig->siglock);
-		error = specific_send_sig_info(sig, &info, p, 0);
-		spin_unlock_irq(&p->sig->siglock);
+		error = check_kill_permission(sig, &info, p);
+		/*
+		 * The null signal is a permissions and process existence
+		 * probe.  No signal is actually delivered.
+		 */
+		if (!error && sig && p->sig) {
+			spin_lock_irq(&p->sig->siglock);
+			handle_stop_signal(sig, p);
+			error = specific_send_sig_info(sig, &info, p);
+			spin_unlock_irq(&p->sig->siglock);
+		}
 	}
 	read_unlock(&tasklist_lock);
 	return error;
@@ -1664,7 +1841,17 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 
 	k = &current->sig->action[sig-1];
 
+	read_lock(&tasklist_lock);
 	spin_lock_irq(&current->sig->siglock);
+	if (signal_pending(current)) {
+		/*
+		 * If there might be a fatal signal pending on multiple
+		 * threads, make sure we take it before changing the action.
+		 */
+		spin_unlock_irq(&current->sig->siglock);
+		read_unlock(&tasklist_lock);
+		return -ERESTARTSYS;
+	}
 
 	if (oact)
 		*oact = *k;
@@ -1683,25 +1870,22 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 		 *   pending and whose default action is to ignore the signal
 		 *   (for example, SIGCHLD), shall cause the pending signal to
 		 *   be discarded, whether or not it is blocked"
-		 *
-		 * Note the silly behaviour of SIGCHLD: SIG_IGN means that the
-		 * signal isn't actually ignored, but does automatic child
-		 * reaping, while SIG_DFL is explicitly said by POSIX to force
-		 * the signal to be ignored.
 		 */
 
-		if (k->sa.sa_handler == SIG_IGN
-		    || (k->sa.sa_handler == SIG_DFL
-			&& (sig == SIGCONT ||
-			    sig == SIGCHLD ||
-			    sig == SIGWINCH ||
-			    sig == SIGURG))) {
-			if (rm_sig_from_queue(sig, current))
-				recalc_sigpending();
+		if (k->sa.sa_handler == SIG_IGN ||
+		    (k->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) {
+			struct task_struct *t = current;
+			rm_from_queue(sigmask(sig), &t->sig->shared_pending);
+			do {
+				rm_from_queue(sigmask(sig), &t->pending);
+				recalc_sigpending_tsk(t);
+				t = next_thread(t);
+			} while (t != current);
 		}
 	}
-
 	spin_unlock_irq(&current->sig->siglock);
+	read_unlock(&tasklist_lock);
+
 	return 0;
 }
 
diff --git a/kernel/suspend.c b/kernel/suspend.c
index 3c55c284b537..40efb8d02db6 100644
--- a/kernel/suspend.c
+++ b/kernel/suspend.c
@@ -65,7 +65,6 @@
 #include <asm/pgtable.h>
 #include <asm/io.h>
 
-extern void signal_wake_up(struct task_struct *t);
 extern int sys_sync(void);
 
 unsigned char software_suspend_enabled = 0;
@@ -220,7 +219,7 @@ int freeze_processes(void)
 			   without locking */
 			p->flags |= PF_FREEZE;
 			spin_lock_irqsave(&p->sig->siglock, flags);
-			signal_wake_up(p);
+			signal_wake_up(p, 0);
 			spin_unlock_irqrestore(&p->sig->siglock, flags);
 			todo++;
 		} while_each_thread(g, p);
-- 
cgit v1.2.3


From 3fa327f8a756c99e76fc5f95459fc85a45bcafc5 Mon Sep 17 00:00:00 2001
From: Mark Haverkamp <markh@osdl.org>
Date: Wed, 5 Feb 2003 20:51:17 -0800
Subject: [PATCH] fix megaraid driver compile error

This moves access of the host element to device since host has been
removed from struct scsi_cmnd.
---
 drivers/scsi/megaraid.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 88e808cb3ae1..bb942cd73b6e 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -4515,7 +4515,7 @@ static int megadev_ioctl (struct inode *inode, struct file *filep,
 		if(scsicmd == NULL) return -ENOMEM;
 
 		memset(scsicmd, 0, sizeof(Scsi_Cmnd));
-		scsicmd->host = shpnt;
+		scsicmd->device->host = shpnt;
 
 		if( outlen || inlen ) {
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
@@ -4652,7 +4652,7 @@ static int megadev_ioctl (struct inode *inode, struct file *filep,
 		if(scsicmd == NULL) return -ENOMEM;
 
 		memset(scsicmd, 0, sizeof(Scsi_Cmnd));
-		scsicmd->host = shpnt;
+		scsicmd->device->host = shpnt;
 
 		if (outlen || inlen) {
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
-- 
cgit v1.2.3


From ea4e0b5bc6a86a370d5b7089af4d9aa901cecf9c Mon Sep 17 00:00:00 2001
From: Matthew Dobson <colpatch@us.ibm.com>
Date: Wed, 5 Feb 2003 22:55:32 -0800
Subject: [PATCH] Broken CLEAR_BITMAP() macro

The CLEAR_BITMAP() macro in include/linux/types.h is broken and doesn't
round the bitmap size to the proper 'long' boundary.

This fixes it by creating a macro BITS_TO_LONGS that just rounds a
number of bits up to the closest number of unsigned longs.  This makes
the DECLARE & CLEAR _BITMAP macros more readable and fixes the bug.
---
 include/linux/types.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/types.h b/include/linux/types.h
index 94ceb057eb64..f1c0ce5eb845 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -4,10 +4,12 @@
 #ifdef	__KERNEL__
 #include <linux/config.h>
 
+#define BITS_TO_LONGS(bits) \
+	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
-	unsigned long name[((bits)+BITS_PER_LONG-1)/BITS_PER_LONG]
+	unsigned long name[BITS_TO_LONGS(bits)]
 #define CLEAR_BITMAP(name,bits) \
-	memset(name, 0, ((bits)+BITS_PER_LONG-1)/8)
+	memset(name, 0, BITS_TO_LONGS(bits)*sizeof(unsigned long))
 #endif
 
 #include <linux/posix_types.h>
-- 
cgit v1.2.3


From 477c16ff9201e948366081cf944518d666093e0b Mon Sep 17 00:00:00 2001
From: Steven Cole <elenstev@mesatop.com>
Date: Wed, 5 Feb 2003 23:19:05 -0800
Subject: [PATCH] Spelling fixes

OK, here is the diff against 2.5.59-bk2, now up to 880 lines due to an
additional misspelling which crept in the -bk2 snapshot.

Fixes 'seperate' -> 'separate' and 'definate' -> 'definite'.

Kernal codrs cna't spel.
---
 Documentation/SubmittingDrivers        | 2 +-
 Documentation/networking/bonding.txt   | 4 ++--
 Documentation/s390/s390dbf.txt         | 2 +-
 Documentation/scsi/ibmmca.txt          | 4 ++--
 Documentation/usb/hiddev.txt           | 2 +-
 arch/cris/lib/old_checksum.c           | 2 +-
 arch/m68k/atari/hades-pci.c            | 2 +-
 arch/m68k/math-emu/fp_decode.h         | 4 ++--
 arch/m68k/math-emu/fp_scan.S           | 4 ++--
 arch/parisc/kernel/irq.c               | 2 +-
 drivers/block/cpqarray.c               | 2 +-
 drivers/char/drm/i830_dma.c            | 2 +-
 drivers/char/ip2main.c                 | 2 +-
 drivers/char/n_hdlc.c                  | 2 +-
 drivers/char/rio/cmdpkt.h              | 2 +-
 drivers/char/synclink.c                | 2 +-
 drivers/ide/pci/pdc202xx_new.c         | 2 +-
 drivers/ide/pci/pdc202xx_old.c         | 2 +-
 drivers/isdn/hardware/eicon/io.h       | 2 +-
 drivers/media/video/zr36120.c          | 2 +-
 drivers/mtd/maps/elan-104nc.c          | 2 +-
 drivers/net/fealnx.c                   | 2 +-
 drivers/net/hamachi.c                  | 2 +-
 drivers/net/sis900.c                   | 2 +-
 drivers/net/sk98lin/skgeinit.c         | 2 +-
 drivers/net/skfp/h/supern_2.h          | 2 +-
 drivers/net/skfp/smt.c                 | 2 +-
 drivers/net/skfp/smtdef.c              | 2 +-
 drivers/net/tg3.c                      | 2 +-
 drivers/net/tokenring/madgemc.c        | 4 ++--
 drivers/net/tokenring/smctr_firmware.h | 2 +-
 drivers/net/tokenring/tmsisa.c         | 2 +-
 drivers/net/tokenring/tmspci.c         | 2 +-
 drivers/net/wan/lmc/lmc_ioctl.h        | 2 +-
 drivers/parisc/ccio-dma.c              | 2 +-
 drivers/parisc/ccio-rm-dma.c           | 2 +-
 drivers/parisc/sba_iommu.c             | 2 +-
 drivers/s390/char/sclp_tty.c           | 6 +++---
 drivers/s390/char/sclp_tty.h           | 4 ++--
 drivers/s390/char/tape_char.c          | 2 +-
 drivers/scsi/aacraid/aacraid.h         | 4 ++--
 drivers/scsi/aic7xxx_old.c             | 2 +-
 drivers/scsi/qla1280.c                 | 2 +-
 drivers/scsi/qlogicfc.c                | 2 +-
 drivers/scsi/sim710.c                  | 4 ++--
 drivers/usb/serial/usb-serial.c        | 2 +-
 drivers/video/riva/fbdev.c             | 2 +-
 drivers/video/skeletonfb.c             | 4 ++--
 fs/binfmt_elf.c                        | 2 +-
 fs/binfmt_flat.c                       | 2 +-
 fs/jfs/jfs_txnmgr.c                    | 2 +-
 fs/nfs/nfs4proc.c                      | 4 ++--
 fs/xfs/xfs_bmap.c                      | 2 +-
 include/asm-ia64/sn/sv.h               | 2 +-
 include/asm-m68k/mac_psc.h             | 2 +-
 include/asm-mips/ng1hw.h               | 2 +-
 include/asm-sparc/ide.h                | 2 +-
 include/asm-sparc64/ide.h              | 2 +-
 net/8021q/vlan.h                       | 2 +-
 sound/core/seq/seq_device.c            | 2 +-
 sound/oss/i810_audio.c                 | 6 +++---
 sound/oss/trident.c                    | 2 +-
 sound/pci/ali5451/ali5451.c            | 4 ++--
 63 files changed, 78 insertions(+), 78 deletions(-)

diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers
index da3b38ae7ac4..c127eff47bd7 100644
--- a/Documentation/SubmittingDrivers
+++ b/Documentation/SubmittingDrivers
@@ -62,7 +62,7 @@ Code:		Please use the Linux style of code formatting as documented
 		in Documentation/CodingStyle. If you have sections of code
 		that need to be in other formats, for example because they
 		are shared with a windows driver kit and you want to
-		maintain them just once seperate them out nicely and note
+		maintain them just once separate them out nicely and note
 		this fact.
 
 Portability:	Pointers are not always 32bits, not all computers are little
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 11d3360afea3..18d144a0e10d 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -258,7 +258,7 @@ arp_ip_target
         Specifies the ip addresses to use when arp_interval is > 0. These are
         the targets of the ARP request sent to determine the health of the link
         to the targets. Specify these values in ddd.ddd.ddd.ddd format.
-        Multiple ip adresses must be seperated by a comma. At least one ip
+        Multiple ip adresses must be separated by a comma. At least one ip
         address needs to be given for ARP monitoring to work. The maximum number
         of targets that can be specified is set at 16.
 
@@ -309,7 +309,7 @@ in a High Availability setup to have several targets to monitor. In the
 case of just one target,  the target itself may go down or have a problem
 making it unresponsive to ARP requests. Having an additional target (or
 several) would increase the reliability of the ARP monitoring.
-Multiple ARP targets must be seperated by commas as follows:
+Multiple ARP targets must be separated by commas as follows:
 
 # example options for ARP monitoring with three targets
 alias bond0 bonding
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index d7ae2d1fde81..f8b7a5b89036 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -8,7 +8,7 @@ Description:
 ------------
 The goal of this feature is to provide a kernel debug logging API 
 where log records can be stored efficiently in memory, where each component 
-(e.g. device drivers) can have one seperate debug log.
+(e.g. device drivers) can have one separate debug log.
 One purpose of this is to inspect the debug logs after a production system crash
 in order to analyze the reason for the crash.
 If the system still runs but only a subcomponent which uses dbf failes,
diff --git a/Documentation/scsi/ibmmca.txt b/Documentation/scsi/ibmmca.txt
index 3272344a5641..912d10209c1c 100644
--- a/Documentation/scsi/ibmmca.txt
+++ b/Documentation/scsi/ibmmca.txt
@@ -346,7 +346,7 @@
    This table is quite informative for interested users. It shows the load
    of commands on the subsystem and wether you are running the bypassed 
    (software) or integrated (hardware) SCSI-command set (see below). The
-   amount of accesses is shown. Read, write, modeselect is shown seperately
+   amount of accesses is shown. Read, write, modeselect is shown separately
    in order to help debugging problems with CD-ROMs or tapedrives.
    
    The following table shows the list of 15 logical device numbers, that are
@@ -943,7 +943,7 @@
 
    4 To do
    -------
-        - IBM SCSI-2 F/W external SCSI bus support in seperate mode!
+        - IBM SCSI-2 F/W external SCSI bus support in separate mode!
 	- It seems that the handling of bad disks is really bad -
 	  non-existent, in fact. However, a low-level driver cannot help
 	  much, if such things happen.
diff --git a/Documentation/usb/hiddev.txt b/Documentation/usb/hiddev.txt
index 470840b5be5f..26c5dde778d1 100644
--- a/Documentation/usb/hiddev.txt
+++ b/Documentation/usb/hiddev.txt
@@ -9,7 +9,7 @@ examples for this are power devices (especially uninterruptable power
 supplies) and monitor control on higher end monitors.
 
 To support these disparite requirements, the Linux USB system provides
-HID events to two seperate interfaces:
+HID events to two separate interfaces:
 * the input subsystem, which converts HID events into normal input
 device interfaces (such as keyboard, mouse and joystick) and a
 normalised event interface - see Documentation/input/input.txt
diff --git a/arch/cris/lib/old_checksum.c b/arch/cris/lib/old_checksum.c
index 52be69146e30..5d4d9bc4b057 100644
--- a/arch/cris/lib/old_checksum.c
+++ b/arch/cris/lib/old_checksum.c
@@ -75,7 +75,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
     sum += *((unsigned short *)buff)++;
   }
   if(endMarker - buff > 0) {
-    sum += *buff;                 /* add extra byte seperately */
+    sum += *buff;                 /* add extra byte separately */
   }
   BITOFF;
   return(sum);
diff --git a/arch/m68k/atari/hades-pci.c b/arch/m68k/atari/hades-pci.c
index 4fdd61b2e4f2..6bd0adba8e92 100644
--- a/arch/m68k/atari/hades-pci.c
+++ b/arch/m68k/atari/hades-pci.c
@@ -375,7 +375,7 @@ struct pci_bus_info * __init init_hades_pci(void)
 	memset(bus, 0, sizeof(struct pci_bus_info));
 
 	/*
-	 * Claim resources. The m68k has no seperate I/O space, both
+	 * Claim resources. The m68k has no separate I/O space, both
 	 * PCI memory space and PCI I/O space are in memory space. Therefore
 	 * the I/O resources are requested in memory space as well.
 	 */
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
index 259785f34660..759679d9ab96 100644
--- a/arch/m68k/math-emu/fp_decode.h
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -68,7 +68,7 @@ do_no_pc_mode=0
 do_fscc=0
 
 | first decoding of the instr type
-| this seperates the conditional instr
+| this separates the conditional instr
 .macro	fp_decode_cond_instr_type
 	bfextu	%d2{#8,#2},%d0
 	jmp	([0f:w,%pc,%d0*4])
@@ -80,7 +80,7 @@ do_fscc=0
 .endm
 
 | second decoding of the instr type
-| this seperates most move instr
+| this separates most move instr
 .macro	fp_decode_move_instr_type
 	bfextu	%d2{#16,#3},%d0
 	jmp	([0f:w,%pc,%d0*4])
diff --git a/arch/m68k/math-emu/fp_scan.S b/arch/m68k/math-emu/fp_scan.S
index 97b73c1de572..1177ebad37fb 100644
--- a/arch/m68k/math-emu/fp_scan.S
+++ b/arch/m68k/math-emu/fp_scan.S
@@ -74,13 +74,13 @@ fp_scan:
 | first two instruction words are kept in %d2
 	getuser.l (%a0)+,%d2,fp_err_ua1,%a0
 	fp_put_pc %a0
-fp_decode_cond:				| seperate conditional instr
+fp_decode_cond:				| separate conditional instr
 	fp_decode_cond_instr_type
 
 	.long	fp_decode_move, fp_fscc
 	.long	fp_fbccw, fp_fbccl
 
-fp_decode_move:				| seperate move instr
+fp_decode_move:				| separate move instr
 	fp_decode_move_instr_type
 
 	.long	fp_fgen_fp, fp_ill
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4abe55cdd129..15d5126d579d 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -373,7 +373,7 @@ txn_alloc_data(int virt_irq, unsigned int bits_wide)
 	/* XXX FIXME : bits_wide indicates how wide the transaction
 	** data is allowed to be...we may need a different virt_irq
 	** if this one won't work. Another reason to index virtual
-	** irq's into a table which can manage CPU/IRQ bit seperately.
+	** irq's into a table which can manage CPU/IRQ bit separately.
 	*/
 	if (IRQ_OFFSET(virt_irq) > (1 << (bits_wide -1)))
 	{
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index a5ca0a6811a9..3a4bfd88b362 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -607,7 +607,7 @@ static void *remap_pci_mem(ulong base, ulong size)
 #ifndef MODULE
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,13)
 /*
- * Config string is a comma seperated set of i/o addresses of EISA cards.
+ * Config string is a comma separated set of i/o addresses of EISA cards.
  */
 static int cpqarray_setup(char *str)
 {
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index d29e21cbf432..dbffc3ced84a 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -454,7 +454,7 @@ static int i830_dma_initialize(drm_device_t *dev,
 	DRM_DEBUG("pitch_bits %x\n",    init->pitch_bits);
 
 	dev_priv->cpp = init->cpp;
-	/* We are using seperate values as placeholders for mechanisms for
+	/* We are using separate values as placeholders for mechanisms for
 	 * private backbuffer/depthbuffer usage.
 	 */
 
diff --git a/drivers/char/ip2main.c b/drivers/char/ip2main.c
index 1fbc0eb7fb79..244399a6ae04 100644
--- a/drivers/char/ip2main.c
+++ b/drivers/char/ip2main.c
@@ -1466,7 +1466,7 @@ static void do_status(void *p)
 		}
 #ifdef NEVER_HAPPENS_AS_SETUP_XXX
 	// and can't work because we don't know the_char
-	// as the_char is reported on a seperate path
+	// as the_char is reported on a separate path
 	// The intelligent board does this stuff as setup
 	{
 	char brkf = TTY_NORMAL;
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index d060056f86c9..947c077bf9fb 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -35,7 +35,7 @@
  *    callback directly to avoid fragmenting or concatenating
  *    multiple frames into a single receive callback.
  *
- *    The HDLC line discipline queues the receive frames in seperate
+ *    The HDLC line discipline queues the receive frames in separate
  *    buffers so complete receive frames can be returned by the
  *    tty read calls.
  *
diff --git a/drivers/char/rio/cmdpkt.h b/drivers/char/rio/cmdpkt.h
index 6db1009a2f7d..46befd354f20 100644
--- a/drivers/char/rio/cmdpkt.h
+++ b/drivers/char/rio/cmdpkt.h
@@ -41,7 +41,7 @@ static char *_cmdpkt_h_sccs_ = "@(#)cmdpkt.h	1.2";
 /*
 ** overlays for the data area of a packet. Used in both directions
 ** (to build a packet to send, and to interpret a packet that arrives)
-** and is very inconvenient for MIPS, so they appear as two seperate
+** and is very inconvenient for MIPS, so they appear as two separate
 ** structures - those used for modifying/reading packets on the card
 ** and those for modifying/reading packets in real memory, which have an _M
 ** suffix.
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 4f1aee0d8055..65a83c982a44 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -6098,7 +6098,7 @@ void usc_reset( struct mgsl_struct *info )
 	/*
 	 * Program the Bus Configuration Register (BCR)
 	 *
-	 * <15>		0	Don't use seperate address
+	 * <15>		0	Don't use separate address
 	 * <14..6>	0	reserved
 	 * <5..4>	00	IAckmode = Default, don't care
 	 * <3>		1	Bus Request Totem Pole output
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 92184361e03b..4bac440c8588 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -339,7 +339,7 @@ static int config_chipset_for_dma (ide_drive_t *drive)
 	 * fall back to U33 mode. The BIOS INT 13 hooks turn
 	 * the clock on then off for each read/write issued. I don't
 	 * do that here because it would require modifying the
-	 * kernel, seperating the fop routines from the kernel or
+	 * kernel, separating the fop routines from the kernel or
 	 * somehow hooking the fops calls. It may also be possible to
 	 * leave the 66Mhz clock on and readjust the timing
 	 * parameters.
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 577c739aca78..d156ba268590 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -400,7 +400,7 @@ static int config_chipset_for_dma (ide_drive_t *drive)
 	 * fall back to U33 mode. The BIOS INT 13 hooks turn
 	 * the clock on then off for each read/write issued. I don't
 	 * do that here because it would require modifying the
-	 * kernel, seperating the fop routines from the kernel or
+	 * kernel, separating the fop routines from the kernel or
 	 * somehow hooking the fops calls. It may also be possible to
 	 * leave the 66Mhz clock on and readjust the timing
 	 * parameters.
diff --git a/drivers/isdn/hardware/eicon/io.h b/drivers/isdn/hardware/eicon/io.h
index c34c94b21d1d..532b3b4e87da 100644
--- a/drivers/isdn/hardware/eicon/io.h
+++ b/drivers/isdn/hardware/eicon/io.h
@@ -174,7 +174,7 @@ struct _ISDN_ADAPTER {
  word                assign;         /* list of pending ASSIGNs  */
  word                head;           /* head of request queue    */
  word                tail;           /* tail of request queue    */
- ADAPTER             a ;             /* not a seperate structure */
+ ADAPTER             a ;             /* not a separate structure */
  void        (* out)(ADAPTER * a) ;
  byte        (* dpc)(ADAPTER * a) ;
  byte        (* tst_irq)(ADAPTER * a) ;
diff --git a/drivers/media/video/zr36120.c b/drivers/media/video/zr36120.c
index 7c4c9a82dc58..cee51710427d 100644
--- a/drivers/media/video/zr36120.c
+++ b/drivers/media/video/zr36120.c
@@ -86,7 +86,7 @@ static struct zoran zorans[ZORAN_MAX];
  * 0x28 and 0x2C. How you do that is left as an exercise
  * to the impatient reader :)
  */
-#define T 1	/* to seperate the bools from the ints */
+#define T 1	/* to separate the bools from the ints */
 #define F 0
 static struct tvcard tvcards[] = {
 	/* reported working by <middelin@polyware.nl> */
diff --git a/drivers/mtd/maps/elan-104nc.c b/drivers/mtd/maps/elan-104nc.c
index 0776fd053243..1d6123ef4cf9 100644
--- a/drivers/mtd/maps/elan-104nc.c
+++ b/drivers/mtd/maps/elan-104nc.c
@@ -27,7 +27,7 @@ The flash is accessed as follows:
    
    16 bit I/O port (0x22) for some sort of paging.
 
-The single flash device is divided into 3 partition which appear as seperate
+The single flash device is divided into 3 partition which appear as separate
 MTD devices.
 
 Linux thinks that the I/O port is used by the PIC and hence check_region() will
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 92117594cde6..867641422de1 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1563,7 +1563,7 @@ static void intr_handler(int irq, void *dev_instance, struct pt_regs *rgs)
 }
 
 
-/* This routine is logically part of the interrupt handler, but seperated
+/* This routine is logically part of the interrupt handler, but separated
    for clarity and better register allocation. */
 static int netdev_rx(struct net_device *dev)
 {
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 03817dae12dd..1b05e18e1a12 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1468,7 +1468,7 @@ static void hamachi_interrupt(int irq, void *dev_instance, struct pt_regs *rgs)
 	spin_unlock(&hmp->lock);
 }
 
-/* This routine is logically part of the interrupt handler, but seperated
+/* This routine is logically part of the interrupt handler, but separated
    for clarity and better register allocation. */
 static int hamachi_rx(struct net_device *dev)
 {
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index 19c3476417d8..f23a4d026b70 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -750,7 +750,7 @@ static u16 __devinit read_eeprom(long ioaddr, int location)
 
 /* Read and write the MII management registers using software-generated
    serial MDIO protocol. Note that the command bits and data bits are
-   send out seperately */
+   send out separately */
 #define mdio_delay()    inl(mdio_addr)
 
 static void mdio_idle(long mdio_addr)
diff --git a/drivers/net/sk98lin/skgeinit.c b/drivers/net/sk98lin/skgeinit.c
index fcb2b3960ed8..4befb55bb3b4 100644
--- a/drivers/net/sk98lin/skgeinit.c
+++ b/drivers/net/sk98lin/skgeinit.c
@@ -1134,7 +1134,7 @@ int		QuIoOffs)	/* Queue IO Address Offset */
  *	After calling this function the descriptor rings and rx and tx
  *	queues of this port may be reconfigured.
  *
- *	It is possible to stop the receive and transmit path seperate or
+ *	It is possible to stop the receive and transmit path separate or
  *	both together.
  *
  *	Dir =	SK_STOP_TX 	Stops the transmit path only and resets
diff --git a/drivers/net/skfp/h/supern_2.h b/drivers/net/skfp/h/supern_2.h
index ea564b337c90..5ba0b8306753 100644
--- a/drivers/net/skfp/h/supern_2.h
+++ b/drivers/net/skfp/h/supern_2.h
@@ -926,7 +926,7 @@ struct tx_queue {
 #define	PL_PC1		(1<<7)		/* BREAK   - entry point in start PCM*/
 #define	PL_PC2		(2<<7)		/* TRACE   - to localize stuck Beacon*/
 #define	PL_PC3		(3<<7)		/* CONNECT - synchronize ends of conn*/
-#define	PL_PC4		(4<<7)		/* NEXT	   - to seperate the signalng*/
+#define	PL_PC4		(4<<7)		/* NEXT	   - to separate the signalng*/
 #define	PL_PC5		(5<<7)		/* SIGNAL  - PCM trans/rec. bit infos*/
 #define	PL_PC6		(6<<7)		/* JOIN	   - 1. state to activ conn. */
 #define	PL_PC7		(7<<7)		/* VERIFY  - 2. - " - (3. ACTIVE) */
diff --git a/drivers/net/skfp/smt.c b/drivers/net/skfp/smt.c
index b02ab9a2d020..805caf9137eb 100644
--- a/drivers/net/skfp/smt.c
+++ b/drivers/net/skfp/smt.c
@@ -417,7 +417,7 @@ int event ;
 		/*
 		 * Make sure the fddiMACUNDA_Flag = FALSE is
 		 * included in the SRF so we don't generate
-		 * a seperate SRF for the deassertion of this
+		 * a separate SRF for the deassertion of this
 		 * condition
 		 */
 		update_dac(smc,0) ;
diff --git a/drivers/net/skfp/smtdef.c b/drivers/net/skfp/smtdef.c
index 221b0334ce70..beaed26b5d19 100644
--- a/drivers/net/skfp/smtdef.c
+++ b/drivers/net/skfp/smtdef.c
@@ -217,7 +217,7 @@ int level ;
 	mib->fddiSMTStatRptPolicy = TRUE ;
 	mib->fddiSMTTrace_MaxExpiration = SEC2MIB(7) ;
 	mib->fddiSMTMACIndexes = INDEX_MAC ;
-	mib->fddiSMTStationStatus = MIB_SMT_STASTA_SEPA ;	/* seperated */
+	mib->fddiSMTStationStatus = MIB_SMT_STASTA_SEPA ;	/* separated */
 
 	mib->m[MAC0].fddiMACIndex = INDEX_MAC ;
 	mib->m[MAC0].fddiMACFrameStatusFunctions = FSC_TYPE0 ;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f5818d526405..74162a6dd193 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1872,7 +1872,7 @@ static int tg3_vlan_rx(struct tg3 *tp, struct sk_buff *skb, u16 vlan_tag)
  * Each TG3_BDINFO specifies a MAXLEN field and the first TG3_BDINFO
  * which is within the range of the new packet's length is chosen.
  *
- * The "seperate ring for rx status" scheme may sound queer, but it makes
+ * The "separate ring for rx status" scheme may sound queer, but it makes
  * sense from a cache coherency perspective.  If only the host writes
  * to the buffer post rings, and only the chip writes to the rx status
  * rings, then cache lines never move beyond shared-modified state.
diff --git a/drivers/net/tokenring/madgemc.c b/drivers/net/tokenring/madgemc.c
index 0390d01340cf..96aa23c39e4f 100644
--- a/drivers/net/tokenring/madgemc.c
+++ b/drivers/net/tokenring/madgemc.c
@@ -514,7 +514,7 @@ unsigned short madgemc_setnselout_pins(struct net_device *dev)
  *
  * Register selection is normally done via three contiguous
  * bits.  However, some boards (such as the MC16/32) use only
- * two bits, plus a seperate bit in the glue chip.  This
+ * two bits, plus a separate bit in the glue chip.  This
  * sets the SRSX bit (the top bit).  See page 4-17 in the
  * Yellow Book for which registers are affected.
  *
@@ -629,7 +629,7 @@ void madgemc_chipset_close(struct net_device *dev)
 /*
  * Read the card type (MC16 or MC32) from the card.
  *
- * The configuration registers are stored in two seperate
+ * The configuration registers are stored in two separate
  * pages.  Pages are flipped by clearing bit 3 of CONTROL_REG0 (PAGE)
  * for page zero, or setting bit 3 for page one.
  *
diff --git a/drivers/net/tokenring/smctr_firmware.h b/drivers/net/tokenring/smctr_firmware.h
index dadf94daa5b1..53f2cbc817c9 100644
--- a/drivers/net/tokenring/smctr_firmware.h
+++ b/drivers/net/tokenring/smctr_firmware.h
@@ -1,6 +1,6 @@
 /*
  * The firmware this driver downloads into the tokenring card is a
- * seperate program and is not GPL'd source code, even though the Linux
+ * separate program and is not GPL'd source code, even though the Linux
  * side driver and the routine that loads this data into the card are.
  *
  * This firmware is licensed to you strictly for use in conjunction
diff --git a/drivers/net/tokenring/tmsisa.c b/drivers/net/tokenring/tmsisa.c
index 3a8ab2d92330..c671a43ea95e 100644
--- a/drivers/net/tokenring/tmsisa.c
+++ b/drivers/net/tokenring/tmsisa.c
@@ -302,7 +302,7 @@ int __init tms_isa_probe(struct net_device *dev)
  * Calling this on a board that does not support it can be a very
  * dangerous thing.  The Madge board, for instance, will lock your
  * machine hard when this is called.  Luckily, its supported in a
- * seperate driver.  --ASF
+ * separate driver.  --ASF
  */
 static void tms_isa_read_eeprom(struct net_device *dev)
 {
diff --git a/drivers/net/tokenring/tmspci.c b/drivers/net/tokenring/tmspci.c
index 6b26454f2fd3..1401392a20db 100644
--- a/drivers/net/tokenring/tmspci.c
+++ b/drivers/net/tokenring/tmspci.c
@@ -190,7 +190,7 @@ err_out_trdev:
  * Calling this on a board that does not support it can be a very
  * dangerous thing.  The Madge board, for instance, will lock your
  * machine hard when this is called.  Luckily, its supported in a
- * seperate driver.  --ASF
+ * separate driver.  --ASF
  */
 static void tms_pci_read_eeprom(struct net_device *dev)
 {
diff --git a/drivers/net/wan/lmc/lmc_ioctl.h b/drivers/net/wan/lmc/lmc_ioctl.h
index 1f756e6fdf67..38f3c1bca827 100644
--- a/drivers/net/wan/lmc/lmc_ioctl.h
+++ b/drivers/net/wan/lmc/lmc_ioctl.h
@@ -173,7 +173,7 @@
 
 /*
  * Some of the MII16 bits are mirrored in the MII17 register as well,
- * but let's keep thing seperate for now, and get only the cable from
+ * but let's keep thing separate for now, and get only the cable from
  * the MII17.
  */
 #define LMC_MII17_SSI_CABLE_MASK	0x0038	/* mask to extract the cable type */
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 20505f6997a9..dff1c393dd4e 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -852,7 +852,7 @@ ccio_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents,
 ** in the DMA stream. Allocates PDIR entries but does not fill them.
 ** Returns the number of DMA chunks.
 **
-** Doing the fill seperate from the coalescing/allocation keeps the
+** Doing the fill separate from the coalescing/allocation keeps the
 ** code simpler. Future enhancement could make one pass through
 ** the sglist do both.
 */
diff --git a/drivers/parisc/ccio-rm-dma.c b/drivers/parisc/ccio-rm-dma.c
index e0a1d04014a3..6c1eb2f54a35 100644
--- a/drivers/parisc/ccio-rm-dma.c
+++ b/drivers/parisc/ccio-rm-dma.c
@@ -116,7 +116,7 @@ static int ccio_map_sg(struct pci_dev *dev, struct scatterlist *sglist, int nent
 {
 	int tmp = nents;
 
-        /* KISS: map each buffer seperately. */
+        /* KISS: map each buffer separately. */
 	while (nents) {
 		sg_dma_address(sglist) = ccio_map_single(dev, sglist->address, sglist->length, direction);
 		sg_dma_len(sglist) = sglist->length;
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index a66bb850a312..70463f9e53f6 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1132,7 +1132,7 @@ sba_fill_pdir(
  * in the DMA stream. Allocates PDIR entries but does not fill them.
  * Returns the number of DMA chunks.
  *
- * Doing the fill seperate from the coalescing/allocation keeps the
+ * Doing the fill separate from the coalescing/allocation keeps the
  * code simpler. Future enhancement could make one pass through
  * the sglist do both.
  */
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index d36899ea8f3a..7c5452ba338c 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -198,7 +198,7 @@ sclp_tty_ioctl(struct tty_struct *tty, struct file * file,
 		break;
 	case TIOCSCLPSDELIM:
 		/*
-		 * set special character used for seperating upper and
+		 * set special character used for separating upper and
 		 * lower case, 0x00 disables this feature
 		 */
 		if (get_user(sclp_ioctls.delim, (unsigned char *) arg))
@@ -206,7 +206,7 @@ sclp_tty_ioctl(struct tty_struct *tty, struct file * file,
 		break;
 	case TIOCSCLPGDELIM:
 		/*
-		 * get special character used for seperating upper and
+		 * get special character used for separating upper and
 		 * lower case, 0x00 disables this feature
 		 */
 		if (put_user(sclp_ioctls.delim, (unsigned char *) arg))
@@ -507,7 +507,7 @@ static void sclp_tty_input(unsigned char* buf, unsigned int count)
 
 /*
  * get a EBCDIC string in upper/lower case,
- * find out characters in lower/upper case seperated by a special character,
+ * find out characters in lower/upper case separated by a special character,
  * modifiy original string,
  * returns length of resulting string
  */
diff --git a/drivers/s390/char/sclp_tty.h b/drivers/s390/char/sclp_tty.h
index 602c2e0ab159..81bfb39bd43c 100644
--- a/drivers/s390/char/sclp_tty.h
+++ b/drivers/s390/char/sclp_tty.h
@@ -42,7 +42,7 @@ struct sclp_ioctls {
 #define TIOCSCLPSINIT	_IO(SCLP_IOCTL_LETTER, 6)
 /* enable/disable conversion from upper to lower case of input */
 #define TIOCSCLPSCASE	_IOW(SCLP_IOCTL_LETTER, 7, unsigned char)
-/* set special character used for seperating upper and lower case, */
+/* set special character used for separating upper and lower case, */
 /* 0x00 disables this feature */
 #define TIOCSCLPSDELIM	_IOW(SCLP_IOCTL_LETTER, 9, unsigned char)
 
@@ -58,7 +58,7 @@ struct sclp_ioctls {
 #define TIOCSCLPGOBUF	_IOR(SCLP_IOCTL_LETTER, 15, unsigned short)
 /* Is conversion from upper to lower case of input enabled ? */
 #define TIOCSCLPGCASE	_IOR(SCLP_IOCTL_LETTER, 17, unsigned char)
-/* get special character used for seperating upper and lower case, */
+/* get special character used for separating upper and lower case, */
 /* 0x00 disables this feature */
 #define TIOCSCLPGDELIM	_IOR(SCLP_IOCTL_LETTER, 19, unsigned char)
 /* get the number of buffers/pages got from kernel at startup */
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index f7237c0a2bd0..ba281131041a 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -64,7 +64,7 @@ tapechar_cleanup_device(struct tape_device *device)
  * Terminate write command (we write two TMs and skip backward over last)
  * This ensures that the tape is always correctly terminated.
  * When the user writes afterwards a new file, he will overwrite the
- * second TM and therefore one TM will remain to seperate the
+ * second TM and therefore one TM will remain to separate the
  * two files on the tape...
  */
 static inline void
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index f5495ded6306..9aee76efd823 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -79,7 +79,7 @@ struct diskparm
 /*
  *	Host side memory scatter gather list
  *	Used by the adapter for read, write, and readdirplus operations
- *	We have seperate 32 and 64 bit version because even
+ *	We have separate 32 and 64 bit version because even
  *	on 64 bit systems not all cards support the 64 bit version
  */
 struct sgentry {
@@ -443,7 +443,7 @@ struct aac_driver_ident
 /*
  *	The adapter interface specs all queues to be located in the same
  *	physically contigous block. The host structure that defines the
- *	commuication queues will assume they are each a seperate physically
+ *	commuication queues will assume they are each a separate physically
  *	contigous memory region that will support them all being one big
  *	contigous block. 
  *	There is a command and response queue for each level and direction of
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index cba61d8d1744..560bea06a2cd 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -2557,7 +2557,7 @@ aic7xxx_allocate_scb(struct aic7xxx_host *p)
      * than the right hand side.  If the number of SG array elements
      * is changed, this function may not be near so efficient any more.
      *
-     * Since the DMA'able buffers are now allocated in a seperate
+     * Since the DMA'able buffers are now allocated in a separate
      * chunk this algorithm has been modified to match.  The '12'
      * and '6' factors in scb_size are for the DMA'able command byte
      * and sensebuffers respectively.  -DaveM
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 5e53c8b6c7b1..7c781dbfc344 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -108,7 +108,7 @@
 	  and scsi_set_pci_device()
 	- Call scsi_set_pci_device() for all devices
 	- Reduce size of kernel version dependant device probe code
-	- Move duplicate probe/init code to seperate function
+	- Move duplicate probe/init code to separate function
 	- Handle error if qla1280_mem_alloc() fails
 	- Kill OFFSET() macro and use Linux's PCI definitions instead
         - Kill private structure defining PCI config space (struct config_reg)
diff --git a/drivers/scsi/qlogicfc.c b/drivers/scsi/qlogicfc.c
index 645dec43c6f9..41e5fd85ca1d 100644
--- a/drivers/scsi/qlogicfc.c
+++ b/drivers/scsi/qlogicfc.c
@@ -397,7 +397,7 @@ struct Status_Entry {
 #define MBOX_PORT_LOGOUT                0x0071
 
 /*
- *	Firmware if needed (note this is a hack, it belongs in a seperate
+ *	Firmware if needed (note this is a hack, it belongs in a separate
  *	module.
  */
  
diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c
index e2d99f359f0d..2e53e6c236d9 100644
--- a/drivers/scsi/sim710.c
+++ b/drivers/scsi/sim710.c
@@ -48,7 +48,7 @@
  * addr: parameter is specified first for each controller.  e.g.
  *      sim710="addr:0x9000 irq:15 addr:0x8000 irq:14"
  *
- * To seperate the different options, ' ', '+', and ',' can be used, except
+ * To separate the different options, ' ', '+', and ',' can be used, except
  * that ',' can not be used in module parameters.  ' ' can be a pain, because
  * it needs to be quoted, which causes problems with some installers.
  * The command line above is completely equivalent to
@@ -378,7 +378,7 @@ param_setup(char *str)
 	    return 1;
 	}
 
-	/* Allow ',', ' ', or '+' seperators.  Used to be ',' at boot and
+	/* Allow ',', ' ', or '+' separators.  Used to be ',' at boot and
 	 * ' ' for module load, some installers crap out on the space and
 	 * insmod doesn't like the comma.
 	 */
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index d0ea2df359a5..10f84fc41d71 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -214,7 +214,7 @@
  *	Brian Warner has a place to put his code.
  *	Made the ezusb specific functions generic enough that different
  *	devices can use them (whiteheat and keyspan_pda both need them).
- *	Split out a whole bunch of structure and other stuff to a seperate
+ *	Split out a whole bunch of structure and other stuff to a separate
  *	usb-serial.h file.
  *	Made the Visor connection messages a little more understandable, now
  *	that Miles Lott (milos@insync.net) has gotten the Generic channel to
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index db8fda5ee317..1beea3e66640 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -1759,7 +1759,7 @@ static int __devinit rivafb_probe(struct pci_dev *pd,
 	case NV_ARCH_03:
 		/* Riva128's PRAMIN is in the "framebuffer" space
 		 * Since these cards were never made with more than 8 megabytes
-		 * we can safely allocate this seperately.
+		 * we can safely allocate this separately.
 		 */
 		if (!request_mem_region(rivafb_fix.smem_start + 0x00C00000,
 					 0x00008000, "rivafb")) {
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index bb95b6dc51ab..57b69168e37e 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -19,7 +19,7 @@
  *  struct vc_data to data in a device independent way in struct fb_info. Then
  *  various functions in struct fb_ops will be called to store the device 
  *  dependent state in the par field in struct fb_info and to change the 
- *  hardware to that state. This allows a very clean seperation of the fbdev
+ *  hardware to that state. This allows a very clean separation of the fbdev
  *  layer from the console layer. It also allows one to use fbdev on its own
  *  which is a bounus for embedded devices. The reason this approach works is  
  *  for each framebuffer device when used as a tty/console device is allocated
@@ -93,7 +93,7 @@ static struct fb_fix_screeninfo xxxfb_fix __initdata = {
      * 	Modern graphical hardware not only supports pipelines but some 
      *  also support multiple monitors where each display can have its  
      *  its own unique data. In this case each display could be  
-     *  represented by a seperate framebuffer device thus a seperate 
+     *  represented by a separate framebuffer device thus a separate 
      *  struct fb_info. Now the struct xxx_par represents the graphics
      *  hardware state thus only one exist per card. In this case the 
      *  struct xxx_par for each graphics card would be shared between 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0d214c4a54fd..a95b66f560c7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1058,7 +1058,7 @@ static inline void fill_note(struct memelfnote *note, const char *name, int type
 
 /*
  * fill up all the fields in prstatus from the given task struct, except registers
- * which need to be filled up seperately.
+ * which need to be filled up separately.
  */
 static inline void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) 
 {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7f088639326e..7975056a6995 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -403,7 +403,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	set_personality(PER_LINUX);
 
 	/*
-	 * there are a couple of cases here,  the seperate code/data
+	 * there are a couple of cases here,  the separate code/data
 	 * case,  and then the fully copied to RAM case which lumps
 	 * it all together.
 	 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 58df2f7c38cd..f85bb58be45b 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2815,7 +2815,7 @@ restart:
 			txLazyCommit(tblk);
 
 			/*
-			 * We can be running indefinately if other processors
+			 * We can be running indefinitely if other processors
 			 * are adding transactions to this list
 			 */
 			cond_resched();
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7cfd12436c79..1f1ab0213a87 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -743,8 +743,8 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		goto out;
 	
 	/*
-	 * Now we do a seperate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done seperately so that we can conveniently
+	 * Now we do a separate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done separately so that we can conveniently
 	 * catch an ERR_WRONGSEC if it occurs along the way...
 	 */
 	p = server->mnt_path;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 27d7013b4d80..8a20f1cfc415 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5522,7 +5522,7 @@ xfs_getbmap(
 	int			prealloced;	/* this is a file with
 						 * preallocated data space */
 	int			sh_unwritten;	/* true, if unwritten */
-						/* extents listed seperately */
+						/* extents listed separately */
 	int			bmapi_flags;	/* flags for xfs_bmapi */
 	__int32_t		oflags;		/* getbmapx bmv_oflags field */
 
diff --git a/include/asm-ia64/sn/sv.h b/include/asm-ia64/sn/sv.h
index 044659de2ed6..8e93fb91e0eb 100644
--- a/include/asm-ia64/sn/sv.h
+++ b/include/asm-ia64/sn/sv.h
@@ -99,7 +99,7 @@ void sv_init(sv_t *sv, sv_mon_lock_t *monitor_lock, int flags);
  * Set SV_WAIT_SIG in sv_wait_flags to let the sv_wait be interrupted by signals.
  *
  * timeout is how long to wait before giving up, or 0 to wait
- * indefinately.  It is given in jiffies, and is relative.
+ * indefinitely.  It is given in jiffies, and is relative.
  *
  * The associated lock must be locked on entry.  It is unlocked on return.
  *
diff --git a/include/asm-m68k/mac_psc.h b/include/asm-m68k/mac_psc.h
index 75d415469b05..e9fc5011eb49 100644
--- a/include/asm-m68k/mac_psc.h
+++ b/include/asm-m68k/mac_psc.h
@@ -158,7 +158,7 @@
 				 *                  0x3 = CD Audio
 				 *                  0x4 = External Audio
 				 *
-				 * The volume is definately not the general
+				 * The volume is definitely not the general
 				 * output volume as it doesn't affect the
 				 * alert sound volume.
 				 */
diff --git a/include/asm-mips/ng1hw.h b/include/asm-mips/ng1hw.h
index d981e583641c..a57fdede64c2 100644
--- a/include/asm-mips/ng1hw.h
+++ b/include/asm-mips/ng1hw.h
@@ -1,6 +1,6 @@
 /* $Id: ng1hw.h,v 1.4 1999/08/04 06:01:51 ulfc Exp $
  * 
- * ng1hw.h: Tweaks the newport.h structures and definations to be compatible
+ * ng1hw.h: Tweaks the newport.h structures and definitions to be compatible
  * 	    with IRIX.  Quite ugly, but it works.
  *
  * Copyright (C) 1999 Ulf Carlsson (ulfc@thepuffingroup.com)
diff --git a/include/asm-sparc/ide.h b/include/asm-sparc/ide.h
index 40c498bd2445..e1e6dfd5120a 100644
--- a/include/asm-sparc/ide.h
+++ b/include/asm-sparc/ide.h
@@ -83,7 +83,7 @@ static __inline__ void ide_init_default_hwifs(void)
 #define ide_ack_intr(hwif)		(1)
 
 /* XXX Known to be broken.  Axboe will fix the problems this
- * XXX has by making seperate IN/OUT macros for IDE_DATA
+ * XXX has by making separate IN/OUT macros for IDE_DATA
  * XXX register and rest of IDE regs and also using
  * XXX ide_ioreg_t instead of u32 for ports. -DaveM
  */
diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h
index 32aab1e59203..b27e0400683a 100644
--- a/include/asm-sparc64/ide.h
+++ b/include/asm-sparc64/ide.h
@@ -80,7 +80,7 @@ static __inline__ void ide_init_default_hwifs(void)
 #define ide_ack_intr(hwif)		(1)
 
 /* XXX Known to be broken.  Axboe will fix the problems this
- * XXX has by making seperate IN/OUT macros for IDE_DATA
+ * XXX has by making separate IN/OUT macros for IDE_DATA
  * XXX register and rest of IDE regs and also using
  * XXX ide_ioreg_t instead of u32 for ports. -DaveM
  */
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index d490adb2db72..bc89e879c379 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -41,7 +41,7 @@ extern spinlock_t vlan_group_lock;
 /*  Find a VLAN device by the MAC address of it's Ethernet device, and
  *  it's VLAN ID.  The default configuration is to have VLAN's scope
  *  to be box-wide, so the MAC will be ignored.  The mac will only be
- *  looked at if we are configured to have a seperate set of VLANs per
+ *  looked at if we are configured to have a separate set of VLANs per
  *  each MAC addressable interface.  Note that this latter option does
  *  NOT follow the spec for VLANs, but may be useful for doing very
  *  large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index f10d02b438af..88f030ccb4fc 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -19,7 +19,7 @@
  *
  *----------------------------------------------------------------
  *
- * This device handler seperates the card driver module from sequencer
+ * This device handler separates the card driver module from sequencer
  * stuff (sequencer core, synth drivers, etc), so that user can avoid
  * to spend unnecessary resources e.g. if he needs only listening to
  * MP3s.
diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c
index 0302eb372914..c63f760dea57 100644
--- a/sound/oss/i810_audio.c
+++ b/sound/oss/i810_audio.c
@@ -177,7 +177,7 @@ struct sg_item {
 struct i810_channel 
 {
 	/* these sg guys should probably be allocated
-	   seperately as nocache. Must be 8 byte aligned */
+	   separately as nocache. Must be 8 byte aligned */
 	struct sg_item sg[SG_LEN];	/* 32*8 */
 	u32 offset;			/* 4 */
 	u32 port;			/* 4 */
@@ -186,7 +186,7 @@ struct i810_channel
 };
 
 /*
- * we have 3 seperate dma engines.  pcm in, pcm out, and mic.
+ * we have 3 separate dma engines.  pcm in, pcm out, and mic.
  * each dma engine has controlling registers.  These goofy
  * names are from the datasheet, but make it easy to write
  * code while leafing through it.
@@ -900,7 +900,7 @@ static void start_dac(struct i810_state *state)
 #define DMABUF_DEFAULTORDER (16-PAGE_SHIFT)
 #define DMABUF_MINORDER 1
 
-/* allocate DMA buffer, playback and recording buffer should be allocated seperately */
+/* allocate DMA buffer, playback and recording buffer should be allocated separately */
 static int alloc_dmabuf(struct i810_state *state)
 {
 	struct dmabuf *dmabuf = &state->dmabuf;
diff --git a/sound/oss/trident.c b/sound/oss/trident.c
index 74b467cf339c..912be5b6e882 100644
--- a/sound/oss/trident.c
+++ b/sound/oss/trident.c
@@ -1227,7 +1227,7 @@ static int alloc_dmabuf(struct dmabuf* dmabuf, struct pci_dev* pci_dev, int orde
 }
 
 /* allocate the main DMA buffer, playback and recording buffer should be */ 
-/* allocated seperately */
+/* allocated separately */
 static int alloc_main_dmabuf(struct trident_state *state)
 {
 	struct dmabuf *dmabuf = &state->dmabuf;
diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c
index f05aeb5fbb5a..e88a74e78f4b 100644
--- a/sound/pci/ali5451/ali5451.c
+++ b/sound/pci/ali5451/ali5451.c
@@ -67,7 +67,7 @@ MODULE_PARM_DESC(pcm_channels, "PCM Channels");
 MODULE_PARM_SYNTAX(pcm_channels, SNDRV_ENABLED ",default:32,allows:{{1,32}}");
 
 /*
- *  Debug part definations
+ *  Debug part definitions
  */
 
 //#define ALI_DEBUG
@@ -79,7 +79,7 @@ MODULE_PARM_SYNTAX(pcm_channels, SNDRV_ENABLED ",default:32,allows:{{1,32}}");
 #endif
 
 /*
- *  Constants defination
+ *  Constants definition
  */
 
 #ifndef PCI_VENDOR_ID_ALI
-- 
cgit v1.2.3


From 202b74ebfa3aa288b3a306160febf478978ecee4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 5 Feb 2003 23:39:31 -0800
Subject: [PATCH] Make sys_wait4() more readable

I cleaned up sys_wait4; it was straightforward and I think a definite
improvement.  While at it, I noticed that one of the races I fixed in the
TASK_STOPPED case actually can happen earlier.  Between read_unlock and
write_lock_irq, another thread could reap the process and make P invalid,
so now I do get_task_struct before read_unlock and then the existing race
checks catch all scenarios.

Aside from the aforementioned race tweak, the code should be the same as
in the previous patch (that Ingo and I have tested more thoroughly)
modulo being moved into functions and some reformatting and comment
changes.

Oh, my old patch had one case where it failed to retake the read lock after
a race bailout that I just noticed reading over it.  That's fixed too.

These exit fixes were something I noticed incidentally and spent less time
on than the signals changes.  Another few passes of eyeballs over them are
certainly warranted.  (In particular, there are code paths like that one
that check for specific races that have probably never been seen in
practice, so those code paths have never run once.)
---
 kernel/exit.c | 263 +++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 149 insertions(+), 114 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 25281033be8d..1e23538c9a0e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -814,11 +814,149 @@ static int eligible_child(pid_t pid, int options, task_t *p)
 	return 1;
 }
 
+/*
+ * Handle sys_wait4 work for one task in state TASK_ZOMBIE.  We hold
+ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * the lock and this task is uninteresting.  If we return nonzero, we have
+ * released the lock and the system call should return.
+ */
+static int wait_task_zombie(task_t *p, unsigned int *stat_addr, struct rusage *ru)
+{
+	unsigned long state;
+	int retval;
+
+	/*
+	 * Try to move the task's state to DEAD
+	 * only one thread is allowed to do this:
+	 */
+	state = xchg(&p->state, TASK_DEAD);
+	if (state != TASK_ZOMBIE) {
+		BUG_ON(state != TASK_DEAD);
+		return 0;
+	}
+	if (unlikely(p->exit_signal == -1))
+		/*
+		 * This can only happen in a race with a ptraced thread
+		 * dying on another processor.
+		 */
+		return 0;
+
+	/*
+	 * Now we are sure this task is interesting, and no other
+	 * thread can reap it because we set its state to TASK_DEAD.
+	 */
+	read_unlock(&tasklist_lock);
+
+	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+	if (!retval && stat_addr) {
+		if (p->sig->group_exit)
+			retval = put_user(p->sig->group_exit_code, stat_addr);
+		else
+			retval = put_user(p->exit_code, stat_addr);
+	}
+	if (retval) {
+		p->state = TASK_ZOMBIE;
+		return retval;
+	}
+	retval = p->pid;
+	if (p->real_parent != p->parent) {
+		write_lock_irq(&tasklist_lock);
+		/* Double-check with lock held.  */
+		if (p->real_parent != p->parent) {
+			__ptrace_unlink(p);
+			do_notify_parent(p, p->exit_signal);
+			p->state = TASK_ZOMBIE;
+			p = NULL;
+		}
+		write_unlock_irq(&tasklist_lock);
+	}
+	if (p != NULL)
+		release_task(p);
+	BUG_ON(!retval);
+	return retval;
+}
+
+/*
+ * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
+ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * the lock and this task is uninteresting.  If we return nonzero, we have
+ * released the lock and the system call should return.
+ */
+static int wait_task_stopped(task_t *p, int delayed_group_leader,
+			     unsigned int *stat_addr, struct rusage *ru)
+{
+	int retval, exit_code;
+
+	if (!p->exit_code)
+		return 0;
+	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
+	    p->sig && p->sig->group_stop_count > 0)
+		/*
+		 * A group stop is in progress and this is the group leader.
+		 * We won't report until all threads have stopped.
+		 */
+		return 0;
+
+	/*
+	 * Now we are pretty sure this task is interesting.
+	 * Make sure it doesn't get reaped out from under us while we
+	 * give up the lock and then examine it below.  We don't want to
+	 * keep holding onto the tasklist_lock while we call getrusage and
+	 * possibly take page faults for user memory.
+	 */
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+	write_lock_irq(&tasklist_lock);
+
+	/*
+	 * This uses xchg to be atomic with the thread resuming and setting
+	 * it.  It must also be done with the write lock held to prevent a
+	 * race with the TASK_ZOMBIE case.
+	 */
+	exit_code = xchg(&p->exit_code, 0);
+	if (unlikely(p->state > TASK_STOPPED)) {
+		/*
+		 * The task resumed and then died.  Let the next iteration
+		 * catch it in TASK_ZOMBIE.  Note that exit_code might
+		 * already be zero here if it resumed and did _exit(0).
+		 * The task itself is dead and won't touch exit_code again;
+		 * other processors in this function are locked out.
+		 */
+		p->exit_code = exit_code;
+		exit_code = 0;
+	}
+	if (unlikely(exit_code == 0)) {
+		/*
+		 * Another thread in this function got to it first, or it
+		 * resumed, or it resumed and then died.
+		 */
+		write_unlock_irq(&tasklist_lock);
+		put_task_struct(p);
+		read_lock(&tasklist_lock);
+		return 0;
+	}
+
+	/* move to end of parent's list to avoid starvation */
+	remove_parent(p);
+	add_parent(p, p->parent);
+
+	write_unlock_irq(&tasklist_lock);
+
+	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+	if (!retval && stat_addr)
+		retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+	if (!retval)
+		retval = p->pid;
+	put_task_struct(p);
+
+	BUG_ON(!retval);
+	return retval;
+}
+
 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct task_struct *tsk;
-	unsigned long state;
 	int flag, retval;
 
 	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
@@ -836,8 +974,6 @@ repeat:
 		int ret;
 
 		list_for_each(_p,&tsk->children) {
-			int exit_code;
-
 			p = list_entry(_p,struct task_struct,sibling);
 
 			ret = eligible_child(pid, options, p);
@@ -847,125 +983,24 @@ repeat:
 
 			switch (p->state) {
 			case TASK_STOPPED:
-				if (!p->exit_code)
-					continue;
-				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
-					continue;
-				if (ret == 2 && !(p->ptrace & PT_PTRACED) &&
-				    p->sig && p->sig->group_stop_count > 0)
-					/*
-					 * A group stop is in progress and
-					 * we are the group leader.  We won't
-					 * report until all threads have
-					 * stopped.
-					 */
-					continue;
-				read_unlock(&tasklist_lock);
-
-				/* move to end of parent's list to avoid starvation */
-				write_lock_irq(&tasklist_lock);
-				remove_parent(p);
-				add_parent(p, p->parent);
-
-				/*
-				 * This uses xchg to be atomic with
-				 * the thread resuming and setting it.
-				 * It must also be done with the write
-				 * lock held to prevent a race with the
-				 * TASK_ZOMBIE case (below).
-				 */
-				exit_code = xchg(&p->exit_code, 0);
-				if (unlikely(p->state > TASK_STOPPED)) {
-					/*
-					 * The task resumed and then died.
-					 * Let the next iteration catch it
-					 * in TASK_ZOMBIE.  Note that
-					 * exit_code might already be zero
-					 * here if it resumed and did
-					 * _exit(0).  The task itself is
-					 * dead and won't touch exit_code
-					 * again; other processors in
-					 * this function are locked out.
-					 */
-					p->exit_code = exit_code;
-					exit_code = 0;
-				}
-				if (unlikely(exit_code == 0)) {
-					/*
-					 * Another thread in this function
-					 * got to it first, or it resumed,
-					 * or it resumed and then died.
-					 */
-					write_unlock_irq(&tasklist_lock);
+				if (!(options & WUNTRACED) &&
+				    !(p->ptrace & PT_PTRACED))
 					continue;
-				}
-				/*
-				 * Make sure this doesn't get reaped out from
-				 * under us while we are examining it below.
-				 * We don't want to keep holding onto the
-				 * tasklist_lock while we call getrusage and
-				 * possibly take page faults for user memory.
-				 */
-				get_task_struct(p);
-				write_unlock_irq(&tasklist_lock);
-				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
-				if (!retval && stat_addr) 
-					retval = put_user((exit_code << 8) | 0x7f, stat_addr);
-				if (!retval)
-					retval = p->pid;
-				put_task_struct(p);
-				goto end_wait4;
+				retval = wait_task_stopped(p, ret == 2,
+							   stat_addr, ru);
+				if (retval != 0) /* He released the lock.  */
+					goto end_wait4;
+				break;
 			case TASK_ZOMBIE:
 				/*
 				 * Eligible but we cannot release it yet:
 				 */
 				if (ret == 2)
 					continue;
-				/*
-				 * Try to move the task's state to DEAD
-				 * only one thread is allowed to do this:
-				 */
-				state = xchg(&p->state, TASK_DEAD);
-				if (state != TASK_ZOMBIE)
-					continue;
-				if (unlikely(p->exit_signal == -1))
-					/*
-					 * This can only happen in a race with
-					 * a ptraced thread dying on another
-					 * processor.
-					 */
-					continue;
-				read_unlock(&tasklist_lock);
-
-				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
-				if (!retval && stat_addr) {
-					if (p->sig->group_exit)
-						retval = put_user(p->sig->group_exit_code, stat_addr);
-					else
-						retval = put_user(p->exit_code, stat_addr);
-				}
-				if (retval) {
-					p->state = TASK_ZOMBIE;
+				retval = wait_task_zombie(p, stat_addr, ru);
+				if (retval != 0) /* He released the lock.  */
 					goto end_wait4;
-				}
-				retval = p->pid;
-				if (p->real_parent != p->parent) {
-					write_lock_irq(&tasklist_lock);
-					/* Double-check with lock held.  */
-					if (p->real_parent != p->parent) {
-					__ptrace_unlink(p);
-						do_notify_parent(
-							p, p->exit_signal);
-					p->state = TASK_ZOMBIE;
-						p = NULL;
-					}
-					write_unlock_irq(&tasklist_lock);
-				}
-				if (p != NULL)
-					release_task(p);
-				goto end_wait4;
-			default:
-				continue;
+				break;
 			}
 		}
 		if (!flag) {
-- 
cgit v1.2.3


From 1dbb976e474aeb880bc8a2d918bbb2f70af073a4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Thu, 6 Feb 2003 00:22:04 -0800
Subject: [PATCH] revert extra sendfile security hook patch

hm.  It seems that I sent this patch twice.

After resyncing with your tree I go through and try to reapply all the sent
patches, throwing out the ones which get a lot of rejects.  Just to make sure
that everything got through OK.

But it appears that that particular patch happily applied on top of itself,
so I assumed it was not applied...
---
 fs/read_write.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index bcdf304115e6..04904f7fa206 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -535,10 +535,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (retval)
 		goto fput_in;
 
-	retval = security_file_permission (in_file, MAY_READ);
-	if (retval)
-		goto fput_in;
-
 	/*
 	 * Get output file, and verify that it is ok..
 	 */
@@ -560,10 +556,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	if (retval)
 		goto fput_out;
 
-	retval = security_file_permission (out_file, MAY_WRITE);
-	if (retval)
-		goto fput_out;
-
 	if (!ppos)
 		ppos = &in_file->f_pos;
 
-- 
cgit v1.2.3


From 6b932555f7693040e3f8034dff9042d8b236dc3e Mon Sep 17 00:00:00 2001
From: "Andries E. Brouwer" <andries.brouwer@cwi.nl>
Date: Thu, 6 Feb 2003 00:22:48 -0800
Subject: [PATCH] Remove dead code

In struct char_dev the fields openers and sem are unused.
The file char_dev.c claims that it is called differently.
---
 fs/char_dev.c      | 9 +++------
 include/linux/fs.h | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/char_dev.c b/fs/char_dev.c
index ff34b5e336cd..ec9489c3a387 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/block_dev.c
+ *  linux/fs/char_dev.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
@@ -38,16 +38,13 @@ static kmem_cache_t * cdev_cachep;
 	 ((struct char_device *) kmem_cache_alloc(cdev_cachep, SLAB_KERNEL))
 #define destroy_cdev(cdev) kmem_cache_free(cdev_cachep, (cdev))
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	struct char_device * cdev = (struct char_device *) foo;
+	struct char_device *cdev = (struct char_device *) foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR)
-	{
 		memset(cdev, 0, sizeof(*cdev));
-		sema_init(&cdev->sem, 1);
-	}
 }
 
 void __init cdev_cache_init(void)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a17c9819ae9..037c1fe2ad6c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -332,8 +332,6 @@ struct char_device {
 	struct list_head	hash;
 	atomic_t		count;
 	dev_t			dev;
-	atomic_t		openers;
-	struct semaphore	sem;
 };
 
 struct block_device {
-- 
cgit v1.2.3


From 365e2b77b9d5bb4d85d4a03b0eb1b6a251392090 Mon Sep 17 00:00:00 2001
From: "Andries E. Brouwer" <andries.brouwer@cwi.nl>
Date: Thu, 6 Feb 2003 00:25:10 -0800
Subject: [PATCH] Doc fix

---
 Documentation/filesystems/proc.txt | 2 +-
 Documentation/sysctl/kernel.txt    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ca0cd745da88..fe4e48f6f594 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -911,7 +911,7 @@ printk
 The four values in printk denote
 * console_loglevel,
 * default_message_loglevel,
-* minimum_console_level and
+* minimum_console_loglevel and
 * default_console_loglevel
 respectively.
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 424ccd9f7c45..e6f061d1a2f7 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -222,7 +222,7 @@ otherwise the 'doze' mode will be used.
 printk:
 
 The four values in printk denote: console_loglevel,
-default_message_loglevel, minimum_console_level and
+default_message_loglevel, minimum_console_loglevel and
 default_console_loglevel respectively.
 
 These values influence printk() behavior when printing or
-- 
cgit v1.2.3


From 24b8c8bcea9fa9136a6bc2b153546f15def3138b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@sgi.com>
Date: Thu, 6 Feb 2003 00:52:04 -0800
Subject: [PATCH] fix leaks in vxfs_read_fshead()

The Stanford checker disclose that vxfs_read_fshead was missing any
unwinding in the error cases..
---
 fs/freevxfs/vxfs_fshead.c | 65 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index f0d4e5a7f128..bf8f5e3f90da 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -111,13 +111,15 @@ vxfs_read_fshead(struct super_block *sbp)
 	struct vxfs_fsh			*pfp, *sfp;
 	struct vxfs_inode_info		*vip, *tip;
 
-	if (!(vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino))) {
+	vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino);
+	if (!vip) {
 		printk(KERN_ERR "vxfs: unabled to read fsh inode\n");
 		return -EINVAL;
-	} else if (!VXFS_ISFSH(vip)) {
+	}
+	if (!VXFS_ISFSH(vip)) {
 		printk(KERN_ERR "vxfs: fsh list inode is of wrong type (%x)\n",
 				vip->vii_mode & VXFS_TYPE_MASK); 
-		return -EINVAL;
+		goto out_free_fship;
 	}
 
 
@@ -126,23 +128,26 @@ vxfs_read_fshead(struct super_block *sbp)
 	vxfs_dumpi(vip, infp->vsi_fshino);
 #endif
 
-	if (!(infp->vsi_fship = vxfs_get_fake_inode(sbp, vip))) {
+	infp->vsi_fship = vxfs_get_fake_inode(sbp, vip);
+	if (!infp->vsi_fship) {
 		printk(KERN_ERR "vxfs: unabled to get fsh inode\n");
-		return -EINVAL;
+		goto out_free_fship;
 	}
 
-	if (!(sfp = vxfs_getfsh(infp->vsi_fship, 0))) {
+	sfp = vxfs_getfsh(infp->vsi_fship, 0);
+	if (!sfp) {
 		printk(KERN_ERR "vxfs: unabled to get structural fsh\n");
-		return -EINVAL;
+		goto out_iput_fship;
 	} 
 
 #ifdef DIAGNOSTIC
 	vxfs_dumpfsh(sfp);
 #endif
 
-	if (!(pfp = vxfs_getfsh(infp->vsi_fship, 1))) {
+	pfp = vxfs_getfsh(infp->vsi_fship, 1);
+	if (!pfp) {
 		printk(KERN_ERR "vxfs: unabled to get primary fsh\n");
-		return -EINVAL;
+		goto out_free_sfp;
 	}
 
 #ifdef DIAGNOSTIC
@@ -150,24 +155,50 @@ vxfs_read_fshead(struct super_block *sbp)
 #endif
 
 	tip = vxfs_blkiget(sbp, infp->vsi_iext, sfp->fsh_ilistino[0]);
-	if (!tip || ((infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
+	if (!tip)
+		goto out_free_pfp;
+
+	infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip);
+	if (!infp->vsi_stilist) {
 		printk(KERN_ERR "vxfs: unabled to get structual list inode\n");
-		return -EINVAL;
-	} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
+		kfree(tip);
+		goto out_free_pfp;
+	}
+	if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
 		printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n",
 				VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); 
-		return -EINVAL;
+		goto out_iput_stilist;
 	}
 
 	tip = vxfs_stiget(sbp, pfp->fsh_ilistino[0]);
-	if (!tip || ((infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
+	if (!tip)
+		goto out_iput_stilist;
+	infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip);
+	if (!infp->vsi_ilist) {
 		printk(KERN_ERR "vxfs: unabled to get inode list inode\n");
-		return -EINVAL;
-	} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) {
+		kfree(tip);
+		goto out_iput_stilist;
+	}
+	if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) {
 		printk(KERN_ERR "vxfs: inode list inode is of wrong type (%x)\n",
 				VXFS_INO(infp->vsi_ilist)->vii_mode & VXFS_TYPE_MASK);
-		return -EINVAL;
+		goto out_iput_ilist;
 	}
 
 	return 0;
+
+ out_iput_ilist:
+ 	iput(infp->vsi_ilist);
+ out_iput_stilist:
+ 	iput(infp->vsi_stilist);
+ out_free_pfp:
+	kfree(pfp);
+ out_free_sfp:
+ 	kfree(sfp);
+ out_iput_fship:
+	iput(infp->vsi_fship);
+	return -EINVAL;
+ out_free_fship:
+ 	kfree(vip);
+	return -EINVAL;
 }
-- 
cgit v1.2.3


From d41710a0830c2e88fb3d529accd06c4415265070 Mon Sep 17 00:00:00 2001
From: Frank Davis <fdavis@si.rr.com>
Date: Thu, 6 Feb 2003 00:52:11 -0800
Subject: [PATCH] 2.5.59 : drivers/media/video/bt856.c

This fixes a bt856.c compile error.  The driver now compiles.  Its a
straightforward patch and have emailed l-k and no objections have been
reported.
---
 drivers/media/video/bt856.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c
index 39b5916da89e..5b10c76e6de9 100644
--- a/drivers/media/video/bt856.c
+++ b/drivers/media/video/bt856.c
@@ -82,6 +82,7 @@ struct bt856 {
 	int contrast;
 	int hue;
 	int sat;
+	struct semaphore lock;
 };
 
 #define   I2C_BT856        0x88
@@ -90,11 +91,6 @@ struct bt856 {
 
 /* ----------------------------------------------------------------------- */
 
-static int bt856_probe(struct i2c_adapter *adap)
-{
-	return i2c_probe(adap, &addr_data , bt856_attach);
-}
-
 static int bt856_setbit(struct bt856 *dev, int subaddr, int bit, int data)
 {
 	return i2c_smbus_write_byte_data(dev->client, subaddr,(dev->reg[subaddr] & ~(1 << bit)) | (data ? (1 << bit) : 0));
@@ -134,7 +130,7 @@ static int bt856_attach(struct i2c_adapter *adap, int addr , unsigned long flags
 	encoder->norm = VIDEO_MODE_NTSC;
 	encoder->enable = 1;
 
-	DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->bus->name));
+	DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->client->name));
 
 	i2c_smbus_write_byte_data(client, 0xdc, 0x18);
 	encoder->reg[0xdc] = 0x18;
@@ -167,6 +163,10 @@ static int bt856_attach(struct i2c_adapter *adap, int addr , unsigned long flags
 	return 0;
 }
 
+static int bt856_probe(struct i2c_adapter *adap)
+{
+	return i2c_probe(adap, &addr_data , bt856_attach);
+}
 
 static int bt856_detach(struct i2c_client *client)
 {
@@ -299,21 +299,19 @@ static int bt856_command(struct i2c_client *client, unsigned int cmd,
 /* ----------------------------------------------------------------------- */
 
 static struct i2c_driver i2c_driver_bt856 = {
-	"bt856",		/* name */
-	I2C_DRIVERID_BT856,	/* ID */
-	I2C_DF_NOTIFY,
-	bt856_probe,
-	bt856_detach,
-	bt856_command
+	.owner = THIS_MODULE,
+	.name = "bt856",		/* name */
+	.id = I2C_DRIVERID_BT856,	/* ID */
+	.flags = I2C_DF_NOTIFY,
+	.attach_adapter = bt856_probe,
+	.detach_client = bt856_detach,
+	.command = bt856_command
 };
 
 static struct i2c_client client_template = {
-	"bt856_client",
-	-1,
-	0,
-	0,
-	NULL,
-	&i2c_driver_bt856
+	.name = "bt856_client",
+	.id = -1,
+	.driver = &i2c_driver_bt856
 };
 
 static int bt856_init(void)
-- 
cgit v1.2.3


From 06abcac643817f986a81dbbc3f85ad913870238c Mon Sep 17 00:00:00 2001
From: Frank Davis <fdavis@si.rr.com>
Date: Thu, 6 Feb 2003 00:53:33 -0800
Subject: [PATCH] 2.5.59 : drivers/media/video/saa7185.c

This patch to saa7185 to resolves buzilla bug #168 (compile error).  It
has been sent to l-k and has received no objections.
---
 drivers/media/video/saa7185.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c
index 9db597c23dae..5819d24d57fe 100644
--- a/drivers/media/video/saa7185.c
+++ b/drivers/media/video/saa7185.c
@@ -186,7 +186,7 @@ static int saa7185_attach(struct i2c_adapter *adap, int addr, unsigned short fla
 {
 	int i;
 	struct saa7185 *encoder;
-	struct i2c_client client;
+	struct i2c_client *client;
 
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if (client == NULL)
@@ -194,14 +194,14 @@ static int saa7185_attach(struct i2c_adapter *adap, int addr, unsigned short fla
 	client_template.adapter = adap;
 	client_template.addr = addr;
 	memcpy(client, &client_template, sizeof(*client));
-	encoder = kmalloc(sizeof(*decoder), GFP_KERNEL);
+	encoder = kmalloc(sizeof(*encoder), GFP_KERNEL);
 	if (encoder == NULL) {
 		kfree(client);
 		return -ENOMEM;
 	}
 
 
-	memset(encoder, 0, sizeof(*decoder));
+	memset(encoder, 0, sizeof(*encoder));
 	strcpy(client->name, "saa7185");
 	encoder->client = client;
 	client->data = encoder;
@@ -221,7 +221,7 @@ static int saa7185_attach(struct i2c_adapter *adap, int addr, unsigned short fla
 		printk(KERN_INFO "%s_attach: chip version %d\n",
 		       client->name, i2c_smbus_read_byte(client) >> 5);
 	}
-	init_MUTEX(&decoder->lock);
+	init_MUTEX(&encoder->lock);
 	i2c_attach_client(client);
 	MOD_INC_USE_COUNT;
 	return 0;
@@ -355,6 +355,7 @@ static int saa7185_command(struct i2c_client *client, unsigned int cmd,
 /* ----------------------------------------------------------------------- */
 
 static struct i2c_driver i2c_driver_saa7185 = {
+	.owner 		= THIS_MODULE,
 	.name	 	= "saa7185",		 /* name */
 	.id 		= I2C_DRIVERID_SAA7185B, /* ID */
 	.flags 		= I2C_DF_NOTIFY,
-- 
cgit v1.2.3


From 8673d7da33449c5fe8937060813d86cfab9b63fc Mon Sep 17 00:00:00 2001
From: Frank Davis <fdavis@si.rr.com>
Date: Thu, 6 Feb 2003 00:53:40 -0800
Subject: [PATCH] 2.5.59 : drivers/media/video/bt819.c

This patch for bt819.c addresses buzilla bug #169 (compile error).
---
 drivers/media/video/bt819.c | 44 ++++++++++++++++----------------------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c
index f476cb92e2b5..34abd7463117 100644
--- a/drivers/media/video/bt819.c
+++ b/drivers/media/video/bt819.c
@@ -48,18 +48,8 @@
 
 static unsigned short normal_i2c[] = {34>>1, I2C_CLIENT_END };
 static unsigned short normal_i2c_range[] = { I2C_CLIENT_END };
-static unsigned short probe[2] = { I2C_CLIENT_END , I2C_CLIENT_END };
-static unsigned short probe_range[2] = { I2C_CLIENT_END , I2C_CLIENT_END };
-static unsigned short ignore[2] = { I2C_CLIENT_END , I2C_CLIENT_END };
-static unsigned short ignore_range[2] = { I2C_CLIENT_END , I2C_CLIENT_END };
-static unsigned force[2] = { I2C_CLIENT_END , I2C_CLIENT_END };	
-
-static struct i2c_client_address_data addr_data = {
-	normal_i2c , normal_i2c_range,
-	probe , probe_range,
-	ignore , ignore_range,
-	force
-};
+
+I2C_CLIENT_INSMOD;
 
 static struct i2c_client client_template;
 
@@ -100,10 +90,6 @@ struct timing timing_data[] = {
 
 /* ----------------------------------------------------------------------- */
 
-static int bt819_probe(struct i2c_adapter *adap)
-{
-	return i2c_probe(adap, &addr_data, bt819_attach);
-}
 
 static int bt819_setbit(struct bt819 *dev, int subaddr, int bit, int data)
 {
@@ -211,6 +197,10 @@ static int bt819_attach(struct i2c_adapter *adap, int addr , unsigned long flags
 	MOD_INC_USE_COUNT;
 	return 0;
 }
+static int bt819_probe(struct i2c_adapter *adap)
+{
+	return i2c_probe(adap, &addr_data, bt819_attach);
+}
 
 static int bt819_detach(struct i2c_client *client)
 {
@@ -448,21 +438,19 @@ static int bt819_command(struct i2c_client *client, unsigned int cmd, void *arg)
 /* ----------------------------------------------------------------------- */
 
 static struct i2c_driver i2c_driver_bt819 = {
-	"bt819",		/* name */
-	I2C_DRIVERID_BT819,	/* ID */
-	I2C_DF_NOTIFY,
-	bt819_probe,
-	bt819_detach,
-	bt819_command
+        .name = "bt819",		/* name */
+	.id = I2C_DRIVERID_BT819,	/* ID */
+	.flags = I2C_DF_NOTIFY,
+	.attach_adapter = bt819_probe,
+	.detach_client = bt819_detach,
+	.command = bt819_command
+
 };
 
 static struct i2c_client client_template = {
-	"bt819_client",
-	-1,
-	0,
-	0,
-	NULL,
-	&i2c_driver_bt819
+	.name = "bt819_client",
+	.id = -1,
+	.driver = &i2c_driver_bt819
 };
 
 static int bt819_setup(void)
-- 
cgit v1.2.3


From 1b44f7f6a8f36b208136e44d791507ae5d0e80c6 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 6 Feb 2003 00:55:26 -0800
Subject: [PATCH] missing include in pci-sysfs.c

Add a missing include for those pesky S_IRUGO thingys.
---
 drivers/pci/pci-sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 066c345b2c29..4d33ee733b5b 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/stat.h>
 
 #include "pci.h"
 
-- 
cgit v1.2.3


From 45c1a159b85b3b30afd26a77b4be312226bba416 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@nevyn.them.org>
Date: Thu, 6 Feb 2003 04:32:29 -0500
Subject: Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.

---
 include/linux/ptrace.h |  4 ++++
 include/linux/sched.h  |  2 ++
 kernel/exit.c          |  3 +++
 kernel/fork.c          |  6 ++++--
 kernel/ptrace.c        | 13 ++++++++++++-
 5 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index b56bbe7ca800..706b420fb5c9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -35,12 +35,16 @@
 #define PTRACE_O_TRACEVFORK	0x00000004
 #define PTRACE_O_TRACECLONE	0x00000008
 #define PTRACE_O_TRACEEXEC	0x00000010
+#define PTRACE_O_TRACEVFORKDONE	0x00000020
+#define PTRACE_O_TRACEEXIT	0x00000040
 
 /* Wait extended result codes for the above trace options.  */
 #define PTRACE_EVENT_FORK	1
 #define PTRACE_EVENT_VFORK	2
 #define PTRACE_EVENT_CLONE	3
 #define PTRACE_EVENT_EXEC	4
+#define PTRACE_EVENT_VFORK_DONE	5
+#define PTRACE_EVENT_EXIT	6
 
 #include <asm/ptrace.h>
 #include <linux/sched.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a325e5a8c645..c424a353a748 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -441,6 +441,8 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PT_TRACE_VFORK	0x00000020
 #define PT_TRACE_CLONE	0x00000040
 #define PT_TRACE_EXEC	0x00000080
+#define PT_TRACE_VFORK_DONE	0x00000100
+#define PT_TRACE_EXIT	0x00000200
 
 #if CONFIG_SMP
 extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
diff --git a/kernel/exit.c b/kernel/exit.c
index 03801540a5e6..5cb58a1d2075 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -653,6 +653,9 @@ NORET_TYPE void do_exit(long code)
 
 	profile_exit_task(tsk);
  
+	if (unlikely(current->ptrace & PT_TRACE_EXIT))
+		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+
 fake_volatile:
 	acct_process(code);
 	__exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 347f957b080e..3da82d978812 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1046,9 +1046,11 @@ struct task_struct *do_fork(unsigned long clone_flags,
 			ptrace_notify ((trace << 8) | SIGTRAP);
 		}
 
-		if (clone_flags & CLONE_VFORK)
+		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-		else
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+		} else
 			/*
 			 * Let the child process run first, to avoid most of the
 			 * COW overhead when the child exec()s afterwards.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9f3769bfdc7e..14d158864d9e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -277,9 +277,20 @@ static int ptrace_setoptions(struct task_struct *child, long data)
 	else
 		child->ptrace &= ~PT_TRACE_EXEC;
 
+	if (data & PTRACE_O_TRACEVFORKDONE)
+		child->ptrace |= PT_TRACE_VFORK_DONE;
+	else
+		child->ptrace &= ~PT_TRACE_VFORK_DONE;
+
+	if (data & PTRACE_O_TRACEEXIT)
+		child->ptrace |= PT_TRACE_EXIT;
+	else
+		child->ptrace &= ~PT_TRACE_EXIT;
+
 	if ((data & (PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK
 		    | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE
-		    | PTRACE_O_TRACEEXEC))
+		    | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEEXIT
+		    | PTRACE_O_TRACEVFORKDONE))
 	    != data)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From a866697c6eadb9d46eff2c8ebac4dfe4834ce565 Mon Sep 17 00:00:00 2001
From: Daniel Jacobowitz <drow@nevyn.them.org>
Date: Thu, 6 Feb 2003 06:44:58 -0500
Subject: Signal handling bugs for thread exit + ptrace

---
 kernel/exit.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 5cb58a1d2075..0846290aeea3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -586,7 +586,7 @@ static void exit_notify(void)
 	 * is about to become orphaned.
 	 */
 	 
-	t = current->parent;
+	t = current->real_parent;
 	
 	if ((t->pgrp != current->pgrp) &&
 	    (t->session == current->session) &&
@@ -619,8 +619,16 @@ static void exit_notify(void)
 		current->exit_signal = SIGCHLD;
 
 
-	if (current->exit_signal != -1)
-		do_notify_parent(current, current->exit_signal);
+	/* If something other than our normal parent is ptracing us, then
+	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+	 * only has special meaning to our real parent.
+	 */
+	if (current->exit_signal != -1) {
+		if (current->parent == current->real_parent)
+			do_notify_parent(current, current->exit_signal);
+		else
+			do_notify_parent(current, SIGCHLD);
+	}
 
 	current->state = TASK_ZOMBIE;
 	/*
@@ -877,7 +885,7 @@ repeat:
 				if (p->real_parent != p->parent) {
 					write_lock_irq(&tasklist_lock);
 					__ptrace_unlink(p);
-					do_notify_parent(p, SIGCHLD);
+					do_notify_parent(p, p->exit_signal);
 					p->state = TASK_ZOMBIE;
 					write_unlock_irq(&tasklist_lock);
 				} else
-- 
cgit v1.2.3


From 03cd9c238b7ae369b782580ed461140c6d077dc4 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 6 Feb 2003 06:22:20 -0800
Subject: [PATCH] exit_notify/do_exit cleanup

Here is a cleanup moving the new pending thread signal check into
exit_notify.

I also made exit_notify and do_exit consistent in using the saved tsk
variable instead of current, as most of do_exit already does.
---
 kernel/exit.c | 101 ++++++++++++++++++++++++++--------------------------------
 1 file changed, 45 insertions(+), 56 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 1e23538c9a0e..cee8991011f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -542,10 +542,33 @@ static inline void forget_original_parent(struct task_struct * father)
  * Send signals to all our closest relatives so that they know
  * to properly mourn us..
  */
-static void exit_notify(void)
+static void exit_notify(struct task_struct *tsk)
 {
 	struct task_struct *t;
 
+	if (signal_pending(tsk) && !tsk->sig->group_exit
+	    && !thread_group_empty(tsk)) {
+		/*
+		 * This occurs when there was a race between our exit
+		 * syscall and a group signal choosing us as the one to
+		 * wake up.  It could be that we are the only thread
+		 * alerted to check for pending signals, but another thread
+		 * should be woken now to take the signal since we will not.
+		 * Now we'll wake all the threads in the group just to make
+		 * sure someone gets all the pending signals.
+		 */
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sig->siglock);
+		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
+			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
+				recalc_sigpending_tsk(t);
+				if (signal_pending(t))
+					signal_wake_up(t, 0);
+			}
+		spin_unlock_irq(&tsk->sig->siglock);
+		read_unlock(&tasklist_lock);
+	}
+
 	write_lock_irq(&tasklist_lock);
 
 	/*
@@ -557,8 +580,8 @@ static void exit_notify(void)
 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 	 */
 
-	forget_original_parent(current);
-	BUG_ON(!list_empty(&current->children));
+	forget_original_parent(tsk);
+	BUG_ON(!list_empty(&tsk->children));
 
 	/*
 	 * Check to see if any process groups have become orphaned
@@ -570,14 +593,14 @@ static void exit_notify(void)
 	 * is about to become orphaned.
 	 */
 	 
-	t = current->parent;
+	t = tsk->parent;
 	
-	if ((t->pgrp != current->pgrp) &&
-	    (t->session == current->session) &&
-	    will_become_orphaned_pgrp(current->pgrp, current) &&
-	    has_stopped_jobs(current->pgrp)) {
-		__kill_pg_info(SIGHUP, (void *)1, current->pgrp);
-		__kill_pg_info(SIGCONT, (void *)1, current->pgrp);
+	if ((t->pgrp != tsk->pgrp) &&
+	    (t->session == tsk->session) &&
+	    will_become_orphaned_pgrp(tsk->pgrp, tsk) &&
+	    has_stopped_jobs(tsk->pgrp)) {
+		__kill_pg_info(SIGHUP, (void *)1, tsk->pgrp);
+		__kill_pg_info(SIGCONT, (void *)1, tsk->pgrp);
 	}
 
 	/* Let father know we died 
@@ -596,17 +619,17 @@ static void exit_notify(void)
 	 *	
 	 */
 	
-	if (current->exit_signal != SIGCHLD && current->exit_signal != -1 &&
-	    ( current->parent_exec_id != t->self_exec_id  ||
-	      current->self_exec_id != current->parent_exec_id) 
+	if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
+	    ( tsk->parent_exec_id != t->self_exec_id  ||
+	      tsk->self_exec_id != tsk->parent_exec_id)
 	    && !capable(CAP_KILL))
-		current->exit_signal = SIGCHLD;
+		tsk->exit_signal = SIGCHLD;
 
 
-	if (current->exit_signal != -1)
-		do_notify_parent(current, current->exit_signal);
+	if (tsk->exit_signal != -1)
+		do_notify_parent(tsk, tsk->exit_signal);
 
-	current->state = TASK_ZOMBIE;
+	tsk->state = TASK_ZOMBIE;
 	/*
 	 * No need to unlock IRQs, we'll schedule() immediately
 	 * anyway. In the preemption case this also makes it
@@ -637,7 +660,6 @@ NORET_TYPE void do_exit(long code)
 
 	profile_exit_task(tsk);
  
-fake_volatile:
 	acct_process(code);
 	__exit_mm(tsk);
 
@@ -655,49 +677,16 @@ fake_volatile:
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
-	exit_notify();
+	exit_notify(tsk);
 	preempt_disable();
-	if (signal_pending(tsk) && !tsk->sig->group_exit
-	    && !thread_group_empty(tsk)) {
-		/*
-		 * This occurs when there was a race between our exit
-		 * syscall and a group signal choosing us as the one to
-		 * wake up.  It could be that we are the only thread
-		 * alerted to check for pending signals, but another thread
-		 * should be woken now to take the signal since we will not.
-		 * Now we'll wake all the threads in the group just to make
-		 * sure someone gets all the pending signals.
-		 */
-		struct task_struct *t;
-		read_lock(&tasklist_lock);
-		spin_lock_irq(&tsk->sig->siglock);
-		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
-			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
-				recalc_sigpending_tsk(t);
-				if (signal_pending(t))
-					signal_wake_up(t, 0);
-			}
-		spin_unlock_irq(&tsk->sig->siglock);
-		read_unlock(&tasklist_lock);
-	}
+
 	if (tsk->exit_signal == -1)
 		release_task(tsk);
+
 	schedule();
 	BUG();
-/*
- * In order to get rid of the "volatile function does return" message
- * I did this little loop that confuses gcc to think do_exit really
- * is volatile. In fact it's schedule() that is volatile in some
- * circumstances: when current->state = ZOMBIE, schedule() never
- * returns.
- *
- * In fact the natural way to do all this is to have the label and the
- * goto right after each other, but I put the fake_volatile label at
- * the start of the function just in case something /really/ bad
- * happens, and the schedule returns. This way we can try again. I'm
- * not paranoid: it's just that everybody is out to get me.
- */
-	goto fake_volatile;
+	/* Avoid "noreturn function does return".  */
+	for (;;) ;
 }
 
 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
-- 
cgit v1.2.3


From da11be46accaae5bbe089161b00e43a67097308d Mon Sep 17 00:00:00 2001
From: Steven Cole <elenstev@mesatop.com>
Date: Thu, 6 Feb 2003 16:11:08 -0800
Subject: [PATCH] Spelling fixes for consistent, dependent, persistent

This fixes the following common misspellings and their variants.

  consistant -> consistent
  dependant  -> dependent
  persistant -> persistent
---
 Documentation/filesystems/ext3.txt               |  2 +-
 Documentation/isdn/HiSax.cert                    |  2 +-
 Documentation/md.txt                             |  4 ++--
 Documentation/scsi/ChangeLog.sym53c8xx           |  2 +-
 Documentation/scsi/ibmmca.txt                    |  4 ++--
 Documentation/sparc/sbus_drivers.txt             |  4 ++--
 Documentation/tipar.txt                          |  4 ++--
 Documentation/usb/silverlink.txt                 |  2 +-
 arch/arm/mach-integrator/pci_v3.c                |  2 +-
 arch/arm/mach-sa1100/stork.c                     |  2 +-
 arch/arm/mm/proc-arm920.S                        |  2 +-
 arch/arm/mm/proc-arm922.S                        |  2 +-
 arch/arm/mm/proc-arm926.S                        |  2 +-
 arch/cris/drivers/serial.c                       |  2 +-
 arch/cris/kernel/ptrace.c                        |  2 +-
 arch/ia64/kernel/smpboot.c                       |  2 +-
 arch/ia64/sn/io/sn1/pcibr.c                      |  4 ++--
 arch/ia64/sn/io/sn2/pcibr/pcibr_dvr.c            |  4 ++--
 arch/m68k/mac/macints.c                          |  2 +-
 arch/m68k/mac/via.c                              |  2 +-
 arch/m68knommu/platform/68360/uCquicc/crt0_ram.S |  2 +-
 arch/m68knommu/platform/68360/uCquicc/crt0_rom.S |  2 +-
 arch/parisc/kernel/entry.S                       |  2 +-
 arch/ppc/boot/simple/Makefile                    |  2 +-
 arch/ppc/boot/simple/misc.c                      |  2 +-
 arch/ppc/kernel/pci.c                            |  2 +-
 arch/ppc64/kernel/pci.c                          |  2 +-
 arch/sh/kernel/io.c                              |  2 +-
 arch/sparc/kernel/entry.S                        |  2 +-
 arch/sparc/kernel/sys_sparc.c                    |  2 +-
 arch/sparc/kernel/sys_sunos.c                    |  2 +-
 arch/sparc/math-emu/math.c                       |  2 +-
 arch/sparc/mm/sun4c.c                            |  2 +-
 arch/sparc64/kernel/sbus.c                       |  2 +-
 arch/sparc64/kernel/winfixup.S                   |  6 +++---
 arch/sparc64/math-emu/math.c                     |  2 +-
 arch/v850/kernel/rte_mb_a_pci.c                  |  4 ++--
 drivers/atm/lanai.c                              |  2 +-
 drivers/block/nbd.c                              |  2 +-
 drivers/char/ip2/i2ellis.c                       |  2 +-
 drivers/char/ip2/i2os.h                          |  2 +-
 drivers/char/ip2main.c                           |  2 +-
 drivers/char/nvram.c                             |  2 +-
 drivers/char/rio/riotty.c                        |  4 ++--
 drivers/char/rtc.c                               |  2 +-
 drivers/hotplug/cpci_hotplug_pci.c               |  2 +-
 drivers/hotplug/cpqphp_core.c                    |  2 +-
 drivers/hotplug/cpqphp_pci.c                     |  2 +-
 drivers/ide/pci/sis5513.c                        |  4 ++--
 drivers/isdn/eicon/eicon.h                       |  2 +-
 drivers/isdn/hisax/isdnl2.c                      | 20 ++++++++++----------
 drivers/isdn/hisax/l3dss1.c                      |  2 +-
 drivers/isdn/hisax/l3ni1.c                       |  2 +-
 drivers/isdn/hysdn/hysdn_boot.c                  |  2 +-
 drivers/media/radio/radio-zoltrix.c              |  2 +-
 drivers/mtd/chips/jedec.c                        |  2 +-
 drivers/net/acenic.c                             |  2 +-
 drivers/net/declance.c                           |  2 +-
 drivers/net/e1000/e1000_osdep.h                  |  2 +-
 drivers/net/hamradio/6pack.c                     |  8 ++++----
 drivers/net/hamradio/scc.c                       |  2 +-
 drivers/net/hamradio/yam.c                       |  2 +-
 drivers/net/rrunner.c                            |  2 +-
 drivers/net/sgiseeq.c                            |  2 +-
 drivers/net/sk98lin/skvpd.c                      |  2 +-
 drivers/net/sk98lin/skxmac2.c                    |  4 ++--
 drivers/net/skfp/cfm.c                           |  6 +++---
 drivers/net/skfp/ecm.c                           |  8 ++++----
 drivers/net/skfp/h/osdef1st.h                    |  2 +-
 drivers/net/skfp/pcmplc.c                        |  8 ++++----
 drivers/net/skfp/rmt.c                           |  6 +++---
 drivers/net/skfp/skfddi.c                        |  4 ++--
 drivers/net/wan/lmc/lmc_main.c                   |  2 +-
 drivers/net/wan/lmc/lmc_ver.h                    |  2 +-
 drivers/net/wireless/airo.c                      |  2 +-
 drivers/net/wireless/orinoco.h                   |  2 +-
 drivers/sbus/char/aurora.c                       |  2 +-
 drivers/sbus/char/bbc_envctrl.c                  |  2 +-
 drivers/scsi/aacraid/aachba.c                    |  4 ++--
 drivers/scsi/aic7xxx/aic79xx_inline.h            |  2 +-
 drivers/scsi/aic7xxx/aic79xx_osm.h               |  2 +-
 drivers/scsi/aic7xxx/aic7xxx_inline.h            |  2 +-
 drivers/scsi/aic7xxx/aic7xxx_osm.h               |  2 +-
 drivers/scsi/megaraid.c                          |  2 +-
 drivers/scsi/qla1280.c                           |  2 +-
 drivers/scsi/sym53c8xx_2/sym_glue.c              |  4 ++--
 drivers/scsi/sym53c8xx_2/sym_glue.h              |  2 +-
 drivers/usb/misc/atmsar.c                        |  2 +-
 drivers/usb/serial/safe_serial.c                 |  2 +-
 drivers/usb/storage/usb.c                        |  2 +-
 drivers/video/skeletonfb.c                       |  2 +-
 fs/befs/ChangeLog                                |  2 +-
 fs/partitions/ldm.c                              |  2 +-
 include/asm-alpha/pci.h                          | 12 ++++++------
 include/asm-cris/io.h                            |  2 +-
 include/asm-generic/rmap.h                       |  2 +-
 include/asm-generic/rtc.h                        |  2 +-
 include/asm-mips/isadep.h                        |  2 +-
 include/asm-mips64/r10kcache.h                   |  2 +-
 include/asm-ppc/io.h                             |  2 +-
 include/asm-ppc/system.h                         |  2 +-
 include/asm-ppc64/system.h                       |  2 +-
 include/asm-v850/pci.h                           |  2 +-
 include/linux/agp_backend.h                      |  2 +-
 include/linux/apm_bios.h                         |  2 +-
 include/linux/isdnif.h                           |  2 +-
 include/linux/sdla_x25.h                         |  2 +-
 net/irda/iriap.c                                 |  2 +-
 net/irda/irlmp.c                                 |  4 ++--
 net/irda/irnet/irnet.h                           |  2 +-
 sound/core/hwdep.c                               |  2 +-
 sound/core/seq/seq_midi_emul.c                   |  4 ++--
 sound/oss/ac97_codec.c                           |  2 +-
 sound/oss/maestro.c                              |  2 +-
 114 files changed, 158 insertions(+), 158 deletions(-)

diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 84e8ca1f4418..12c01af809ac 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -78,7 +78,7 @@ design to add journaling capabilities on a block device.  The ext3
 filesystem code will inform the JBD of modifications it is performing
 (Call a transaction).  the journal support the transactions start and
 stop, and in case of crash, the journal can replayed the transactions
-to put the partition on a consistant state fastly.
+to put the partition on a consistent state fastly.
 
 handles represent a single atomic update to a filesystem.  JBD can
 handle external journal on a block device.
diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert
index 2e3523ca7d22..f2a6fcb8efee 100644
--- a/Documentation/isdn/HiSax.cert
+++ b/Documentation/isdn/HiSax.cert
@@ -27,7 +27,7 @@ These tests included all layers 1-3 and as well all functional tests for
 the layer 1. Because all hardware based on these chips are complete ISDN
 solutions in one chip all cards and USB-TAs using these chips are to be
 regarded as approved for those tests. Some additional electrical tests
-of the layer 1 which are independant of the driver and related to a
+of the layer 1 which are independent of the driver and related to a
 special hardware used will be regarded as approved if at least one 
 solution has been tested including those electrical tests. So if cards 
 or tas have been completely approved for any other os, the approval
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 0df89447bb0e..cecc9beba2fb 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -9,7 +9,7 @@ device with the following kernel command lines:
 for old raid arrays without persistent superblocks:
   md=<md device no.>,<raid level>,<chunk size factor>,<fault level>,dev0,dev1,...,devn
 
-for raid arrays with persistant superblocks
+for raid arrays with persistent superblocks
   md=<md device no.>,dev0,dev1,...,devn
   
 md device no. = the number of the md device ... 
@@ -21,7 +21,7 @@ md device no. = the number of the md device ...
 
 raid level = -1 linear mode
               0 striped mode
-	      other modes are only supported with persistant super blocks
+	      other modes are only supported with persistent super blocks
 
 chunk size factor = (raid-0 and raid-1 only)
               Set  the chunk size as 4k << n.
diff --git a/Documentation/scsi/ChangeLog.sym53c8xx b/Documentation/scsi/ChangeLog.sym53c8xx
index 423ceecfd9f3..8dc00328bf94 100644
--- a/Documentation/scsi/ChangeLog.sym53c8xx
+++ b/Documentation/scsi/ChangeLog.sym53c8xx
@@ -529,7 +529,7 @@ Sun Nov 1  14H00 1998 Gerard Roudier (groudier@club-internet.fr)
 	* version pre-sym53c8xx-0.13
 	- Some rewrite of the device detection code. This code had been 
 	  patched too much and needed to be face-lifted a bit.
-	  Remove all platform dependant fix-ups that was not needed or
+	  Remove all platform dependent fix-ups that was not needed or
 	  conflicted with some other driver code as work-arounds.
 	  Reread the NVRAM before the calling of ncr_attach(). This spares 
 	  stack space and so allows to handle more boards.
diff --git a/Documentation/scsi/ibmmca.txt b/Documentation/scsi/ibmmca.txt
index 912d10209c1c..3957eb3034a4 100644
--- a/Documentation/scsi/ibmmca.txt
+++ b/Documentation/scsi/ibmmca.txt
@@ -906,7 +906,7 @@
       to does not offer more space, invalid memory accesses destabilized the
       kernel.
    3) version 4.0 is only valid for kernel 2.4.0 or later. This is necessary
-      to remove old kernel version dependant waste from the driver. 3.2d is
+      to remove old kernel version dependent waste from the driver. 3.2d is
       only distributed with older kernels but keeps compatibility with older
       kernel versions. 4.0 and higher versions cannot be used with older 
       kernels anymore!! You must have at least kernel 2.4.0!!
@@ -1382,7 +1382,7 @@
    
    9 Disclaimer
    ------------
-   Beside the GNU General Public License and the dependant disclaimers and disclaimers
+   Beside the GNU General Public License and the dependent disclaimers and disclaimers
    concerning the Linux-kernel in special, this SCSI-driver comes without any
    warranty. Its functionality is tested as good as possible on certain 
    machines and combinations of computer hardware, which does not exclude,
diff --git a/Documentation/sparc/sbus_drivers.txt b/Documentation/sparc/sbus_drivers.txt
index fc8ab4a8b10e..876195dc2aef 100644
--- a/Documentation/sparc/sbus_drivers.txt
+++ b/Documentation/sparc/sbus_drivers.txt
@@ -208,7 +208,7 @@ like the following:
 	char *mem;		/* Address in the CPU space */
 	u32 busa;		/* Address in the SBus space */
 
-	mem = (char *) sbus_alloc_consistant(sdev, MYMEMSIZE, &busa);
+	mem = (char *) sbus_alloc_consistent(sdev, MYMEMSIZE, &busa);
 
 	Then mem is used when CPU accesses this memory and u32
 is fed to the device so that it can do DVMA. This is typically
@@ -216,7 +216,7 @@ done with an sbus_writel() into some device register.
 
 	Do not forget to free the DVMA resources once you are done:
 
-	sbus_free_consistant(sdev, MYMEMSIZE, mem, busa);
+	sbus_free_consistent(sdev, MYMEMSIZE, mem, busa);
 
 	Streaming DVMA is more interesting. First you allocate some
 memory suitable for it or pin down some user pages. Then it all works
diff --git a/Documentation/tipar.txt b/Documentation/tipar.txt
index 73e5548357cf..773b9a2483eb 100644
--- a/Documentation/tipar.txt
+++ b/Documentation/tipar.txt
@@ -12,7 +12,7 @@ INTRODUCTION:
 This is a driver for the very common home-made parallel link cable, a cable 
 designed for connecting TI8x/9x graphing calculators (handhelds) to a computer
 or workstation (Alpha, Sparc). Given that driver is built on parport, the 
-parallel port abstraction layer, this driver is independant of the platform.
+parallel port abstraction layer, this driver is independent of the platform.
 
 It can also be used with another device plugged on the same port (such as a
 ZIP drive). I have a 100MB ZIP and both of them work fine !
@@ -90,4 +90,4 @@ You can also mail JB at jb@jblache.org. He packaged these drivers for Debian.
 CREDITS:
 
 The code is based on tidev.c & parport.c.
-The driver has been developed independantly of Texas Instruments.
+The driver has been developed independently of Texas Instruments.
diff --git a/Documentation/usb/silverlink.txt b/Documentation/usb/silverlink.txt
index ae4b9ab3985b..d720415ebb34 100644
--- a/Documentation/usb/silverlink.txt
+++ b/Documentation/usb/silverlink.txt
@@ -73,4 +73,4 @@ this driver but he better knows the Mac OS-X driver.
 CREDITS:
 
 The code is based on dabusb.c, printer.c and scanner.c !
-The driver has been developed independantly of Texas Instruments.
+The driver has been developed independently of Texas Instruments.
diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index f2f73e15a11b..46dd55038145 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -120,7 +120,7 @@
  *              function = which function
  *		offset = configuration space register we are interested in
  *
- * description:	this routine will generate a platform dependant config
+ * description:	this routine will generate a platform dependent config
  *		address.
  *
  * calls:	none
diff --git a/arch/arm/mach-sa1100/stork.c b/arch/arm/mach-sa1100/stork.c
index 0c34c9d61bb8..9ac3a0a10c7d 100644
--- a/arch/arm/mach-sa1100/stork.c
+++ b/arch/arm/mach-sa1100/stork.c
@@ -181,7 +181,7 @@ static void storkClockTS(void)
 {
     storkSetLatchB(STORK_TOUCH_SCREEN_DCLK);
     udelay(10);			 /* hmm wait 200ns (min) - ok this ought to be udelay(1) but that doesn't get */
-				 /* consistant values so I'm using 10 (urgh) */
+				 /* consistent values so I'm using 10 (urgh) */
     storkClearLatchB(STORK_TOUCH_SCREEN_DCLK);
     udelay(10);
 }
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 09a2b12fad9b..cbeaaf9a0853 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -297,7 +297,7 @@ ENTRY(cpu_arm920_dcache_clean_entry)
  *
  * This is a little misleading, it is not intended to clean out
  * the i-cache but to make sure that any data written to the
- * range is made consistant.  This means that when we execute code
+ * range is made consistent.  This means that when we execute code
  * in that region, everything works as we expect.
  *
  * This generally means writing back data in the Dcache and
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 7bfbc8965019..f115e476bc7b 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -298,7 +298,7 @@ ENTRY(cpu_arm922_dcache_clean_entry)
  *
  * This is a little misleading, it is not intended to clean out
  * the i-cache but to make sure that any data written to the
- * range is made consistant.  This means that when we execute code
+ * range is made consistent.  This means that when we execute code
  * in that region, everything works as we expect.
  *
  * This generally means writing back data in the Dcache and
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 24b5d4f70217..a2cdcdd3d9f3 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -147,7 +147,7 @@ cpu_arm926_cache_clean_invalidate_all_r2:
  *
  * This is a little misleading, it is not intended to clean out
  * the i-cache but to make sure that any data written to the
- * range is made consistant.  This means that when we execute code
+ * range is made consistent.  This means that when we execute code
  * in that region, everything works as we expect.
  *
  * This generally means writing back data in the Dcache and
diff --git a/arch/cris/drivers/serial.c b/arch/cris/drivers/serial.c
index 412f7519c488..13bcee964f09 100644
--- a/arch/cris/drivers/serial.c
+++ b/arch/cris/drivers/serial.c
@@ -318,7 +318,7 @@ static char *serial_version = "$Revision: 1.3 $";
 
 #include <asm/svinto.h>
 
-/* non-arch dependant serial structures are in linux/serial.h */
+/* non-arch dependent serial structures are in linux/serial.h */
 #include <linux/serial.h>
 /* while we keep our own stuff (struct e100_serial) in a local .h file */
 #include "serial.h"
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index b4e1f9282694..c8a066c4ee4c 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -18,7 +18,7 @@
  * PTRACE_DETACH works more simple in 2.4.10
  *
  * Revision 1.6  2001/07/25 16:08:47  bjornw
- * PTRACE_ATTACH bulk moved into arch-independant code in 2.4.7
+ * PTRACE_ATTACH bulk moved into arch-independent code in 2.4.7
  *
  * Revision 1.5  2001/03/26 14:24:28  orjanf
  * * Changed loop condition.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 15d820d88c64..cb5a89f68763 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -551,7 +551,7 @@ __cpu_up (unsigned int cpu)
 }
 
 /*
- * Assume that CPU's have been discovered by some platform-dependant interface.  For
+ * Assume that CPU's have been discovered by some platform-dependent interface.  For
  * SoftSDV/Lion, that would be ACPI.
  *
  * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
diff --git a/arch/ia64/sn/io/sn1/pcibr.c b/arch/ia64/sn/io/sn1/pcibr.c
index ac30ed1badd8..6029f26613fe 100644
--- a/arch/ia64/sn/io/sn1/pcibr.c
+++ b/arch/ia64/sn/io/sn1/pcibr.c
@@ -4811,7 +4811,7 @@ pcibr_dmamap_alloc(devfs_handle_t pconn_vhdl,
 
 	    /* Device is capable of A64 operations,
 	     * and the attributes of the DMA are
-	     * consistant with any previous DMA
+	     * consistent with any previous DMA
 	     * mappings using shared resources.
 	     */
 
@@ -4853,7 +4853,7 @@ pcibr_dmamap_alloc(devfs_handle_t pconn_vhdl,
 	if (!pcibr_try_set_device(pcibr_soft, slot, flags, BRIDGE_DEV_D32_BITS)) {
 	    /* User desires DIRECT A32 operations,
 	     * and the attributes of the DMA are
-	     * consistant with any previous DMA
+	     * consistent with any previous DMA
 	     * mappings using shared resources.
 	     * Mapping calls may fail if target
 	     * is outside the direct32 range.
diff --git a/arch/ia64/sn/io/sn2/pcibr/pcibr_dvr.c b/arch/ia64/sn/io/sn2/pcibr/pcibr_dvr.c
index 867178f42fab..54939d8508c9 100644
--- a/arch/ia64/sn/io/sn2/pcibr/pcibr_dvr.c
+++ b/arch/ia64/sn/io/sn2/pcibr/pcibr_dvr.c
@@ -3216,7 +3216,7 @@ pcibr_dmamap_alloc(devfs_handle_t pconn_vhdl,
 
 	    /* Device is capable of A64 operations,
 	     * and the attributes of the DMA are
-	     * consistant with any previous DMA
+	     * consistent with any previous DMA
 	     * mappings using shared resources.
 	     */
 
@@ -3266,7 +3266,7 @@ pcibr_dmamap_alloc(devfs_handle_t pconn_vhdl,
 	if (!pcibr_try_set_device(pcibr_soft, slot, flags, BRIDGE_DEV_D32_BITS)) {
 	    /* User desires DIRECT A32 operations,
 	     * and the attributes of the DMA are
-	     * consistant with any previous DMA
+	     * consistent with any previous DMA
 	     * mappings using shared resources.
 	     * Mapping calls may fail if target
 	     * is outside the direct32 range.
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index 4f8ea17f57ac..b0a647a964fc 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -749,7 +749,7 @@ void mac_scc_dispatch(int irq, void *dev_id, struct pt_regs *regs)
 	/*                                              */
 	/* Note that we're ignoring scc_mask for now.   */
 	/* If we actually mask the ints then we tend to */
-	/* get hammered by very persistant SCC irqs,    */
+	/* get hammered by very persistent SCC irqs,    */
 	/* and since they're autovector interrupts they */
 	/* pretty much kill the system.                 */
 
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 53b448deb37a..5b43e574046d 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -48,7 +48,7 @@ __u8 rbv_clear;
  * just hit the combined register (ie, vIER|rIER) but that seems to
  * break on AV Macs...probably because they actually decode more than
  * eight address bits. Why can't Apple engineers at least be
- * _consistantly_ lazy?                          - 1999-05-21 (jmt)
+ * _consistently_ lazy?                          - 1999-05-21 (jmt)
  */
 
 static int gIER,gIFR,gBufA,gBufB;
diff --git a/arch/m68knommu/platform/68360/uCquicc/crt0_ram.S b/arch/m68knommu/platform/68360/uCquicc/crt0_ram.S
index c8d9142cdd24..56027aac42f8 100644
--- a/arch/m68knommu/platform/68360/uCquicc/crt0_ram.S
+++ b/arch/m68knommu/platform/68360/uCquicc/crt0_ram.S
@@ -358,7 +358,7 @@ _ramend:
     .long   0           /* (Unassigned, Reserved)                       - 62. */
     .long   0           /* (Unassigned, Reserved)                       - 63. */
     /*                  The assignment of these vectors to the CPM is         */
-    /*                  dependant on the configuration of the CPM vba         */
+    /*                  dependent on the configuration of the CPM vba         */
     /*                          fields.                                       */
     .long   0           /* (User-Defined Vectors 1) CPM Error           - 64. */
     .long   0           /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */
diff --git a/arch/m68knommu/platform/68360/uCquicc/crt0_rom.S b/arch/m68knommu/platform/68360/uCquicc/crt0_rom.S
index 6c0df839eb7a..ed9187c740ac 100644
--- a/arch/m68knommu/platform/68360/uCquicc/crt0_rom.S
+++ b/arch/m68knommu/platform/68360/uCquicc/crt0_rom.S
@@ -367,7 +367,7 @@ _ramend:
     .long   0           /* (Unassigned, Reserved)                       - 62. */
     .long   0           /* (Unassigned, Reserved)                       - 63. */
     /*                  The assignment of these vectors to the CPM is         */
-    /*                  dependant on the configuration of the CPM vba         */
+    /*                  dependent on the configuration of the CPM vba         */
     /*                          fields.                                       */
     .long   0           /* (User-Defined Vectors 1) CPM Error           - 64. */
     .long   0           /* (User-Defined Vectors 2) CPM Parallel IO PC11- 65. */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 2b858b1b67a0..84064c3a02bd 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -689,7 +689,7 @@ syscall_exit_rfi:
 	 * (we don't store them in the sigcontext), so set them
 	 * to "proper" values now (otherwise we'll wind up restoring
 	 * whatever was last stored in the task structure, which might
-	 * be inconsistant if an interrupt occured while on the gateway
+	 * be inconsistent if an interrupt occured while on the gateway
 	 * page) Note that we may be "trashing" values the user put in
 	 * them, but we don't support the the user changing them.
 	 */
diff --git a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile
index 82f57d14b810..40cd523f7587 100644
--- a/arch/ppc/boot/simple/Makefile
+++ b/arch/ppc/boot/simple/Makefile
@@ -19,7 +19,7 @@
 # ENTRYPOINT which the image should be loaded at.  The optimal setting
 # for ENTRYPOINT is the link address.
 # (4) It is advisable to pass in the memory size using BI_MEMSIZE and
-# get_mem_size(), which is memory controller dependant.  Add in the correct
+# get_mem_size(), which is memory controller dependent.  Add in the correct
 # XXX_memory.o file for this to work, as well as editing the $(MISC) file.
 
 boot: zImage
diff --git a/arch/ppc/boot/simple/misc.c b/arch/ppc/boot/simple/misc.c
index e204557f2494..6b2909ed4eae 100644
--- a/arch/ppc/boot/simple/misc.c
+++ b/arch/ppc/boot/simple/misc.c
@@ -82,7 +82,7 @@ decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum)
 
 #if defined(CONFIG_LOPEC) || defined(CONFIG_PAL4)
 	/*
-	 * Call get_mem_size(), which is memory controller dependant,
+	 * Call get_mem_size(), which is memory controller dependent,
 	 * and we must have the correct file linked in here.
 	 */
 	TotalMemory = get_mem_size();
diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c
index 90df3328b3e8..ee68f236114b 100644
--- a/arch/ppc/kernel/pci.c
+++ b/arch/ppc/kernel/pci.c
@@ -1069,7 +1069,7 @@ pcibios_init(void)
 	if (ppc_md.pci_swizzle && ppc_md.pci_map_irq)
 		pci_fixup_irqs(ppc_md.pci_swizzle, ppc_md.pci_map_irq);
 
-	/* Call machine dependant fixup */
+	/* Call machine dependent fixup */
 	if (ppc_md.pcibios_fixup)
 		ppc_md.pcibios_fixup();
 
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 049a5e58ffd7..c459ee120097 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -413,7 +413,7 @@ pcibios_init(void)
 			next_busno = hose->last_busno+1;
 	}
 
-	/* Call machine dependant fixup */
+	/* Call machine dependent fixup */
 	if (ppc_md.pcibios_fixup) {
 		ppc_md.pcibios_fixup();
 	}
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 1b0f0918dfd6..1f49c739d1bc 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2000  Stuart Menefy
  *
  * Provide real functions which expand to whatever the header file defined.
- * Also definitions of machine independant IO functions.
+ * Also definitions of machine independent IO functions.
  */
 
 #include <asm/io.h>
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index d4c1e922b59f..4434547f406d 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -1889,7 +1889,7 @@ C_LABEL(kill_user_windows):
 	wr	%o4, 0x0, %psr			! the uwinmask state
 	WRITE_PAUSE				! burn them cycles
 1:
-	ld	[%g6 + TI_UWINMASK], %o0	! get consistant state
+	ld	[%g6 + TI_UWINMASK], %o0	! get consistent state
 	orcc	%g0, %o0, %g0			! did an interrupt come in?
 	be	4f				! yep, we are done
 	 rd	%wim, %o3			! get current wim
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index d060b430c08c..fbf31d498e8e 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -409,7 +409,7 @@ sparc_sigaction (int sig, const struct old_sigaction *act,
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
-		/* In the clone() case we could copy half consistant
+		/* In the clone() case we could copy half consistent
 		 * state to the user, however this could sleep and
 		 * deadlock us if we held the signal lock on SMP.  So for
 		 * now I take the easy way out and do no locking.
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index 1babc9d4d326..a81ca4978702 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -1168,7 +1168,7 @@ sunos_sigaction(int sig, const struct old_sigaction *act,
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
-		/* In the clone() case we could copy half consistant
+		/* In the clone() case we could copy half consistent
 		 * state to the user, however this could sleep and
 		 * deadlock us if we held the signal lock on SMP.  So for
 		 * now I take the easy way out and do no locking.
diff --git a/arch/sparc/math-emu/math.c b/arch/sparc/math-emu/math.c
index 050431cfcd87..cd15e4f41ef2 100644
--- a/arch/sparc/math-emu/math.c
+++ b/arch/sparc/math-emu/math.c
@@ -203,7 +203,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
 }
 
 /* All routines returning an exception to raise should detect
- * such exceptions _before_ rounding to be consistant with
+ * such exceptions _before_ rounding to be consistent with
  * the behavior of the hardware in the implemented cases
  * (and thus with the recommendations in the V9 architecture
  * manual).
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 74c23da4a7b0..5b165f0d3b1b 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1042,7 +1042,7 @@ static struct thread_info *sun4c_alloc_thread_info(void)
 		get_locked_segment(addr);
 
 	/* We are changing the virtual color of the page(s)
-	 * so we must flush the cache to guarentee consistancy.
+	 * so we must flush the cache to guarentee consistency.
 	 */
 	sun4c_flush_page(pages);
 #ifndef CONFIG_SUN4	
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 27ffa86806f9..9a5f8a99cfdc 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -767,7 +767,7 @@ unsigned int sbus_build_irq(void *buscookie, unsigned int ino)
 	}
 	imap += reg_base;
 
-	/* SYSIO inconsistancy.  For external SLOTS, we have to select
+	/* SYSIO inconsistency.  For external SLOTS, we have to select
 	 * the right ICLR register based upon the lower SBUS irq level
 	 * bits.
 	 */
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index a633e009aae9..707e85e8f59b 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -60,7 +60,7 @@ fill_fixup:
 	sll		%g2, 3, %g2			! NORMAL-->OTHER
 
 	wrpr		%g0, 0x0, %canrestore		! Standard etrap stuff.
-	wrpr		%g2, 0x0, %wstate		! This must be consistant.
+	wrpr		%g2, 0x0, %wstate		! This must be consistent.
 	wrpr		%g0, 0x0, %otherwin		! We know this.
 	mov		PRIMARY_CONTEXT, %g1		! Change contexts...
 	stxa		%g0, [%g1] ASI_DMMU		! Back into the nucleus.
@@ -181,7 +181,7 @@ fill_fixup_mna:
 	sll		%g2, 3, %g2			! NORMAL-->OTHER
 	wrpr		%g0, 0x0, %canrestore		! Standard etrap stuff.
 
-	wrpr		%g2, 0x0, %wstate		! This must be consistant.
+	wrpr		%g2, 0x0, %wstate		! This must be consistent.
 	wrpr		%g0, 0x0, %otherwin		! We know this.
 	mov		PRIMARY_CONTEXT, %g1		! Change contexts...
 	stxa		%g0, [%g1] ASI_DMMU		! Back into the nucleus.
@@ -287,7 +287,7 @@ fill_fixup_dax:
 	sll		%g2, 3, %g2			! NORMAL-->OTHER
 	wrpr		%g0, 0x0, %canrestore		! Standard etrap stuff.
 
-	wrpr		%g2, 0x0, %wstate		! This must be consistant.
+	wrpr		%g2, 0x0, %wstate		! This must be consistent.
 	wrpr		%g0, 0x0, %otherwin		! We know this.
 	mov		PRIMARY_CONTEXT, %g1		! Change contexts...
 	stxa		%g0, [%g1] ASI_DMMU		! Back into the nucleus.
diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c
index f45419845cde..48dfa37ddd72 100644
--- a/arch/sparc64/math-emu/math.c
+++ b/arch/sparc64/math-emu/math.c
@@ -88,7 +88,7 @@
 #define FSR_CEXC_MASK	(0x1fUL << FSR_CEXC_SHIFT)
 
 /* All routines returning an exception to raise should detect
- * such exceptions _before_ rounding to be consistant with
+ * such exceptions _before_ rounding to be consistent with
  * the behavior of the hardware in the implemented cases
  * (and thus with the recommendations in the V9 architecture
  * manual).
diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c
index c56b0d28bb98..6fc07ccd955e 100644
--- a/arch/v850/kernel/rte_mb_a_pci.c
+++ b/arch/v850/kernel/rte_mb_a_pci.c
@@ -689,7 +689,7 @@ void pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
 	free_dma_mapping (mapping);
 }
 
-/* Make physical memory consistant for a single streaming mode DMA
+/* Make physical memory consistent for a single streaming mode DMA
    translation after a transfer.
 
    If you perform a pci_map_single() but wish to interrogate the
@@ -731,7 +731,7 @@ pci_unmap_sg (struct pci_dev *pdev, struct scatterlist *sg, int sg_len,int dir)
 	BUG ();
 }
 
-/* Make physical memory consistant for a set of streaming mode DMA
+/* Make physical memory consistent for a set of streaming mode DMA
    translations after a transfer.  The same as pci_dma_sync_single but
    for a scatter-gather list, same rules and usage.  */
 
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 8ca6798f0796..e6788148675b 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -632,7 +632,7 @@ static inline void reset_board(const struct lanai_dev *lanai)
  * anytime it wants to consult its table of vccs - for instance
  * when handling an incoming PDU.  This also explains why we would
  * probably want the write_lock while in _change_qos - to prevent
- * handling of PDUs while possibly in an inconsistant state.
+ * handling of PDUs while possibly in an inconsistent state.
  * Also, _send would grab the lock for reading.
  *
  * One problem with this is that _open and _close could no longer
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c71a04f5d457..0f9f7a9de7ba 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -24,7 +24,7 @@
  * 01-3-11 Make nbd work with new Linux block layer code. It now supports
  *   plugging like all the other block devices. Also added in MSG_MORE to
  *   reduce number of partial TCP segments sent. <steve@chygwyn.com>
- * 01-12-6 Fix deadlock condition by making queue locks independant of
+ * 01-12-6 Fix deadlock condition by making queue locks independent of
  *   the transmit lock. <steve@chygwyn.com>
  * 02-10-11 Allow hung xmit to be aborted via SIGKILL & various fixes.
  *   <Paul.Clements@SteelEye.com> <James.Bottomley@SteelEye.com>
diff --git a/drivers/char/ip2/i2ellis.c b/drivers/char/ip2/i2ellis.c
index 443f414eea68..f834d05ccc97 100644
--- a/drivers/char/ip2/i2ellis.c
+++ b/drivers/char/ip2/i2ellis.c
@@ -773,7 +773,7 @@ iiWriteBuf16(i2eBordStrPtr pB, unsigned char *address, int count)
 //
 // Writes 'count' bytes from 'address' to the data fifo specified by the board
 // structure pointer pB. Should count happen to be odd, an extra pad byte is
-// sent (identity unknown...). This is to be consistant with the 16-bit version.
+// sent (identity unknown...). This is to be consistent with the 16-bit version.
 // Uses 8-bit (byte) operations. Is called indirectly through pB->i2eWriteBuf.
 //
 //******************************************************************************
diff --git a/drivers/char/ip2/i2os.h b/drivers/char/ip2/i2os.h
index 8466d774700e..c83e901ea63f 100644
--- a/drivers/char/ip2/i2os.h
+++ b/drivers/char/ip2/i2os.h
@@ -8,7 +8,7 @@
 *   PACKAGE:     Linux tty Device Driver for IntelliPort II family of multiport
 *                serial I/O controllers.
 *
-*   DESCRIPTION: Defines, definitions and includes which are heavily dependant
+*   DESCRIPTION: Defines, definitions and includes which are heavily dependent
 *                on O/S, host, compiler, etc. This file is tailored for:
 *                 Linux v2.0.0 and later
 *                 Gnu gcc c2.7.2
diff --git a/drivers/char/ip2main.c b/drivers/char/ip2main.c
index 244399a6ae04..7ca1e640b7cc 100644
--- a/drivers/char/ip2main.c
+++ b/drivers/char/ip2main.c
@@ -21,7 +21,7 @@
 //
 // 1.2.14	/\/\|=mhw=|\/\/
 // Added bounds checking to ip2_ipl_ioctl to avoid potential terroristic acts.
-// Changed the definition of ip2trace to be more consistant with kernel style
+// Changed the definition of ip2trace to be more consistent with kernel style
 //	Thanks to Andreas Dilger <adilger@turbolabs.com> for these updates
 //
 // 1.2.13	/\/\|=mhw=|\/\/
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index d1d8717f31bb..3565715eb561 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -11,7 +11,7 @@
  * "NVRAM" (NV stands for non-volatile).
  *
  * The data are supplied as a (seekable) character device, /dev/nvram. The
- * size of this file is dependant on the controller.  The usual size is 114,
+ * size of this file is dependent on the controller.  The usual size is 114,
  * the number of freely available bytes in the memory (i.e., not used by the
  * RTC itself).
  *
diff --git a/drivers/char/rio/riotty.c b/drivers/char/rio/riotty.c
index 15868bfbb285..3e0de643fe2e 100644
--- a/drivers/char/rio/riotty.c
+++ b/drivers/char/rio/riotty.c
@@ -1287,7 +1287,7 @@ register caddr_t arg;
 }
 
 /*
-	ttyseth -- set hardware dependant tty settings
+	ttyseth -- set hardware dependent tty settings
 */
 void
 ttyseth(PortP, s, sg)
@@ -1342,7 +1342,7 @@ struct old_sgttyb *sg;
 }
 
 /*
-	ttyseth_pv -- set hardware dependant tty settings using either the
+	ttyseth_pv -- set hardware dependent tty settings using either the
 			POSIX termios structure or the System V termio structure.
 				sysv = 0 => (POSIX):	 struct termios *sg
 				sysv != 0 => (System V): struct termio *sg
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 1e5f4c6ec5a1..cf6e7b75dbac 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -486,7 +486,7 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 			yrs = 73;
 		}
 #endif
-		/* These limits and adjustments are independant of
+		/* These limits and adjustments are independent of
 		 * whether the chip is in binary mode or not.
 		 */
 		if (yrs > 169) {
diff --git a/drivers/hotplug/cpci_hotplug_pci.c b/drivers/hotplug/cpci_hotplug_pci.c
index da4d0d77b13b..822a7b9353e6 100644
--- a/drivers/hotplug/cpci_hotplug_pci.c
+++ b/drivers/hotplug/cpci_hotplug_pci.c
@@ -341,7 +341,7 @@ static int cpci_configure_dev(struct pci_bus *bus, struct pci_dev *dev)
 
 	/*
 	 * Need to explicitly set irq field to 0 so that it'll get assigned
-	 * by the pcibios platform dependant code called by pci_enable_device.
+	 * by the pcibios platform dependent code called by pci_enable_device.
 	 */
 	dev->irq = 0;
 
diff --git a/drivers/hotplug/cpqphp_core.c b/drivers/hotplug/cpqphp_core.c
index 5e641fd1c6e3..15b4f37424e6 100644
--- a/drivers/hotplug/cpqphp_core.c
+++ b/drivers/hotplug/cpqphp_core.c
@@ -42,7 +42,7 @@
 
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../arch/i386/pci/pci.h"	/* horrible hack showing how processor dependant we are... */
+#include "../../arch/i386/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
 
 
 /* Global variables */
diff --git a/drivers/hotplug/cpqphp_pci.c b/drivers/hotplug/cpqphp_pci.c
index 1d3bb90efdf2..e8e5328b7666 100644
--- a/drivers/hotplug/cpqphp_pci.c
+++ b/drivers/hotplug/cpqphp_pci.c
@@ -36,7 +36,7 @@
 #include <linux/pci.h>
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include "../../arch/i386/pci/pci.h"	/* horrible hack showing how processor dependant we are... */
+#include "../../arch/i386/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
 
 
 u8 cpqhp_nic_irq;
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index c91a7f93c6d5..9812cb5bef09 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -22,7 +22,7 @@
 
 /*
  * TODO:
- *	- Get ridden of SisHostChipInfo[] completness dependancy.
+ *	- Get ridden of SisHostChipInfo[] completness dependency.
  *	- Study drivers/ide/ide-timing.h.
  *	- Are there pre-ATA_16 SiS5513 chips ? -> tune init code for them
  *	  or remove ATA_00 define
@@ -64,7 +64,7 @@
 /* Miscellaneaous flags */
 #define SIS5513_LATENCY		0x01
 
-/* registers layout and init values are chipset family dependant */
+/* registers layout and init values are chipset family dependent */
 /* 1/ define families */
 #define ATA_00		0x00
 #define ATA_16		0x01
diff --git a/drivers/isdn/eicon/eicon.h b/drivers/isdn/eicon/eicon.h
index f2c6a043a056..4070b06f2380 100644
--- a/drivers/isdn/eicon/eicon.h
+++ b/drivers/isdn/eicon/eicon.h
@@ -305,7 +305,7 @@ typedef struct {
  * Per card driver data
  */
 typedef struct eicon_card {
-	eicon_hwif hwif;                 /* Hardware dependant interface     */
+	eicon_hwif hwif;                 /* Hardware dependent interface     */
 	DESCRIPTOR *d;			 /* IDI Descriptor		     */
         u_char ptype;                    /* Protocol type (1TR6 or Euro)     */
         u_char bus;                      /* Bustype (ISA, MCA, PCI)          */
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index be30c6ea972f..87142482cf05 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1445,7 +1445,7 @@ l2_tei_remove(struct FsmInst *fi, int event, void *arg)
 }
 
 static void
-l2_st14_persistant_da(struct FsmInst *fi, int event, void *arg)
+l2_st14_persistent_da(struct FsmInst *fi, int event, void *arg)
 {
 	struct PStack *st = fi->userdata;
 	
@@ -1456,7 +1456,7 @@ l2_st14_persistant_da(struct FsmInst *fi, int event, void *arg)
 }
 
 static void
-l2_st5_persistant_da(struct FsmInst *fi, int event, void *arg)
+l2_st5_persistent_da(struct FsmInst *fi, int event, void *arg)
 {
 	struct PStack *st = fi->userdata;
 
@@ -1469,7 +1469,7 @@ l2_st5_persistant_da(struct FsmInst *fi, int event, void *arg)
 }
 
 static void
-l2_st6_persistant_da(struct FsmInst *fi, int event, void *arg)
+l2_st6_persistent_da(struct FsmInst *fi, int event, void *arg)
 {
 	struct PStack *st = fi->userdata;
 
@@ -1480,7 +1480,7 @@ l2_st6_persistant_da(struct FsmInst *fi, int event, void *arg)
 }
 
 static void
-l2_persistant_da(struct FsmInst *fi, int event, void *arg)
+l2_persistent_da(struct FsmInst *fi, int event, void *arg)
 {
 	struct PStack *st = fi->userdata;
 
@@ -1615,14 +1615,14 @@ static struct FsmNode L2FnList[] __initdata =
 	{ST_L2_6, EV_L2_FRAME_ERROR, l2_frame_error},
 	{ST_L2_7, EV_L2_FRAME_ERROR, l2_frame_error_reest},
 	{ST_L2_8, EV_L2_FRAME_ERROR, l2_frame_error_reest},
-	{ST_L2_1, EV_L1_DEACTIVATE, l2_st14_persistant_da},
+	{ST_L2_1, EV_L1_DEACTIVATE, l2_st14_persistent_da},
 	{ST_L2_2, EV_L1_DEACTIVATE, l2_st24_tei_remove},
 	{ST_L2_3, EV_L1_DEACTIVATE, l2_st3_tei_remove},
-	{ST_L2_4, EV_L1_DEACTIVATE, l2_st14_persistant_da},
-	{ST_L2_5, EV_L1_DEACTIVATE, l2_st5_persistant_da},
-	{ST_L2_6, EV_L1_DEACTIVATE, l2_st6_persistant_da},
-	{ST_L2_7, EV_L1_DEACTIVATE, l2_persistant_da},
-	{ST_L2_8, EV_L1_DEACTIVATE, l2_persistant_da},
+	{ST_L2_4, EV_L1_DEACTIVATE, l2_st14_persistent_da},
+	{ST_L2_5, EV_L1_DEACTIVATE, l2_st5_persistent_da},
+	{ST_L2_6, EV_L1_DEACTIVATE, l2_st6_persistent_da},
+	{ST_L2_7, EV_L1_DEACTIVATE, l2_persistent_da},
+	{ST_L2_8, EV_L1_DEACTIVATE, l2_persistent_da},
 };
 
 #define L2_FN_COUNT (sizeof(L2FnList)/sizeof(struct FsmNode))
diff --git a/drivers/isdn/hisax/l3dss1.c b/drivers/isdn/hisax/l3dss1.c
index 0ba4d72ff48b..75e4706ce9b7 100644
--- a/drivers/isdn/hisax/l3dss1.c
+++ b/drivers/isdn/hisax/l3dss1.c
@@ -2169,7 +2169,7 @@ static void l3dss1_redir_req_early(struct l3_process *pc, u8 pr, void *arg)
 
 /***********************************************/
 /* handle special commands for this protocol.  */
-/* Examples are call independant services like */
+/* Examples are call independent services like */
 /* remote operations with dummy  callref.      */
 /***********************************************/
 static int l3dss1_cmd_global(struct PStack *st, isdn_ctrl *ic)
diff --git a/drivers/isdn/hisax/l3ni1.c b/drivers/isdn/hisax/l3ni1.c
index ca1caf72abe0..b9001580be28 100644
--- a/drivers/isdn/hisax/l3ni1.c
+++ b/drivers/isdn/hisax/l3ni1.c
@@ -2024,7 +2024,7 @@ static void l3ni1_redir_req_early(struct l3_process *pc, u8 pr, void *arg)
 
 /***********************************************/
 /* handle special commands for this protocol.  */
-/* Examples are call independant services like */
+/* Examples are call independent services like */
 /* remote operations with dummy  callref.      */
 /***********************************************/
 static int l3ni1_cmd_global(struct PStack *st, isdn_ctrl *ic)
diff --git a/drivers/isdn/hysdn/hysdn_boot.c b/drivers/isdn/hysdn/hysdn_boot.c
index 25e4fe68fa3f..bf8756c0954f 100644
--- a/drivers/isdn/hysdn/hysdn_boot.c
+++ b/drivers/isdn/hysdn/hysdn_boot.c
@@ -78,7 +78,7 @@ DecryptBuf(struct boot_data *boot, int cnt)
 }				/* DecryptBuf */
 
 /********************************************************************************/
-/* pof_handle_data executes the required actions dependant on the active record */
+/* pof_handle_data executes the required actions dependent on the active record */
 /* id. If successful 0 is returned, a negative value shows an error.           */
 /********************************************************************************/
 static int
diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c
index 2c309c5e8ed1..70a3ee406503 100644
--- a/drivers/media/radio/radio-zoltrix.c
+++ b/drivers/media/radio/radio-zoltrix.c
@@ -2,7 +2,7 @@
  * (c) 1998 C. van Schaik <carl@leg.uct.ac.za>
  *
  * BUGS  
- *  Due to the inconsistancy in reading from the signal flags
+ *  Due to the inconsistency in reading from the signal flags
  *  it is difficult to get an accurate tuned signal.
  *
  *  It seems that the card is not linear to 0 volume. It cuts off
diff --git a/drivers/mtd/chips/jedec.c b/drivers/mtd/chips/jedec.c
index b95e3b58d895..337d0d22bfb0 100644
--- a/drivers/mtd/chips/jedec.c
+++ b/drivers/mtd/chips/jedec.c
@@ -608,7 +608,7 @@ static int flash_erase(struct mtd_info *mtd, struct erase_info *instr)
 
    /* Poll the flash for erasure completion, specs say this can take as long
       as 480 seconds to do all the sectors (for a 2 meg flash). 
-      Erasure time is dependant on chip age, temp and wear.. */
+      Erasure time is dependent on chip age, temp and wear.. */
    
    /* This being a generic routine assumes a 32 bit bus. It does read32s
       and bundles interleved chips into the same grouping. This will work 
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index bcf04cb0be4a..5d0ef4856b07 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -482,7 +482,7 @@ static inline void tasklet_init(struct tasklet_struct *tasklet,
  *
  * One advantagous side effect of this allocation approach is that the
  * entire rx processing can be done without holding any spin lock
- * since the rx rings and registers are totally independant of the tx
+ * since the rx rings and registers are totally independent of the tx
  * ring and its registers.  This of course includes the kmalloc's of
  * new skb's. Thus start_xmit can run in parallel with rx processing
  * and the memory allocation on SMP systems.
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 98f558dd7f90..e57ccf08b4d6 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -279,7 +279,7 @@ struct lance_private {
 			lp->tx_old - lp->tx_new-1)
 
 /* The lance control ports are at an absolute address, machine and tc-slot
- * dependant.
+ * dependent.
  * DECstations do only 32-bit access and the LANCE uses 16 bit addresses,
  * so we have to give the structure an extra member making rap pointing
  * at the right address
diff --git a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h
index aa1dd7dd8d01..0d68940f9b98 100644
--- a/drivers/net/e1000/e1000_osdep.h
+++ b/drivers/net/e1000/e1000_osdep.h
@@ -27,7 +27,7 @@
 *******************************************************************************/
 
 
-/* glue for the OS independant part of e1000
+/* glue for the OS independent part of e1000
  * includes register access macros
  */
 
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index b4fb6e2e8de3..38b55e119c90 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -113,7 +113,7 @@ struct sixpack {
 
 	/* 6pack stuff */
 	unsigned char		tx_delay;
-	unsigned char		persistance;
+	unsigned char		persistence;
 	unsigned char		slottime;
 	unsigned char		duplex;
 	unsigned char		led_state;
@@ -302,7 +302,7 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
 
 	switch (p[0]) {
 		case 1:	sp->tx_delay = p[1];		return;
-		case 2:	sp->persistance = p[1];		return;
+		case 2:	sp->persistence = p[1];		return;
 		case 3: sp->slottime = p[1];		return;
 		case 4: /* ignored */			return;
 		case 5: sp->duplex = p[1];		return;
@@ -392,7 +392,7 @@ static void sp_xmit_on_air(unsigned long channel)
 
 	random = random * 17 + 41;
 
-	if (((sp->status1 & SIXP_DCD_MASK) == 0) && (random < sp->persistance)) {
+	if (((sp->status1 & SIXP_DCD_MASK) == 0) && (random < sp->persistence)) {
 		sp->led_state = 0x70;
 		sp->tty->driver.write(sp->tty, 0, &sp->led_state, 1);
 		sp->tx_enable = 1;
@@ -469,7 +469,7 @@ static int sp_open(struct net_device *dev)
 
 	sp->duplex = 0;
 	sp->tx_delay    = SIXP_TXDELAY;
-	sp->persistance = SIXP_PERSIST;
+	sp->persistence = SIXP_PERSIST;
 	sp->slottime    = SIXP_SLOTTIME;
 	sp->led_state   = 0x60;
 	sp->status      = 1;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 3487fac205e7..f1ed6647615c 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1703,7 +1703,7 @@ static int scc_net_tx(struct sk_buff *skb, struct net_device *dev)
 
 	/*
 	 * Start transmission if the trx state is idle or
-	 * t_idle hasn't expired yet. Use dwait/persistance/slottime
+	 * t_idle hasn't expired yet. Use dwait/persistence/slottime
 	 * algorithm for normal halfduplex operation.
 	 */
 
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index d685b95b294c..66348071a670 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -34,7 +34,7 @@
  *   0.5 F6FBB 01.08.98  Shared IRQs, /proc/net and network statistics
  *   0.6 F6FBB 25.08.98  Added 1200Bds format
  *   0.7 F6FBB 12.09.98  Added to the kernel configuration
- *   0.8 F6FBB 14.10.98  Fixed slottime/persistance timing bug
+ *   0.8 F6FBB 14.10.98  Fixed slottime/persistence timing bug
  *       OK1ZIA 2.09.01  Fixed "kfree_skb on hard IRQ" 
  *                       using dev_kfree_skb_any(). (important in 2.4 kernel)
  *   
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index c04703381230..e5465e5a08ec 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -386,7 +386,7 @@ static int rr_reset(struct net_device *dev)
 		writel(0, &regs->CmdRing[i]);
 
 /*
- * Why 32 ? is this not cache line size dependant?
+ * Why 32 ? is this not cache line size dependent?
  */
 	writel(RBURST_64|WBURST_64, &regs->PciState);
 	wmb();
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 73b6b55f7475..a59f1872ed2a 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -524,7 +524,7 @@ static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * 2) Do no allow the HPC to look at a new descriptor until
 	 *    we have completely set up it's state.  This means, do
 	 *    not clear HPCDMA_EOX in the current last descritptor
-	 *    until the one we are adding looks consistant and could
+	 *    until the one we are adding looks consistent and could
 	 *    be processes right now.
 	 * 3) The tx interrupt code must notice when we've added a new
 	 *    entry and the HPC got to the end of the chain before we
diff --git a/drivers/net/sk98lin/skvpd.c b/drivers/net/sk98lin/skvpd.c
index f3683e3af4e8..0ac799a0f99e 100644
--- a/drivers/net/sk98lin/skvpd.c
+++ b/drivers/net/sk98lin/skvpd.c
@@ -88,7 +88,7 @@
  *	Revision 1.9  1998/09/16 07:33:52  malthoff
  *	remove memcmp() by SK_MEMCMP and
  *	memcpy() by SK_MEMCPY() to be
- *	independant from the 'C' Standard Library.
+ *	independent from the 'C' Standard Library.
  *	
  *	Revision 1.8  1998/08/19 12:52:35  malthoff
  *	compiler fix: use SK_VPD_KEY instead of S_VPD.
diff --git a/drivers/net/sk98lin/skxmac2.c b/drivers/net/sk98lin/skxmac2.c
index 32dc2aa38460..67c2fa53361a 100644
--- a/drivers/net/sk98lin/skxmac2.c
+++ b/drivers/net/sk98lin/skxmac2.c
@@ -194,7 +194,7 @@
  *
  *	Revision 1.12  1998/10/14 14:45:04  malthoff
  *	Remove SKERR_SIRQ_E0xx and SKERR_SIRQ_E0xxMSG by
- *	SKERR_HWI_Exx and SKERR_HWI_E0xxMSG to be independant
+ *	SKERR_HWI_Exx and SKERR_HWI_E0xxMSG to be independent
  *	from the Sirq module.
  *
  *	Revision 1.11  1998/10/14 13:59:01  gklug
@@ -826,7 +826,7 @@ int		Port)		/* Port Index (MAC_1 + n) */
 	for (i = 0; i < 3; i++) {
 		/*
 		 * The following 2 statements are together endianess
-		 * independant. Remember this when changing.
+		 * independent. Remember this when changing.
 		 */
 		SK_IN16(IoC, (B2_MAC_2 + Port * 8 + i * 2), &SWord);
 		XM_OUT16(IoC, Port, (XM_SA + i * 2), SWord);
diff --git a/drivers/net/skfp/cfm.c b/drivers/net/skfp/cfm.c
index 7a51b765b4a5..91eb36f3ec3a 100644
--- a/drivers/net/skfp/cfm.c
+++ b/drivers/net/skfp/cfm.c
@@ -21,15 +21,15 @@
 */
 
 /*
- *	Hardware independant state machine implemantation
+ *	Hardware independent state machine implemantation
  *	The following external SMT functions are referenced :
  *
  *		queue_event()
  *
- *	The following external HW dependant functions are referenced :
+ *	The following external HW dependent functions are referenced :
  *		config_mux()
  *
- *	The following HW dependant events are required :
+ *	The following HW dependent events are required :
  *		NONE 
  */
 
diff --git a/drivers/net/skfp/ecm.c b/drivers/net/skfp/ecm.c
index 9dcca7f5581e..7eaab1c3f9d2 100644
--- a/drivers/net/skfp/ecm.c
+++ b/drivers/net/skfp/ecm.c
@@ -17,23 +17,23 @@
 /*
 	SMT ECM
 	Entity Coordination Management
-	Hardware independant state machine
+	Hardware independent state machine
 */
 
 /*
- * Hardware independant state machine implemantation
+ * Hardware independent state machine implemantation
  * The following external SMT functions are referenced :
  *
  * 		queue_event()
  * 		smt_timer_start()
  * 		smt_timer_stop()
  *
- * 	The following external HW dependant functions are referenced :
+ * 	The following external HW dependent functions are referenced :
  * 		sm_pm_bypass_req()
  * 		sm_pm_ls_latch()
  * 		sm_pm_get_ls()
  * 
- * 	The following HW dependant events are required :
+ * 	The following HW dependent events are required :
  *		NONE
  *
  */
diff --git a/drivers/net/skfp/h/osdef1st.h b/drivers/net/skfp/h/osdef1st.h
index a6866248abc0..5359eb53008d 100644
--- a/drivers/net/skfp/h/osdef1st.h
+++ b/drivers/net/skfp/h/osdef1st.h
@@ -13,7 +13,7 @@
  ******************************************************************************/
 
 /* 
- * Operating system-dependant definitions that have to be defined
+ * Operating system-dependent definitions that have to be defined
  * before any other header files are included.
  */
 
diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c
index b19b2a81b6ac..4be8f3a9e8f4 100644
--- a/drivers/net/skfp/pcmplc.c
+++ b/drivers/net/skfp/pcmplc.c
@@ -20,19 +20,19 @@
 */
 
 /*
- * Hardware independant state machine implemantation
+ * Hardware independent state machine implemantation
  * The following external SMT functions are referenced :
  *
  * 		queue_event()
  * 		smt_timer_start()
  * 		smt_timer_stop()
  *
- * 	The following external HW dependant functions are referenced :
+ * 	The following external HW dependent functions are referenced :
  * 		sm_pm_control()
  *		sm_ph_linestate()
  *		sm_pm_ls_latch()
  *
- * 	The following HW dependant events are required :
+ * 	The following HW dependent events are required :
  *		PC_QLS
  *		PC_ILS
  *		PC_HLS
@@ -714,7 +714,7 @@ int cmd;
 	mib = phy->mib ;
 
 	/*
-	 * general transitions independant of state
+	 * general transitions independent of state
 	 */
 	switch (cmd) {
 	case PC_STOP :
diff --git a/drivers/net/skfp/rmt.c b/drivers/net/skfp/rmt.c
index 473eb0c9cdfe..5771dc42ced8 100644
--- a/drivers/net/skfp/rmt.c
+++ b/drivers/net/skfp/rmt.c
@@ -20,18 +20,18 @@
 */
 
 /*
- * Hardware independant state machine implemantation
+ * Hardware independent state machine implemantation
  * The following external SMT functions are referenced :
  *
  * 		queue_event()
  * 		smt_timer_start()
  * 		smt_timer_stop()
  *
- * 	The following external HW dependant functions are referenced :
+ * 	The following external HW dependent functions are referenced :
  *		sm_ma_control()
  *		sm_mac_check_beacon_claim()
  *
- * 	The following HW dependant events are required :
+ * 	The following HW dependent events are required :
  *		RM_RING_OP
  *		RM_RING_NON_OP
  *		RM_MY_BEACON
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 52f9d11ceba6..4a430f99b947 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -33,7 +33,7 @@
  *   The driver architecture is based on the DEC FDDI driver by
  *   Lawrence V. Stefani and several ethernet drivers.
  *   I also used an existing Windows NT miniport driver.
- *   All hardware dependant fuctions are handled by the SysKonnect
+ *   All hardware dependent fuctions are handled by the SysKonnect
  *   Hardware Module.
  *   The only headerfiles that are directly related to this source
  *   are skfddi.c, h/types.h, h/osdef1st.h, h/targetos.h.
@@ -1729,7 +1729,7 @@ u_long dma_master(struct s_smc * smc, void *virt, int len, int flag)
  *	dma_complete
  *
  *	The hardware module calls this routine when it has completed a DMA
- *	transfer. If the operating system dependant module has set up the DMA
+ *	transfer. If the operating system dependent module has set up the DMA
  *	channel via dma_master() (e.g. Windows NT or AIX) it should clean up
  *	the DMA channel.
  * Args
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6f65949dd7ea..bf13c8182eb8 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1074,7 +1074,7 @@ int lmc_probe (struct net_device *dev) /*fold00*/
          * This prevents taking someone else's device.
          *
          * Check either the subvendor or the subdevice, some systems reverse
-         * the setting in the bois, seems to be version and arch dependant?
+         * the setting in the bois, seems to be version and arch dependent?
          * Fix the two variables
          *
          */
diff --git a/drivers/net/wan/lmc/lmc_ver.h b/drivers/net/wan/lmc/lmc_ver.h
index 1e9e2f5f7ddb..dd1c592813e4 100644
--- a/drivers/net/wan/lmc/lmc_ver.h
+++ b/drivers/net/wan/lmc/lmc_ver.h
@@ -25,7 +25,7 @@
   * made the souce code not only hard to read but version problems hard
   * to track down.  If I'm overiding a function/etc with something in
   * this file it will be prefixed by "LMC_" which will mean look
-  * here for the version dependant change that's been done.
+  * here for the version dependent change that's been done.
   *
   */
 
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index d1e1ae8f6c1b..ec86ccb3bfda 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -5144,7 +5144,7 @@ static int airo_set_scan(struct net_device *dev,
 
 /*------------------------------------------------------------------*/
 /*
- * Translate scan data returned from the card to a card independant
+ * Translate scan data returned from the card to a card independent
  * format that the Wireless Tools will understand - Jean II
  */
 static inline char *airo_translate_scan(struct net_device *dev,
diff --git a/drivers/net/wireless/orinoco.h b/drivers/net/wireless/orinoco.h
index 8317270a46da..8608626def40 100644
--- a/drivers/net/wireless/orinoco.h
+++ b/drivers/net/wireless/orinoco.h
@@ -36,7 +36,7 @@ struct orinoco_key {
 
 
 struct orinoco_private {
-	void *card;	/* Pointer to card dependant structure */
+	void *card;	/* Pointer to card dependent structure */
 	int (*hard_reset)(struct orinoco_private *);
 
 	/* Synchronisation stuff */
diff --git a/drivers/sbus/char/aurora.c b/drivers/sbus/char/aurora.c
index cb00080d2fc9..1a27aa713c88 100644
--- a/drivers/sbus/char/aurora.c
+++ b/drivers/sbus/char/aurora.c
@@ -1046,7 +1046,7 @@ static void aurora_change_speed(struct Aurora_board *bp, struct Aurora_port *por
 			    &bp->r[chip]->r[CD180_MSVR]);
 	}
 	
-	/* Now we must calculate some speed dependant things. */
+	/* Now we must calculate some speed dependent things. */
 	
 	/* Set baud rate for port. */
 	tmp = (((bp->oscfreq + baud/2) / baud +
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index de7c9eadb63d..a5ac67cc498f 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -30,7 +30,7 @@ static int errno;
  *
  * The max1617 is capable of being programmed with power-off
  * temperature values, one low limit and one high limit.  These
- * can be controlled independantly for the cpu or ambient temperature.
+ * can be controlled independently for the cpu or ambient temperature.
  * If a limit is violated, the power is simply shut off.  The frequency
  * with which the max1617 does temperature sampling can be controlled
  * as well.
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 416cf07afb38..8e3ad622cb8b 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -392,8 +392,8 @@ static char *container_types[] = {
  * Arguments: [1] pointer to void [1] int
  *
  * Purpose: Sets SCSI inquiry data strings for vendor, product
- * and revision level. Allows strings to be set in platform dependant
- * files instead of in OS dependant driver source.
+ * and revision level. Allows strings to be set in platform dependent
+ * files instead of in OS dependent driver source.
  */
 
 static void setinqstr(int devtype, void *data, int tindex)
diff --git a/drivers/scsi/aic7xxx/aic79xx_inline.h b/drivers/scsi/aic7xxx/aic79xx_inline.h
index e41701472bfc..bdca5c17b33f 100644
--- a/drivers/scsi/aic7xxx/aic79xx_inline.h
+++ b/drivers/scsi/aic7xxx/aic79xx_inline.h
@@ -769,7 +769,7 @@ ahd_queue_scb(struct ahd_softc *ahd, struct scb *scb)
 	ahd_setup_scb_common(ahd, scb);
 
 	/*
-	 * Make sure our data is consistant from the
+	 * Make sure our data is consistent from the
 	 * perspective of the adapter.
 	 */
 	ahd_sync_scb(ahd, scb, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.h b/drivers/scsi/aic7xxx/aic79xx_osm.h
index 5913588f86d2..e7c7c0896113 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.h
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.h
@@ -321,7 +321,7 @@ struct ahd_cmd {
 /*
  * A per probed device structure used to deal with some error recovery
  * scenarios that the Linux mid-layer code just doesn't know how to
- * handle.  The structure allocated for a device only becomes persistant
+ * handle.  The structure allocated for a device only becomes persistent
  * after a successfully completed inquiry command to the target when
  * that inquiry data indicates a lun is present.
  */
diff --git a/drivers/scsi/aic7xxx/aic7xxx_inline.h b/drivers/scsi/aic7xxx/aic7xxx_inline.h
index 53de3904b10b..c38b0cb97d36 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_inline.h
+++ b/drivers/scsi/aic7xxx/aic7xxx_inline.h
@@ -460,7 +460,7 @@ ahc_queue_scb(struct ahc_softc *ahc, struct scb *scb)
 	ahc->qinfifo[ahc->qinfifonext++] = scb->hscb->tag;
 
 	/*
-	 * Make sure our data is consistant from the
+	 * Make sure our data is consistent from the
 	 * perspective of the adapter.
 	 */
 	ahc_sync_scb(ahc, scb, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.h b/drivers/scsi/aic7xxx/aic7xxx_osm.h
index 4baa42a415b6..e400114b9ca7 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.h
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h
@@ -334,7 +334,7 @@ struct ahc_cmd {
 /*
  * A per probed device structure used to deal with some error recovery
  * scenarios that the Linux mid-layer code just doesn't know how to
- * handle.  The structure allocated for a device only becomes persistant
+ * handle.  The structure allocated for a device only becomes persistent
  * after a successfully completed inquiry command to the target when
  * that inquiry data indicates a lun is present.
  */
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index bb942cd73b6e..640ba4d96e69 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -443,7 +443,7 @@
  * Mon Aug 6 14:59:29 BST 2001 - "Michael Johnson" <johnsom@home.com>
  *
  * Make the HP print formatting and check for buggy firmware runtime not
- * ifdef dependant.
+ * ifdef dependent.
  *
  *
  * Version 1.17d
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 7c781dbfc344..dc61815d69e2 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -107,7 +107,7 @@
 	- Provide compat macros for pci_enable_device(), pci_find_subsys()
 	  and scsi_set_pci_device()
 	- Call scsi_set_pci_device() for all devices
-	- Reduce size of kernel version dependant device probe code
+	- Reduce size of kernel version dependent device probe code
 	- Move duplicate probe/init code to separate function
 	- Handle error if qla1280_mem_alloc() fails
 	- Kill OFFSET() macro and use Linux's PCI definitions instead
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 0356142db1aa..df8fa6f3de3f 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -1843,7 +1843,7 @@ static void sym_free_resources(hcb_p np)
 		pci_unmap_mem(np->s.ram_va, np->ram_ws);
 #endif
 	/*
-	 *  Free O/S independant resources.
+	 *  Free O/S independent resources.
 	 */
 	sym_hcb_free(np);
 
@@ -2043,7 +2043,7 @@ sym_attach (Scsi_Host_Template *tpnt, int unit, sym_device *dev)
 	}
 
 	/*
-	 *  Perform O/S independant stuff.
+	 *  Perform O/S independent stuff.
 	 */
 	if (sym_hcb_attach(np, fw, nvram))
 		goto attach_failed;
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h
index 94bcef34d906..b711718b9df6 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.h
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.h
@@ -188,7 +188,7 @@ typedef struct sym_sccb *sccb_p;
 typedef struct sym_shcb *shcb_p;
 
 /*
- *  Define a reference to the O/S dependant IO request.
+ *  Define a reference to the O/S dependent IO request.
  */
 typedef Scsi_Cmnd *cam_ccb_p;	/* Generic */
 typedef Scsi_Cmnd *cam_scsiio_p;/* SCSI I/O */
diff --git a/drivers/usb/misc/atmsar.c b/drivers/usb/misc/atmsar.c
index 5f6a7f033361..fce7041d51a2 100644
--- a/drivers/usb/misc/atmsar.c
+++ b/drivers/usb/misc/atmsar.c
@@ -45,7 +45,7 @@
  *		- No more in-buffer rewriting for cloned buffers.
  *		- Removed the PII specific CFLAGS in the Makefile.
  *
- *  0.2.1:	- removed dependancy on alloc_tx. tis presented problems when
+ *  0.2.1:	- removed dependency on alloc_tx. tis presented problems when
  *		using this with the br2684 code.
  *
  *  0.2:	- added AAL0 reassembly
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index fd8ae959defb..3cce9b7d1e73 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -256,7 +256,7 @@ static void safe_read_bulk_callback (struct urb *urb, struct pt_regs *regs)
 				}
 				tty_flip_buffer_push (port->tty);
 			} else {
-				err ("%s - inconsistant lengths %d:%d", __FUNCTION__,
+				err ("%s - inconsistent lengths %d:%d", __FUNCTION__,
 				     actual_length, length);
 			}
 		} else {
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index aa4af2b0a948..5fb910c1a6e2 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -706,7 +706,7 @@ static int storage_probe(struct usb_interface *intf,
 
 	/* 
 	 * Set the handler pointers based on the protocol
-	 * Again, this data is persistant across reattachments
+	 * Again, this data is persistent across reattachments
 	 */
 	switch (ss->protocol) {
 	case US_PR_CB:
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index 57b69168e37e..4d0bd916ec75 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -513,7 +513,7 @@ void xxxfb_poll(struct fb_info *info, poll_table *wait)
  *		     for a graphics card take a specific amount of time.
  *		     Often we have to wait for the accelerator to finish
  *		     its operation before we can write to the framebuffer
- *		     so we can have consistant display output. 
+ *		     so we can have consistent display output. 
  *
  *      @info: frame buffer structure that represents a single frame buffer
  */
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index 8e09a0bd8ebb..6774a4e815b2 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -60,7 +60,7 @@ Version 0.63 (2002-01-31)
 
 * Documentation improvements in source. [WD]
 
-* Makefile fix for independant module when CONFIG_MODVERSION is set in 
+* Makefile fix for independent module when CONFIG_MODVERSION is set in 
 	kernel config [Pavel Roskin <proski@gnu.org>]
 
 * Compile warning fix for namei.c. [Sergey S. Kostyliov <rathamahata@php4.ru>]
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 9a33c941cd5c..e0d448b3a6c9 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -510,7 +510,7 @@ static BOOL ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
 
 	/* Are there uncommitted transactions? */
 	if (BE16(data + 0x10) != 0x01) {
-		ldm_crit ("Database is not in a consistant state.  Aborting.");
+		ldm_crit ("Database is not in a consistent state.  Aborting.");
 		goto out;
 	}
 
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index 923f913e544a..472a96bdd49b 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -68,16 +68,16 @@ extern inline void pcibios_penalize_isa_irq(int irq)
    decisions.  */
 #define PCI_DMA_BUS_IS_PHYS  0
 
-/* Allocate and map kernel buffer using consistant mode DMA for PCI
+/* Allocate and map kernel buffer using consistent mode DMA for PCI
    device.  Returns non-NULL cpu-view pointer to the buffer if
    successful and sets *DMA_ADDRP to the pci side dma address as well,
    else DMA_ADDRP is undefined.  */
 
 extern void *pci_alloc_consistent(struct pci_dev *, size_t, dma_addr_t *);
 
-/* Free and unmap a consistant DMA buffer.  CPU_ADDR and DMA_ADDR must
-   be values that were returned from pci_alloc_consistant.  SIZE must
-   be the same as what as passed into pci_alloc_consistant.
+/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
+   be values that were returned from pci_alloc_consistent.  SIZE must
+   be the same as what as passed into pci_alloc_consistent.
    References to the memory and mappings assosciated with CPU_ADDR or
    DMA_ADDR past this call are illegal.  */
 
@@ -139,7 +139,7 @@ extern int pci_map_sg(struct pci_dev *, struct scatterlist *, int, int);
 
 extern void pci_unmap_sg(struct pci_dev *, struct scatterlist *, int, int);
 
-/* Make physical memory consistant for a single streaming mode DMA
+/* Make physical memory consistent for a single streaming mode DMA
    translation after a transfer.
 
    If you perform a pci_map_single() but wish to interrogate the
@@ -155,7 +155,7 @@ pci_dma_sync_single(struct pci_dev *dev, dma_addr_t dma_addr, long size,
 	/* Nothing to do.  */
 }
 
-/* Make physical memory consistant for a set of streaming mode DMA
+/* Make physical memory consistent for a set of streaming mode DMA
    translations after a transfer.  The same as pci_dma_sync_single but
    for a scatter-gather list, same rules and usage.  */
 
diff --git a/include/asm-cris/io.h b/include/asm-cris/io.h
index 607b6291b366..82a06f841c9f 100644
--- a/include/asm-cris/io.h
+++ b/include/asm-cris/io.h
@@ -246,7 +246,7 @@ extern inline void * ioremap (unsigned long offset, unsigned long size)
 
 #define eth_io_copy_and_sum(a,b,c,d)	eth_copy_and_sum((a),(void *)(b),(c),(d))
 
-/* The following is junk needed for the arch-independant code but which
+/* The following is junk needed for the arch-independent code but which
  * we never use in the CRIS port
  */
 
diff --git a/include/asm-generic/rmap.h b/include/asm-generic/rmap.h
index d96b2e3fed98..5932b91fa2e7 100644
--- a/include/asm-generic/rmap.h
+++ b/include/asm-generic/rmap.h
@@ -3,7 +3,7 @@
 /*
  * linux/include/asm-generic/rmap.h
  *
- * Architecture dependant parts of the reverse mapping code,
+ * Architecture dependent parts of the reverse mapping code,
  * this version should work for most architectures with a
  * 'normal' page table layout.
  *
diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 845641b06b0c..001667ef0d35 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -147,7 +147,7 @@ static inline int set_rtc_time(struct rtc_time *time)
 		yrs = 73;
 	}
 #endif
-	/* These limits and adjustments are independant of
+	/* These limits and adjustments are independent of
 	 * whether the chip is in binary mode or not.
 	 */
 	if (yrs > 169) {
diff --git a/include/asm-mips/isadep.h b/include/asm-mips/isadep.h
index 3cd1eb8eb58a..b3453bb3ba34 100644
--- a/include/asm-mips/isadep.h
+++ b/include/asm-mips/isadep.h
@@ -1,5 +1,5 @@
 /*
- * Various ISA level dependant constants.
+ * Various ISA level dependent constants.
  * Most of the following constants reflect the different layout
  * of Coprocessor 0 registers.
  *
diff --git a/include/asm-mips64/r10kcache.h b/include/asm-mips64/r10kcache.h
index 564ac6cc79aa..984f2f6e6ea2 100644
--- a/include/asm-mips64/r10kcache.h
+++ b/include/asm-mips64/r10kcache.h
@@ -25,7 +25,7 @@
 #define ic_lsize	64
 #define dc_lsize	32
 
-/* These are configuration dependant.  */
+/* These are configuration dependent.  */
 #define scache_size()	({						\
 	unsigned long __res;						\
 	__res = (read_32bit_cp0_register(CP0_CONFIG) >> 16) & 3;	\
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 29eaaae63187..1616aede8ce5 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -36,7 +36,7 @@
 #define _IO_BASE	isa_io_base
 #define _ISA_MEM_BASE	isa_mem_base
 #define PCI_DRAM_OFFSET	pci_dram_offset
-#endif /* Platform-dependant I/O */
+#endif /* Platform-dependent I/O */
 
 extern unsigned long isa_io_base;
 extern unsigned long isa_mem_base;
diff --git a/include/asm-ppc/system.h b/include/asm-ppc/system.h
index ca47022e92ea..837f9bc6bab5 100644
--- a/include/asm-ppc/system.h
+++ b/include/asm-ppc/system.h
@@ -22,7 +22,7 @@
  * mb() prevents loads and stores being reordered across this point.
  * rmb() prevents loads being reordered across this point.
  * wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependant loads being reordered
+ * read_barrier_depends() prevents data-dependent loads being reordered
  *	across this point (nop on PPC).
  *
  * We can use the eieio instruction for wmb, but since it doesn't
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 68f31b97c314..c78f830ed823 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -25,7 +25,7 @@
  * mb() prevents loads and stores being reordered across this point.
  * rmb() prevents loads being reordered across this point.
  * wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependant loads being reordered
+ * read_barrier_depends() prevents data-dependent loads being reordered
  *	across this point (nop on PPC).
  *
  * We can use the eieio instruction for wmb, but since it doesn't
diff --git a/include/asm-v850/pci.h b/include/asm-v850/pci.h
index 5ec5944d2b37..b915819c609b 100644
--- a/include/asm-v850/pci.h
+++ b/include/asm-v850/pci.h
@@ -36,7 +36,7 @@ extern void
 pci_unmap_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
 		  int dir);
 
-/* Make physical memory consistant for a single streaming mode DMA
+/* Make physical memory consistent for a single streaming mode DMA
    translation after a transfer.
 
    If you perform a pci_map_single() but wish to interrogate the
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index e8fec2776624..36568e4a3d14 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -160,7 +160,7 @@ extern agp_memory *agp_allocate_memory(size_t, u32);
  * an u32 argument of the type of memory to be allocated.  
  * Every agp bridge device will allow you to allocate 
  * AGP_NORMAL_MEMORY which maps to physical ram.  Any other
- * type is device dependant.
+ * type is device dependent.
  * 
  * It returns NULL whenever memory is unavailable.
  * 
diff --git a/include/linux/apm_bios.h b/include/linux/apm_bios.h
index ceffd587b7a8..b3b981af768d 100644
--- a/include/linux/apm_bios.h
+++ b/include/linux/apm_bios.h
@@ -45,7 +45,7 @@ struct apm_bios_info {
 #define APM_BIOS_DISENGAGED     0x0010
 
 /*
- * Data for APM that is persistant across module unload/load
+ * Data for APM that is persistent across module unload/load
  */
 struct apm_info {
 	struct apm_bios_info	bios;
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index fed344ec7a41..06265081fa48 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -62,7 +62,7 @@
 /*                                                                         */ 
 /* The proceed command holds a incoming call in a state to leave processes */
 /* enough time to check whether ist should be accepted.                    */
-/* The PROT_IO Command extends the interface to make protocol dependant    */
+/* The PROT_IO Command extends the interface to make protocol dependent    */
 /* features available (call diversion, call waiting...).                   */
 /*                                                                         */ 
 /* The PROT_IO Command is executed with the desired driver id and the arg  */
diff --git a/include/linux/sdla_x25.h b/include/linux/sdla_x25.h
index 9827e74faaf2..c110c1a835f7 100644
--- a/include/linux/sdla_x25.h
+++ b/include/linux/sdla_x25.h
@@ -157,7 +157,7 @@ typedef struct X25Cmd
 #define X25RES_PROTO_VIOLATION	0x41	/* protocol violation occured */
 #define X25RES_PKT_TIMEOUT	0x42	/* X.25 packet time out */
 #define X25RES_PKT_RETRY_LIMIT	0x43	/* X.25 packet retry limit exceeded */
-/*----- Command-dependant results -----*/
+/*----- Command-dependent results -----*/
 #define X25RES_LINK_DISC	0x00	/* HDLC_LINK_STATUS */
 #define X25RES_LINK_IN_ABM	0x01	/* HDLC_LINK_STATUS */
 #define X25RES_NO_DATA		0x01	/* HDLC_READ/READ_TRACE_DATA*/
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index edf9a77c078c..d996cb5e0496 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -990,7 +990,7 @@ int irias_proc_read(char *buf, char **start, off_t offset, int len)
 		len += sprintf(buf+len, "\n");
 
 		/* Careful for priority inversions here !
-		 * All other uses of attrib spinlock are independant of
+		 * All other uses of attrib spinlock are independent of
 		 * the object spinlock, so we are safe. Jean II */
 		spin_lock(&obj->attribs->hb_spinlock);
 
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index ba525b82f994..3ab62fa1d884 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1623,7 +1623,7 @@ int irlmp_slsap_inuse(__u8 slsap_sel)
 		ASSERT(lap->magic == LMP_LAP_MAGIC, return TRUE;);
 
 		/* Careful for priority inversions here !
-		 * All other uses of attrib spinlock are independant of
+		 * All other uses of attrib spinlock are independent of
 		 * the object spinlock, so we are safe. Jean II */
 		spin_lock(&lap->lsaps->hb_spinlock);
 
@@ -1786,7 +1786,7 @@ int irlmp_proc_read(char *buf, char **start, off_t offset, int len)
 		len += sprintf(buf+len, "\n");
 
 		/* Careful for priority inversions here !
-		 * All other uses of attrib spinlock are independant of
+		 * All other uses of attrib spinlock are independent of
 		 * the object spinlock, so we are safe. Jean II */
 		spin_lock(&lap->lsaps->hb_spinlock);
 
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 612030769391..97381500fd20 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -284,7 +284,7 @@
 /*
  * This set of flags enable and disable all the various warning,
  * error and debug message of this driver.
- * Each section can be enabled and disabled independantly
+ * Each section can be enabled and disabled independently
  */
 /* In the PPP part */
 #define DEBUG_CTRL_TRACE	0	/* Control channel */
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 5826ea7d5419..be0245dbfdf0 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -334,7 +334,7 @@ static int snd_hwdep_dev_register(snd_device_t *device)
 	if ((err = snd_register_device(SNDRV_DEVICE_TYPE_HWDEP,
 				       hwdep->card, hwdep->device,
 				       &snd_hwdep_reg, name)) < 0) {
-		snd_printk(KERN_ERR "unable to register hardware dependant device %i:%i\n",
+		snd_printk(KERN_ERR "unable to register hardware dependent device %i:%i\n",
 			   hwdep->card->number, hwdep->device);
 		snd_hwdep_devices[idx] = NULL;
 		up(&register_mutex);
diff --git a/sound/core/seq/seq_midi_emul.c b/sound/core/seq/seq_midi_emul.c
index 8afa4df16428..0bc8ee757b05 100644
--- a/sound/core/seq/seq_midi_emul.c
+++ b/sound/core/seq/seq_midi_emul.c
@@ -60,7 +60,7 @@ static void reset_all_channels(snd_midi_channel_set_t *chset);
 
 
 /*
- * Process an event in a driver independant way.  This means dealing
+ * Process an event in a driver independent way.  This means dealing
  * with RPN, NRPN, SysEx etc that are defined for common midi applications
  * such as GM, GS and XG.
  * There modes that this module will run in are:
@@ -258,7 +258,7 @@ note_off(snd_midi_op_t *ops, void *drv, snd_midi_channel_t *chan, int note, int
 }
 
 /*
- * Do all driver independant operations for this controler and pass
+ * Do all driver independent operations for this controler and pass
  * events that need to take place immediately to the driver.
  */
 static void
diff --git a/sound/oss/ac97_codec.c b/sound/oss/ac97_codec.c
index 62aa94ef6df2..689cac0755ec 100644
--- a/sound/oss/ac97_codec.c
+++ b/sound/oss/ac97_codec.c
@@ -446,7 +446,7 @@ static void ac97_set_mixer(struct ac97_codec *codec, unsigned int oss_mixer, uns
 }
 
 /* read or write the recmask, the ac97 can really have left and right recording
-   inputs independantly set, but OSS doesn't seem to want us to express that to
+   inputs independently set, but OSS doesn't seem to want us to express that to
    the user. the caller guarantees that we have a supported bit set, and they
    must be holding the card's spinlock */
 static int ac97_recmask_io(struct ac97_codec *codec, int rw, int mask) 
diff --git a/sound/oss/maestro.c b/sound/oss/maestro.c
index 287b0619a414..7a22f5f26e84 100644
--- a/sound/oss/maestro.c
+++ b/sound/oss/maestro.c
@@ -793,7 +793,7 @@ static unsigned int ac97_oss_rm[] = {
 	
 /* read or write the recmask 
 	the ac97 can really have left and right recording
-	inputs independantly set, but OSS doesn't seem to 
+	inputs independently set, but OSS doesn't seem to 
 	want us to express that to the user. 
 	the caller guarantees that we have a supported bit set,
 	and they must be holding the card's spinlock */
-- 
cgit v1.2.3


From 9e549588fdff1d6e6319e2778e921b5b70148836 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 6 Feb 2003 16:22:22 -0800
Subject: [PATCH] SA_NOCLDWAIT now supported - update comments

This patch removes all the comments on the SA_NOCLDWAIT definitions,
since SA_NOCLDWAIT is fully supported now.
---
 include/asm-alpha/signal.h     | 2 +-
 include/asm-arm/signal.h       | 2 +-
 include/asm-cris/signal.h      | 2 +-
 include/asm-i386/signal.h      | 2 +-
 include/asm-ia64/signal.h      | 2 +-
 include/asm-m68k/signal.h      | 2 +-
 include/asm-m68knommu/signal.h | 2 +-
 include/asm-mips/signal.h      | 2 +-
 include/asm-mips64/signal.h    | 2 +-
 include/asm-parisc/signal.h    | 2 +-
 include/asm-ppc/signal.h       | 2 +-
 include/asm-ppc64/signal.h     | 2 +-
 include/asm-s390/signal.h      | 2 +-
 include/asm-s390x/signal.h     | 2 +-
 include/asm-sh/signal.h        | 2 +-
 include/asm-sparc/signal.h     | 2 +-
 include/asm-sparc64/signal.h   | 2 +-
 include/asm-v850/signal.h      | 2 +-
 include/asm-x86_64/signal.h    | 2 +-
 19 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/asm-alpha/signal.h b/include/asm-alpha/signal.h
index 07f843f72edc..7df5d47927f5 100644
--- a/include/asm-alpha/signal.h
+++ b/include/asm-alpha/signal.h
@@ -93,7 +93,7 @@ typedef unsigned long sigset_t;
 #define SA_NOCLDSTOP	0x00000004
 #define SA_NODEFER	0x00000008
 #define SA_RESETHAND	0x00000010
-#define SA_NOCLDWAIT	0x00000020 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000020
 #define SA_SIGINFO	0x00000040
 
 #define SA_ONESHOT	SA_RESETHAND
diff --git a/include/asm-arm/signal.h b/include/asm-arm/signal.h
index ab3449d8b181..9bb358d1a1c2 100644
--- a/include/asm-arm/signal.h
+++ b/include/asm-arm/signal.h
@@ -90,7 +90,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_THIRTYTWO	0x02000000
 #define SA_RESTORER	0x04000000
diff --git a/include/asm-cris/signal.h b/include/asm-cris/signal.h
index e33bdbf22ac8..246f45042d9d 100644
--- a/include/asm-cris/signal.h
+++ b/include/asm-cris/signal.h
@@ -86,7 +86,7 @@ typedef unsigned long sigset_t;
  */
 
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-i386/signal.h b/include/asm-i386/signal.h
index 68d89be28620..d6883d12ab5d 100644
--- a/include/asm-i386/signal.h
+++ b/include/asm-i386/signal.h
@@ -86,7 +86,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h
index 2e576bf137f5..b8fb5819036d 100644
--- a/include/asm-ia64/signal.h
+++ b/include/asm-ia64/signal.h
@@ -67,7 +67,7 @@
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-m68k/signal.h b/include/asm-m68k/signal.h
index 06246d0994db..dfb43563e1cc 100644
--- a/include/asm-m68k/signal.h
+++ b/include/asm-m68k/signal.h
@@ -85,7 +85,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-m68knommu/signal.h b/include/asm-m68knommu/signal.h
index 60c32686d226..2600c4681c84 100644
--- a/include/asm-m68knommu/signal.h
+++ b/include/asm-m68knommu/signal.h
@@ -85,7 +85,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-mips/signal.h b/include/asm-mips/signal.h
index 50d6224d9091..25828d38e2aa 100644
--- a/include/asm-mips/signal.h
+++ b/include/asm-mips/signal.h
@@ -80,7 +80,7 @@ typedef unsigned long old_sigset_t;		/* at least 32 bits */
 #define SA_RESTART	0x10000000
 #define SA_SIGINFO	0x00000008
 #define SA_NODEFER	0x40000000
-#define SA_NOCLDWAIT	0x00010000	/* Not supported yet */
+#define SA_NOCLDWAIT	0x00010000
 #define SA_NOCLDSTOP	0x00000001
 
 #define SA_NOMASK	SA_NODEFER
diff --git a/include/asm-mips64/signal.h b/include/asm-mips64/signal.h
index 0915196d49fb..4af7b2090fcd 100644
--- a/include/asm-mips64/signal.h
+++ b/include/asm-mips64/signal.h
@@ -80,7 +80,7 @@ typedef unsigned int old_sigset_t32;
 #define SA_RESTART	0x10000000
 #define SA_SIGINFO	0x00000008
 #define SA_NODEFER	0x40000000
-#define SA_NOCLDWAIT	0x00010000	/* Not supported yet */
+#define SA_NOCLDWAIT	0x00010000
 #define SA_NOCLDSTOP	0x00000001
 
 #define SA_NOMASK	SA_NODEFER
diff --git a/include/asm-parisc/signal.h b/include/asm-parisc/signal.h
index 9295c212b413..cc29ceac7744 100644
--- a/include/asm-parisc/signal.h
+++ b/include/asm-parisc/signal.h
@@ -64,7 +64,7 @@
 #define SA_SIGINFO	0x00000010
 #define SA_NODEFER	0x00000020
 #define SA_RESTART	0x00000040
-#define SA_NOCLDWAIT	0x00000080 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000080
 #define _SA_SIGGFAULT	0x00000100 /* HPUX */
 
 #define SA_NOMASK	SA_NODEFER
diff --git a/include/asm-ppc/signal.h b/include/asm-ppc/signal.h
index 57c399154f43..6935af6a07db 100644
--- a/include/asm-ppc/signal.h
+++ b/include/asm-ppc/signal.h
@@ -78,7 +78,7 @@ typedef struct {
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-ppc64/signal.h b/include/asm-ppc64/signal.h
index a4dc9444142e..cc67e7f8bf0c 100644
--- a/include/asm-ppc64/signal.h
+++ b/include/asm-ppc64/signal.h
@@ -73,7 +73,7 @@ typedef struct {
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-s390/signal.h b/include/asm-s390/signal.h
index c80535695e50..1c27c9f50966 100644
--- a/include/asm-s390/signal.h
+++ b/include/asm-s390/signal.h
@@ -94,7 +94,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP    0x00000001
-#define SA_NOCLDWAIT    0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT    0x00000002
 #define SA_SIGINFO      0x00000004
 #define SA_ONSTACK      0x08000000
 #define SA_RESTART      0x10000000
diff --git a/include/asm-s390x/signal.h b/include/asm-s390x/signal.h
index 8a60129b4f17..c2a52040791f 100644
--- a/include/asm-s390x/signal.h
+++ b/include/asm-s390x/signal.h
@@ -94,7 +94,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP    0x00000001
-#define SA_NOCLDWAIT    0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT    0x00000002
 #define SA_SIGINFO      0x00000004
 #define SA_ONSTACK      0x08000000
 #define SA_RESTART      0x10000000
diff --git a/include/asm-sh/signal.h b/include/asm-sh/signal.h
index 07f7fb474850..332ce7a66b8e 100644
--- a/include/asm-sh/signal.h
+++ b/include/asm-sh/signal.h
@@ -73,7 +73,7 @@ typedef struct {
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-sparc/signal.h b/include/asm-sparc/signal.h
index 33c8ed0b0349..11cd75b94062 100644
--- a/include/asm-sparc/signal.h
+++ b/include/asm-sparc/signal.h
@@ -140,7 +140,7 @@ struct sigstack {
 #define SA_INTERRUPT	0x10
 #define SA_NOMASK	0x20
 #define SA_SHIRQ	0x40
-#define SA_NOCLDWAIT	0x100	/* not supported yet */
+#define SA_NOCLDWAIT	0x100
 #define SA_SIGINFO	0x200
 
 #define SIG_BLOCK          0x01	/* for blocking signals */
diff --git a/include/asm-sparc64/signal.h b/include/asm-sparc64/signal.h
index c6b6feba590c..303b9453d024 100644
--- a/include/asm-sparc64/signal.h
+++ b/include/asm-sparc64/signal.h
@@ -145,7 +145,7 @@ struct sigstack {
 #define SA_INTERRUPT	0x10
 #define SA_NOMASK	0x20
 #define SA_SHIRQ	0x40
-#define SA_NOCLDWAIT    0x100 /* not supported yet */
+#define SA_NOCLDWAIT    0x100
 #define SA_SIGINFO      0x200
 
 
diff --git a/include/asm-v850/signal.h b/include/asm-v850/signal.h
index 6423d2adb3fe..3f701cd82390 100644
--- a/include/asm-v850/signal.h
+++ b/include/asm-v850/signal.h
@@ -88,7 +88,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
diff --git a/include/asm-x86_64/signal.h b/include/asm-x86_64/signal.h
index 65ff89b11075..bfc4553b8b90 100644
--- a/include/asm-x86_64/signal.h
+++ b/include/asm-x86_64/signal.h
@@ -93,7 +93,7 @@ typedef unsigned long sigset_t;
  * Unix names RESETHAND and NODEFER respectively.
  */
 #define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
+#define SA_NOCLDWAIT	0x00000002
 #define SA_SIGINFO	0x00000004
 #define SA_ONSTACK	0x08000000
 #define SA_RESTART	0x10000000
-- 
cgit v1.2.3


From 530a7dbc7f10903f83ddf624142c3a8400435b0a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 6 Feb 2003 16:22:30 -0800
Subject: [PATCH] do_sigaction locking cleanup

This changes do_sigaction to avoid read_lock(&tasklist_lock) on every
call.  Only in the fairly uncommon cases where it's really needed will
it take that lock (which requires unlocking and relocking the siglock
for locking order).

I also changed the ERESTARTSYS added in my earlier patch to ERESTARTNOINTR.
That is an "instantaneous" case, and there is no reason to have it possibly
return EINTR if !SA_RESTART (which AFAIK sigaction never could before, and
it might not be kosher by POSIX); rollback is always better.
---
 kernel/signal.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 809ea104b63f..14e11ac05295 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1841,7 +1841,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 
 	k = &current->sig->action[sig-1];
 
-	read_lock(&tasklist_lock);
 	spin_lock_irq(&current->sig->siglock);
 	if (signal_pending(current)) {
 		/*
@@ -1849,17 +1848,13 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 		 * threads, make sure we take it before changing the action.
 		 */
 		spin_unlock_irq(&current->sig->siglock);
-		read_unlock(&tasklist_lock);
-		return -ERESTARTSYS;
+		return -ERESTARTNOINTR;
 	}
 
 	if (oact)
 		*oact = *k;
 
 	if (act) {
-		*k = *act;
-		sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
-
 		/*
 		 * POSIX 3.3.1.3:
 		 *  "Setting a signal action to SIG_IGN for a signal that is
@@ -1871,21 +1866,39 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 		 *   (for example, SIGCHLD), shall cause the pending signal to
 		 *   be discarded, whether or not it is blocked"
 		 */
-
-		if (k->sa.sa_handler == SIG_IGN ||
-		    (k->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) {
+		if (act->sa.sa_handler == SIG_IGN ||
+		    (act->sa.sa_handler == SIG_DFL &&
+		     sig_kernel_ignore(sig))) {
+			/*
+			 * This is a fairly rare case, so we only take the
+			 * tasklist_lock once we're sure we'll need it.
+			 * Now we must do this little unlock and relock
+			 * dance to maintain the lock hierarchy.
+			 */
 			struct task_struct *t = current;
+			spin_unlock_irq(&t->sig->siglock);
+			read_lock(&tasklist_lock);
+			spin_lock_irq(&t->sig->siglock);
+			*k = *act;
+			sigdelsetmask(&k->sa.sa_mask,
+				      sigmask(SIGKILL) | sigmask(SIGSTOP));
 			rm_from_queue(sigmask(sig), &t->sig->shared_pending);
 			do {
 				rm_from_queue(sigmask(sig), &t->pending);
 				recalc_sigpending_tsk(t);
 				t = next_thread(t);
 			} while (t != current);
-		}
-	}
 	spin_unlock_irq(&current->sig->siglock);
 	read_unlock(&tasklist_lock);
+			return 0;
+		}
 
+		*k = *act;
+		sigdelsetmask(&k->sa.sa_mask,
+			      sigmask(SIGKILL) | sigmask(SIGSTOP));
+	}
+
+	spin_unlock_irq(&current->sig->siglock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b1ab50493209e3ec1d57e94ce00378c763db7572 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@digeo.com>
Date: Thu, 6 Feb 2003 16:50:27 -0800
Subject: [PATCH] Fix possible uninitialised variable in vma merging code

Spotted by davem.  Strange that it ever worked.  Don't know why the compiler
didn't warn...
---
 mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index af3d4a272ad7..07e2417185ff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -432,7 +432,7 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
 	if (prev->vm_end == addr &&
 			can_vma_merge_after(prev, vm_flags, file, pgoff)) {
 		struct vm_area_struct *next;
-		struct inode *inode = file ? file->f_dentry->d_inode : inode;
+		struct inode *inode = file ? file->f_dentry->d_inode : NULL;
 		int need_up = 0;
 
 		if (unlikely(file && prev->vm_next &&
-- 
cgit v1.2.3


From fef31b0354eeec8ce9c71c7adcf08848c25727f6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@home.transmeta.com>
Date: Thu, 6 Feb 2003 17:52:01 -0800
Subject: Don't special-case SIGKILL/SIGSTOP - the blocking masks should
 already take care of it.

This fixes kernel threads that _do_ block SIGKILL/STOP.
---
 kernel/signal.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 14e11ac05295..e8ff3bb6324e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -794,8 +794,7 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	 * If the main thread wants the signal, it gets first crack.
 	 * Probably the least surprising to the average bear.
 	 */
-	if (p->state < TASK_ZOMBIE &&
-	    (sig_kernel_only(sig) || wants_signal(sig, p)))
+	if (wants_signal(sig, p))
 		t = p;
 	else if (thread_group_empty(p))
 		/*
-- 
cgit v1.2.3


From 8eae299835cf161a93a5acd890cebf0f83f2a2ce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@home.transmeta.com>
Date: Thu, 6 Feb 2003 20:25:24 -0800
Subject: Split up "struct signal_struct" into "signal" and "sighand" parts.

This is required to get make the old LinuxThread semantics work
together with the fixed-for-POSIX full signal sharing. A traditional
CLONE_SIGHAND thread (LinuxThread) will not see any other shared
signal state, while a new-style CLONE_THREAD thread will share all
of it.

This way the two methods don't confuse each other.
---
 arch/i386/kernel/init_task.c |   1 +
 arch/i386/kernel/signal.c    |  22 +--
 arch/i386/kernel/vm86.c      |   4 +-
 drivers/block/loop.c         |   4 +-
 drivers/char/n_tty.c         |   2 +-
 drivers/scsi/scsi_error.c    |   4 +-
 drivers/usb/storage/usb.c    |   4 +-
 fs/autofs/waitq.c            |  12 +-
 fs/autofs4/waitq.c           |  12 +-
 fs/exec.c                    | 107 ++++++++------
 fs/jbd/journal.c             |   4 +-
 fs/lockd/clntproc.c          |  18 +--
 fs/lockd/svc.c               |  12 +-
 fs/nfsd/nfssvc.c             |   8 +-
 fs/proc/array.c              |   8 +-
 include/linux/init_task.h    |   9 +-
 include/linux/sched.h        |  18 ++-
 include/linux/slab.h         |   3 +-
 kernel/exit.c                |  34 ++---
 kernel/fork.c                |  65 ++++++---
 kernel/kmod.c                |  12 +-
 kernel/signal.c              | 322 +++++++++++++++++++++++--------------------
 kernel/workqueue.c           |   8 +-
 mm/pdflush.c                 |   4 +-
 net/sunrpc/clnt.c            |  10 +-
 net/sunrpc/sched.c           |  12 +-
 net/sunrpc/svc.c             |   4 +-
 security/capability.c        |   2 +-
 28 files changed, 410 insertions(+), 315 deletions(-)

diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index 4eb40a9582c7..a2a7181dac62 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -11,6 +11,7 @@
 static struct fs_struct init_fs = INIT_FS;
 static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
 
 /*
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 42160fc0322d..90561449cc07 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -37,11 +37,11 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->eax = -EINTR;
 	while (1) {
@@ -66,11 +66,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->eax = -EINTR;
 	while (1) {
@@ -224,10 +224,10 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (restore_sigcontext(regs, &frame->sc, &eax))
 		goto badframe;
@@ -252,10 +252,10 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
 		goto badframe;
@@ -513,7 +513,7 @@ static void
 handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 	struct pt_regs * regs)
 {
-	struct k_sigaction *ka = &current->sig->action[sig-1];
+	struct k_sigaction *ka = &current->sighand->action[sig-1];
 
 	/* Are we from a system call? */
 	if (regs->orig_eax >= 0) {
@@ -547,11 +547,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index facb01379561..269cf00ddf4e 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -512,10 +512,10 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno
 		return 1; /* we let this handle by the calling routine */
 	if (current->ptrace & PT_PTRACED) {
 		unsigned long flags;
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->blocked, SIGTRAP);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 	send_sig(SIGTRAP, current, 1);
 	current->thread.trap_no = trapno;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 86d653d168df..71ae15a0c6fd 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -584,10 +584,10 @@ static int loop_thread(void *data)
 					   hence, it mustn't be stopped at all because it could
 					   be indirectly used during suspension */
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	flush_signals(current);
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	set_user_nice(current, -20);
 
diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 34304a4dcb7f..ccf36427d4b0 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -787,7 +787,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 int is_ignored(int sig)
 {
 	return (sigismember(&current->blocked, sig) ||
-	        current->sig->action[sig-1].sa.sa_handler == SIG_IGN);
+	        current->sighand->action[sig-1].sa.sa_handler == SIG_IGN);
 }
 
 static void n_tty_set_termios(struct tty_struct *tty, struct termios * old)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 28ef5461ec78..f21af2d1c1e2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1575,10 +1575,10 @@ void scsi_error_handler(void *data)
 	int rtn;
 	DECLARE_MUTEX_LOCKED(sem);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	lock_kernel();
 
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 5fb910c1a6e2..e4a98569d058 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -301,12 +301,12 @@ static int usb_stor_control_thread(void * __us)
 	daemonize();
 
 	/* avoid getting signals */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	flush_signals(current);
 	current->flags |= PF_IOTHREAD;
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* set our name for identification purposes */
 	sprintf(current->comm, "usb-storage");
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index c212015631b9..6c82dc144b33 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -70,10 +70,10 @@ static int autofs_write(struct file *file, const void *addr, int bytes)
 	/* Keep the currently executing process from receiving a
 	   SIGPIPE unless it was already supposed to get one */
 	if (wr == -EPIPE && !sigpipe) {
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->pending.signal, SIGPIPE);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	return (bytes > 0);
@@ -161,18 +161,18 @@ int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
 		sigset_t oldset;
 		unsigned long irqflags;
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		oldset = current->blocked;
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 
 		interruptible_sleep_on(&wq->queue);
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		current->blocked = oldset;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 	} else {
 		DPRINTK(("autofs_wait: skipped sleeping\n"));
 	}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 7af5f71e16b9..c1b7279cae81 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -74,10 +74,10 @@ static int autofs4_write(struct file *file, const void *addr, int bytes)
 	/* Keep the currently executing process from receiving a
 	   SIGPIPE unless it was already supposed to get one */
 	if (wr == -EPIPE && !sigpipe) {
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->pending.signal, SIGPIPE);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	return (bytes > 0);
@@ -198,18 +198,18 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct qstr *name,
 		sigset_t oldset;
 		unsigned long irqflags;
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		oldset = current->blocked;
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 
 		interruptible_sleep_on(&wq->queue);
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		current->blocked = oldset;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 	} else {
 		DPRINTK(("autofs_wait: skipped sleeping\n"));
 	}
diff --git a/fs/exec.c b/fs/exec.c
index 0b41239937b7..a63d5c43da1f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -559,31 +559,61 @@ static inline void put_proc_dentry(struct dentry *dentry)
  * disturbing other processes.  (Other processes might share the signal
  * table via the CLONE_SIGHAND option to clone().)
  */
-static inline int de_thread(struct signal_struct *oldsig)
+static inline int de_thread(struct task_struct *tsk)
 {
-	struct signal_struct *newsig;
+	struct signal_struct *newsig, *oldsig = tsk->signal;
+	struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+	spinlock_t *lock = &oldsighand->siglock;
 	int count;
 
-	if (atomic_read(&current->sig->count) <= 1)
+	/*
+	 * If we don't share sighandlers, then we aren't sharing anything
+	 * and we can just re-use it all.
+	 */
+	if (atomic_read(&oldsighand->count) <= 1)
 		return 0;
 
-	newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
-	if (!newsig)
+	newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+	if (!newsighand)
 		return -ENOMEM;
 
+	spin_lock_init(&newsighand->siglock);
+	atomic_set(&newsighand->count, 1);
+	memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action));
+
+	/*
+	 * See if we need to allocate a new signal structure
+	 */
+	newsig = NULL;
+	if (atomic_read(&oldsig->count) > 1) {
+		newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+		if (!newsig) {
+			kmem_cache_free(sighand_cachep, newsighand);
+			return -ENOMEM;
+		}
+		atomic_set(&newsig->count, 1);
+		newsig->group_exit = 0;
+		newsig->group_exit_code = 0;
+		newsig->group_exit_task = NULL;
+		newsig->group_stop_count = 0;
+		init_sigpending(&newsig->shared_pending);
+	}
+
 	if (thread_group_empty(current))
-		goto out;
+		goto no_thread_group;
 	/*
 	 * Kill all other threads in the thread group:
 	 */
-	spin_lock_irq(&oldsig->siglock);
+	spin_lock_irq(lock);
 	if (oldsig->group_exit) {
 		/*
 		 * Another group action in progress, just
 		 * return so that the signal is processed.
 		 */
-		spin_unlock_irq(&oldsig->siglock);
-		kmem_cache_free(sigact_cachep, newsig);
+		spin_unlock_irq(lock);
+		kmem_cache_free(sighand_cachep, newsighand);
+		if (newsig)
+			kmem_cache_free(signal_cachep, newsig);
 		return -EAGAIN;
 	}
 	oldsig->group_exit = 1;
@@ -598,13 +628,13 @@ static inline int de_thread(struct signal_struct *oldsig)
 	while (atomic_read(&oldsig->count) > count) {
 		oldsig->group_exit_task = current;
 		current->state = TASK_UNINTERRUPTIBLE;
-		spin_unlock_irq(&oldsig->siglock);
+		spin_unlock_irq(lock);
 		schedule();
-		spin_lock_irq(&oldsig->siglock);
+		spin_lock_irq(lock);
 		if (oldsig->group_exit_task)
 			BUG();
 	}
-	spin_unlock_irq(&oldsig->siglock);
+	spin_unlock_irq(lock);
 
 	/*
 	 * At this point all other threads have exited, all we have to
@@ -675,32 +705,29 @@ static inline int de_thread(struct signal_struct *oldsig)
 		release_task(leader);
         }
 
-out:
-	spin_lock_init(&newsig->siglock);
-	atomic_set(&newsig->count, 1);
-	newsig->group_exit = 0;
-	newsig->group_exit_code = 0;
-	newsig->group_exit_task = NULL;
-	newsig->group_stop_count = 0;
-	memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
-	init_sigpending(&newsig->shared_pending);
+no_thread_group:
 
 	write_lock_irq(&tasklist_lock);
-	spin_lock(&oldsig->siglock);
-	spin_lock(&newsig->siglock);
+	spin_lock(&oldsighand->siglock);
+	spin_lock(&newsighand->siglock);
 
 	if (current == oldsig->curr_target)
 		oldsig->curr_target = next_thread(current);
-	current->sig = newsig;
+	if (newsig)
+		current->signal = newsig;
+	current->sighand = newsighand;
 	init_sigpending(&current->pending);
 	recalc_sigpending();
 
-	spin_unlock(&newsig->siglock);
-	spin_unlock(&oldsig->siglock);
+	spin_unlock(&newsighand->siglock);
+	spin_unlock(&oldsighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 
-	if (atomic_dec_and_test(&oldsig->count))
-		kmem_cache_free(sigact_cachep, oldsig);
+	if (newsig && atomic_dec_and_test(&oldsig->count))
+		kmem_cache_free(signal_cachep, oldsig);
+
+	if (atomic_dec_and_test(&oldsighand->count))
+		kmem_cache_free(sighand_cachep, oldsighand);
 
 	if (!thread_group_empty(current))
 		BUG();
@@ -746,21 +773,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 {
 	char * name;
 	int i, ch, retval;
-	struct signal_struct * oldsig = current->sig;
 
 	/* 
 	 * Release all of the old mmap stuff
 	 */
 	retval = exec_mmap(bprm->mm);
 	if (retval)
-		goto mmap_failed;
+		goto out;
 	/*
 	 * Make sure we have a private signal table and that
 	 * we are unassociated from the previous thread group.
 	 */
-	retval = de_thread(oldsig);
+	retval = de_thread(current);
 	if (retval)
-		goto flush_failed;
+		goto out;
 
 	/* This is the point of no return */
 
@@ -794,14 +820,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	return 0;
 
-mmap_failed:
-flush_failed:
-	spin_lock_irq(&current->sig->siglock);
-	if (current->sig != oldsig) {
-		kmem_cache_free(sigact_cachep, current->sig);
-		current->sig = oldsig;
-	}
-	spin_unlock_irq(&current->sig->siglock);
+out:
 	return retval;
 }
 
@@ -885,7 +904,7 @@ void compute_creds(struct linux_binprm *bprm)
 		if (must_not_trace_exec(current)
 		    || atomic_read(&current->fs->count) > 1
 		    || atomic_read(&current->files->count) > 1
-		    || atomic_read(&current->sig->count) > 1) {
+		    || atomic_read(&current->sighand->count) > 1) {
 			if(!capable(CAP_SETUID)) {
 				bprm->e_uid = current->uid;
 				bprm->e_gid = current->gid;
@@ -1302,8 +1321,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	}
 	mm->dumpable = 0;
 	init_completion(&mm->core_done);
-	current->sig->group_exit = 1;
-	current->sig->group_exit_code = exit_code;
+	current->signal->group_exit = 1;
+	current->signal->group_exit_code = exit_code;
 	coredump_wait(mm);
 
 	if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1330,7 +1349,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 
 	retval = binfmt->core_dump(signr, regs, file);
 
-	current->sig->group_exit_code |= 0x80;
+	current->signal->group_exit_code |= 0x80;
 close_fail:
 	filp_close(file, NULL);
 fail_unlock:
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 3117885c3f6e..a106e23956f7 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -205,10 +205,10 @@ int kjournald(void *arg)
 
 	lock_kernel();
 	daemonize();
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	sprintf(current->comm, "kjournald");
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 404ac2d3a95b..c4c4e0595163 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -139,7 +139,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	}
 
 	/* Keep the old signal mask */
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 
 	/* If we're cleaning up locks because the process is exiting,
@@ -149,7 +149,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 		call = nlmclnt_alloc_call();
 		if (!call) {
@@ -158,7 +158,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		}
 		call->a_flags = RPC_TASK_ASYNC;
 	} else {
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 		memset(call, 0, sizeof(*call));
 		locks_init_lock(&call->a_args.lock.fl);
 		locks_init_lock(&call->a_res.lock.fl);
@@ -183,10 +183,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		kfree(call);
 
  out_restore:
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 done:
 	dprintk("lockd: clnt proc returns %d\n", status);
@@ -588,11 +588,11 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	int		status;
 
 	/* Block all signals while setting up call */
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	req = nlmclnt_alloc_call();
 	if (!req)
@@ -607,10 +607,10 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	if (status < 0)
 		kfree(req);
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	return status;
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f608fbc8354b..a0cafbdfbb0a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -101,10 +101,10 @@ lockd(struct svc_rqst *rqstp)
 	sprintf(current->comm, "lockd");
 
 	/* Process request with signals blocked.  */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* kick rpciod */
 	rpciod_up();
@@ -126,9 +126,9 @@ lockd(struct svc_rqst *rqstp)
 	{
 		long timeout = MAX_SCHEDULE_TIMEOUT;
 		if (signalled()) {
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			flush_signals(current);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 			if (nlmsvc_ops) {
 				nlmsvc_invalidate_all();
 				grace_period_expire = set_grace_period();
@@ -297,9 +297,9 @@ lockd_down(void)
 			"lockd_down: lockd failed to exit, clearing pid\n");
 		nlmsvc_pid = 0;
 	}
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 out:
 	up(&nlmsvc_sema);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 94f48ae35e95..3919e77036e3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -189,10 +189,10 @@ nfsd(struct svc_rqst *rqstp)
 	 */
 	for (;;) {
 		/* Block all but the shutdown signals */
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		/*
 		 * Find a socket with data available and call its
@@ -210,10 +210,10 @@ nfsd(struct svc_rqst *rqstp)
 		exp_readlock();
 
 		/* Process request with signals blocked.  */
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		siginitsetinv(&current->blocked, ALLOWED_SIGS);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		svc_process(serv, rqstp);
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e135ac5a1080..df1501a0f332 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -190,16 +190,16 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 	sigemptyset(catch);
 
 	read_lock(&tasklist_lock);
-	if (p->sig) {
-		spin_lock_irq(&p->sig->siglock);
-		k = p->sig->action;
+	if (p->sighand) {
+		spin_lock_irq(&p->sighand->siglock);
+		k = p->sighand->action;
 		for (i = 1; i <= _NSIG; ++i, ++k) {
 			if (k->sa.sa_handler == SIG_IGN)
 				sigaddset(ign, i);
 			else if (k->sa.sa_handler != SIG_DFL)
 				sigaddset(catch, i);
 		}
-		spin_unlock_irq(&p->sig->siglock);
+		spin_unlock_irq(&p->sighand->siglock);
 	}
 	read_unlock(&tasklist_lock);
 }
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 77bc3a1340ac..11483636b4d6 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -44,10 +44,14 @@
 }
 
 #define INIT_SIGNALS(sig) {	\
+	.count		= ATOMIC_INIT(1), 		\
+	.shared_pending	= { NULL, &sig.shared_pending.head, {{0}}}, \
+}
+
+#define INIT_SIGHAND(sighand) {	\
 	.count		= ATOMIC_INIT(1), 		\
 	.action		= { {{0,}}, }, 			\
 	.siglock	= SPIN_LOCK_UNLOCKED, 		\
-	.shared_pending	= { NULL, &sig.shared_pending.head, {{0}}}, \
 }
 
 /*
@@ -90,7 +94,8 @@
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
 	.files		= &init_files,					\
-	.sig		= &init_signals,				\
+	.signal		= &init_signals,				\
+	.sighand	= &init_sighand,				\
 	.pending	= { NULL, &tsk.pending.head, {{0}}},		\
 	.blocked	= {{0}},					\
 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d41f7a24fc14..78970007590f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -220,10 +220,21 @@ struct mm_struct {
 
 extern int mmlist_nr;
 
-struct signal_struct {
+struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
+};
+
+/*
+ * NOTE! "signal_struct" does not have it's own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+	atomic_t		count;
 
 	/* current thread group signal load-balancing target: */
 	task_t			*curr_target;
@@ -378,7 +389,8 @@ struct task_struct {
 /* namespace */
 	struct namespace *namespace;
 /* signal handlers */
-	struct signal_struct *sig;
+	struct signal_struct *signal;
+	struct sighand_struct *sighand;
 
 	sigset_t blocked, real_blocked;
 	struct sigpending pending;
@@ -589,6 +601,8 @@ extern void exit_thread(void);
 
 extern void exit_mm(struct task_struct *);
 extern void exit_files(struct task_struct *);
+extern void exit_signal(struct task_struct *);
+extern void __exit_signal(struct task_struct *);
 extern void exit_sighand(struct task_struct *);
 extern void __exit_sighand(struct task_struct *);
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 220a672af798..c136265fd3cd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -71,7 +71,8 @@ extern kmem_cache_t	*files_cachep;
 extern kmem_cache_t	*filp_cachep;
 extern kmem_cache_t	*dquot_cachep;
 extern kmem_cache_t	*fs_cachep;
-extern kmem_cache_t	*sigact_cachep;
+extern kmem_cache_t	*signal_cachep;
+extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
 #endif	/* __KERNEL__ */
diff --git a/kernel/exit.c b/kernel/exit.c
index cee8991011f7..febad08ae9ef 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -76,6 +76,7 @@ void release_task(struct task_struct * p)
 	if (unlikely(p->ptrace))
 		__ptrace_unlink(p);
 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
+	__exit_signal(p);
 	__exit_sighand(p);
 	proc_dentry = __unhash_process(p);
 
@@ -546,7 +547,7 @@ static void exit_notify(struct task_struct *tsk)
 {
 	struct task_struct *t;
 
-	if (signal_pending(tsk) && !tsk->sig->group_exit
+	if (signal_pending(tsk) && !tsk->signal->group_exit
 	    && !thread_group_empty(tsk)) {
 		/*
 		 * This occurs when there was a race between our exit
@@ -558,14 +559,14 @@ static void exit_notify(struct task_struct *tsk)
 		 * sure someone gets all the pending signals.
 		 */
 		read_lock(&tasklist_lock);
-		spin_lock_irq(&tsk->sig->siglock);
+		spin_lock_irq(&tsk->sighand->siglock);
 		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
 			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
 				recalc_sigpending_tsk(t);
 				if (signal_pending(t))
 					signal_wake_up(t, 0);
 			}
-		spin_unlock_irq(&tsk->sig->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -708,9 +709,9 @@ task_t *next_thread(task_t *p)
 	struct list_head *tmp, *head = &link->pidptr->task_list;
 
 #if CONFIG_SMP
-	if (!p->sig)
+	if (!p->sighand)
 		BUG();
-	if (!spin_is_locked(&p->sig->siglock) &&
+	if (!spin_is_locked(&p->sighand->siglock) &&
 				!rwlock_is_locked(&tasklist_lock))
 		BUG();
 #endif
@@ -730,21 +731,22 @@ do_group_exit(int exit_code)
 {
 	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
 
-	if (current->sig->group_exit)
-		exit_code = current->sig->group_exit_code;
+	if (current->signal->group_exit)
+		exit_code = current->signal->group_exit_code;
 	else if (!thread_group_empty(current)) {
-		struct signal_struct *const sig = current->sig;
+		struct signal_struct *const sig = current->signal;
+		struct sighand_struct *const sighand = current->sighand;
 		read_lock(&tasklist_lock);
-		spin_lock_irq(&sig->siglock);
+		spin_lock_irq(&sighand->siglock);
 		if (sig->group_exit)
 			/* Another thread got here before we took the lock.  */
 			exit_code = sig->group_exit_code;
 		else {
-		sig->group_exit = 1;
-		sig->group_exit_code = exit_code;
+			sig->group_exit = 1;
+			sig->group_exit_code = exit_code;
 			zap_other_threads(current);
 		}
-		spin_unlock_irq(&sig->siglock);
+		spin_unlock_irq(&sighand->siglock);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -838,8 +840,8 @@ static int wait_task_zombie(task_t *p, unsigned int *stat_addr, struct rusage *r
 
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	if (!retval && stat_addr) {
-		if (p->sig->group_exit)
-			retval = put_user(p->sig->group_exit_code, stat_addr);
+		if (p->signal->group_exit)
+			retval = put_user(p->signal->group_exit_code, stat_addr);
 		else
 			retval = put_user(p->exit_code, stat_addr);
 	}
@@ -879,7 +881,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
 	if (!p->exit_code)
 		return 0;
 	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
-	    p->sig && p->sig->group_stop_count > 0)
+	    p->signal && p->signal->group_stop_count > 0)
 		/*
 		 * A group stop is in progress and this is the group leader.
 		 * We won't report until all threads have stopped.
@@ -1004,7 +1006,7 @@ repeat:
 		if (options & __WNOTHREAD)
 			break;
 		tsk = next_thread(tsk);
-		if (tsk->sig != current->sig)
+		if (tsk->signal != current->signal)
 			BUG();
 	} while (tsk != current);
 	read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index c042b5a8eaec..988a195bcc93 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -665,23 +665,39 @@ out_release:
 
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
-	struct signal_struct *sig;
+	struct sighand_struct *sig;
 
-	if (clone_flags & CLONE_SIGHAND) {
-		atomic_inc(&current->sig->count);
+	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+		atomic_inc(&current->sighand->count);
 		return 0;
 	}
-	sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
-	tsk->sig = sig;
+	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+	tsk->sighand = sig;
 	if (!sig)
 		return -1;
 	spin_lock_init(&sig->siglock);
 	atomic_set(&sig->count, 1);
+	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+	return 0;
+}
+
+static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct signal_struct *sig;
+
+	if (clone_flags & CLONE_THREAD) {
+		atomic_inc(&current->signal->count);
+		return 0;
+	}
+	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+	tsk->signal = sig;
+	if (!sig)
+		return -1;
+	atomic_set(&sig->count, 1);
 	sig->group_exit = 0;
 	sig->group_exit_code = 0;
 	sig->group_exit_task = NULL;
 	sig->group_stop_count = 0;
-	memcpy(sig->action, current->sig->action, sizeof(sig->action));
 	sig->curr_target = NULL;
 	init_sigpending(&sig->shared_pending);
 
@@ -831,8 +847,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_files;
 	if (copy_sighand(clone_flags, p))
 		goto bad_fork_cleanup_fs;
-	if (copy_mm(clone_flags, p))
+	if (copy_signal(clone_flags, p))
 		goto bad_fork_cleanup_sighand;
+	if (copy_mm(clone_flags, p))
+		goto bad_fork_cleanup_signal;
 	if (copy_namespace(clone_flags, p))
 		goto bad_fork_cleanup_mm;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
@@ -923,31 +941,31 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->parent = p->real_parent;
 
 	if (clone_flags & CLONE_THREAD) {
-		spin_lock(&current->sig->siglock);
+		spin_lock(&current->sighand->siglock);
 		/*
 		 * Important: if an exit-all has been started then
 		 * do not create this new thread - the whole thread
 		 * group is supposed to exit anyway.
 		 */
-		if (current->sig->group_exit) {
-			spin_unlock(&current->sig->siglock);
+		if (current->signal->group_exit) {
+			spin_unlock(&current->sighand->siglock);
 			write_unlock_irq(&tasklist_lock);
 			goto bad_fork_cleanup_namespace;
 		}
 		p->tgid = current->tgid;
 		p->group_leader = current->group_leader;
 
-		if (current->sig->group_stop_count > 0) {
+		if (current->signal->group_stop_count > 0) {
 			/*
 			 * There is an all-stop in progress for the group.
 			 * We ourselves will stop as soon as we check signals.
 			 * Make the new thread part of that group stop too.
 			 */
-			current->sig->group_stop_count++;
+			current->signal->group_stop_count++;
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
 		}
 
-		spin_unlock(&current->sig->siglock);
+		spin_unlock(&current->sighand->siglock);
 	}
 
 	SET_LINKS(p);
@@ -977,6 +995,8 @@ bad_fork_cleanup_namespace:
 	exit_namespace(p);
 bad_fork_cleanup_mm:
 	exit_mm(p);
+bad_fork_cleanup_signal:
+	exit_signal(p);
 bad_fork_cleanup_sighand:
 	exit_sighand(p);
 bad_fork_cleanup_fs:
@@ -1077,8 +1097,11 @@ struct task_struct *do_fork(unsigned long clone_flags,
 	return p;
 }
 
-/* SLAB cache for signal_struct structures (tsk->sig) */
-kmem_cache_t *sigact_cachep;
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
 
 /* SLAB cache for files_struct structures (tsk->files) */
 kmem_cache_t *files_cachep;
@@ -1094,11 +1117,17 @@ kmem_cache_t *mm_cachep;
 
 void __init proc_caches_init(void)
 {
-	sigact_cachep = kmem_cache_create("signal_act",
+	sighand_cachep = kmem_cache_create("sighand_cache",
+			sizeof(struct sighand_struct), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!sighand_cachep)
+		panic("Cannot create sighand SLAB cache");
+
+	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
 			SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (!sigact_cachep)
-		panic("Cannot create signal action SLAB cache");
+	if (!signal_cachep)
+		panic("Cannot create signal SLAB cache");
 
 	files_cachep = kmem_cache_create("files_cache", 
 			 sizeof(struct files_struct), 0, 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6a9a2c8f937c..2b85eff87f43 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -111,12 +111,12 @@ int exec_usermodehelper(char *program_path, char *argv[], char *envp[])
 	   as the super user right after the execve fails if you time
 	   the signal just right.
 	*/
-	spin_lock_irq(&curtask->sig->siglock);
+	spin_lock_irq(&curtask->sighand->siglock);
 	sigemptyset(&curtask->blocked);
 	flush_signals(curtask);
 	flush_signal_handlers(curtask);
 	recalc_sigpending();
-	spin_unlock_irq(&curtask->sig->siglock);
+	spin_unlock_irq(&curtask->sighand->siglock);
 
 	for (i = 0; i < curtask->files->max_fds; i++ ) {
 		if (curtask->files->fd[i]) close(i);
@@ -239,20 +239,20 @@ int request_module(const char * module_name)
 	}
 
 	/* Block everything but SIGKILL/SIGSTOP */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	tmpsig = current->blocked;
 	siginitsetinv(&current->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	waitpid_result = waitpid(pid, NULL, __WCLONE);
 	atomic_dec(&kmod_concurrent);
 
 	/* Allow signals again.. */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = tmpsig;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (waitpid_result != pid) {
 		printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n",
diff --git a/kernel/signal.c b/kernel/signal.c
index e8ff3bb6324e..a095215cffb1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -138,16 +138,16 @@ int max_queued_signals = 1024;
 		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_STOP_MASK))
 
 #define sig_user_defined(t, signr) \
-	(((t)->sig->action[(signr)-1].sa.sa_handler != SIG_DFL) &&	\
-	 ((t)->sig->action[(signr)-1].sa.sa_handler != SIG_IGN))
+	(((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) &&	\
+	 ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN))
 
 #define sig_ignored(t, signr) \
 	(!((t)->ptrace & PT_PTRACED) && \
-	 (t)->sig->action[(signr)-1].sa.sa_handler == SIG_IGN)
+	 (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_IGN)
 
 #define sig_fatal(t, signr) \
 	(!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
-	 (t)->sig->action[(signr)-1].sa.sa_handler == SIG_DFL)
+	 (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
 
 /*
  * Re-calculate pending state from the set of locally pending
@@ -183,9 +183,9 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 inline void recalc_sigpending_tsk(struct task_struct *t)
 {
-	if (t->sig->group_stop_count > 0 ||
+	if (t->signal->group_stop_count > 0 ||
 	    PENDING(&t->pending, &t->blocked) ||
-			PENDING(&t->sig->shared_pending, &t->blocked))
+	    PENDING(&t->signal->shared_pending, &t->blocked))
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
 	else
 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -265,20 +265,41 @@ flush_signals(struct task_struct *t)
  */
 void __exit_sighand(struct task_struct *tsk)
 {
-	struct signal_struct * sig = tsk->sig;
+	struct sighand_struct * sighand = tsk->sighand;
+
+	/* Ok, we're done with the signal handlers */
+	tsk->sighand = NULL;
+	if (atomic_dec_and_test(&sighand->count))
+		kmem_cache_free(sighand_cachep, sighand);
+}
+
+void exit_sighand(struct task_struct *tsk)
+{
+	write_lock_irq(&tasklist_lock);
+	__exit_sighand(tsk);
+	write_unlock_irq(&tasklist_lock);
+}
+
+/*
+ * This function expects the tasklist_lock write-locked.
+ */
+void __exit_signal(struct task_struct *tsk)
+{
+	struct signal_struct * sig = tsk->signal;
+	struct sighand_struct * sighand = tsk->sighand;
 
 	if (!sig)
 		BUG();
 	if (!atomic_read(&sig->count))
 		BUG();
-	spin_lock(&sig->siglock);
+	spin_lock(&sighand->siglock);
 	if (atomic_dec_and_test(&sig->count)) {
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
-		tsk->sig = NULL;
-		spin_unlock(&sig->siglock);
+		tsk->signal = NULL;
+		spin_unlock(&sighand->siglock);
 		flush_sigqueue(&sig->shared_pending);
-		kmem_cache_free(sigact_cachep, sig);
+		kmem_cache_free(signal_cachep, sig);
 	} else {
 		/*
 		 * If there is any task waiting for the group exit
@@ -290,17 +311,17 @@ void __exit_sighand(struct task_struct *tsk)
 		}
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
-		tsk->sig = NULL;
-		spin_unlock(&sig->siglock);
+		tsk->signal = NULL;
+		spin_unlock(&sighand->siglock);
 	}
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	flush_sigqueue(&tsk->pending);
 }
 
-void exit_sighand(struct task_struct *tsk)
+void exit_signal(struct task_struct *tsk)
 {
 	write_lock_irq(&tasklist_lock);
-	__exit_sighand(tsk);
+	__exit_signal(tsk);
 	write_unlock_irq(&tasklist_lock);
 }
 
@@ -312,7 +333,7 @@ void
 flush_signal_handlers(struct task_struct *t)
 {
 	int i;
-	struct k_sigaction *ka = &t->sig->action[0];
+	struct k_sigaction *ka = &t->sighand->action[0];
 	for (i = _NSIG ; i != 0 ; i--) {
 		if (ka->sa.sa_handler != SIG_IGN)
 			ka->sa.sa_handler = SIG_DFL;
@@ -336,11 +357,11 @@ block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->notifier_mask = mask;
 	current->notifier_data = priv;
 	current->notifier = notifier;
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
 /* Notify the system that blocking has ended. */
@@ -350,11 +371,11 @@ unblock_all_signals(void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->notifier = NULL;
 	current->notifier_data = NULL;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
 static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
@@ -443,7 +464,7 @@ int dequeue_signal(sigset_t *mask, siginfo_t *info)
 {
 	int signr = __dequeue_signal(&current->pending, mask, info);
 	if (!signr)
-		signr = __dequeue_signal(&current->sig->shared_pending,
+		signr = __dequeue_signal(&current->signal->shared_pending,
 					 mask, info);
 	return signr;
 }
@@ -559,7 +580,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 		/*
 		 * This is a stop signal.  Remove SIGCONT from all queues.
 		 */
-		rm_from_queue(sigmask(SIGCONT), &p->sig->shared_pending);
+		rm_from_queue(sigmask(SIGCONT), &p->signal->shared_pending);
 		t = p;
 		do {
 			rm_from_queue(sigmask(SIGCONT), &t->pending);
@@ -570,8 +591,8 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 		/*
 		 * Remove all stop signals from all queues,
 		 * and wake all threads.
- */
-		if (unlikely(p->sig->group_stop_count > 0)) {
+		 */
+		if (unlikely(p->signal->group_stop_count > 0)) {
 			/*
 			 * There was a group stop in progress.  We'll
 			 * pretend it finished before we got here.  We are
@@ -584,7 +605,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 			 * now, and it's as if the stop had finished and
 			 * the SIGCHLD was pending on entry to this kill.
 			 */
-			p->sig->group_stop_count = 0;
+			p->signal->group_stop_count = 0;
 			if (p->ptrace & PT_PTRACED)
 				do_notify_parent_cldstop(p, p->parent);
 			else
@@ -592,7 +613,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 					p->group_leader,
 					p->group_leader->real_parent);
 		}
-		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->sig->shared_pending);
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
 		t = p;
 		do {
 			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
@@ -608,7 +629,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 				 * set, the thread will pause and acquire the
 				 * siglock that we hold now and until we've
 				 * queued the pending signal.
- */
+				 */
 				if (sig_user_defined(p, SIGCONT))
 					set_tsk_thread_flag(t, TIF_SIGPENDING);
 				wake_up_process(t);
@@ -646,23 +667,23 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals
 		*signals->tail = q;
 		signals->tail = &q->next;
 		switch ((unsigned long) info) {
-			case 0:
-				q->info.si_signo = sig;
-				q->info.si_errno = 0;
-				q->info.si_code = SI_USER;
-				q->info.si_pid = current->pid;
-				q->info.si_uid = current->uid;
-				break;
-			case 1:
-				q->info.si_signo = sig;
-				q->info.si_errno = 0;
-				q->info.si_code = SI_KERNEL;
-				q->info.si_pid = 0;
-				q->info.si_uid = 0;
-				break;
-			default:
-				copy_siginfo(&q->info, info);
-				break;
+		case 0:
+			q->info.si_signo = sig;
+			q->info.si_errno = 0;
+			q->info.si_code = SI_USER;
+			q->info.si_pid = current->pid;
+			q->info.si_uid = current->uid;
+			break;
+		case 1:
+			q->info.si_signo = sig;
+			q->info.si_errno = 0;
+			q->info.si_code = SI_KERNEL;
+			q->info.si_pid = 0;
+			q->info.si_uid = 0;
+			break;
+		default:
+			copy_siginfo(&q->info, info);
+			break;
 		}
 	} else if (sig >= SIGRTMIN && info && (unsigned long)info != 1
 		   && info->si_code != SI_USER)
@@ -689,7 +710,7 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	if (!irqs_disabled())
 		BUG();
 #if CONFIG_SMP
-	if (!spin_is_locked(&t->sig->siglock))
+	if (!spin_is_locked(&t->sighand->siglock))
 		BUG();
 #endif
 
@@ -697,10 +718,10 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	if (sig_ignored(t, sig))
 		return 0;
 
-		/* Support queueing exactly one non-rt signal, so that we
-		   can get more detailed information about the cause of
-		   the signal. */
-		if (LEGACY_QUEUE(&t->pending, sig))
+	/* Support queueing exactly one non-rt signal, so that we
+	   can get more detailed information about the cause of
+	   the signal. */
+	if (LEGACY_QUEUE(&t->pending, sig))
 		return 0;
 
 	ret = send_signal(sig, info, &t->pending);
@@ -721,13 +742,13 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	unsigned long int flags;
 	int ret;
 
-	spin_lock_irqsave(&t->sig->siglock, flags);
-	if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
-		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
+	spin_lock_irqsave(&t->sighand->siglock, flags);
+	if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN)
+		t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
 	sigdelset(&t->blocked, sig);
 	recalc_sigpending_tsk(t);
 	ret = specific_send_sig_info(sig, info, t);
-	spin_unlock_irqrestore(&t->sig->siglock, flags);
+	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 
 	return ret;
 }
@@ -737,13 +758,13 @@ force_sig_specific(int sig, struct task_struct *t)
 {
 	unsigned long int flags;
 
-	spin_lock_irqsave(&t->sig->siglock, flags);
-	if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
-		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
+	spin_lock_irqsave(&t->sighand->siglock, flags);
+	if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN)
+		t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
 	sigdelset(&t->blocked, sig);
 	recalc_sigpending_tsk(t);
 	specific_send_sig_info(sig, (void *)2, t);
-	spin_unlock_irqrestore(&t->sig->siglock, flags);
+	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
 /*
@@ -766,7 +787,7 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	int ret;
 
 #if CONFIG_SMP
-	if (!spin_is_locked(&p->sig->siglock))
+	if (!spin_is_locked(&p->sighand->siglock))
 		BUG();
 #endif
 	handle_stop_signal(sig, p);
@@ -775,7 +796,7 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	if (sig_ignored(p, sig))
 		return 0;
 
-	if (LEGACY_QUEUE(&p->sig->shared_pending, sig))
+	if (LEGACY_QUEUE(&p->signal->shared_pending, sig))
 		/* This is a non-RT signal and we already have one queued.  */
 		return 0;
 
@@ -784,7 +805,7 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	 * We always use the shared queue for process-wide signals,
 	 * to avoid several races.
 	 */
-	ret = send_signal(sig, info, &p->sig->shared_pending);
+	ret = send_signal(sig, info, &p->signal->shared_pending);
 	if (unlikely(ret))
 		return ret;
 
@@ -804,32 +825,32 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 		return 0;
 	else {
 		/*
-	 * Otherwise try to find a suitable thread.
-	 */
-		t = p->sig->curr_target;
+		 * Otherwise try to find a suitable thread.
+		 */
+		t = p->signal->curr_target;
 		if (t == NULL)
-		/* restart balancing at this thread */
-			t = p->sig->curr_target = p;
+			/* restart balancing at this thread */
+			t = p->signal->curr_target = p;
 		BUG_ON(t->tgid != p->tgid);
 
 		while (!wants_signal(sig, t)) {
 			t = next_thread(t);
-			if (t == p->sig->curr_target)
-		/*
+			if (t == p->signal->curr_target)
+				/*
 				 * No thread needs to be woken.
 				 * Any eligible threads will see
 				 * the signal in the queue soon.
-		 */
+				 */
 				return 0;
 		}
-		p->sig->curr_target = t;
+		p->signal->curr_target = t;
 	}
 
 	/*
 	 * Found a killable thread.  If the signal will be fatal,
 	 * then start taking the whole group down immediately.
 	 */
-	if (sig_fatal(p, sig) && !p->sig->group_exit &&
+	if (sig_fatal(p, sig) && !p->signal->group_exit &&
 	    !sigismember(&t->real_blocked, sig) &&
 	    (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) {
 		/*
@@ -842,9 +863,9 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 			 * running and doing things after a slower
 			 * thread has the fatal signal pending.
 			 */
-			p->sig->group_exit = 1;
-			p->sig->group_exit_code = sig;
-			p->sig->group_stop_count = 0;
+			p->signal->group_exit = 1;
+			p->signal->group_exit_code = sig;
+			p->signal->group_stop_count = 0;
 			t = p;
 			do {
 				sigaddset(&t->pending.signal, SIGKILL);
@@ -865,16 +886,16 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 		 * the core-dump signal unblocked.
 		 */
 		rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->sig->shared_pending);
-		p->sig->group_stop_count = 0;
-		p->sig->group_exit_task = t;
+		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
+		p->signal->group_stop_count = 0;
+		p->signal->group_exit_task = t;
 		t = p;
 		do {
-			p->sig->group_stop_count++;
+			p->signal->group_stop_count++;
 			signal_wake_up(t, 0);
 			t = next_thread(t);
 		} while (t != p);
-		wake_up_process(p->sig->group_exit_task);
+		wake_up_process(p->signal->group_exit_task);
 		return 0;
 	}
 
@@ -893,7 +914,7 @@ void zap_other_threads(struct task_struct *p)
 {
 	struct task_struct *t;
 
-	p->sig->group_stop_count = 0;
+	p->signal->group_stop_count = 0;
 
 	if (thread_group_empty(p))
 		return;
@@ -912,10 +933,10 @@ group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	int ret;
 
 	ret = check_kill_permission(sig, info, p);
-	if (!ret && sig && p->sig) {
-		spin_lock_irqsave(&p->sig->siglock, flags);
+	if (!ret && sig && p->sighand) {
+		spin_lock_irqsave(&p->sighand->siglock, flags);
 		ret = __group_send_sig_info(sig, info, p);
-		spin_unlock_irqrestore(&p->sig->siglock, flags);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 
 	return ret;
@@ -1050,9 +1071,9 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 		return group_send_sig_info(sig, info, p);
 	else {
 		int error;
-		spin_lock_irq(&p->sig->siglock);
+		spin_lock_irq(&p->sighand->siglock);
 		error = specific_send_sig_info(sig, info, p);
-		spin_unlock_irq(&p->sig->siglock);
+		spin_unlock_irq(&p->sighand->siglock);
 		return error;
 	}
 }
@@ -1107,7 +1128,7 @@ static inline void __wake_up_parent(struct task_struct *p,
 	do {
 		wake_up_interruptible(&tsk->wait_chldexit);
 		tsk = next_thread(tsk);
-		if (tsk->sig != parent->sig)
+		if (tsk->signal != parent->signal)
 			BUG();
 	} while (tsk != parent);
 }
@@ -1121,7 +1142,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	int why, status;
-	struct signal_struct *psig;
+	struct sighand_struct *psig;
 
 	if (sig == -1)
 		BUG();
@@ -1160,7 +1181,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_code = why;
 	info.si_status = status;
 
-	psig = tsk->parent->sig;
+	psig = tsk->parent->sighand;
 	spin_lock_irqsave(&psig->siglock, flags);
 	if (sig == SIGCHLD && tsk->state != TASK_STOPPED &&
 	    (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
@@ -1213,6 +1234,7 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
 {
 	struct siginfo info;
 	unsigned long flags;
+	struct sighand_struct *sighand;
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
@@ -1226,15 +1248,16 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
 	info.si_status = tsk->exit_code & 0x7f;
 	info.si_code = CLD_STOPPED;
 
-	spin_lock_irqsave(&parent->sig->siglock, flags);
-	if (parent->sig->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
-	    !(parent->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
+	sighand = parent->sighand;
+	spin_lock_irqsave(&sighand->siglock, flags);
+	if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
+	    !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
 		__group_send_sig_info(SIGCHLD, &info, parent);
 	/*
 	 * Even if SIGCHLD is not generated, we must wake up wait4 calls.
 	 */
 	__wake_up_parent(tsk, parent);
-	spin_unlock_irqrestore(&parent->sig->siglock, flags);
+	spin_unlock_irqrestore(&sighand->siglock, flags);
 }
 
 static void
@@ -1271,7 +1294,8 @@ finish_stop(int stop_count)
 static void
 do_signal_stop(int signr)
 {
-	struct signal_struct *sig = current->sig;
+	struct signal_struct *sig = current->signal;
+	struct sighand_struct *sighand = current->sighand;
 	int stop_count = -1;
 
 	if (sig->group_stop_count > 0) {
@@ -1279,17 +1303,17 @@ do_signal_stop(int signr)
 		 * There is a group stop in progress.  We don't need to
 		 * start another one.
 		 */
-		spin_lock_irq(&sig->siglock);
+		spin_lock_irq(&sighand->siglock);
 		if (unlikely(sig->group_stop_count == 0)) {
 			BUG_ON(!sig->group_exit);
-			spin_unlock_irq(&sig->siglock);
+			spin_unlock_irq(&sighand->siglock);
 			return;
 		}
 		signr = sig->group_exit_code;
 		stop_count = --sig->group_stop_count;
 		current->exit_code = signr;
 		set_current_state(TASK_STOPPED);
-		spin_unlock_irq(&sig->siglock);
+		spin_unlock_irq(&sighand->siglock);
 	}
 	else if (thread_group_empty(current)) {
 		/*
@@ -1305,7 +1329,7 @@ do_signal_stop(int signr)
 		 */
 		struct task_struct *t;
 		read_lock(&tasklist_lock);
-		spin_lock_irq(&sig->siglock);
+		spin_lock_irq(&sighand->siglock);
 
 		if (unlikely(sig->group_exit)) {
 			/*
@@ -1313,7 +1337,7 @@ do_signal_stop(int signr)
 			 * We'll just ignore the stop and process the
 			 * associated fatal signal.
 			 */
-			spin_unlock_irq(&sig->siglock);
+			spin_unlock_irq(&sighand->siglock);
 			read_unlock(&tasklist_lock);
 			return;
 		}
@@ -1343,7 +1367,7 @@ do_signal_stop(int signr)
 		current->exit_code = signr;
 		set_current_state(TASK_STOPPED);
 
-		spin_unlock_irq(&sig->siglock);
+		spin_unlock_irq(&sighand->siglock);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -1361,31 +1385,31 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 		unsigned long signr = 0;
 		struct k_sigaction *ka;
 
-		spin_lock_irq(&current->sig->siglock);
-		if (unlikely(current->sig->group_stop_count > 0)) {
+		spin_lock_irq(&current->sighand->siglock);
+		if (unlikely(current->signal->group_stop_count > 0)) {
 			int stop_count;
-			if (current->sig->group_exit_task == current) {
+			if (current->signal->group_exit_task == current) {
 				/*
 				 * Group stop is so we can do a core dump.
 				 */
-				current->sig->group_exit_task = NULL;
+				current->signal->group_exit_task = NULL;
 				goto dequeue;
 			}
 			/*
 			 * There is a group stop in progress.  We stop
 			 * without any associated signal being in our queue.
 			 */
-			stop_count = --current->sig->group_stop_count;
-			signr = current->sig->group_exit_code;
+			stop_count = --current->signal->group_stop_count;
+			signr = current->signal->group_exit_code;
 			current->exit_code = signr;
 			set_current_state(TASK_STOPPED);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 			finish_stop(stop_count);
 			continue;
 		}
 	dequeue:
 		signr = dequeue_signal(mask, info);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		if (!signr)
 			break;
@@ -1395,10 +1419,10 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 			 * If there is a group stop in progress,
 			 * we must participate in the bookkeeping.
 			 */
-			if (current->sig->group_stop_count > 0) {
-				spin_lock_irq(&current->sig->siglock);
-				--current->sig->group_stop_count;
-				spin_unlock_irq(&current->sig->siglock);
+			if (current->signal->group_stop_count > 0) {
+				spin_lock_irq(&current->sighand->siglock);
+				--current->signal->group_stop_count;
+				spin_unlock_irq(&current->sighand->siglock);
 			}
 
 			/* Let the debugger run.  */
@@ -1424,14 +1448,14 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 
 			/* If the (new) signal is now blocked, requeue it.  */
 			if (sigismember(&current->blocked, signr)) {
-				spin_lock_irq(&current->sig->siglock);
+				spin_lock_irq(&current->sighand->siglock);
 				specific_send_sig_info(signr, info, current);
-				spin_unlock_irq(&current->sig->siglock);
+				spin_unlock_irq(&current->sighand->siglock);
 				continue;
 			}
 		}
 
-		ka = &current->sig->action[signr-1];
+		ka = &current->sighand->action[signr-1];
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) /* Run the handler.  */
@@ -1443,9 +1467,9 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 		if (sig_kernel_ignore(signr)) /* Default is nothing. */
 			continue;
 
-			/* Init gets no signals it doesn't want.  */
-			if (current->pid == 1)
-				continue;
+		/* Init gets no signals it doesn't want.  */
+		if (current->pid == 1)
+			continue;
 
 		if (sig_kernel_stop(signr)) {
 			/*
@@ -1457,8 +1481,8 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 			if (signr == SIGSTOP ||
 			    !is_orphaned_pgrp(current->pgrp))
 				do_signal_stop(signr);
-				continue;
-			}
+			continue;
+		}
 
 		/*
 		 * Anything else is fatal, maybe with a core dump.
@@ -1476,8 +1500,8 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs)
 			 * and we just let them go to finish dying.
 			 */
 			const int code = signr | 0x80;
-			BUG_ON(!current->sig->group_exit);
-			BUG_ON(current->sig->group_exit_code != code);
+			BUG_ON(!current->signal->group_exit);
+			BUG_ON(current->signal->group_exit_code != code);
 			do_exit(code);
 				/* NOTREACHED */
 			}
@@ -1549,7 +1573,7 @@ sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize)
 			goto out;
 		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		old_set = current->blocked;
 
 		error = 0;
@@ -1569,15 +1593,15 @@ sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize)
 
 		current->blocked = new_set;
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		if (error)
 			goto out;
 		if (oset)
 			goto set_old;
 	} else if (oset) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		old_set = current->blocked;
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 	set_old:
 		error = -EFAULT;
@@ -1597,10 +1621,10 @@ long do_sigpending(void *set, unsigned long sigsetsize)
 	if (sigsetsize > sizeof(sigset_t))
 		goto out;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&pending, &current->pending.signal,
-		  &current->sig->shared_pending.signal);
-	spin_unlock_irq(&current->sig->siglock);
+		  &current->signal->shared_pending.signal);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* Outside the lock because only this thread touches it.  */
 	sigandsets(&pending, &current->blocked, &pending);
@@ -1714,7 +1738,7 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
 			return -EINVAL;
 	}
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sig = dequeue_signal(&these, &info);
 	if (!sig) {
 		timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1729,19 +1753,19 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
 			current->real_blocked = current->blocked;
 			sigandsets(&current->blocked, &current->blocked, &these);
 			recalc_sigpending();
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 
 			current->state = TASK_INTERRUPTIBLE;
 			timeout = schedule_timeout(timeout);
 
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			sig = dequeue_signal(&these, &info);
 			current->blocked = current->real_blocked;
 			siginitset(&current->real_blocked, 0);
 			recalc_sigpending();
 		}
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (sig) {
 		ret = sig;
@@ -1801,11 +1825,11 @@ sys_tkill(int pid, int sig)
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
 		 */
-		if (!error && sig && p->sig) {
-			spin_lock_irq(&p->sig->siglock);
+		if (!error && sig && p->sighand) {
+			spin_lock_irq(&p->sighand->siglock);
 			handle_stop_signal(sig, p);
 			error = specific_send_sig_info(sig, &info, p);
-			spin_unlock_irq(&p->sig->siglock);
+			spin_unlock_irq(&p->sighand->siglock);
 		}
 	}
 	read_unlock(&tasklist_lock);
@@ -1838,15 +1862,15 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 	if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig)))
 		return -EINVAL;
 
-	k = &current->sig->action[sig-1];
+	k = &current->sighand->action[sig-1];
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	if (signal_pending(current)) {
 		/*
 		 * If there might be a fatal signal pending on multiple
 		 * threads, make sure we take it before changing the action.
 		 */
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		return -ERESTARTNOINTR;
 	}
 
@@ -1875,20 +1899,20 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 			 * dance to maintain the lock hierarchy.
 			 */
 			struct task_struct *t = current;
-			spin_unlock_irq(&t->sig->siglock);
+			spin_unlock_irq(&t->sighand->siglock);
 			read_lock(&tasklist_lock);
-			spin_lock_irq(&t->sig->siglock);
+			spin_lock_irq(&t->sighand->siglock);
 			*k = *act;
 			sigdelsetmask(&k->sa.sa_mask,
 				      sigmask(SIGKILL) | sigmask(SIGSTOP));
-			rm_from_queue(sigmask(sig), &t->sig->shared_pending);
+			rm_from_queue(sigmask(sig), &t->signal->shared_pending);
 			do {
 				rm_from_queue(sigmask(sig), &t->pending);
 				recalc_sigpending_tsk(t);
 				t = next_thread(t);
 			} while (t != current);
-	spin_unlock_irq(&current->sig->siglock);
-	read_unlock(&tasklist_lock);
+			spin_unlock_irq(&current->sighand->siglock);
+			read_unlock(&tasklist_lock);
 			return 0;
 		}
 
@@ -1897,7 +1921,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 			      sigmask(SIGKILL) | sigmask(SIGSTOP));
 	}
 
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 
@@ -1926,7 +1950,7 @@ do_sigaltstack (const stack_t *uss, stack_t *uoss, unsigned long sp)
 			goto out;
 
 		error = -EPERM;
-		if (on_sig_stack (sp))
+		if (on_sig_stack(sp))
 			goto out;
 
 		error = -EINVAL;
@@ -1984,9 +2008,9 @@ sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
 		error = -EFAULT;
 		if (copy_from_user(&new_set, set, sizeof(*set)))
 			goto out;
-		new_set &= ~(sigmask(SIGKILL)|sigmask(SIGSTOP));
+		new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		old_set = current->blocked.sig[0];
 
 		error = 0;
@@ -2006,7 +2030,7 @@ sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
 		}
 
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		if (error)
 			goto out;
 		if (oset)
@@ -2068,13 +2092,13 @@ sys_ssetmask(int newmask)
 {
 	int old;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	old = current->blocked.sig[0];
 
 	siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
 						  sigmask(SIGSTOP)));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return old;
 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fb10d360c436..8fd97c6764a4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -180,10 +180,10 @@ static int worker_thread(void *__startup)
 	set_user_nice(current, -10);
 	set_cpus_allowed(current, 1UL << cpu);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGCHLD));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	complete(&startup->done);
 
@@ -213,10 +213,10 @@ static int worker_thread(void *__startup)
 				/* SIGCHLD - auto-reaping */ ;
 
 			/* zap all other signals */
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			flush_signals(current);
 			recalc_sigpending();
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 		}
 	}
 	remove_wait_queue(&cwq->more_work, &wait);
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 141ce97a6ea9..c38ad46c58c7 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -90,10 +90,10 @@ static int __pdflush(struct pdflush_work *my_work)
 	strcpy(current->comm, "pdflush");
 
 	/* interruptible sleep, so block all signals */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, 0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	current->flags |= PF_FLUSHER;
 	my_work->fn = NULL;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 54c2eb392856..f0da5234b109 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -233,27 +233,27 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
 	
 	/* Turn off various signals */
 	if (clnt->cl_intr) {
-		struct k_sigaction *action = current->sig->action;
+		struct k_sigaction *action = current->sighand->action;
 		if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
 			sigallow |= sigmask(SIGINT);
 		if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
 			sigallow |= sigmask(SIGQUIT);
 	}
-	spin_lock_irqsave(&current->sig->siglock, irqflags);
+	spin_lock_irqsave(&current->sighand->siglock, irqflags);
 	*oldset = current->blocked;
 	siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 }
 
 void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 	unsigned long	irqflags;
 	
-	spin_lock_irqsave(&current->sig->siglock, irqflags);
+	spin_lock_irqsave(&current->sighand->siglock, irqflags);
 	current->blocked = *oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 }
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c999d5b2008b..c8e5364ed1ea 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -964,10 +964,10 @@ rpciod(void *ptr)
 
 	daemonize();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strcpy(current->comm, "rpciod");
 
@@ -1022,9 +1022,9 @@ rpciod_killall(void)
 		}
 	}
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
 /*
@@ -1100,9 +1100,9 @@ rpciod_down(void)
 		}
 		interruptible_sleep_on(&rpciod_killer);
 	}
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 out:
 	up(&rpciod_sema);
 	MOD_DEC_USE_COUNT;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aacd138fb911..beadf395b863 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -235,9 +235,9 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 	}
 
 	if (!port) {
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	return error;
diff --git a/security/capability.c b/security/capability.c
index cf6d2440a21d..d9b00d69fe41 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -133,7 +133,7 @@ void cap_bprm_compute_creds (struct linux_binprm *bprm)
 		if (must_not_trace_exec (current)
 		    || atomic_read (&current->fs->count) > 1
 		    || atomic_read (&current->files->count) > 1
-		    || atomic_read (&current->sig->count) > 1) {
+		    || atomic_read (&current->sighand->count) > 1) {
 			if (!capable (CAP_SETPCAP)) {
 				new_permitted = cap_intersect (new_permitted,
 							       current->
-- 
cgit v1.2.3


From 631da088f602364f696a50d001b5c2f3d0026070 Mon Sep 17 00:00:00 2001
From: Chris Wedgwood <cw@f00f.org>
Date: Fri, 7 Feb 2003 00:24:40 -0800
Subject: [PATCH] signal locking update

Accomodate the signal locking moving from "tsk->sig" to "tsk->sighand".
---
 arch/alpha/kernel/signal.c           | 24 ++++++++++++------------
 arch/arm/kernel/signal.c             | 24 ++++++++++++------------
 arch/ia64/ia32/ia32_signal.c         | 12 ++++++------
 arch/ia64/kernel/signal.c            | 12 ++++++------
 arch/m68knommu/kernel/signal.c       | 20 ++++++++++----------
 arch/parisc/kernel/signal.c          | 12 ++++++------
 arch/ppc/kernel/signal.c             | 20 ++++++++++----------
 arch/ppc64/kernel/signal.c           | 12 ++++++------
 arch/ppc64/kernel/signal32.c         | 20 ++++++++++----------
 arch/s390/kernel/signal.c            | 20 ++++++++++----------
 arch/s390x/kernel/linux32.c          |  8 ++++----
 arch/s390x/kernel/signal.c           | 20 ++++++++++----------
 arch/s390x/kernel/signal32.c         | 20 ++++++++++----------
 arch/sparc/kernel/signal.c           | 32 ++++++++++++++++----------------
 arch/sparc/kernel/sys_sunos.c        |  8 ++++----
 arch/sparc64/kernel/power.c          |  4 ++--
 arch/sparc64/kernel/signal.c         | 24 ++++++++++++------------
 arch/sparc64/kernel/signal32.c       | 32 ++++++++++++++++----------------
 arch/sparc64/kernel/sys_sparc32.c    |  8 ++++----
 arch/sparc64/kernel/sys_sunos32.c    |  8 ++++----
 arch/sparc64/solaris/signal.c        | 16 ++++++++--------
 arch/um/kernel/signal_kern.c         | 20 ++++++++++----------
 arch/v850/kernel/signal.c            | 20 ++++++++++----------
 arch/x86_64/ia32/ia32_signal.c       | 12 ++++++------
 arch/x86_64/kernel/signal.c          | 12 ++++++------
 drivers/block/nbd.c                  | 12 ++++++------
 drivers/bluetooth/bt3c_cs.c          |  8 ++++----
 drivers/char/ftape/lowlevel/fdc-io.c |  8 ++++----
 drivers/macintosh/adb.c              |  4 ++--
 drivers/md/md.c                      |  4 ++--
 drivers/media/video/saa5249.c        |  8 ++++----
 drivers/mtd/devices/blkmtd.c         |  4 ++--
 drivers/mtd/mtdblock.c               |  4 ++--
 drivers/net/8139too.c                |  8 ++++----
 drivers/net/irda/sir_kthread.c       |  4 ++--
 fs/afs/cmservice.c                   |  4 ++--
 fs/afs/internal.h                    |  4 ++--
 fs/afs/kafsasyncd.c                  |  4 ++--
 fs/afs/kafstimod.c                   |  4 ++--
 fs/jffs/intrep.c                     |  8 ++++----
 fs/jffs2/os-linux.h                  |  2 +-
 fs/jfs/jfs_logmgr.c                  |  4 ++--
 fs/jfs/jfs_txnmgr.c                  |  8 ++++----
 fs/ncpfs/sock.c                      |  8 ++++----
 fs/smbfs/smbiod.c                    |  4 ++--
 fs/xfs/pagebuf/page_buf.c            |  4 ++--
 include/linux/sched.h                |  2 +-
 kernel/suspend.c                     |  4 ++--
 net/rxrpc/internal.h                 |  4 ++--
 net/rxrpc/krxiod.c                   |  4 ++--
 net/rxrpc/krxsecd.c                  |  4 ++--
 net/rxrpc/krxtimod.c                 |  4 ++--
 52 files changed, 280 insertions(+), 280 deletions(-)

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index a986bc9a2db8..c17cb0f62c69 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -63,7 +63,7 @@ osf_sigprocmask(int how, unsigned long newmask, long a2, long a3,
 		unsigned long block, unblock;
 
 		newmask &= _BLOCKABLE;
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		oldmask = current->blocked.sig[0];
 
 		unblock = oldmask & ~newmask;
@@ -76,7 +76,7 @@ osf_sigprocmask(int how, unsigned long newmask, long a2, long a3,
 			sigemptyset(&current->blocked);
 		current->blocked.sig[0] = newmask;
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		(&regs)->r0 = 0;		/* special no error return */
 	}
@@ -150,11 +150,11 @@ do_sigsuspend(old_sigset_t mask, struct pt_regs *reg, struct switch_stack *sw)
 	sigset_t oldset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	oldset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
@@ -177,11 +177,11 @@ do_rt_sigsuspend(sigset_t *uset, size_t sigsetsize,
 		return -EFAULT;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	oldset = current->blocked;
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
@@ -284,10 +284,10 @@ do_sigreturn(struct sigframe *frame, struct pt_regs *regs,
 		goto give_sigsegv;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(&frame->sc, regs, sw))
 		goto give_sigsegv;
@@ -323,10 +323,10 @@ do_rt_sigreturn(struct rt_sigframe *frame, struct pt_regs *regs,
 		goto give_sigsegv;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(&frame->uc.uc_mcontext, regs, sw))
 		goto give_sigsegv;
@@ -562,11 +562,11 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 51f8711be5d0..c70ebebb1eb1 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -59,11 +59,11 @@ asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t m
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->ARM_r0 = -EINTR;
 
 	while (1) {
@@ -87,11 +87,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->ARM_r0 = -EINTR;
 
 	while (1) {
@@ -207,10 +207,10 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(regs, &frame->sc))
 		goto badframe;
@@ -247,10 +247,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
@@ -477,12 +477,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 			ka->sa.sa_handler = SIG_DFL;
 
 		if (!(ka->sa.sa_flags & SA_NODEFER)) {
-			spin_lock_irq(&tsk->sig->siglock);
+			spin_lock_irq(&tsk->sighand->siglock);
 			sigorsets(&tsk->blocked, &tsk->blocked,
 				  &ka->sa.sa_mask);
 			sigaddset(&tsk->blocked, sig);
 			recalc_sigpending();
-			spin_unlock_irq(&tsk->sig->siglock);
+			spin_unlock_irq(&tsk->sighand->siglock);
 		}
 		return;
 	}
@@ -521,9 +521,9 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
 		unsigned long signr = 0;
 		struct k_sigaction *ka;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		signr = dequeue_signal(&current->blocked, &info);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		if (!signr)
 			break;
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
index f2d006240df2..de4213cf1a5a 100644
--- a/arch/ia64/ia32/ia32_signal.c
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -479,13 +479,13 @@ ia32_rt_sigsuspend (compat_sigset_t *uset, unsigned int sigsetsize, struct sigsc
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	{
 		oldset = current->blocked;
 		current->blocked = set;
 		recalc_sigpending();
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/*
 	 * The return below usually returns to the signal handler.  We need to pre-set the
@@ -1007,10 +1007,10 @@ sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5, int
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = (sigset_t) set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext_ia32(regs, &frame->sc, &eax))
 		goto badframe;
@@ -1038,10 +1038,10 @@ sys32_rt_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5,
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked =  set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext_ia32(regs, &frame->uc.uc_mcontext, &eax))
 		goto badframe;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 8ff4fe33902a..626725da43f7 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -68,13 +68,13 @@ ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct sigscratch *scr)
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	{
 		oldset = current->blocked;
 		current->blocked = set;
 		recalc_sigpending();
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/*
 	 * The return below usually returns to the signal handler.  We need to
@@ -274,12 +274,12 @@ ia64_rt_sigreturn (struct sigscratch *scr)
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	{
 		current->blocked = set;
 		recalc_sigpending();
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(sc, scr))
 		goto give_sigsegv;
@@ -465,13 +465,13 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		{
 			sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 			sigaddset(&current->blocked, sig);
 			recalc_sigpending();
 		}
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	return 1;
 }
diff --git a/arch/m68knommu/kernel/signal.c b/arch/m68knommu/kernel/signal.c
index 3b9d312da583..2d51c175074e 100644
--- a/arch/m68knommu/kernel/signal.c
+++ b/arch/m68knommu/kernel/signal.c
@@ -63,11 +63,11 @@ asmlinkage int do_sigsuspend(struct pt_regs *regs)
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->d0 = -EINTR;
 	while (1) {
@@ -93,11 +93,11 @@ do_rt_sigsuspend(struct pt_regs *regs)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->d0 = -EINTR;
 	while (1) {
@@ -370,10 +370,10 @@ asmlinkage int do_sigreturn(unsigned long __unused)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (restore_sigcontext(regs, &frame->sc, frame + 1, &d0))
 		goto badframe;
@@ -399,10 +399,10 @@ asmlinkage int do_rt_sigreturn(unsigned long __unused)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (rt_restore_ucontext(regs, sw, &frame->uc, &d0))
 		goto badframe;
@@ -738,11 +738,11 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 0fd358285653..47c9c81ad69f 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -118,11 +118,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs)
 #endif
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->gr[28] = -EINTR;
 	while (1) {
@@ -177,10 +177,10 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 		goto give_sigsegv;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* Good thing we saved the old gr[30], eh? */
 	if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
@@ -407,11 +407,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	return 1;
 }
diff --git a/arch/ppc/kernel/signal.c b/arch/ppc/kernel/signal.c
index fddae70728fd..0878aed66721 100644
--- a/arch/ppc/kernel/signal.c
+++ b/arch/ppc/kernel/signal.c
@@ -65,11 +65,11 @@ sys_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7,
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->result = -EINTR;
 	regs->ccr |= 0x10000000;
@@ -96,11 +96,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, int p3, int p4, int p6,
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->result = -EINTR;
 	regs->ccr |= 0x10000000;
@@ -208,10 +208,10 @@ int sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
 	    || copy_from_user(&st, &rt_sf->uc.uc_stack, sizeof(st)))
 		goto badframe;
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	if (regs->msr & MSR_FP)
 		giveup_fpu(current);
 
@@ -311,10 +311,10 @@ int sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
 	set.sig[1] = sigctx._unused[3];
 #endif
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	if (regs->msr & MSR_FP )
 		giveup_fpu(current);
 
@@ -450,11 +450,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	return;
 
diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c
index acb5577269f6..c07f30d4ccac 100644
--- a/arch/ppc64/kernel/signal.c
+++ b/arch/ppc64/kernel/signal.c
@@ -112,11 +112,11 @@ long sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, int p3, int p4, int
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->result = -EINTR;
 	regs->gpr[3] = EINTR;
@@ -164,10 +164,10 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 	    || copy_from_user(&st, &rt_sf->uc.uc_stack, sizeof(st)))
 		goto badframe;
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	if (regs->msr & MSR_FP)
 		giveup_fpu(current);
 
@@ -333,11 +333,11 @@ static void handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	return;
 
diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c
index 8dd332ded5ff..3999fd4ea35f 100644
--- a/arch/ppc64/kernel/signal32.c
+++ b/arch/ppc64/kernel/signal32.c
@@ -126,11 +126,11 @@ long sys32_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6,
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->result = -EINTR;
 	regs->gpr[3] = EINTR;
@@ -268,10 +268,10 @@ long sys32_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 	 */
 	set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	if (regs->msr & MSR_FP )
 		giveup_fpu(current);
 	/* Last stacked signal - restore registers */
@@ -487,10 +487,10 @@ long sys32_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 	 */
 	sigdelsetmask(&set, ~_BLOCKABLE); 
 	/* update the current based on the sigmask found in the rt_stackframe */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* If currently owning the floating point - give them up */
 	if (regs->msr & MSR_FP)
@@ -863,11 +863,11 @@ int sys32_rt_sigsuspend(sigset32_t* unewset, size_t sigsetsize, int p3,
 
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->result = -EINTR;
 	regs->gpr[3] = EINTR;
@@ -1055,11 +1055,11 @@ static void handle_signal32(unsigned long sig, siginfo_t *info,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	return;
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 1f2b732fb96a..8c69d545b82f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -61,11 +61,11 @@ sys_sigsuspend(struct pt_regs * regs, int history0, int history1,
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->gprs[2] = -EINTR;
 
 	while (1) {
@@ -89,11 +89,11 @@ sys_rt_sigsuspend(struct pt_regs * regs,sigset_t *unewset, size_t sigsetsize)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->gprs[2] = -EINTR;
 
 	while (1) {
@@ -194,10 +194,10 @@ asmlinkage long sys_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
@@ -220,10 +220,10 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
@@ -427,11 +427,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/s390x/kernel/linux32.c b/arch/s390x/kernel/linux32.c
index ee1dcc5d8f61..298c903a2d7c 100644
--- a/arch/s390x/kernel/linux32.c
+++ b/arch/s390x/kernel/linux32.c
@@ -1725,7 +1725,7 @@ sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo,
 			return -EINVAL;
 	}
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sig = dequeue_signal(&these, &info);
 	if (!sig) {
 		/* None ready -- temporarily unblock those we're interested
@@ -1733,7 +1733,7 @@ sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo,
 		current->real_blocked = current->blocked;
 		sigandsets(&current->blocked, &current->blocked, &these);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		timeout = MAX_SCHEDULE_TIMEOUT;
 		if (uts)
@@ -1743,13 +1743,13 @@ sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo,
 		current->state = TASK_INTERRUPTIBLE;
 		timeout = schedule_timeout(timeout);
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sig = dequeue_signal(&these, &info);
 		current->blocked = current->real_blocked;
 		siginitset(&current->real_blocked, 0);
 		recalc_sigpending();
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (sig) {
 		ret = sig;
diff --git a/arch/s390x/kernel/signal.c b/arch/s390x/kernel/signal.c
index bb9291563f5f..957197b9b35c 100644
--- a/arch/s390x/kernel/signal.c
+++ b/arch/s390x/kernel/signal.c
@@ -60,11 +60,11 @@ sys_sigsuspend(struct pt_regs * regs,int history0, int history1, old_sigset_t ma
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->gprs[2] = -EINTR;
 
 	while (1) {
@@ -88,11 +88,11 @@ sys_rt_sigsuspend(struct pt_regs * regs,sigset_t *unewset, size_t sigsetsize)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->gprs[2] = -EINTR;
 
 	while (1) {
@@ -188,10 +188,10 @@ asmlinkage long sys_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
@@ -214,10 +214,10 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
@@ -421,11 +421,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/s390x/kernel/signal32.c b/arch/s390x/kernel/signal32.c
index 11f5e3baed07..9757d092bbfb 100644
--- a/arch/s390x/kernel/signal32.c
+++ b/arch/s390x/kernel/signal32.c
@@ -112,11 +112,11 @@ sys32_sigsuspend(struct pt_regs * regs,int history0, int history1, old_sigset_t
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->gprs[2] = -EINTR;
 
 	while (1) {
@@ -147,11 +147,11 @@ sys32_rt_sigsuspend(struct pt_regs * regs,compat_sigset_t *unewset, size_t sigse
 	}
         sigdelsetmask(&newset, ~_BLOCKABLE);
 
-        spin_lock_irq(&current->sig->siglock);
+        spin_lock_irq(&current->sighand->siglock);
         saveset = current->blocked;
         current->blocked = newset;
         recalc_sigpending();
-        spin_unlock_irq(&current->sig->siglock);
+        spin_unlock_irq(&current->sighand->siglock);
         regs->gprs[2] = -EINTR;
 
         while (1) {
@@ -345,10 +345,10 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
@@ -375,10 +375,10 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
@@ -588,11 +588,11 @@ handle_signal32(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
index 9ea6c0e50af4..afaa3df1b6c0 100644
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -104,11 +104,11 @@ asmlinkage void _sigpause_common(old_sigset_t set, struct pt_regs *regs)
 	sigset_t saveset;
 
 	set &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, set);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->pc = regs->npc;
 	regs->npc += 4;
@@ -161,11 +161,11 @@ asmlinkage void do_rt_sigsuspend(sigset_t *uset, size_t sigsetsize,
 	}
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	oldset = current->blocked;
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->pc = regs->npc;
 	regs->npc += 4;
@@ -267,10 +267,10 @@ static inline void do_new_sigreturn (struct pt_regs *regs)
 		goto segv_and_exit;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return;
 
 segv_and_exit:
@@ -314,10 +314,10 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 		goto segv_and_exit;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->pc = pc;
 	regs->npc = npc;
@@ -384,10 +384,10 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	do_sigaltstack(&st, NULL, (unsigned long)sf);
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return;
 segv:
 	send_sig(SIGSEGV, current, 1);
@@ -967,10 +967,10 @@ asmlinkage int svr4_setcontext (svr4_ucontext_t *c, struct pt_regs *regs)
 		set.sig[3] = setv.sigbits[3];
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->pc = pc;
 	regs->npc = npc | 1;
 	err |= __get_user(regs->y, &((*gr) [SVR4_Y]));
@@ -1007,11 +1007,11 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
 	if(ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 	if(!(ka->sa.sa_flags & SA_NOMASK)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked, signr);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
@@ -1066,9 +1066,9 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs,
 		sigset_t *mask = &current->blocked;
 		unsigned long signr = 0;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		signr = dequeue_signal(mask, &info);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		if (!signr)
 			break;
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index a81ca4978702..383012ad6d90 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -281,11 +281,11 @@ asmlinkage unsigned long sunos_sigblock(unsigned long blk_mask)
 {
 	unsigned long old;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	old = current->blocked.sig[0];
 	current->blocked.sig[0] |= (blk_mask & _BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return old;
 }
 
@@ -293,11 +293,11 @@ asmlinkage unsigned long sunos_sigsetmask(unsigned long newmask)
 {
 	unsigned long retval;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	retval = current->blocked.sig[0];
 	current->blocked.sig[0] = (newmask & _BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return retval;
 }
 
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index e0da1f38b54a..042bb08bbb21 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -70,9 +70,9 @@ static int powerd(void *__unused)
 
 again:
 	while (button_pressed == 0) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		flush_signals(current);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		interruptible_sleep_on(&powerd_wait);
 	}
 
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index d90f860410b2..fc16c629e2b7 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -70,10 +70,10 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
 				goto do_sigsegv;
 		}
 		sigdelsetmask(&set, ~_BLOCKABLE);
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		current->blocked = set;
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 	if (test_thread_flag(TIF_32BIT)) {
 		pc &= 0xffffffff;
@@ -257,11 +257,11 @@ asmlinkage void _sigpause_common(old_sigset_t set, struct pt_regs *regs)
 	}
 #endif
 	set &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, set);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (test_thread_flag(TIF_32BIT)) {
 		regs->tpc = (regs->tnpc & 0xffffffff);
@@ -317,11 +317,11 @@ asmlinkage void do_rt_sigsuspend(sigset_t *uset, size_t sigsetsize, struct pt_re
 	}
                                                                 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	oldset = current->blocked;
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (test_thread_flag(TIF_32BIT)) {
 		regs->tpc = (regs->tnpc & 0xffffffff);
@@ -428,10 +428,10 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	set_fs(old_fs);
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return;
 segv:
 	send_sig(SIGSEGV, current, 1);
@@ -564,11 +564,11 @@ static inline void handle_signal(unsigned long signr, struct k_sigaction *ka,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 	if (!(ka->sa.sa_flags & SA_NOMASK)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,signr);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
@@ -619,9 +619,9 @@ static int do_signal(sigset_t *oldset, struct pt_regs * regs,
 		sigset_t *mask = &current->blocked;
 		unsigned long signr = 0;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		signr = dequeue_signal(mask, &info);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		
 		if (!signr)
 			break;
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index b2a6edc8ed8d..8b5de881a810 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -144,11 +144,11 @@ asmlinkage void _sigpause32_common(compat_old_sigset_t set, struct pt_regs *regs
 	sigset_t saveset;
 
 	set &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, set);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	regs->tpc = regs->tnpc;
 	regs->tnpc += 4;
@@ -199,11 +199,11 @@ asmlinkage void do_rt_sigsuspend32(u32 uset, size_t sigsetsize, struct pt_regs *
 	case 1: set.sig[0] = set32.sig[0] + (((long)set32.sig[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	oldset = current->blocked;
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	regs->tpc = regs->tnpc;
 	regs->tnpc += 4;
@@ -312,10 +312,10 @@ void do_new_sigreturn32(struct pt_regs *regs)
 		case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return;
 
 segv:
@@ -359,10 +359,10 @@ asmlinkage void do_sigreturn32(struct pt_regs *regs)
 		case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (test_thread_flag(TIF_32BIT)) {
 		pc &= 0xffffffff;
@@ -461,10 +461,10 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 		case 1: set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return;
 segv:
 	do_exit(SIGSEGV);
@@ -1059,10 +1059,10 @@ asmlinkage int svr4_setcontext(svr4_ucontext_t *c, struct pt_regs *regs)
 	set_fs(old_fs);
 	
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	regs->tpc = pc;
 	regs->tnpc = npc | 1;
 	if (test_thread_flag(TIF_32BIT)) {
@@ -1241,11 +1241,11 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 	if (!(ka->sa.sa_flags & SA_NOMASK)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,signr);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
@@ -1288,9 +1288,9 @@ int do_signal32(sigset_t *oldset, struct pt_regs * regs,
 		sigset_t *mask = &current->blocked;
 		unsigned long signr = 0;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		signr = dequeue_signal(mask, &info);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		
 		if (!signr)
 			break;
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 62e4753a5eca..b01db9baf0d6 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -1812,7 +1812,7 @@ sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo,
 			return -EINVAL;
 	}
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sig = dequeue_signal(&these, &info);
 	if (!sig) {
 		timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1827,19 +1827,19 @@ sys32_rt_sigtimedwait(compat_sigset_t *uthese, siginfo_t32 *uinfo,
 			current->real_blocked = current->blocked;
 			sigandsets(&current->blocked, &current->blocked, &these);
 			recalc_sigpending();
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 
 			current->state = TASK_INTERRUPTIBLE;
 			timeout = schedule_timeout(timeout);
 
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			sig = dequeue_signal(&these, &info);
 			current->blocked = current->real_blocked;
 			siginitset(&current->real_blocked, 0);
 			recalc_sigpending();
 		}
 	}
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (sig) {
 		ret = sig;
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 8d0b518b3227..b3341bbbd2fe 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -238,11 +238,11 @@ asmlinkage u32 sunos_sigblock(u32 blk_mask)
 {
 	u32 old;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	old = (u32) current->blocked.sig[0];
 	current->blocked.sig[0] |= (blk_mask & _BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return old;
 }
 
@@ -250,11 +250,11 @@ asmlinkage u32 sunos_sigsetmask(u32 newmask)
 {
 	u32 retval;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	retval = (u32) current->blocked.sig[0];
 	current->blocked.sig[0] = (newmask & _BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return retval;
 }
 
diff --git a/arch/sparc64/solaris/signal.c b/arch/sparc64/solaris/signal.c
index bb2d2b30c645..72f126c3dcae 100644
--- a/arch/sparc64/solaris/signal.c
+++ b/arch/sparc64/solaris/signal.c
@@ -99,16 +99,16 @@ static inline long solaris_signal(int sig, u32 arg)
 static long solaris_sigset(int sig, u32 arg)
 {
 	if (arg != 2) /* HOLD */ {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigdelsetmask(&current->blocked, _S(sig));
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		return sig_handler (sig, arg, 0);
 	} else {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigaddsetmask(&current->blocked, (_S(sig) & ~_BLOCKABLE));
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		return 0;
 	}
 }
@@ -120,10 +120,10 @@ static inline long solaris_sighold(int sig)
 
 static inline long solaris_sigrelse(int sig)
 {
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigdelsetmask(&current->blocked, _S(sig));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 
@@ -311,10 +311,10 @@ asmlinkage int solaris_sigpending(int which, u32 set)
 	u32 tmp[4];
 	switch (which) {
 	case 1: /* sigpending */
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigandsets(&s, &current->blocked, &current->pending.signal);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 		break;
 	case 2: /* sigfillset - I just set signals which have linux equivalents */
 		sigfillset(&s);
diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
index bd4572688650..d640ff441ff8 100644
--- a/arch/um/kernel/signal_kern.c
+++ b/arch/um/kernel/signal_kern.c
@@ -95,12 +95,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked, 
 			  &ka->sa.sa_mask);
 		sigaddset(&current->blocked, signr);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 
 	sp = PT_REGS_SP(regs);
@@ -186,11 +186,11 @@ int sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
@@ -212,11 +212,11 @@ int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
@@ -242,13 +242,13 @@ int sys_sigreturn(struct pt_regs regs)
 	void *mask = sp_to_mask(PT_REGS_SP(&current->thread.regs));
 	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	copy_from_user(&current->blocked.sig[0], sc_sigmask(sc), 
 		       sizeof(current->blocked.sig[0]));
 	copy_from_user(&current->blocked.sig[1], mask, sig_size);
 	sigdelsetmask(&current->blocked, ~_BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	copy_sc_from_user(&current->thread.regs, sc, 
 			  &signal_frame_sc.common.arch);
 	return(PT_REGS_SYSCALL_RET(&current->thread.regs));
@@ -260,11 +260,11 @@ int sys_rt_sigreturn(struct pt_regs regs)
 	void *fp;
 	int sig_size = _NSIG_WORDS * sizeof(unsigned long);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	copy_from_user(&current->blocked, &uc->uc_sigmask, sig_size);
 	sigdelsetmask(&current->blocked, ~_BLOCKABLE);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext));
 	copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext,
 			  &signal_frame_si.common.arch);
diff --git a/arch/v850/kernel/signal.c b/arch/v850/kernel/signal.c
index d7ec3892e848..324a2fe2fe21 100644
--- a/arch/v850/kernel/signal.c
+++ b/arch/v850/kernel/signal.c
@@ -50,11 +50,11 @@ sys_sigsuspend(old_sigset_t mask, struct pt_regs *regs)
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->gpr[GPR_RVAL] = -EINTR;
 	while (1) {
@@ -78,11 +78,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
 	if (copy_from_user(&newset, unewset, sizeof(newset)))
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs->gpr[GPR_RVAL] = -EINTR;
 	while (1) {
@@ -188,10 +188,10 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(regs, &frame->sc, &rval))
 		goto badframe;
@@ -216,10 +216,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
 		goto badframe;
@@ -472,11 +472,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index c3d9c99d760e..a638bc480cff 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -83,11 +83,11 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs r
 	sigset_t saveset;
 
 	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	siginitset(&current->blocked, mask);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	regs.rax = -EINTR;
 	while (1) {
@@ -243,10 +243,10 @@ asmlinkage long sys32_sigreturn(struct pt_regs regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (ia32_restore_sigcontext(&regs, &frame->sc, &eax))
 		goto badframe;
@@ -270,10 +270,10 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (ia32_restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
 		goto badframe;
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index a2c0878a1c16..3f3582cd270b 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -52,11 +52,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
 		return -EFAULT;
 	sigdelsetmask(&newset, ~_BLOCKABLE);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 #if DEBUG_SIG
 	printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
 		saveset, newset, &regs, regs.rip);
@@ -155,10 +155,10 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
 	} 
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax)) { 
 		goto badframe;
@@ -401,11 +401,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		ka->sa.sa_handler = SIG_DFL;
 
 	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		sigaddset(&current->blocked,sig);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0f9f7a9de7ba..d765900b200b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -118,12 +118,12 @@ static int nbd_xmit(int send, struct socket *sock, char *buf, int size, int msg_
 	set_fs(get_ds());
 	/* Allow interception of SIGKILL only
 	 * Don't allow other signals to interrupt the transmission */
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
 	sigdelsetmask(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 
 	do {
@@ -146,11 +146,11 @@ static int nbd_xmit(int send, struct socket *sock, char *buf, int size, int msg_
 
 		if (signal_pending(current)) {
 			siginfo_t info;
-			spin_lock_irqsave(&current->sig->siglock, flags);
+			spin_lock_irqsave(&current->sighand->siglock, flags);
 			printk(KERN_WARNING "NBD (pid %d: %s) got signal %d\n",
 				current->pid, current->comm, 
 				dequeue_signal(&current->blocked, &info));
-			spin_unlock_irqrestore(&current->sig->siglock, flags);
+			spin_unlock_irqrestore(&current->sighand->siglock, flags);
 			result = -EINTR;
 			break;
 		}
@@ -166,10 +166,10 @@ static int nbd_xmit(int send, struct socket *sock, char *buf, int size, int msg_
 		buf += result;
 	} while (size > 0);
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	set_fs(oldfs);
 	return result;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index e959f7814225..de14aeafa20c 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -528,19 +528,19 @@ static int bt3c_firmware_load(bt3c_info_t *info)
 	}
 
 	/* Block signals, everything but SIGKILL/SIGSTOP */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	tmpsig = current->blocked;
 	siginitsetinv(&current->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	result = waitpid(pid, NULL, __WCLONE);
 
 	/* Allow signals again */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = tmpsig;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	if (result != pid) {
 		printk(KERN_WARNING "bt3c_cs: Waiting for pid %d failed (errno=%d).\n", pid, -result);
diff --git a/drivers/char/ftape/lowlevel/fdc-io.c b/drivers/char/ftape/lowlevel/fdc-io.c
index c58ea6f76c72..99e4b8fb8b7b 100644
--- a/drivers/char/ftape/lowlevel/fdc-io.c
+++ b/drivers/char/ftape/lowlevel/fdc-io.c
@@ -386,11 +386,11 @@ int fdc_interrupt_wait(unsigned int time)
 	/* timeout time will be up to USPT microseconds too long ! */
 	timeout = (1000 * time + FT_USPT - 1) / FT_USPT;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	old_sigmask = current->blocked;
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	current->state = TASK_INTERRUPTIBLE;
 	add_wait_queue(&ftape_wait_intr, &wait);
@@ -398,10 +398,10 @@ int fdc_interrupt_wait(unsigned int time)
 		timeout = schedule_timeout(timeout);
         }
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = old_sigmask;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	
 	remove_wait_queue(&ftape_wait_intr, &wait);
 	/*  the following IS necessary. True: as well
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 3dace6456209..6e994acf13b6 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -246,10 +246,10 @@ adb_probe_task(void *x)
 {
 	strcpy(current->comm, "kadbprobe");
 	
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	flush_signals(current);
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	printk(KERN_INFO "adb: starting probe task...\n");
 	do_adb_reset_bus();
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e05f1b3daeba..9f1d5d0878e5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2444,9 +2444,9 @@ static struct block_device_operations md_fops =
 
 static inline void flush_curr_signals(void)
 {
-	spin_lock(&current->sig->siglock);
+	spin_lock(&current->sighand->siglock);
 	flush_signals(current);
-	spin_unlock(&current->sig->siglock);
+	spin_unlock(&current->sighand->siglock);
 }
 
 int md_thread(void * arg)
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index 30041f612215..897b3382d0d9 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -280,17 +280,17 @@ static void jdelay(unsigned long delay)
 {
 	sigset_t oldblocked = current->blocked;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	current->state = TASK_INTERRUPTIBLE;
 	schedule_timeout(delay);
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = oldblocked;
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 
diff --git a/drivers/mtd/devices/blkmtd.c b/drivers/mtd/devices/blkmtd.c
index d609dfc4f8ef..7d738397f667 100644
--- a/drivers/mtd/devices/blkmtd.c
+++ b/drivers/mtd/devices/blkmtd.c
@@ -305,10 +305,10 @@ static int write_queue_task(void *data)
   DEBUG(1, "blkmtd: writetask: starting (pid = %d)\n", tsk->pid);
   daemonize();
   strcpy(tsk->comm, "blkmtdd");
-  spin_lock_irq(&tsk->sig->siglock);
+  spin_lock_irq(&tsk->sighand->siglock);
   sigfillset(&tsk->blocked);
   recalc_sigpending();
-  spin_unlock_irq(&tsk->sig->siglock);
+  spin_unlock_irq(&tsk->sighand->siglock);
 
   if(alloc_kiovec(1, &iobuf)) {
     printk("blkmtd: write_queue_task cant allocate kiobuf\n");
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index bec639985ebc..0aec2e6709e7 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -453,10 +453,10 @@ int mtdblock_thread(void *dummy)
 	/* we might get involved when memory gets low, so use PF_MEMALLOC */
 	tsk->flags |= PF_MEMALLOC;
 	strcpy(tsk->comm, "mtdblockd");
-	spin_lock_irq(&tsk->sig->siglock);
+	spin_lock_irq(&tsk->sighand->siglock);
 	sigfillset(&tsk->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&tsk->sig->siglock);
+	spin_unlock_irq(&tsk->sighand->siglock);
 	daemonize();
 
 	while (!leaving) {
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 8fcf0527a01d..e1e8d997983e 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1589,10 +1589,10 @@ static int rtl8139_thread (void *data)
 	unsigned long timeout;
 
 	daemonize();
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigemptyset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strncpy (current->comm, dev->name, sizeof(current->comm) - 1);
 	current->comm[sizeof(current->comm) - 1] = '\0';
@@ -1604,9 +1604,9 @@ static int rtl8139_thread (void *data)
 		} while (!signal_pending (current) && (timeout > 0));
 
 		if (signal_pending (current)) {
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			flush_signals(current);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 		}
 
 		if (tp->time_to_die)
diff --git a/drivers/net/irda/sir_kthread.c b/drivers/net/irda/sir_kthread.c
index 80fd4a071836..3f2a538981d9 100644
--- a/drivers/net/irda/sir_kthread.c
+++ b/drivers/net/irda/sir_kthread.c
@@ -116,10 +116,10 @@ static int irda_thread(void *startup)
 	daemonize();
 	strcpy(current->comm, "kIrDAd");
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	set_fs(KERNEL_DS);
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index b3eca7db8051..99960d002026 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -127,10 +127,10 @@ static int kafscmd(void *arg)
 	complete(&kafscmd_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
 	do {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a875684e3d4b..7de072e495c0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -46,9 +46,9 @@ static inline void afs_discard_my_signals(void)
 	while (signal_pending(current)) {
 		siginfo_t sinfo;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		dequeue_signal(&current->blocked,&sinfo);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index caedebc20095..e546a6da5015 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -101,10 +101,10 @@ static int kafsasyncd(void *arg)
 	complete(&kafsasyncd_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
 	do {
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 0d3f30a73657..2b0f5a9d84e9 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -78,10 +78,10 @@ static int kafstimod(void *arg)
 	complete(&kafstimod_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
  loop:
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index d5b053e5b73a..6cf3d86a5d79 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -3347,10 +3347,10 @@ jffs_garbage_collect_thread(void *ptr)
 	current->session = 1;
 	current->pgrp = 1;
 	init_completion(&c->gc_thread_comp); /* barrier */ 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	strcpy(current->comm, "jffs_gcd");
 
 	D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): Starting infinite loop.\n"));
@@ -3378,9 +3378,9 @@ jffs_garbage_collect_thread(void *ptr)
 			siginfo_t info;
 			unsigned long signr = 0;
 
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			signr = dequeue_signal(&current->blocked, &info);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 
 			switch(signr) {
 			case SIGSTOP:
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index b1654cff562b..a5c35fdb51c8 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -54,7 +54,7 @@
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,40)
 #define current_sig_lock current->sigmask_lock
 #else
-#define current_sig_lock current->sig->siglock
+#define current_sig_lock current->sighand->siglock
 #endif
 
 static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e3d931ff7ca2..360139794557 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2139,10 +2139,10 @@ int jfsIOWait(void *arg)
 
 	unlock_kernel();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	complete(&jfsIOwait);
 
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f85bb58be45b..6af148d0387c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2780,10 +2780,10 @@ int jfs_lazycommit(void *arg)
 
 	jfsCommitTask = current;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	LAZY_LOCK_INIT();
 	TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0;
@@ -2985,10 +2985,10 @@ int jfs_sync(void *arg)
 
 	unlock_kernel();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	complete(&jfsIOwait);
 
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 93ba7610dde0..f01c538eb282 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -745,7 +745,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
 		sigset_t old_set;
 		unsigned long mask, flags;
 
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		old_set = current->blocked;
 		if (current->flags & PF_EXITING)
 			mask = 0;
@@ -764,7 +764,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
 		}
 		siginitsetinv(&current->blocked, mask);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 		
 		fs = get_fs();
 		set_fs(get_ds());
@@ -773,10 +773,10 @@ static int ncp_do_request(struct ncp_server *server, int size,
 
 		set_fs(fs);
 
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		current->blocked = old_set;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	DDPRINTK("do_ncp_rpc_call returned %d\n", result);
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 5f7d1d5969fe..41d5bbd8a334 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -285,10 +285,10 @@ static int smbiod(void *unused)
 	MOD_INC_USE_COUNT;
 	daemonize();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strcpy(current->comm, "smbiod");
 
diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index 4c60a8799fcb..d6b027eb2022 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -1581,10 +1581,10 @@ pagebuf_daemon(
 	daemonize();
 
 	/* Avoid signals */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strcpy(current->comm, "pagebufd");
 	current->flags |= PF_MEMALLOC;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea5d949f946c..975dd5dca713 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -778,7 +778,7 @@ static inline void cond_resched_lock(spinlock_t * lock)
 
 /* Reevaluate whether the task has signals pending delivery.
    This is required every time the blocked sigset_t changes.
-   callers must hold sig->siglock.  */
+   callers must hold sighand->siglock.  */
 
 extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
 extern void recalc_sigpending(void);
diff --git a/kernel/suspend.c b/kernel/suspend.c
index 40efb8d02db6..8ed7bde5aa18 100644
--- a/kernel/suspend.c
+++ b/kernel/suspend.c
@@ -218,9 +218,9 @@ int freeze_processes(void)
 			/* FIXME: smp problem here: we may not access other process' flags
 			   without locking */
 			p->flags |= PF_FREEZE;
-			spin_lock_irqsave(&p->sig->siglock, flags);
+			spin_lock_irqsave(&p->sighand->siglock, flags);
 			signal_wake_up(p, 0);
-			spin_unlock_irqrestore(&p->sig->siglock, flags);
+			spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
index 9e4553cc1aea..b0ee06b71a7e 100644
--- a/net/rxrpc/internal.h
+++ b/net/rxrpc/internal.h
@@ -54,9 +54,9 @@ static inline void rxrpc_discard_my_signals(void)
 	while (signal_pending(current)) {
 		siginfo_t sinfo;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		dequeue_signal(&current->blocked,&sinfo);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
index d246585b8f07..ae9987d6155f 100644
--- a/net/rxrpc/krxiod.c
+++ b/net/rxrpc/krxiod.c
@@ -47,10 +47,10 @@ static int rxrpc_krxiod(void *arg)
 	daemonize();
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around waiting for work to do */
 	do {
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
index 4e35bd351412..39f4eac9f224 100644
--- a/net/rxrpc/krxsecd.c
+++ b/net/rxrpc/krxsecd.c
@@ -59,10 +59,10 @@ static int rxrpc_krxsecd(void *arg)
 	daemonize();
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around waiting for work to do */
 	do {
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
index 8eb61e64fa55..c6df1edf38ff 100644
--- a/net/rxrpc/krxtimod.c
+++ b/net/rxrpc/krxtimod.c
@@ -77,10 +77,10 @@ static int krxtimod(void *arg)
 	complete(&krxtimod_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
  loop:
-- 
cgit v1.2.3


From 03e218313940d2948c2b72df2710e193e42acca5 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 7 Feb 2003 00:31:37 -0800
Subject: [PATCH] TASK_STOPPED wakeup cleanup

For handle_stop_signal to do the special case for SIGKILL and have it
work right in all SMP cases (without changing all the existing ptrace
stops), it needs to at least set TIF_SIGPENDING on each thread before
resuming it.

handle_stop_signal addresses a related race for SIGCONT by setting
TIF_SIGPENDING already, so having SIGKILL handled the same way makes
sense.

Now it seems pretty clean to have handle_stop_signal resume threads for
SIGKILL, and have on SIGKILL special case in group_send_sig_info.

There is also an SMP race issue with cases like do_syscall_trace, i.e.
TASK_STOPPED state set without holding the siglock.  So I think
handle_stop_signal should call wake_up_process unconditionally.
---
 kernel/signal.c | 50 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 670141c149a7..05791342da39 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -120,6 +120,9 @@ int max_queued_signals = 1024;
 #define SIG_KERNEL_STOP_MASK (\
 	M(SIGSTOP)   |  M(SIGTSTP)   |  M(SIGTTIN)   |  M(SIGTTOU)   )
 
+#define SIG_KERNEL_CONT_MASK (\
+	M(SIGCONT)   |  M(SIGKILL)   )
+
 #define SIG_KERNEL_COREDUMP_MASK (\
         M(SIGQUIT)   |  M(SIGILL)    |  M(SIGTRAP)   |  M(SIGABRT)   | \
         M(SIGFPE)    |  M(SIGSEGV)   |  M(SIGBUS)    |  M(SIGSYS)    | \
@@ -136,6 +139,8 @@ int max_queued_signals = 1024;
 		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_IGNORE_MASK))
 #define sig_kernel_stop(sig) \
 		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_STOP_MASK))
+#define sig_kernel_cont(sig) \
+		(((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_CONT_MASK))
 
 #define sig_user_defined(t, signr) \
 	(((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) &&	\
@@ -587,7 +592,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 			t = next_thread(t);
 		} while (t != p);
 	}
-	else if (sig == SIGCONT) {
+	else if (sig_kernel_cont(sig)) {
 		/*
 		 * Remove all stop signals from all queues,
 		 * and wake all threads.
@@ -617,23 +622,32 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 		t = p;
 		do {
 			rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
-			if (t->state == TASK_STOPPED) {
-				/*
-				 * If there is a handler for SIGCONT, we
-				 * must make sure that no thread returns to
-				 * user mode before we post the signal, in
-				 * case it was the only thread eligible to
-				 * run the signal handler--then it must not
-				 * do anything between resuming and running
-				 * the handler.  With the TIF_SIGPENDING flag
-				 * set, the thread will pause and acquire the
-				 * siglock that we hold now and until we've
-				 * queued the pending signal.
-				 */
-				if (sig_user_defined(p, SIGCONT))
-					set_tsk_thread_flag(t, TIF_SIGPENDING);
-				wake_up_process(t);
-			}
+			/*
+			 * This wakeup is only need if in TASK_STOPPED,
+			 * but there can be SMP races with testing for that.
+			 * In the normal SIGCONT case, all will be stopped.
+			 * A spuriously sent SIGCONT will interrupt all running
+			 * threads to check signals even if it's ignored.
+			 *
+			 * If there is a handler for SIGCONT, we must make
+			 * sure that no thread returns to user mode before
+			 * we post the signal, in case it was the only
+			 * thread eligible to run the signal handler--then
+			 * it must not do anything between resuming and
+			 * running the handler.  With the TIF_SIGPENDING
+			 * flag set, the thread will pause and acquire the
+			 * siglock that we hold now and until we've queued
+			 * the pending signal.  For SIGKILL, we likewise
+			 * don't want anybody doing anything but taking the
+			 * SIGKILL.  The only case in which a thread would
+			 * not already be in the signal dequeuing loop is
+			 * non-signal (e.g. syscall) ptrace tracing, so we
+			 * don't worry about an unnecessary trip through
+			 * the signal code and just keep this code path
+			 * simpler by unconditionally setting the flag.
+			 */
+			set_tsk_thread_flag(t, TIF_SIGPENDING);
+			wake_up_process(t);
 			t = next_thread(t);
 		} while (t != p);
 	}
-- 
cgit v1.2.3