From c9472e0f28cd2f0695a0ac3a0b4bd33f21714a7e Mon Sep 17 00:00:00 2001
From: Cal Peake <cp@absolutedigital.net>
Date: Fri, 29 Sep 2006 02:00:47 -0700
Subject: [PATCH] kill extraneous printk in kernel_restart()

Get rid of an extraneous printk in kernel_restart().

Signed-off-by: Cal Peake <cp@absolutedigital.net>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/sys.c')

diff --git a/kernel/sys.c b/kernel/sys.c
index 3f894775488d..8647061c084a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -612,7 +612,6 @@ void kernel_restart(char *cmd)
 	} else {
 		printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
 	}
-	printk(".\n");
 	machine_restart(cmd);
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
-- 
cgit v1.2.3


From 34596dc9e59d7bece16fe5aba08116b49465da26 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 30 Sep 2006 01:47:55 +0200
Subject: [PATCH] Define vsyscall cache as blob to make clearer that user space
 shouldn't use it

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/vsyscall.c |  8 ++++----
 include/linux/getcpu.h        | 12 +++++++-----
 kernel/sys.c                  |  8 ++++----
 3 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index ac48c3857ddb..07c086382059 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -155,8 +155,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 	   We do this here because otherwise user space would do it on
 	   its own in a likely inferior way (no access to jiffies).
 	   If you don't like it pass NULL. */
-	if (tcache && tcache->t0 == (j = __jiffies)) {
-		p = tcache->t1;
+	if (tcache && tcache->blob[0] == (j = __jiffies)) {
+		p = tcache->blob[1];
 	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
 		rdtscp(dummy, dummy, p);
@@ -165,8 +165,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
 	}
 	if (tcache) {
-		tcache->t0 = j;
-		tcache->t1 = p;
+		tcache->blob[0] = j;
+		tcache->blob[1] = p;
 	}
 	if (cpu)
 		*cpu = p & 0xfff;
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
index 031ed3780e45..c7372d7a97be 100644
--- a/include/linux/getcpu.h
+++ b/include/linux/getcpu.h
@@ -1,16 +1,18 @@
 #ifndef _LINUX_GETCPU_H
 #define _LINUX_GETCPU_H 1
 
-/* Cache for getcpu() to speed it up. Results might be upto a jiffie
+/* Cache for getcpu() to speed it up. Results might be a short time
    out of date, but will be faster.
+
    User programs should not refer to the contents of this structure.
-   It is only a cache for vgetcpu(). It might change in future kernels.
+   I repeat they should not refer to it. If they do they will break
+   in future kernels.
+
+   It is only a private cache for vgetcpu(). It will change in future kernels.
    The user program must store this information per thread (__thread)
    If you want 100% accurate information pass NULL instead. */
 struct getcpu_cache {
-	unsigned long t0;
-	unsigned long t1;
-	unsigned long res[4];
+	unsigned long blob[128 / sizeof(long)];
 };
 
 #endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 8647061c084a..b88806c66244 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2083,12 +2083,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
 		 * padding
 		 */
 		unsigned long t0, t1;
-		get_user(t0, &cache->t0);
-		get_user(t1, &cache->t1);
+		get_user(t0, &cache->blob[0]);
+		get_user(t1, &cache->blob[1]);
 		t0++;
 		t1++;
-		put_user(t0, &cache->t0);
-		put_user(t1, &cache->t1);
+		put_user(t0, &cache->blob[0]);
+		put_user(t1, &cache->blob[1]);
 	}
 	return err ? -EFAULT : 0;
 }
-- 
cgit v1.2.3


From 756184b7d771992f4fb1998d62aebcaf3e028076 Mon Sep 17 00:00:00 2001
From: Cal Peake <cp@absolutedigital.net>
Date: Sat, 30 Sep 2006 23:27:24 -0700
Subject: [PATCH] CodingStyle cleanup for kernel/sys.c

Fix up kernel/sys.c to be consistent with CodingStyle and the rest of the
file.

Signed-off-by: Cal Peake <cp@absolutedigital.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 80 +++++++++++++++++++++++-------------------------------------
 1 file changed, 30 insertions(+), 50 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/kernel/sys.c b/kernel/sys.c
index b88806c66244..2460581c928c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -607,11 +607,10 @@ static void kernel_restart_prepare(char *cmd)
 void kernel_restart(char *cmd)
 {
 	kernel_restart_prepare(cmd);
-	if (!cmd) {
+	if (!cmd)
 		printk(KERN_EMERG "Restarting system.\n");
-	} else {
+	else
 		printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
-	}
 	machine_restart(cmd);
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
@@ -627,9 +626,8 @@ static void kernel_kexec(void)
 #ifdef CONFIG_KEXEC
 	struct kimage *image;
 	image = xchg(&kexec_image, NULL);
-	if (!image) {
+	if (!image)
 		return;
-	}
 	kernel_restart_prepare(NULL);
 	printk(KERN_EMERG "Starting new kernel\n");
 	machine_shutdown();
@@ -823,12 +821,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 		    (current->sgid == egid) ||
 		    capable(CAP_SETGID))
 			new_egid = egid;
-		else {
+		else
 			return -EPERM;
-		}
 	}
-	if (new_egid != old_egid)
-	{
+	if (new_egid != old_egid) {
 		current->mm->dumpable = suid_dumpable;
 		smp_wmb();
 	}
@@ -857,19 +853,14 @@ asmlinkage long sys_setgid(gid_t gid)
 	if (retval)
 		return retval;
 
-	if (capable(CAP_SETGID))
-	{
-		if(old_egid != gid)
-		{
+	if (capable(CAP_SETGID)) {
+		if (old_egid != gid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
 		current->gid = current->egid = current->sgid = current->fsgid = gid;
-	}
-	else if ((gid == current->gid) || (gid == current->sgid))
-	{
-		if(old_egid != gid)
-		{
+	} else if ((gid == current->gid) || (gid == current->sgid)) {
+		if (old_egid != gid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
@@ -900,8 +891,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
 
 	switch_uid(new_user);
 
-	if(dumpclear)
-	{
+	if (dumpclear) {
 		current->mm->dumpable = suid_dumpable;
 		smp_wmb();
 	}
@@ -957,8 +947,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
 		return -EAGAIN;
 
-	if (new_euid != old_euid)
-	{
+	if (new_euid != old_euid) {
 		current->mm->dumpable = suid_dumpable;
 		smp_wmb();
 	}
@@ -1008,8 +997,7 @@ asmlinkage long sys_setuid(uid_t uid)
 	} else if ((uid != current->uid) && (uid != new_suid))
 		return -EPERM;
 
-	if (old_euid != uid)
-	{
+	if (old_euid != uid) {
 		current->mm->dumpable = suid_dumpable;
 		smp_wmb();
 	}
@@ -1054,8 +1042,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 			return -EAGAIN;
 	}
 	if (euid != (uid_t) -1) {
-		if (euid != current->euid)
-		{
+		if (euid != current->euid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
@@ -1105,8 +1092,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 			return -EPERM;
 	}
 	if (egid != (gid_t) -1) {
-		if (egid != current->egid)
-		{
+		if (egid != current->egid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
@@ -1151,10 +1137,8 @@ asmlinkage long sys_setfsuid(uid_t uid)
 
 	if (uid == current->uid || uid == current->euid ||
 	    uid == current->suid || uid == current->fsuid || 
-	    capable(CAP_SETUID))
-	{
-		if (uid != old_fsuid)
-		{
+	    capable(CAP_SETUID)) {
+		if (uid != old_fsuid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
@@ -1182,10 +1166,8 @@ asmlinkage long sys_setfsgid(gid_t gid)
 
 	if (gid == current->gid || gid == current->egid ||
 	    gid == current->sgid || gid == current->fsgid || 
-	    capable(CAP_SETGID))
-	{
-		if (gid != old_fsgid)
-		{
+	    capable(CAP_SETGID)) {
+		if (gid != old_fsgid) {
 			current->mm->dumpable = suid_dumpable;
 			smp_wmb();
 		}
@@ -1321,9 +1303,9 @@ out:
 
 asmlinkage long sys_getpgid(pid_t pid)
 {
-	if (!pid) {
+	if (!pid)
 		return process_group(current);
-	} else {
+	else {
 		int retval;
 		struct task_struct *p;
 
@@ -1353,9 +1335,9 @@ asmlinkage long sys_getpgrp(void)
 
 asmlinkage long sys_getsid(pid_t pid)
 {
-	if (!pid) {
+	if (!pid)
 		return current->signal->session;
-	} else {
+	else {
 		int retval;
 		struct task_struct *p;
 
@@ -1363,7 +1345,7 @@ asmlinkage long sys_getsid(pid_t pid)
 		p = find_task_by_pid(pid);
 
 		retval = -ESRCH;
-		if(p) {
+		if (p) {
 			retval = security_task_getsid(p);
 			if (!retval)
 				retval = p->signal->session;
@@ -1431,9 +1413,9 @@ struct group_info *groups_alloc(int gidsetsize)
 	group_info->nblocks = nblocks;
 	atomic_set(&group_info->usage, 1);
 
-	if (gidsetsize <= NGROUPS_SMALL) {
+	if (gidsetsize <= NGROUPS_SMALL)
 		group_info->blocks[0] = group_info->small_block;
-	} else {
+	else {
 		for (i = 0; i < nblocks; i++) {
 			gid_t *b;
 			b = (void *)__get_free_page(GFP_USER);
@@ -1489,7 +1471,7 @@ static int groups_to_user(gid_t __user *grouplist,
 /* fill a group_info from a user-space array - it must be allocated already */
 static int groups_from_user(struct group_info *group_info,
     gid_t __user *grouplist)
- {
+{
 	int i;
 	int count = group_info->ngroups;
 
@@ -1647,9 +1629,8 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
 int in_group_p(gid_t grp)
 {
 	int retval = 1;
-	if (grp != current->fsgid) {
+	if (grp != current->fsgid)
 		retval = groups_search(current->group_info, grp);
-	}
 	return retval;
 }
 
@@ -1658,9 +1639,8 @@ EXPORT_SYMBOL(in_group_p);
 int in_egroup_p(gid_t grp)
 {
 	int retval = 1;
-	if (grp != current->egid) {
+	if (grp != current->egid)
 		retval = groups_search(current->group_info, grp);
-	}
 	return retval;
 }
 
@@ -1775,9 +1755,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 	task_lock(current->group_leader);
 	x = current->signal->rlim[resource];
 	task_unlock(current->group_leader);
-	if(x.rlim_cur > 0x7FFFFFFF)
+	if (x.rlim_cur > 0x7FFFFFFF)
 		x.rlim_cur = 0x7FFFFFFF;
-	if(x.rlim_max > 0x7FFFFFFF)
+	if (x.rlim_max > 0x7FFFFFFF)
 		x.rlim_max = 0x7FFFFFFF;
 	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
 }
-- 
cgit v1.2.3


From f2aa85a0ccd90110e76c6375535adc3ae358f971 Mon Sep 17 00:00:00 2001
From: "bibo,mao" <bibo.mao@intel.com>
Date: Mon, 2 Oct 2006 02:17:34 -0700
Subject: [PATCH] disallow kprobes on notifier_call_chain

When kprobe is re-entered, the re-entered kprobe kernel path will will call
atomic_notifier_call_chain function, if this function is kprobed that will
incur numerous kprobe recursive fault.  This patch disallows kprobes on
atomic_notifier_call_chain function.

Signed-off-by: bibo, mao <bibo.mao@intel.com>
Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sys.c')

diff --git a/kernel/sys.c b/kernel/sys.c
index 2460581c928c..398d57923f95 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
  *	of the last notifier function called.
  */
  
-int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
 		unsigned long val, void *v)
 {
 	int ret;
-- 
cgit v1.2.3


From e9ff3990f08e9a0c2839cc22808b01732ea5b3e4 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 2 Oct 2006 02:18:11 -0700
Subject: [PATCH] namespaces: utsname: switch to using uts namespaces

Replace references to system_utsname to the per-process uts namespace
where appropriate.  This includes things like uname.

Changes: Per Eric Biederman's comments, use the per-process uts namespace
	for ELF_PLATFORM, sunrpc, and parts of net/ipv4/ipconfig.c

[jdike@addtoit.com: UML fix]
[clg@fr.ibm.com: cleanup]
[akpm@osdl.org: build fix]
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Andrey Savochkin <saw@sw.ru>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/kernel/osf_sys.c         | 26 ++++++++++++-------------
 arch/i386/kernel/sys_i386.c         | 27 +++++++++++++++-----------
 arch/ia64/sn/kernel/sn2/sn_hwperf.c |  2 +-
 arch/m32r/kernel/sys_m32r.c         |  2 +-
 arch/mips/kernel/linux32.c          |  2 +-
 arch/mips/kernel/syscall.c          | 27 +++++++++++++++-----------
 arch/mips/kernel/sysirix.c          | 12 ++++++------
 arch/parisc/hpux/sys_hpux.c         | 37 ++++++++++++++++++++----------------
 arch/powerpc/kernel/syscalls.c      | 14 +++++++-------
 arch/sh/kernel/sys_sh.c             |  2 +-
 arch/sh64/kernel/sys_sh64.c         |  2 +-
 arch/sparc/kernel/sys_sparc.c       |  4 ++--
 arch/sparc/kernel/sys_sunos.c       | 15 ++++++++++-----
 arch/sparc64/kernel/sys_sparc.c     |  4 ++--
 arch/sparc64/kernel/sys_sunos32.c   | 10 +++++-----
 arch/sparc64/solaris/misc.c         |  6 +++---
 arch/um/drivers/mconsole_kern.c     |  6 +++---
 arch/um/kernel/syscall.c            | 22 ++++++++++-----------
 arch/um/sys-x86_64/syscalls.c       |  2 +-
 arch/x86_64/ia32/sys_ia32.c         | 38 ++++++++++++++++++-------------------
 arch/x86_64/kernel/sys_x86_64.c     |  2 +-
 arch/xtensa/kernel/syscalls.c       |  2 +-
 drivers/char/random.c               |  4 ++--
 fs/cifs/connect.c                   | 22 ++++++++++-----------
 fs/exec.c                           |  2 +-
 fs/lockd/clntproc.c                 |  4 ++--
 fs/lockd/mon.c                      |  2 +-
 fs/lockd/svclock.c                  |  2 +-
 fs/lockd/xdr.c                      |  2 +-
 fs/nfs/nfsroot.c                    |  2 +-
 include/asm-i386/elf.h              |  2 +-
 include/linux/lockd/lockd.h         |  2 +-
 kernel/sys.c                        | 14 +++++++-------
 net/ipv4/ipconfig.c                 | 14 +++++++-------
 net/sunrpc/clnt.c                   |  4 ++--
 35 files changed, 180 insertions(+), 160 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 73c7622b5297..8a31fc1bfb15 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -402,15 +402,15 @@ osf_utsname(char __user *name)
 
 	down_read(&uts_sem);
 	error = -EFAULT;
-	if (copy_to_user(name + 0, system_utsname.sysname, 32))
+	if (copy_to_user(name + 0, utsname()->sysname, 32))
 		goto out;
-	if (copy_to_user(name + 32, system_utsname.nodename, 32))
+	if (copy_to_user(name + 32, utsname()->nodename, 32))
 		goto out;
-	if (copy_to_user(name + 64, system_utsname.release, 32))
+	if (copy_to_user(name + 64, utsname()->release, 32))
 		goto out;
-	if (copy_to_user(name + 96, system_utsname.version, 32))
+	if (copy_to_user(name + 96, utsname()->version, 32))
 		goto out;
-	if (copy_to_user(name + 128, system_utsname.machine, 32))
+	if (copy_to_user(name + 128, utsname()->machine, 32))
 		goto out;
 
 	error = 0;
@@ -449,8 +449,8 @@ osf_getdomainname(char __user *name, int namelen)
 
 	down_read(&uts_sem);
 	for (i = 0; i < len; ++i) {
-		__put_user(system_utsname.domainname[i], name + i);
-		if (system_utsname.domainname[i] == '\0')
+		__put_user(utsname()->domainname[i], name + i);
+		if (utsname()->domainname[i] == '\0')
 			break;
 	}
 	up_read(&uts_sem);
@@ -607,12 +607,12 @@ osf_sigstack(struct sigstack __user *uss, struct sigstack __user *uoss)
 asmlinkage long
 osf_sysinfo(int command, char __user *buf, long count)
 {
-	static char * sysinfo_table[] = {
-		system_utsname.sysname,
-		system_utsname.nodename,
-		system_utsname.release,
-		system_utsname.version,
-		system_utsname.machine,
+	char *sysinfo_table[] = {
+		utsname()->sysname,
+		utsname()->nodename,
+		utsname()->release,
+		utsname()->version,
+		utsname()->machine,
 		"alpha",	/* instruction set architecture */
 		"dummy",	/* hardware serial number */
 		"dummy",	/* hardware manufacturer */
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index 8fdb1fb17a5f..dc6e3bbeba31 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -210,7 +210,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -226,16 +226,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
   
   	down_read(&uts_sem);
 	
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 	
 	up_read(&uts_sem);
 	
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index b632b9c1e3b3..462ea178f49a 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -423,7 +423,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 			"coherency_domain %d, "
 			"region_size %d\n",
 
-			partid, system_utsname.nodename,
+			partid, utsname()->nodename,
 			shubtype ? "shub2" : "shub1", 
 			(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
 			system_size, sharing_size, coher, region_size);
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index a9cea32eb824..7c29396cc670 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -205,7 +205,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 43b1162d714f..52cada45b353 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -1039,7 +1039,7 @@ asmlinkage long sys32_newuname(struct new_utsname __user * name)
 	int ret = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name, utsname(), sizeof *name))
 		ret = -EFAULT;
 	up_read(&uts_sem);
 
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 9951240cc3fd..970e3e96b1d0 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -231,7 +231,7 @@ out:
  */
 asmlinkage int sys_uname(struct old_utsname __user * name)
 {
-	if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
 		return 0;
 	return -EFAULT;
 }
@@ -248,16 +248,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
 	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
 		return -EFAULT;
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	error -= __put_user(0,name->release+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	error -= __put_user(0,name->version+__OLD_UTS_LEN);
-	error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-	error = __put_user(0,name->machine+__OLD_UTS_LEN);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->release + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error -= __put_user(0, name->version + __OLD_UTS_LEN);
+	error -= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error = __put_user(0, name->machine + __OLD_UTS_LEN);
 	error = error ? -EFAULT : 0;
 
 	return error;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 1137dd6ea7aa..11bb97174972 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -884,7 +884,7 @@ asmlinkage int irix_getdomainname(char __user *name, int len)
 	down_read(&uts_sem);
 	if (len > __NEW_UTS_LEN)
 		len = __NEW_UTS_LEN;
-	err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
+	err = copy_to_user(name, utsname()->domainname, len) ? -EFAULT : 0;
 	up_read(&uts_sem);
 
 	return err;
@@ -1127,11 +1127,11 @@ struct iuname {
 asmlinkage int irix_uname(struct iuname __user *buf)
 {
 	down_read(&uts_sem);
-	if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
-	    || copy_from_user(system_utsname.nodename, buf->nodename, 65)
-	    || copy_from_user(system_utsname.release, buf->release, 65)
-	    || copy_from_user(system_utsname.version, buf->version, 65)
-	    || copy_from_user(system_utsname.machine, buf->machine, 65)) {
+	if (copy_from_user(utsname()->sysname, buf->sysname, 65)
+	    || copy_from_user(utsname()->nodename, buf->nodename, 65)
+	    || copy_from_user(utsname()->release, buf->release, 65)
+	    || copy_from_user(utsname()->version, buf->version, 65)
+	    || copy_from_user(utsname()->machine, buf->machine, 65)) {
 		return -EFAULT;
 	}
 	up_read(&uts_sem);
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index cb69727027ae..2e2dc4f2c853 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -266,16 +266,21 @@ static int hpux_uname(struct hpux_utsname *name)
 
 	down_read(&uts_sem);
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->release+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->version+HPUX_UTSLEN-1);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
-	error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->sysname + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->nodename + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->release + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->version + HPUX_UTSLEN - 1);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				HPUX_UTSLEN - 1);
+	error |= __put_user(0, name->machine + HPUX_UTSLEN - 1);
 
 	up_read(&uts_sem);
 
@@ -373,8 +378,8 @@ int hpux_utssys(char *ubuf, int n, int type)
 		/*  TODO:  print a warning about using this?  */
 		down_write(&uts_sem);
 		error = -EFAULT;
-		if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
-			system_utsname.sysname[len] = 0;
+		if (!copy_from_user(utsname()->sysname, ubuf, len)) {
+			utsname()->sysname[len] = 0;
 			error = 0;
 		}
 		up_write(&uts_sem);
@@ -400,8 +405,8 @@ int hpux_utssys(char *ubuf, int n, int type)
 		/*  TODO:  print a warning about this?  */
 		down_write(&uts_sem);
 		error = -EFAULT;
-		if (!copy_from_user(system_utsname.release, ubuf, len)) {
-			system_utsname.release[len] = 0;
+		if (!copy_from_user(utsname()->release, ubuf, len)) {
+			utsname()->release[len] = 0;
 			error = 0;
 		}
 		up_write(&uts_sem);
@@ -422,13 +427,13 @@ int hpux_getdomainname(char *name, int len)
  	
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(utsname()->domainname) + 1;
 
 	if (nlen < len)
 		len = nlen;
 	if(len > __NEW_UTS_LEN)
 		goto done;
-	if(copy_to_user(name, system_utsname.domainname, len))
+	if(copy_to_user(name, utsname()->domainname, len))
 		goto done;
 	err = 0;
 done:
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 9b69d99a9103..d358866b880f 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -260,7 +260,7 @@ long ppc_newuname(struct new_utsname __user * name)
 	int err = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, utsname(), sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -273,7 +273,7 @@ int sys_uname(struct old_utsname __user *name)
 	int err = 0;
 	
 	down_read(&uts_sem);
-	if (copy_to_user(name, &system_utsname, sizeof(*name)))
+	if (copy_to_user(name, utsname(), sizeof(*name)))
 		err = -EFAULT;
 	up_read(&uts_sem);
 	if (!err)
@@ -289,19 +289,19 @@ int sys_olduname(struct oldold_utsname __user *name)
 		return -EFAULT;
   
 	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
 			       __OLD_UTS_LEN);
 	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &system_utsname.release,
+	error |= __copy_to_user(&name->release, &utsname()->release,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &system_utsname.version,
+	error |= __copy_to_user(&name->version, &utsname()->version,
 				__OLD_UTS_LEN);
 	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &system_utsname.machine,
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
 				__OLD_UTS_LEN);
 	error |= override_machine(name->machine);
 	up_read(&uts_sem);
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index b68ff705f067..11c2acde6eaa 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -281,7 +281,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff --git a/arch/sh64/kernel/sys_sh64.c b/arch/sh64/kernel/sys_sh64.c
index 58ff7d522d81..c6de1a95af44 100644
--- a/arch/sh64/kernel/sys_sh64.c
+++ b/arch/sh64/kernel/sys_sh64.c
@@ -279,7 +279,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 896863fb208a..002ab4dbe5cb 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -475,13 +475,13 @@ asmlinkage int sys_getdomainname(char __user *name, int len)
 
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(utsname()->domainname) + 1;
 	err = -EINVAL;
 	if (nlen > len)
 		goto out;
 
 	err = -EFAULT;
-	if (!copy_to_user(name, system_utsname.domainname, nlen))
+	if (!copy_to_user(name, utsname()->domainname, nlen))
 		err = 0;
 
 out:
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index aa0fb2efb615..9d2cd97d1c3a 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -483,13 +483,18 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
 {
 	int ret;
 	down_read(&uts_sem);
-	ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
+	ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
+			   sizeof(name->sname) - 1);
 	if (!ret) {
-		ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
+		ret |= __copy_to_user(&name->nname[0], &utsname()->nodename[0],
+				      sizeof(name->nname) - 1);
 		ret |= __put_user('\0', &name->nname[8]);
-		ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
-		ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
-		ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
+		ret |= __copy_to_user(&name->rel[0], &utsname()->release[0],
+				      sizeof(name->rel) - 1);
+		ret |= __copy_to_user(&name->ver[0], &utsname()->version[0],
+				      sizeof(name->ver) - 1);
+		ret |= __copy_to_user(&name->mach[0], &utsname()->machine[0],
+				      sizeof(name->mach) - 1);
 	}
 	up_read(&uts_sem);
 	return ret ? -EFAULT : 0;
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index c608c947e6c3..89ac435aacc0 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -712,13 +712,13 @@ asmlinkage long sys_getdomainname(char __user *name, int len)
 
  	down_read(&uts_sem);
  	
-	nlen = strlen(system_utsname.domainname) + 1;
+	nlen = strlen(utsname()->domainname) + 1;
 	err = -EINVAL;
 	if (nlen > len)
 		goto out;
 
 	err = -EFAULT;
-	if (!copy_to_user(name, system_utsname.domainname, nlen))
+	if (!copy_to_user(name, utsname()->domainname, nlen))
 		err = 0;
 
 out:
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 87ebdf858a3a..953296b73f3f 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -439,16 +439,16 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
 	int ret;
 
 	down_read(&uts_sem);
-	ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
+	ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
 			   sizeof(name->sname) - 1);
-	ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
+	ret |= copy_to_user(&name->nname[0], &utsname()->nodename[0],
 			    sizeof(name->nname) - 1);
 	ret |= put_user('\0', &name->nname[8]);
-	ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
+	ret |= copy_to_user(&name->rel[0], &utsname()->release[0],
 			    sizeof(name->rel) - 1);
-	ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
+	ret |= copy_to_user(&name->ver[0], &utsname()->version[0],
 			    sizeof(name->ver) - 1);
-	ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
+	ret |= copy_to_user(&name->mach[0], &utsname()->machine[0],
 			    sizeof(name->mach) - 1);
 	up_read(&uts_sem);
 	return (ret ? -EFAULT : 0);
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 9c581328e76a..9ed997982f8d 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -249,7 +249,7 @@ asmlinkage int solaris_utssys(u32 buf, u32 flags, int which, u32 buf2)
 		/* Let's cheat */
 		err  = set_utsfield(v->sysname, "SunOS", 1, 0);
 		down_read(&uts_sem);
-		err |= set_utsfield(v->nodename, system_utsname.nodename,
+		err |= set_utsfield(v->nodename, utsname()->nodename,
 				    1, 1);
 		up_read(&uts_sem);
 		err |= set_utsfield(v->release, "2.6", 0, 0);
@@ -273,7 +273,7 @@ asmlinkage int solaris_utsname(u32 buf)
 	/* Why should we not lie a bit? */
 	down_read(&uts_sem);
 	err  = set_utsfield(v->sysname, "SunOS", 0, 0);
-	err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
+	err |= set_utsfield(v->nodename, utsname()->nodename, 1, 1);
 	err |= set_utsfield(v->release, "5.6", 0, 0);
 	err |= set_utsfield(v->version, "Generic", 0, 0);
 	err |= set_utsfield(v->machine, machine(), 0, 0);
@@ -305,7 +305,7 @@ asmlinkage int solaris_sysinfo(int cmd, u32 buf, s32 count)
 	case SI_HOSTNAME:
 		r = buffer + 256;
 		down_read(&uts_sem);
-		for (p = system_utsname.nodename, q = buffer; 
+		for (p = utsname()->nodename, q = buffer;
 		     q < r && *p && *p != '.'; *q++ = *p++);
 		up_read(&uts_sem);
 		*q = 0;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 773a134e7fdb..a67dcbd78de4 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -106,9 +106,9 @@ void mconsole_version(struct mc_request *req)
 {
 	char version[256];
 
-	sprintf(version, "%s %s %s %s %s", system_utsname.sysname,
-		system_utsname.nodename, system_utsname.release,
-		system_utsname.version, system_utsname.machine);
+	sprintf(version, "%s %s %s %s %s", utsname()->sysname,
+		utsname()->nodename, utsname()->release,
+		utsname()->version, utsname()->machine);
 	mconsole_reply(req, version, 0, 0);
 }
 
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 48cf88dd02d4..abf14aaf905f 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -110,7 +110,7 @@ long sys_uname(struct old_utsname __user * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	return err?-EFAULT:0;
 }
@@ -126,21 +126,21 @@ long sys_olduname(struct oldold_utsname __user * name)
 
   	down_read(&uts_sem);
 
-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
 			       __OLD_UTS_LEN);
-	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release,&system_utsname.release,
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->release+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version,&system_utsname.version,
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->version+__OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine,&system_utsname.machine,
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
 				__OLD_UTS_LEN);
-	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 
 	up_read(&uts_sem);
 
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
index 6fce9f45dfdc..73ce4463f70c 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/um/sys-x86_64/syscalls.c
@@ -21,7 +21,7 @@ asmlinkage long sys_uname64(struct new_utsname __user * name)
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32)
 		err |= copy_to_user(&name->machine, "i686", 5);
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index f280d3665f4b..26a01717cc1a 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -784,36 +784,36 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
 
 	if (!name)
 		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
 		return -EFAULT;
   
   	down_read(&uts_sem);
-	
-	err = __copy_to_user(&name->sysname,&system_utsname.sysname,
+
+	err = __copy_to_user(&name->sysname,&utsname()->sysname,
 				__OLD_UTS_LEN);
 	err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+	err |= __copy_to_user(&name->nodename,&utsname()->nodename,
 				__OLD_UTS_LEN);
 	err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->release,&system_utsname.release,
+	err |= __copy_to_user(&name->release,&utsname()->release,
 				__OLD_UTS_LEN);
 	err |= __put_user(0,name->release+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->version,&system_utsname.version,
+	err |= __copy_to_user(&name->version,&utsname()->version,
 				__OLD_UTS_LEN);
 	err |= __put_user(0,name->version+__OLD_UTS_LEN);
-	 { 
-		 char *arch = "x86_64";
-		 if (personality(current->personality) == PER_LINUX32)
-			 arch = "i686";
+	{
+		char *arch = "x86_64";
+		if (personality(current->personality) == PER_LINUX32)
+			arch = "i686";
 		 
-		 err |= __copy_to_user(&name->machine,arch,strlen(arch)+1);
-	 }
-	
-	 up_read(&uts_sem);
-	 
-	 err = err ? -EFAULT : 0;
-	 
-	 return err;
+		err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
+	}
+
+	up_read(&uts_sem);
+
+	err = err ? -EFAULT : 0;
+
+	return err;
 }
 
 long sys32_uname(struct old_utsname __user * name)
@@ -822,7 +822,7 @@ long sys32_uname(struct old_utsname __user * name)
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5);
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index 6449ea8fe756..76bf7c241fe4 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, &system_utsname, sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
 	if (personality(current->personality) == PER_LINUX32) 
 		err |= copy_to_user(&name->machine, "i686", 5); 		
diff --git a/arch/xtensa/kernel/syscalls.c b/arch/xtensa/kernel/syscalls.c
index 4688ba2db84d..37c90ca5b98d 100644
--- a/arch/xtensa/kernel/syscalls.c
+++ b/arch/xtensa/kernel/syscalls.c
@@ -128,7 +128,7 @@ out:
 
 int sys_uname(struct old_utsname * name)
 {
-	if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
 		return 0;
 	return -EFAULT;
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b430a12eb819..07f47a0208a7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -889,8 +889,8 @@ static void init_std_data(struct entropy_store *r)
 
 	do_gettimeofday(&tv);
 	add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
-	add_entropy_words(r, (__u32 *)&system_utsname,
-			  sizeof(system_utsname)/4);
+	add_entropy_words(r, (__u32 *)utsname(),
+			  sizeof(*(utsname()))/4);
 }
 
 static int __init rand_initialize(void)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0e9ba0b9d71e..c78762051da4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -772,12 +772,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 	separator[1] = 0; 
 
 	memset(vol->source_rfc1001_name,0x20,15);
-	for(i=0;i < strnlen(system_utsname.nodename,15);i++) {
+	for(i=0;i < strnlen(utsname()->nodename,15);i++) {
 		/* does not have to be a perfect mapping since the field is
 		informational, only used for servers that do not support
 		port 445 and it can be overridden at mount time */
 		vol->source_rfc1001_name[i] = 
-			toupper(system_utsname.nodename[i]);
+			toupper(utsname()->nodename[i]);
 	}
 	vol->source_rfc1001_name[15] = 0;
 	/* null target name indicates to use *SMBSERVR default called name
@@ -2153,7 +2153,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 				  32, nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+		    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
 				  32, nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bcc_ptr += 2;
@@ -2180,8 +2180,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 		}
 		strcpy(bcc_ptr, "Linux version ");
 		bcc_ptr += strlen("Linux version ");
-		strcpy(bcc_ptr, system_utsname.release);
-		bcc_ptr += strlen(system_utsname.release) + 1;
+		strcpy(bcc_ptr, utsname()->release);
+		bcc_ptr += strlen(utsname()->release) + 1;
 		strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
 		bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
 	}
@@ -2445,7 +2445,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 				  32, nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+		    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
 				  nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bcc_ptr += 2;	/* null terminate Linux version */
@@ -2462,8 +2462,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 	} else {		/* ASCII */
 		strcpy(bcc_ptr, "Linux version ");
 		bcc_ptr += strlen("Linux version ");
-		strcpy(bcc_ptr, system_utsname.release);
-		bcc_ptr += strlen(system_utsname.release) + 1;
+		strcpy(bcc_ptr, utsname()->release);
+		bcc_ptr += strlen(utsname()->release) + 1;
 		strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
 		bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
 		bcc_ptr++;	/* empty domain field */
@@ -2836,7 +2836,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 				  32, nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bytes_returned =
-		    cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+		    cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
 				  nls_codepage);
 		bcc_ptr += 2 * bytes_returned;
 		bcc_ptr += 2;	/* null term version string */
@@ -2888,8 +2888,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 
 		strcpy(bcc_ptr, "Linux version ");
 		bcc_ptr += strlen("Linux version ");
-		strcpy(bcc_ptr, system_utsname.release);
-		bcc_ptr += strlen(system_utsname.release) + 1;
+		strcpy(bcc_ptr, utsname()->release);
+		bcc_ptr += strlen(utsname()->release) + 1;
 		strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
 		bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
 		bcc_ptr++;	/* null domain */
diff --git a/fs/exec.c b/fs/exec.c
index 6270f8f20a63..d993ea1a81ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1318,7 +1318,7 @@ static void format_corename(char *corename, const char *pattern, long signr)
 			case 'h':
 				down_read(&uts_sem);
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", system_utsname.nodename);
+					      "%s", utsname()->nodename);
 				up_read(&uts_sem);
 				if (rc > out_end - out_ptr)
 					goto out;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 271e2165fff6..0116729cec5f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -129,11 +129,11 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	nlmclnt_next_cookie(&argp->cookie);
 	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-	lock->caller  = system_utsname.nodename;
+	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
-				system_utsname.nodename);
+				utsname()->nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	lock->fl.fl_start = fl->fl_start;
 	lock->fl.fl_end = fl->fl_end;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 5954dcb497e4..a816b920d431 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -145,7 +145,7 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 	 */
 	sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
 	if (!(p = xdr_encode_string(p, buffer))
-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
+	 || !(p = xdr_encode_string(p, utsname()->nodename)))
 		return ERR_PTR(-EIO);
 	*p++ = htonl(argp->prog);
 	*p++ = htonl(argp->vers);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c9d419703cf3..93c00ee7189d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -325,7 +325,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 {
 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.caller = utsname()->nodename;
 	call->a_args.lock.oh.len = lock->oh.len;
 
 	/* set default data area */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 033ea4ac2c30..61c46facf257 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -515,7 +515,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
  */
 #define NLM_void_sz		0
 #define NLM_cookie_sz		1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz		1+XDR_QUADLEN(sizeof(system_utsname.nodename))
+#define NLM_caller_sz		1+XDR_QUADLEN(sizeof(utsname()->nodename))
 #define NLM_netobj_sz		1+XDR_QUADLEN(XDR_MAX_NETOBJ)
 /* #define NLM_owner_sz		1+XDR_QUADLEN(NLM_MAXOWNER) */
 #define NLM_fhandle_sz		1+XDR_QUADLEN(NFS2_FHSIZE)
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c0a754ecdee6..1d656a645199 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name)
 	/* Override them by options set on kernel command-line */
 	root_nfs_parse(name, buf);
 
-	cp = system_utsname.nodename;
+	cp = utsname()->nodename;
 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
 		return -1;
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index db4344d9f73f..3a05436f31c0 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -112,7 +112,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (utsname()->machine)
 
 #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d92c468d55a..47b7dbd647a6 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -80,7 +80,7 @@ struct nlm_wait;
 /*
  * Memory chunk for NLM client RPC request.
  */
-#define NLMCLNT_OHSIZE		(sizeof(system_utsname.nodename)+10)
+#define NLMCLNT_OHSIZE		(sizeof(utsname()->nodename)+10)
 struct nlm_rqst {
 	unsigned int		a_flags;	/* initial RPC task flags */
 	struct nlm_host *	a_host;		/* host handle */
diff --git a/kernel/sys.c b/kernel/sys.c
index 398d57923f95..3a4776e8f16e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1655,7 +1655,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
 	int errno = 0;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (copy_to_user(name, utsname(), sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1673,8 +1673,8 @@ asmlinkage long sys_sethostname(char __user *name, int len)
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.nodename, tmp, len);
-		system_utsname.nodename[len] = 0;
+		memcpy(utsname()->nodename, tmp, len);
+		utsname()->nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1690,11 +1690,11 @@ asmlinkage long sys_gethostname(char __user *name, int len)
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	i = 1 + strlen(utsname()->nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, utsname()->nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1719,8 +1719,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
 	down_write(&uts_sem);
 	errno = -EFAULT;
 	if (!copy_from_user(tmp, name, len)) {
-		memcpy(system_utsname.domainname, tmp, len);
-		system_utsname.domainname[len] = 0;
+		memcpy(utsname()->domainname, tmp, len);
+		utsname()->domainname[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1fbb38415b19..99d50065aa79 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -805,7 +805,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
 			}
 			break;
 		case 12:	/* Host name */
-			ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+			ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN);
 			ic_host_name_set = 1;
 			break;
 		case 15:	/* Domain name (DNS) */
@@ -816,7 +816,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
 				ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
 			break;
 		case 40:	/* NIS Domain name (_not_ DNS) */
-			ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+			ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
 			break;
 	}
 }
@@ -1368,7 +1368,7 @@ static int __init ip_auto_config(void)
 	printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
 	printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
 	printk(",\n     host=%s, domain=%s, nis-domain=%s",
-	       system_utsname.nodename, ic_domain, system_utsname.domainname);
+	       utsname()->nodename, ic_domain, utsname()->domainname);
 	printk(",\n     bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
 	printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
 	printk(", rootpath=%s", root_server_path);
@@ -1478,11 +1478,11 @@ static int __init ip_auto_config_setup(char *addrs)
 			case 4:
 				if ((dp = strchr(ip, '.'))) {
 					*dp++ = '\0';
-					strlcpy(system_utsname.domainname, dp,
-						sizeof(system_utsname.domainname));
+					strlcpy(utsname()->domainname, dp,
+						sizeof(utsname()->domainname));
 				}
-				strlcpy(system_utsname.nodename, ip,
-					sizeof(system_utsname.nodename));
+				strlcpy(utsname()->nodename, ip,
+					sizeof(utsname()->nodename));
 				ic_host_name_set = 1;
 				break;
 			case 5:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 124ff0ceb55b..78696f2dc7d6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -161,10 +161,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	}
 
 	/* save the nodename */
-	clnt->cl_nodelen = strlen(system_utsname.nodename);
+	clnt->cl_nodelen = strlen(utsname()->nodename);
 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
 		clnt->cl_nodelen = UNX_MAXNODENAME;
-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+	memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
 	return clnt;
 
 out_no_auth:
-- 
cgit v1.2.3


From 9ec52099e4b8678a60e9f93e41ad87885d64f3e6 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Mon, 2 Oct 2006 02:19:00 -0700
Subject: [PATCH] replace cad_pid by a struct pid

There are a few places in the kernel where the init task is signaled.  The
ctrl+alt+del sequence is one them.  It kills a task, usually init, using a
cached pid (cad_pid).

This patch replaces the pid_t by a struct pid to avoid pid wrap around
problem.  The struct pid is initialized at boot time in init() and can be
modified through systctl with

	/proc/sys/kernel/cad_pid

[ I haven't found any distro using it ? ]

It also introduces a small helper routine kill_cad_pid() which is used
where it seemed ok to use cad_pid instead of pid 1.

[akpm@osdl.org: cleanups, build fix]
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/sgi-ip22/ip22-reset.c     |  3 ++-
 arch/mips/sgi-ip32/ip32-reset.c     |  2 +-
 arch/powerpc/platforms/iseries/mf.c |  2 +-
 drivers/char/nwbutton.c             |  5 ++---
 drivers/char/snsc_event.c           |  2 +-
 drivers/parisc/power.c              |  3 +--
 drivers/s390/s390mach.c             |  2 +-
 include/linux/sched.h               |  7 +++++++
 init/main.c                         |  2 ++
 kernel/sys.c                        |  6 +++---
 kernel/sysctl.c                     | 30 +++++++++++++++++++++++++++---
 11 files changed, 48 insertions(+), 16 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index 8134220ed600..7a941ecff3bb 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -123,7 +123,8 @@ static inline void power_button(void)
 	if (machine_state & MACHINE_PANICED)
 		return;
 
-	if ((machine_state & MACHINE_SHUTTING_DOWN) || kill_proc(1,SIGINT,1)) {
+	if ((machine_state & MACHINE_SHUTTING_DOWN) ||
+			kill_cad_pid(SIGINT, 1)) {
 		/* No init process or button pressed twice.  */
 		sgi_machine_power_off();
 	}
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 79ddb4605659..fd0932b2d521 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -120,7 +120,7 @@ static inline void ip32_power_button(void)
 	if (has_panicked)
 		return;
 
-	if (shuting_down || kill_proc(1, SIGINT, 1)) {
+	if (shuting_down || kill_cad_pid(SIGINT, 1)) {
 		/* No init process or button pressed twice.  */
 		ip32_machine_power_off();
 	}
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 1a2c2a50f922..1983b640bac1 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -357,7 +357,7 @@ static int dma_and_signal_ce_msg(char *ce_msg,
  */
 static int shutdown(void)
 {
-	int rc = kill_proc(1, SIGINT, 1);
+	int rc = kill_cad_pid(SIGINT, 1);
 
 	if (rc) {
 		printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), "
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index 7c57ebfa8640..ea1aa7764f8e 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -127,9 +127,8 @@ static void button_consume_callbacks (int bpcount)
 static void button_sequence_finished (unsigned long parameters)
 {
 #ifdef CONFIG_NWBUTTON_REBOOT		/* Reboot using button is enabled */
-	if (button_press_count == reboot_count) {
-		kill_proc (1, SIGINT, 1);	/* Ask init to reboot us */
-	}
+	if (button_press_count == reboot_count)
+		kill_cad_pid(SIGINT, 1);	/* Ask init to reboot us */
 #endif /* CONFIG_NWBUTTON_REBOOT */
 	button_consume_callbacks (button_press_count);
 	bcount = sprintf (button_output_buffer, "%d\n", button_press_count);
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index d12d4f629cec..864854c58866 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -220,7 +220,7 @@ scdrv_dispatch_event(char *event, int len)
 			       " Sending SIGPWR to init...\n");
 
 		/* give a SIGPWR signal to init proc */
-		kill_proc(1, SIGPWR, 0);
+		kill_cad_pid(SIGPWR, 0);
 	} else {
 		/* print to system log */
 		printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index fad5a33bf0fa..4a9f025a6b58 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -84,8 +84,7 @@
 
 static void deferred_poweroff(void *dummy)
 {
-	extern int cad_pid;	/* from kernel/sys.c */
-	if (kill_proc(cad_pid, SIGINT, 1)) {
+	if (kill_cad_pid(SIGINT, 1)) {
 		/* just in case killing init process failed */
 		machine_power_off();
 	}
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 479364d0332a..e088b5e28711 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -208,7 +208,7 @@ s390_handle_mcck(void)
 		 */
 		__ctl_clear_bit(14, 24);	/* Disable WARNING MCH */
 		if (xchg(&mchchk_wng_posted, 1) == 0)
-			kill_proc(1, SIGPWR, 1);
+			kill_cad_pid(SIGPWR, 1);
 	}
 #endif
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9ba959e34266..38530232d92f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1065,6 +1065,8 @@ static inline int is_init(struct task_struct *tsk)
 	return tsk->pid == 1;
 }
 
+extern struct pid *cad_pid;
+
 extern void free_task(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
@@ -1292,6 +1294,11 @@ extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
+static inline int kill_cad_pid(int sig, int priv)
+{
+	return kill_pid(cad_pid, sig, priv);
+}
+
 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
 #define SEND_SIG_PRIV	((struct siginfo *) 1)
diff --git a/init/main.c b/init/main.c
index a49b00235bda..ee123243fb53 100644
--- a/init/main.c
+++ b/init/main.c
@@ -721,6 +721,8 @@ static int init(void * unused)
 	 */
 	child_reaper = current;
 
+	cad_pid = task_pid(current);
+
 	smp_prepare_cpus(max_cpus);
 
 	do_pre_smp_initcalls();
diff --git a/kernel/sys.c b/kernel/sys.c
index 3a4776e8f16e..2314867ae34f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -92,7 +92,8 @@ EXPORT_SYMBOL(fs_overflowgid);
  */
 
 int C_A_D = 1;
-int cad_pid = 1;
+struct pid *cad_pid;
+EXPORT_SYMBOL(cad_pid);
 
 /*
  *	Notifier list for kernel code which wants to be called
@@ -773,10 +774,9 @@ void ctrl_alt_del(void)
 	if (C_A_D)
 		schedule_work(&cad_work);
 	else
-		kill_proc(cad_pid, SIGINT, 1);
+		kill_cad_pid(SIGINT, 1);
 }
 	
-
 /*
  * Unprivileged users may change the real gid to the effective gid
  * or vice versa.  (BSD-style)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a79ccf9d113b..8020fb273c4f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,7 +68,6 @@ extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
-extern int cad_pid;
 extern int pid_max;
 extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
@@ -137,6 +136,9 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
+		  void __user *buffer, size_t *lenp, loff_t *ppos);
+
 static ctl_table root_table[];
 static struct ctl_table_header root_table_header =
 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
@@ -543,10 +545,10 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_CADPID,
 		.procname	= "cad_pid",
-		.data		= &cad_pid,
+		.data		= NULL,
 		.maxlen		= sizeof (int),
 		.mode		= 0600,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_do_cad_pid,
 	},
 	{
 		.ctl_name	= KERN_MAX_THREADS,
@@ -2427,6 +2429,28 @@ proc_minmax:
 }
 #endif
 
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct pid *new_pid;
+	pid_t tmp;
+	int r;
+
+	tmp = pid_nr(cad_pid);
+
+	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+			       lenp, ppos, NULL, NULL);
+	if (r || !write)
+		return r;
+
+	new_pid = find_get_pid(tmp);
+	if (!new_pid)
+		return -ESRCH;
+
+	put_pid(xchg(&cad_pid, new_pid));
+	return 0;
+}
+
 #else /* CONFIG_PROC_FS */
 
 int proc_dostring(ctl_table *table, int write, struct file *filp,
-- 
cgit v1.2.3


From eabc069401bcf45bcc3f19e643017bf761780aa8 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 4 Oct 2006 02:17:04 -0700
Subject: [PATCH] Add SRCU-based notifier chains

This patch (as751) adds a new type of notifier chain, based on the SRCU
(Sleepable Read-Copy Update) primitives recently added to the kernel.  An
SRCU notifier chain is much like a blocking notifier chain, in that it must
be called in process context and its callout routines are allowed to sleep.
 The difference is that the chain's links are protected by the SRCU
mechanism rather than by an rw-semaphore, so calling the chain has
extremely low overhead: no memory barriers and no cache-line bouncing.  On
the other hand, unregistering from the chain is expensive and the chain
head requires special runtime initialization (plus cleanup if it is to be
deallocated).

SRCU notifiers are appropriate for notifiers that will be called very
frequently and for which unregistration occurs very seldom.  The proposed
"task notifier" scheme qualifies, as may some of the network notifiers.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
Acked-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/notifier.h |  43 ++++++++++++++--
 include/linux/srcu.h     |   6 ++-
 kernel/sys.c             | 124 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 166 insertions(+), 7 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 7ff386a6ae87..10a43ed0527e 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -12,9 +12,10 @@
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/srcu.h>
 
 /*
- * Notifier chains are of three types:
+ * Notifier chains are of four types:
  *
  *	Atomic notifier chains: Chain callbacks run in interrupt/atomic
  *		context. Callouts are not allowed to block.
@@ -23,13 +24,27 @@
  *	Raw notifier chains: There are no restrictions on callbacks,
  *		registration, or unregistration.  All locking and protection
  *		must be provided by the caller.
+ *	SRCU notifier chains: A variant of blocking notifier chains, with
+ *		the same restrictions.
  *
  * atomic_notifier_chain_register() may be called from an atomic context,
- * but blocking_notifier_chain_register() must be called from a process
- * context.  Ditto for the corresponding _unregister() routines.
+ * but blocking_notifier_chain_register() and srcu_notifier_chain_register()
+ * must be called from a process context.  Ditto for the corresponding
+ * _unregister() routines.
  *
- * atomic_notifier_chain_unregister() and blocking_notifier_chain_unregister()
- * _must not_ be called from within the call chain.
+ * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(),
+ * and srcu_notifier_chain_unregister() _must not_ be called from within
+ * the call chain.
+ *
+ * SRCU notifier chains are an alternative form of blocking notifier chains.
+ * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for
+ * protection of the chain links.  This means there is _very_ low overhead
+ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
+ * As compensation, srcu_notifier_chain_unregister() is rather expensive.
+ * SRCU notifier chains should be used when the chain will be called very
+ * often but notifier_blocks will seldom be removed.  Also, SRCU notifier
+ * chains are slightly more difficult to use because they require special
+ * runtime initialization.
  */
 
 struct notifier_block {
@@ -52,6 +67,12 @@ struct raw_notifier_head {
 	struct notifier_block *head;
 };
 
+struct srcu_notifier_head {
+	struct mutex mutex;
+	struct srcu_struct srcu;
+	struct notifier_block *head;
+};
+
 #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
 		spin_lock_init(&(name)->lock);	\
 		(name)->head = NULL;		\
@@ -64,6 +85,11 @@ struct raw_notifier_head {
 		(name)->head = NULL;		\
 	} while (0)
 
+/* srcu_notifier_heads must be initialized and cleaned up dynamically */
+extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
+#define srcu_cleanup_notifier_head(name)	\
+		cleanup_srcu_struct(&(name)->srcu);
+
 #define ATOMIC_NOTIFIER_INIT(name) {				\
 		.lock = __SPIN_LOCK_UNLOCKED(name.lock),	\
 		.head = NULL }
@@ -72,6 +98,7 @@ struct raw_notifier_head {
 		.head = NULL }
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
+/* srcu_notifier_heads cannot be initialized statically */
 
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
@@ -91,6 +118,8 @@ extern int blocking_notifier_chain_register(struct blocking_notifier_head *,
 		struct notifier_block *);
 extern int raw_notifier_chain_register(struct raw_notifier_head *,
 		struct notifier_block *);
+extern int srcu_notifier_chain_register(struct srcu_notifier_head *,
+		struct notifier_block *);
 
 extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *,
 		struct notifier_block *);
@@ -98,6 +127,8 @@ extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *,
 		struct notifier_block *);
 extern int raw_notifier_chain_unregister(struct raw_notifier_head *,
 		struct notifier_block *);
+extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *,
+		struct notifier_block *);
 
 extern int atomic_notifier_call_chain(struct atomic_notifier_head *,
 		unsigned long val, void *v);
@@ -105,6 +136,8 @@ extern int blocking_notifier_call_chain(struct blocking_notifier_head *,
 		unsigned long val, void *v);
 extern int raw_notifier_call_chain(struct raw_notifier_head *,
 		unsigned long val, void *v);
+extern int srcu_notifier_call_chain(struct srcu_notifier_head *,
+		unsigned long val, void *v);
 
 #define NOTIFY_DONE		0x0000		/* Don't care */
 #define NOTIFY_OK		0x0001		/* Suits me */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 947fdab2ddb0..8a45367b5f3a 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -24,6 +24,9 @@
  *
  */
 
+#ifndef _LINUX_SRCU_H
+#define _LINUX_SRCU_H
+
 struct srcu_struct_array {
 	int c[2];
 };
@@ -46,4 +49,5 @@ int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
 long srcu_batches_completed(struct srcu_struct *sp);
-void cleanup_srcu_struct(struct srcu_struct *sp);
+
+#endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 2314867ae34f..fd5c71006775 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -153,7 +153,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 
 /*
  *	Atomic notifier chain routines.  Registration and unregistration
- *	use a mutex, and call_chain is synchronized by RCU (no locks).
+ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
  */
 
 /**
@@ -401,6 +401,128 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
 
 EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
 
+/*
+ *	SRCU notifier chain routines.    Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an SRCU notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_register(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an SRCU notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_unregister(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	synchronize_srcu(&nh->srcu);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *	srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	int ret;
+	int idx;
+
+	idx = srcu_read_lock(&nh->srcu);
+	ret = notifier_call_chain(&nh->head, val, v);
+	srcu_read_unlock(&nh->srcu, idx);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *	srcu_init_notifier_head - Initialize an SRCU notifier head
+ *	@nh: Pointer to head of the srcu notifier chain
+ *
+ *	Unlike other sorts of notifier heads, SRCU notifier heads require
+ *	dynamic initialization.  Be sure to call this routine before
+ *	calling any of the other SRCU notifier routines for this head.
+ *
+ *	If an SRCU notifier head is deallocated, it must first be cleaned
+ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *	per-cpu data (used by the SRCU mechanism) will leak.
+ */
+
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+	mutex_init(&nh->mutex);
+	init_srcu_struct(&nh->srcu);
+	nh->head = NULL;
+}
+
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
 /**
  *	register_reboot_notifier - Register function to be called at reboot time
  *	@nb: Info about notifier function to be called
-- 
cgit v1.2.3


From e6a92013ba458804161c0c5b6d134d82204dc233 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 4 Oct 2006 02:17:05 -0700
Subject: [PATCH] SRCU: report out-of-memory errors

Currently the init_srcu_struct() routine has no way to report out-of-memory
errors.  This patch (as761) makes it return -ENOMEM when the per-cpu data
allocation fails.

The patch also makes srcu_init_notifier_head() report a BUG if a notifier
head can't be initialized.  Perhaps it should return -ENOMEM instead, but
in the most likely cases where this might occur I don't think any recovery
is possible.  Notifier chains generally are not created dynamically.

[akpm@osdl.org: avoid statement-with-side-effect in macro]
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/srcu.h | 2 +-
 kernel/srcu.c        | 5 +++--
 kernel/sys.c         | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 8a45367b5f3a..aca0eee53930 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -43,7 +43,7 @@ struct srcu_struct {
 #define srcu_barrier()
 #endif /* #else #ifndef CONFIG_PREEMPT */
 
-void init_srcu_struct(struct srcu_struct *sp);
+int init_srcu_struct(struct srcu_struct *sp);
 void cleanup_srcu_struct(struct srcu_struct *sp);
 int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 7e1979f624ba..3507cabe963b 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -42,11 +42,12 @@
  * to any other function.  Each srcu_struct represents a separate domain
  * of SRCU protection.
  */
-void init_srcu_struct(struct srcu_struct *sp)
+int init_srcu_struct(struct srcu_struct *sp)
 {
 	sp->completed = 0;
-	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
 	mutex_init(&sp->mutex);
+	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+	return (sp->per_cpu_ref ? 0 : -ENOMEM);
 }
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index fd5c71006775..98489d82801b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -517,7 +517,8 @@ EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
 void srcu_init_notifier_head(struct srcu_notifier_head *nh)
 {
 	mutex_init(&nh->mutex);
-	init_srcu_struct(&nh->srcu);
+	if (init_srcu_struct(&nh->srcu) < 0)
+		BUG();
 	nh->head = NULL;
 }
 
-- 
cgit v1.2.3


From 65f27f38446e1976cc98fd3004b110fedcddd189 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:55:48 +0000
Subject: WorkStruct: Pass the work_struct pointer instead of context data

Pass the work_struct pointer to the work function rather than context data.
The work function can use container_of() to work out the data.

For the cases where the container of the work_struct may go away the moment the
pending bit is cleared, it is made possible to defer the release of the
structure by deferring the clearing of the pending bit.

To make this work, an extra flag is introduced into the management side of the
work_struct.  This governs auto-release of the structure upon execution.

Ordinarily, the work queue executor would release the work_struct for further
scheduling or deallocation by clearing the pending bit prior to jumping to the
work function.  This means that, unless the driver makes some guarantee itself
that the work_struct won't go away, the work function may not access anything
else in the work_struct or its container lest they be deallocated..  This is a
problem if the auxiliary data is taken away (as done by the last patch).

However, if the pending bit is *not* cleared before jumping to the work
function, then the work function *may* access the work_struct and its container
with no problems.  But then the work function must itself release the
work_struct by calling work_release().

In most cases, automatic release is fine, so this is the default.  Special
initiators exist for the non-auto-release case (ending in _NAR).


Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/x86_64/kernel/mce.c           |  6 +--
 arch/x86_64/kernel/smpboot.c       | 12 +++--
 arch/x86_64/kernel/time.c          |  4 +-
 block/as-iosched.c                 |  7 +--
 block/cfq-iosched.c                |  8 +--
 block/ll_rw_blk.c                  |  8 +--
 crypto/cryptomgr.c                 |  7 +--
 drivers/acpi/osl.c                 | 25 +++-------
 drivers/ata/libata-core.c          | 20 ++++----
 drivers/ata/libata-scsi.c          | 14 +++---
 drivers/ata/libata.h               |  4 +-
 drivers/block/floppy.c             |  6 +--
 drivers/char/random.c              |  6 +--
 drivers/char/sysrq.c               |  4 +-
 drivers/char/tty_io.c              | 31 ++++++------
 drivers/char/vt.c                  |  6 +--
 drivers/cpufreq/cpufreq.c          | 10 ++--
 drivers/input/keyboard/atkbd.c     |  6 +--
 drivers/input/serio/libps2.c       |  6 +--
 drivers/net/e1000/e1000_main.c     | 10 ++--
 drivers/pci/pcie/aer/aerdrv.c      |  2 +-
 drivers/pci/pcie/aer/aerdrv.h      |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c |  8 +--
 drivers/scsi/scsi_scan.c           |  7 +--
 drivers/scsi/scsi_sysfs.c          | 10 ++--
 fs/aio.c                           | 14 +++---
 fs/bio.c                           |  6 +--
 fs/file.c                          |  6 ++-
 fs/nfs/client.c                    |  2 +-
 fs/nfs/namespace.c                 |  9 ++--
 fs/nfs/nfs4_fs.h                   |  2 +-
 fs/nfs/nfs4renewd.c                |  5 +-
 include/linux/libata.h             |  3 +-
 include/linux/workqueue.h          | 99 +++++++++++++++++++++++++++++---------
 include/net/inet_timewait_sock.h   |  2 +-
 ipc/util.c                         |  7 ++-
 kernel/kmod.c                      | 16 +++---
 kernel/kthread.c                   | 13 +++--
 kernel/power/poweroff.c            |  4 +-
 kernel/sys.c                       |  4 +-
 kernel/workqueue.c                 | 19 +++-----
 mm/slab.c                          |  6 +--
 net/core/link_watch.c              |  6 +--
 net/ipv4/inet_timewait_sock.c      |  5 +-
 net/ipv4/tcp_minisocks.c           |  3 +-
 net/sunrpc/cache.c                 |  6 +--
 net/sunrpc/rpc_pipe.c              |  7 +--
 net/sunrpc/sched.c                 |  8 +--
 net/sunrpc/xprt.c                  |  7 +--
 net/sunrpc/xprtsock.c              | 18 ++++---
 security/keys/key.c                |  6 +--
 51 files changed, 293 insertions(+), 219 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 5306f2630905..c7587fc39015 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
  */
 
 static int check_interval = 5 * 60; /* 5 minutes */
-static void mcheck_timer(void *data);
-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer, NULL);
+static void mcheck_timer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
 
 static void mcheck_check_cpu(void *info)
 {
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
 		do_machine_check(NULL, 0);
 }
 
-static void mcheck_timer(void *data)
+static void mcheck_timer(struct work_struct *work)
 {
 	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
 	schedule_delayed_work(&mcheck_work, check_interval * HZ);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..9800147c4c68 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -753,14 +753,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
 }
 
 struct create_idle {
+	struct work_struct work;
 	struct task_struct *idle;
 	struct completion done;
 	int cpu;
 };
 
-void do_fork_idle(void *_c_idle)
+void do_fork_idle(struct work_struct *work)
 {
-	struct create_idle *c_idle = _c_idle;
+	struct create_idle *c_idle =
+		container_of(work, struct create_idle, work);
 
 	c_idle->idle = fork_idle(c_idle->cpu);
 	complete(&c_idle->done);
@@ -775,10 +777,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	int timeout;
 	unsigned long start_rip;
 	struct create_idle c_idle = {
+		.work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
 		.cpu = cpu,
 		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
-	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
@@ -825,9 +827,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	 * thread.
 	 */
 	if (!keventd_up() || current_is_keventd())
-		work.func(work.data);
+		c_idle.work.func(&c_idle.work);
 	else {
-		schedule_work(&work);
+		schedule_work(&c_idle.work);
 		wait_for_completion(&c_idle.done);
 	}
 
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index e3ef544d2cfb..9f05bc9b2dad 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
 static unsigned int cpufreq_init = 0;
 static struct work_struct cpufreq_delayed_get_work;
 
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *v)
 {
 	unsigned int cpu;
 	for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
 
 static int __init cpufreq_tsc(void)
 {
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
 	if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
 				       CPUFREQ_TRANSITION_NOTIFIER))
 		cpufreq_init = 1;
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 50b95e4c1425..f371c9359999 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1274,9 +1274,10 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
  *
  * FIXME! dispatch queue is not a queue at all!
  */
-static void as_work_handler(void *data)
+static void as_work_handler(struct work_struct *work)
 {
-	struct request_queue *q = data;
+	struct as_data *ad = container_of(work, struct as_data, antic_work);
+	struct request_queue *q = ad->q;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1332,7 +1333,7 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.data = (unsigned long)q;
 	init_timer(&ad->antic_timer);
-	INIT_WORK(&ad->antic_work, as_work_handler, q);
+	INIT_WORK(&ad->antic_work, as_work_handler);
 
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1d9c3c70a9a0..6cec3a1dccb8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1841,9 +1841,11 @@ queue_fail:
 	return 1;
 }
 
-static void cfq_kick_queue(void *data)
+static void cfq_kick_queue(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	struct cfq_data *cfqd =
+		container_of(work, struct cfq_data, unplug_work);
+	request_queue_t *q = cfqd->queue;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1987,7 +1989,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->idle_class_timer.function = cfq_idle_class_timer;
 	cfqd->idle_class_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9eaee6640535..eb4cf6df7374 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -34,7 +34,7 @@
  */
 #include <scsi/scsi_cmnd.h>
 
-static void blk_unplug_work(void *data);
+static void blk_unplug_work(struct work_struct *work);
 static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
@@ -227,7 +227,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	if (q->unplug_delay == 0)
 		q->unplug_delay = 1;
 
-	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
@@ -1631,9 +1631,9 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 	}
 }
 
-static void blk_unplug_work(void *data)
+static void blk_unplug_work(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	request_queue_t *q = container_of(work, request_queue_t, unplug_work);
 
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
index 9b5b15601068..2ebffb84f1d9 100644
--- a/crypto/cryptomgr.c
+++ b/crypto/cryptomgr.c
@@ -40,9 +40,10 @@ struct cryptomgr_param {
 	char template[CRYPTO_MAX_ALG_NAME];
 };
 
-static void cryptomgr_probe(void *data)
+static void cryptomgr_probe(struct work_struct *work)
 {
-	struct cryptomgr_param *param = data;
+	struct cryptomgr_param *param =
+		container_of(work, struct cryptomgr_param, work);
 	struct crypto_template *tmpl;
 	struct crypto_instance *inst;
 	int err;
@@ -112,7 +113,7 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 	param->larval.type = larval->alg.cra_flags;
 	param->larval.mask = larval->mask;
 
-	INIT_WORK(&param->work, cryptomgr_probe, param);
+	INIT_WORK(&param->work, cryptomgr_probe);
 	schedule_work(&param->work);
 
 	return NOTIFY_STOP;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 068fe4f100b0..02b30ae6a68e 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -50,6 +50,7 @@ ACPI_MODULE_NAME("osl")
 struct acpi_os_dpc {
 	acpi_osd_exec_callback function;
 	void *context;
+	struct work_struct work;
 };
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -564,12 +565,9 @@ void acpi_os_derive_pci_id(acpi_handle rhandle,	/* upper bound  */
 	acpi_os_derive_pci_id_2(rhandle, chandle, id, &is_bridge, &bus_number);
 }
 
-static void acpi_os_execute_deferred(void *context)
+static void acpi_os_execute_deferred(struct work_struct *work)
 {
-	struct acpi_os_dpc *dpc = NULL;
-
-
-	dpc = (struct acpi_os_dpc *)context;
+	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
 	if (!dpc) {
 		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
 		return;
@@ -602,7 +600,6 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 {
 	acpi_status status = AE_OK;
 	struct acpi_os_dpc *dpc;
-	struct work_struct *task;
 
 	ACPI_FUNCTION_TRACE("os_queue_for_execution");
 
@@ -615,28 +612,22 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 
 	/*
 	 * Allocate/initialize DPC structure.  Note that this memory will be
-	 * freed by the callee.  The kernel handles the tq_struct list  in a
+	 * freed by the callee.  The kernel handles the work_struct list  in a
 	 * way that allows us to also free its memory inside the callee.
 	 * Because we may want to schedule several tasks with different
 	 * parameters we can't use the approach some kernel code uses of
-	 * having a static tq_struct.
-	 * We can save time and code by allocating the DPC and tq_structs
-	 * from the same memory.
+	 * having a static work_struct.
 	 */
 
-	dpc =
-	    kmalloc(sizeof(struct acpi_os_dpc) + sizeof(struct work_struct),
-		    GFP_ATOMIC);
+	dpc = kmalloc(sizeof(struct acpi_os_dpc), GFP_ATOMIC);
 	if (!dpc)
 		return_ACPI_STATUS(AE_NO_MEMORY);
 
 	dpc->function = function;
 	dpc->context = context;
 
-	task = (void *)(dpc + 1);
-	INIT_WORK(task, acpi_os_execute_deferred, (void *)dpc);
-
-	if (!queue_work(kacpid_wq, task)) {
+	INIT_WORK(&dpc->work, acpi_os_execute_deferred);
+	if (!queue_work(kacpid_wq, &dpc->work)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "Call to queue_work() failed.\n"));
 		kfree(dpc);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0bb4b4dced76..b5f2da6ac80e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -914,7 +914,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	ata_port_queue_task - Queue port_task
  *	@ap: The ata_port to queue port_task for
  *	@fn: workqueue function to be scheduled
- *	@data: data value to pass to workqueue function
+ *	@data: data for @fn to use
  *	@delay: delay time for workqueue function
  *
  *	Schedule @fn(@data) for execution after @delay jiffies using
@@ -929,7 +929,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	LOCKING:
  *	Inherited from caller.
  */
-void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
+void ata_port_queue_task(struct ata_port *ap, work_func_t fn, void *data,
 			 unsigned long delay)
 {
 	int rc;
@@ -937,7 +937,8 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
-	PREPARE_DELAYED_WORK(&ap->port_task, fn, data);
+	PREPARE_DELAYED_WORK(&ap->port_task, fn);
+	ap->port_task_data = data;
 
 	rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
 
@@ -4292,10 +4293,11 @@ fsm_start:
 	return poll_next;
 }
 
-static void ata_pio_task(void *_data)
+static void ata_pio_task(struct work_struct *work)
 {
-	struct ata_queued_cmd *qc = _data;
-	struct ata_port *ap = qc->ap;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, port_task.work);
+	struct ata_queued_cmd *qc = ap->port_task_data;
 	u8 status;
 	int poll_next;
 
@@ -5317,9 +5319,9 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host,
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-	INIT_DELAYED_WORK(&ap->port_task, NULL, NULL);
-	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
-	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan, ap);
+	INIT_DELAYED_WORK(&ap->port_task, NULL);
+	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
+	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 5c1fc467fc7f..c872b324dbd3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3079,7 +3079,7 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
 
 /**
  *	ata_scsi_hotplug - SCSI part of hotplug
- *	@data: Pointer to ATA port to perform SCSI hotplug on
+ *	@work: Pointer to ATA port to perform SCSI hotplug on
  *
  *	Perform SCSI part of hotplug.  It's executed from a separate
  *	workqueue after EH completes.  This is necessary because SCSI
@@ -3089,9 +3089,10 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_hotplug(void *data)
+void ata_scsi_hotplug(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, hotplug_task.work);
 	int i;
 
 	if (ap->pflags & ATA_PFLAG_UNLOADING) {
@@ -3190,7 +3191,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 
 /**
  *	ata_scsi_dev_rescan - initiate scsi_rescan_device()
- *	@data: Pointer to ATA port to perform scsi_rescan_device()
+ *	@work: Pointer to ATA port to perform scsi_rescan_device()
  *
  *	After ATA pass thru (SAT) commands are executed successfully,
  *	libata need to propagate the changes to SCSI layer.  This
@@ -3200,9 +3201,10 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_dev_rescan(void *data)
+void ata_scsi_dev_rescan(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, scsi_rescan_task);
 	struct ata_device *dev;
 	unsigned int i;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 0ed263be652a..7e0f3aff873d 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -81,7 +81,7 @@ extern struct scsi_transport_template ata_scsi_transport_template;
 
 extern void ata_scsi_scan_host(struct ata_port *ap);
 extern int ata_scsi_offline_dev(struct ata_device *dev);
-extern void ata_scsi_hotplug(void *data);
+extern void ata_scsi_hotplug(struct work_struct *work);
 extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
 			       unsigned int buflen);
 
@@ -111,7 +111,7 @@ extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
                         unsigned int (*actor) (struct ata_scsi_args *args,
                                            u8 *rbuf, unsigned int buflen));
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
-extern void ata_scsi_dev_rescan(void *data);
+extern void ata_scsi_dev_rescan(struct work_struct *work);
 extern int ata_bus_probe(struct ata_port *ap);
 
 /* libata-eh.c */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index aa1eb4466f9d..3f1b38276e96 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -992,11 +992,11 @@ static void empty(void)
 {
 }
 
-static DECLARE_WORK(floppy_work, NULL, NULL);
+static DECLARE_WORK(floppy_work, NULL);
 
 static void schedule_bh(void (*handler) (void))
 {
-	PREPARE_WORK(&floppy_work, (work_func_t)handler, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)handler);
 	schedule_work(&floppy_work);
 }
 
@@ -1008,7 +1008,7 @@ static void cancel_activity(void)
 
 	spin_lock_irqsave(&floppy_lock, flags);
 	do_floppy = NULL;
-	PREPARE_WORK(&floppy_work, (work_func_t)empty, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)empty);
 	del_timer(&fd_timer);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index f2ab61f3e8ae..fa764688cad1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1422,9 +1422,9 @@ static struct keydata {
 
 static unsigned int ip_cnt;
 
-static void rekey_seq_generator(void *private_);
+static void rekey_seq_generator(struct work_struct *work);
 
-static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
+static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator);
 
 /*
  * Lock avoidance:
@@ -1438,7 +1438,7 @@ static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
  * happen, and even if that happens only a not perfectly compliant
  * ISN is generated, nothing fatal.
  */
-static void rekey_seq_generator(void *private_)
+static void rekey_seq_generator(struct work_struct *work)
 {
 	struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)];
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5f49280779fb..c64f5bcff947 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -219,13 +219,13 @@ static struct sysrq_key_op sysrq_term_op = {
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
-static void moom_callback(void *ignored)
+static void moom_callback(struct work_struct *ignored)
 {
 	out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL],
 			GFP_KERNEL, 0);
 }
 
-static DECLARE_WORK(moom_work, moom_callback, NULL);
+static DECLARE_WORK(moom_work, moom_callback);
 
 static void sysrq_handle_moom(int key, struct tty_struct *tty)
 {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 7297acfe520c..83e9e7d9b58c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1254,7 +1254,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 	
 /**
  *	do_tty_hangup		-	actual handler for hangup events
- *	@data: tty device
+ *	@work: tty device
  *
  *	This can be called by the "eventd" kernel thread.  That is process
  *	synchronous but doesn't hold any locks, so we need to make sure we
@@ -1274,9 +1274,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *		tasklist_lock to walk task list for hangup event
  *
  */
-static void do_tty_hangup(void *data)
+static void do_tty_hangup(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) data;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, hangup_work);
 	struct file * cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
@@ -1433,7 +1434,7 @@ void tty_vhangup(struct tty_struct * tty)
 
 	printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
 #endif
-	do_tty_hangup((void *) tty);
+	do_tty_hangup(&tty->hangup_work);
 }
 EXPORT_SYMBOL(tty_vhangup);
 
@@ -3304,12 +3305,13 @@ int tty_ioctl(struct inode * inode, struct file * file,
  * Nasty bug: do_SAK is being called in interrupt context.  This can
  * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
  */
-static void __do_SAK(void *arg)
+static void __do_SAK(struct work_struct *work)
 {
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, SAK_work);
 #ifdef TTY_SOFT_SAK
 	tty_hangup(tty);
 #else
-	struct tty_struct *tty = arg;
 	struct task_struct *g, *p;
 	int session;
 	int		i;
@@ -3388,7 +3390,7 @@ void do_SAK(struct tty_struct *tty)
 {
 	if (!tty)
 		return;
-	PREPARE_WORK(&tty->SAK_work, __do_SAK, tty);
+	PREPARE_WORK(&tty->SAK_work, __do_SAK);
 	schedule_work(&tty->SAK_work);
 }
 
@@ -3396,7 +3398,7 @@ EXPORT_SYMBOL(do_SAK);
 
 /**
  *	flush_to_ldisc
- *	@private_: tty structure passed from work queue.
+ *	@work: tty structure passed from work queue.
  *
  *	This routine is called out of the software interrupt to flush data
  *	from the buffer chain to the line discipline.
@@ -3406,9 +3408,10 @@ EXPORT_SYMBOL(do_SAK);
  *	receive_buf method is single threaded for each tty instance.
  */
  
-static void flush_to_ldisc(void *private_)
+static void flush_to_ldisc(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) private_;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, buf.work.work);
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 	struct tty_buffer *tbuf, *head;
@@ -3553,7 +3556,7 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	if (tty->low_latency)
-		flush_to_ldisc((void *) tty);
+		flush_to_ldisc(&tty->buf.work.work);
 	else
 		schedule_delayed_work(&tty->buf.work, 1);
 }
@@ -3580,17 +3583,17 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
-	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc, tty);
+	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc);
 	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
-	INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
+	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->read_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
-	INIT_WORK(&tty->SAK_work, NULL, NULL);
+	INIT_WORK(&tty->SAK_work, NULL);
 }
 
 /*
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 8e4413f6fbaf..8ee04adc37f0 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -155,7 +155,7 @@ static void con_flush_chars(struct tty_struct *tty);
 static void set_vesa_blanking(char __user *p);
 static void set_cursor(struct vc_data *vc);
 static void hide_cursor(struct vc_data *vc);
-static void console_callback(void *ignored);
+static void console_callback(struct work_struct *ignored);
 static void blank_screen_t(unsigned long dummy);
 static void set_palette(struct vc_data *vc);
 
@@ -174,7 +174,7 @@ static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
 static int blankinterval = 10*60*HZ;
 static int vesa_off_interval;
 
-static DECLARE_WORK(console_work, console_callback, NULL);
+static DECLARE_WORK(console_work, console_callback);
 
 /*
  * fg_console is the current virtual console,
@@ -2154,7 +2154,7 @@ out:
  * with other console code and prevention of re-entrancy is
  * ensured with console_sem.
  */
-static void console_callback(void *ignored)
+static void console_callback(struct work_struct *ignored)
 {
 	acquire_console_sem();
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index dd0c2623e27b..7a7c6e6dfe4f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
-static void handle_update(void *data);
+static void handle_update(struct work_struct *work);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -665,7 +665,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 	mutex_init(&policy->lock);
 	mutex_lock(&policy->lock);
 	init_completion(&policy->kobj_unregister);
-	INIT_WORK(&policy->update, handle_update, (void *)(long)cpu);
+	INIT_WORK(&policy->update, handle_update);
 
 	/* call driver. From then on the cpufreq must be able
 	 * to accept all calls to ->verify and ->setpolicy for this CPU
@@ -895,9 +895,11 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 }
 
 
-static void handle_update(void *data)
+static void handle_update(struct work_struct *work)
 {
-	unsigned int cpu = (unsigned int)(long)data;
+	struct cpufreq_policy *policy =
+		container_of(work, struct cpufreq_policy, update);
+	unsigned int cpu = policy->cpu;
 	dprintk("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index cbb93669d1ce..8451b29a3db5 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -567,9 +567,9 @@ static int atkbd_set_leds(struct atkbd *atkbd)
  * interrupt context.
  */
 
-static void atkbd_event_work(void *data)
+static void atkbd_event_work(struct work_struct *work)
 {
-	struct atkbd *atkbd = data;
+	struct atkbd *atkbd = container_of(work, struct atkbd, event_work);
 
 	mutex_lock(&atkbd->event_mutex);
 
@@ -943,7 +943,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
-	INIT_WORK(&atkbd->event_work, atkbd_event_work, atkbd);
+	INIT_WORK(&atkbd->event_work, atkbd_event_work);
 	mutex_init(&atkbd->event_mutex);
 
 	switch (serio->id.type) {
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index e5b1b60757bb..b3e84d3bb7f7 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -251,9 +251,9 @@ EXPORT_SYMBOL(ps2_command);
  * ps2_schedule_command(), to a PS/2 device (keyboard, mouse, etc.)
  */
 
-static void ps2_execute_scheduled_command(void *data)
+static void ps2_execute_scheduled_command(struct work_struct *work)
 {
-	struct ps2work *ps2work = data;
+	struct ps2work *ps2work = container_of(work, struct ps2work, work);
 
 	ps2_command(ps2work->ps2dev, ps2work->param, ps2work->command);
 	kfree(ps2work);
@@ -278,7 +278,7 @@ int ps2_schedule_command(struct ps2dev *ps2dev, unsigned char *param, int comman
 	ps2work->ps2dev = ps2dev;
 	ps2work->command = command;
 	memcpy(ps2work->param, param, send);
-	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command, ps2work);
+	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command);
 
 	if (!schedule_work(&ps2work->work)) {
 		kfree(ps2work);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 726ec5e88ab2..03294400bc90 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -183,7 +183,7 @@ void e1000_set_ethtool_ops(struct net_device *netdev);
 static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
 static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
 static void e1000_tx_timeout(struct net_device *dev);
-static void e1000_reset_task(struct net_device *dev);
+static void e1000_reset_task(struct work_struct *work);
 static void e1000_smartspeed(struct e1000_adapter *adapter);
 static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
                                        struct sk_buff *skb);
@@ -908,8 +908,7 @@ e1000_probe(struct pci_dev *pdev,
 	adapter->phy_info_timer.function = &e1000_update_phy_info;
 	adapter->phy_info_timer.data = (unsigned long) adapter;
 
-	INIT_WORK(&adapter->reset_task,
-		(void (*)(void *))e1000_reset_task, netdev);
+	INIT_WORK(&adapter->reset_task, e1000_reset_task);
 
 	e1000_check_options(adapter);
 
@@ -3154,9 +3153,10 @@ e1000_tx_timeout(struct net_device *netdev)
 }
 
 static void
-e1000_reset_task(struct net_device *netdev)
+e1000_reset_task(struct work_struct *work)
 {
-	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_adapter *adapter =
+		container_of(work, struct e1000_adapter, reset_task);
 
 	e1000_reinit_locked(adapter);
 }
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 04c43ef529ac..55866b6b26fa 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -160,7 +160,7 @@ static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev)
 	rpc->e_lock = SPIN_LOCK_UNLOCKED;
 
 	rpc->rpd = dev;
-	INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev);
+	INIT_WORK(&rpc->dpc_handler, aer_isr);
 	rpc->prod_idx = rpc->cons_idx = 0;
 	mutex_init(&rpc->rpc_mutex);
 	init_waitqueue_head(&rpc->wait_release);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index daf0cad88fc8..3c0a58f64dd8 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -118,7 +118,7 @@ extern struct bus_type pcie_port_bus_type;
 extern void aer_enable_rootport(struct aer_rpc *rpc);
 extern void aer_delete_rootport(struct aer_rpc *rpc);
 extern int aer_init(struct pcie_device *dev);
-extern void aer_isr(void *context);
+extern void aer_isr(struct work_struct *work);
 extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 extern int aer_osc_setup(struct pci_dev *dev);
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 1c7e660d6535..08e13033ced8 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -690,14 +690,14 @@ static void aer_isr_one_error(struct pcie_device *p_device,
 
 /**
  * aer_isr - consume errors detected by root port
- * @context: pointer to a private data of pcie device
+ * @work: definition of this work item
  *
  * Invoked, as DPC, when root port records new detected error
  **/
-void aer_isr(void *context)
+void aer_isr(struct work_struct *work)
 {
-	struct pcie_device *p_device = (struct pcie_device *) context;
-	struct aer_rpc *rpc = get_service_data(p_device);
+	struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
+	struct pcie_device *p_device = rpc->rpd;
 	struct aer_err_source *e_src;
 
 	mutex_lock(&rpc->rpc_mutex);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 94a274645f6f..d3c5e964c964 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -362,9 +362,10 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	goto retry;
 }
 
-static void scsi_target_reap_usercontext(void *data)
+static void scsi_target_reap_usercontext(struct work_struct *work)
 {
-	struct scsi_target *starget = data;
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, ew.work);
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	unsigned long flags;
 
@@ -400,7 +401,7 @@ void scsi_target_reap(struct scsi_target *starget)
 		starget->state = STARGET_DEL;
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		execute_in_process_context(scsi_target_reap_usercontext,
-					   starget, &starget->ew);
+					   &starget->ew);
 		return;
 
 	}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index e1a91665d1c2..259c90cfa367 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -218,16 +218,16 @@ static void scsi_device_cls_release(struct class_device *class_dev)
 	put_device(&sdev->sdev_gendev);
 }
 
-static void scsi_device_dev_release_usercontext(void *data)
+static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
-	struct device *dev = data;
 	struct scsi_device *sdev;
 	struct device *parent;
 	struct scsi_target *starget;
 	unsigned long flags;
 
-	parent = dev->parent;
-	sdev = to_scsi_device(dev);
+	sdev = container_of(work, struct scsi_device, ew.work);
+
+	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
 
 	spin_lock_irqsave(sdev->host->host_lock, flags);
@@ -258,7 +258,7 @@ static void scsi_device_dev_release_usercontext(void *data)
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext, dev,
+	execute_in_process_context(scsi_device_dev_release_usercontext,
 				   &sdp->ew);
 }
 
diff --git a/fs/aio.c b/fs/aio.c
index 11a1a7100ad6..ca1c5180a17f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -53,13 +53,13 @@ static kmem_cache_t	*kioctx_cachep;
 static struct workqueue_struct *aio_wq;
 
 /* Used for rare fput completion. */
-static void aio_fput_routine(void *);
-static DECLARE_WORK(fput_work, aio_fput_routine, NULL);
+static void aio_fput_routine(struct work_struct *);
+static DECLARE_WORK(fput_work, aio_fput_routine);
 
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -470,7 +470,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 		wake_up(&ctx->wait);
 }
 
-static void aio_fput_routine(void *data)
+static void aio_fput_routine(struct work_struct *data)
 {
 	spin_lock_irq(&fput_lock);
 	while (likely(!list_empty(&fput_head))) {
@@ -859,9 +859,9 @@ static inline void aio_run_all_iocbs(struct kioctx *ctx)
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+static void aio_kick_handler(struct work_struct *work)
 {
-	struct kioctx *ctx = data;
+	struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
 	mm_segment_t oldfs = get_fs();
 	int requeue;
 
diff --git a/fs/bio.c b/fs/bio.c
index f95c8749499f..c6c07ca5b5a9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -955,16 +955,16 @@ static void bio_release_pages(struct bio *bio)
  * run one bio_put() against the BIO.
  */
 
-static void bio_dirty_fn(void *data);
+static void bio_dirty_fn(struct work_struct *work);
 
-static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
+static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
 static DEFINE_SPINLOCK(bio_dirty_lock);
 static struct bio *bio_dirty_list;
 
 /*
  * This runs in process context
  */
-static void bio_dirty_fn(void *data)
+static void bio_dirty_fn(struct work_struct *work)
 {
 	unsigned long flags;
 	struct bio *bio;
diff --git a/fs/file.c b/fs/file.c
index 8e81775c5dc8..3787e82f54c1 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,8 +91,10 @@ out:
 	spin_unlock(&fddef->lock);
 }
 
-static void free_fdtable_work(struct fdtable_defer *f)
+static void free_fdtable_work(struct work_struct *work)
 {
+	struct fdtable_defer *f =
+		container_of(work, struct fdtable_defer, wq);
 	struct fdtable *fdt;
 
 	spin_lock_bh(&f->lock);
@@ -351,7 +353,7 @@ static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
-	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	INIT_WORK(&fddef->wq, free_fdtable_work);
 	init_timer(&fddef->timer);
 	fddef->timer.data = (unsigned long)fddef;
 	fddef->timer.function = fdtable_timer;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 6f0487d6f44a..23ab145daa2d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5ed798bc1cf7..371b804e7cc8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -18,11 +18,10 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_expire_automounts(void *list);
+static void nfs_expire_automounts(struct work_struct *work);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
-			    &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
@@ -165,9 +164,9 @@ struct inode_operations nfs_referral_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 };
 
-static void nfs_expire_automounts(void *data)
+static void nfs_expire_automounts(struct work_struct *work)
 {
-	struct list_head *list = (struct list_head *)data;
+	struct list_head *list = &nfs_automount_list;
 
 	mark_mounts_for_expiry(list);
 	if (!list_empty(list))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6f346677332d..c26cd978c7cc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -185,7 +185,7 @@ extern const u32 nfs4_fs_locations_bitmap[2];
 extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
-extern void nfs4_renew_state(void *);
+extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 7b6df1852e75..823298561c0a 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,9 +59,10 @@
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
 void
-nfs4_renew_state(void *data)
+nfs4_renew_state(struct work_struct *work)
 {
-	struct nfs_client *clp = (struct nfs_client *)data;
+	struct nfs_client *clp =
+		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5f04006e8dd2..b3f32eadbef5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -568,6 +568,7 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
+	void			*port_task_data;
 	struct delayed_work	port_task;
 	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
@@ -747,7 +748,7 @@ extern int ata_ratelimit(void);
 extern unsigned int ata_busy_sleep(struct ata_port *ap,
 				   unsigned long timeout_pat,
 				   unsigned long timeout);
-extern void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *),
+extern void ata_port_queue_task(struct ata_port *ap, work_func_t fn,
 				void *data, unsigned long delay);
 extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
 			     unsigned long interval_msec,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ecc017d24cf3..4a3ea83c6d16 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,18 +11,19 @@
 
 struct workqueue_struct;
 
-typedef void (*work_func_t)(void *data);
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
 
 struct work_struct {
-	/* the first word is the work queue pointer and the pending flag
-	 * rolled into one */
+	/* the first word is the work queue pointer and the flags rolled into
+	 * one */
 	unsigned long management;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
-	void *data;
 };
 
 struct delayed_work {
@@ -34,48 +35,77 @@ struct execute_work {
 	struct work_struct work;
 };
 
-#define __WORK_INITIALIZER(n, f, d) {				\
+#define __WORK_INITIALIZER(n, f) {				\
+	.management = 0,					\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
-	.data = (d),						\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f, d) {			\
-	.work = __WORK_INITIALIZER((n).work, (f), (d)),		\
+#define __WORK_INITIALIZER_NAR(n, f) {				\
+	.management = (1 << WORK_STRUCT_NOAUTOREL),		\
+        .entry	= { &(n).entry, &(n).entry },			\
+	.func = (f),						\
+	}
+
+#define __DELAYED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	}
+
+#define __DELAYED_WORK_INITIALIZER_NAR(n, f) {			\
+	.work = __WORK_INITIALIZER_NAR((n).work, (f)),		\
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
-#define DECLARE_WORK(n, f, d)					\
-	struct work_struct n = __WORK_INITIALIZER(n, f, d)
+#define DECLARE_WORK(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER(n, f)
+
+#define DECLARE_WORK_NAR(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER_NAR(n, f)
 
-#define DECLARE_DELAYED_WORK(n, f, d)				\
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, d)
+#define DECLARE_DELAYED_WORK(n, f)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+
+#define DECLARE_DELAYED_WORK_NAR(n, f)			\
+	struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f)
 
 /*
- * initialize a work item's function and data pointers
+ * initialize a work item's function pointer
  */
-#define PREPARE_WORK(_work, _func, _data)			\
+#define PREPARE_WORK(_work, _func)				\
 	do {							\
 		(_work)->func = (_func);			\
-		(_work)->data = (_data);			\
 	} while (0)
 
-#define PREPARE_DELAYED_WORK(_work, _func, _data)		\
-	PREPARE_WORK(&(_work)->work, (_func), (_data))
+#define PREPARE_DELAYED_WORK(_work, _func)			\
+	PREPARE_WORK(&(_work)->work, (_func))
 
 /*
  * initialize all of a work item in one go
  */
-#define INIT_WORK(_work, _func, _data)				\
+#define INIT_WORK(_work, _func)					\
 	do {							\
-		INIT_LIST_HEAD(&(_work)->entry);		\
 		(_work)->management = 0;			\
-		PREPARE_WORK((_work), (_func), (_data));	\
+		INIT_LIST_HEAD(&(_work)->entry);		\
+		PREPARE_WORK((_work), (_func));			\
+	} while (0)
+
+#define INIT_WORK_NAR(_work, _func)					\
+	do {								\
+		(_work)->management = (1 << WORK_STRUCT_NOAUTOREL);	\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+
+#define INIT_DELAYED_WORK(_work, _func)				\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func, _data)		\
+#define INIT_DELAYED_WORK_NAR(_work, _func)			\
 	do {							\
-		INIT_WORK(&(_work)->work, (_func), (_data));	\
+		INIT_WORK_NAR(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
@@ -94,6 +124,27 @@ struct execute_work {
 #define delayed_work_pending(work) \
 	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
 
+/**
+ * work_release - Release a work item under execution
+ * @work: The work item to release
+ *
+ * This is used to release a work item that has been initialised with automatic
+ * release mode disabled (WORK_STRUCT_NOAUTOREL is set).  This gives the work
+ * function the opportunity to grab auxiliary data from the container of the
+ * work_struct before clearing the pending bit as the work_struct may be
+ * subject to deallocation the moment the pending bit is cleared.
+ *
+ * In such a case, this should be called in the work function after it has
+ * fetched any data it may require from the containter of the work_struct.
+ * After this function has been called, the work_struct may be scheduled for
+ * further execution or it may be deallocated unless other precautions are
+ * taken.
+ *
+ * This should also be used to release a delayed work item.
+ */
+#define work_release(work) \
+	clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
@@ -112,7 +163,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
-extern int schedule_on_each_cpu(work_func_t func, void *info);
+extern int schedule_on_each_cpu(work_func_t func);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
@@ -121,7 +172,7 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct delayed_work *);
-int execute_in_process_context(work_func_t fn, void *, struct execute_work *);
+int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5f48748fe017..f7be1ac73601 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -84,7 +84,7 @@ struct inet_timewait_death_row {
 };
 
 extern void inet_twdr_hangman(unsigned long data);
-extern void inet_twdr_twkill_work(void *data);
+extern void inet_twdr_twkill_work(struct work_struct *work);
 extern void inet_twdr_twcal_tick(unsigned long data);
 
 #if (BITS_PER_LONG == 64)
diff --git a/ipc/util.c b/ipc/util.c
index cd8bb14a431f..a9b7a227b8d4 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -514,6 +514,11 @@ void ipc_rcu_getref(void *ptr)
 	container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
 }
 
+static void ipc_do_vfree(struct work_struct *work)
+{
+	vfree(container_of(work, struct ipc_rcu_sched, work));
+}
+
 /**
  * ipc_schedule_free - free ipc + rcu space
  * @head: RCU callback structure for queued work
@@ -528,7 +533,7 @@ static void ipc_schedule_free(struct rcu_head *head)
 	struct ipc_rcu_sched *sched =
 			container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
 
-	INIT_WORK(&sched->work, vfree, sched);
+	INIT_WORK(&sched->work, ipc_do_vfree);
 	schedule_work(&sched->work);
 }
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bb4e29d924e4..7dc7a9dad6ac 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -114,6 +114,7 @@ EXPORT_SYMBOL(request_module);
 #endif /* CONFIG_KMOD */
 
 struct subprocess_info {
+	struct work_struct work;
 	struct completion *complete;
 	char *path;
 	char **argv;
@@ -221,9 +222,10 @@ static int wait_for_helper(void *data)
 }
 
 /* This is run by khelper thread  */
-static void __call_usermodehelper(void *data)
+static void __call_usermodehelper(struct work_struct *work)
 {
-	struct subprocess_info *sub_info = data;
+	struct subprocess_info *sub_info =
+		container_of(work, struct subprocess_info, work);
 	pid_t pid;
 	int wait = sub_info->wait;
 
@@ -264,6 +266,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -272,7 +276,6 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 		.wait		= wait,
 		.retval		= 0,
 	};
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -280,7 +283,7 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 	if (path[0] == '\0')
 		return 0;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
@@ -291,6 +294,8 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -298,7 +303,6 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 		.retval		= 0,
 	};
 	struct file *f;
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -318,7 +322,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	}
 	sub_info.stdin = f;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4f9c60ef95e8..1db8c72d0d38 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -31,6 +31,8 @@ struct kthread_create_info
 	/* Result passed back to kthread_create() from keventd. */
 	struct task_struct *result;
 	struct completion done;
+
+	struct work_struct work;
 };
 
 struct kthread_stop_info
@@ -111,9 +113,10 @@ static int kthread(void *_create)
 }
 
 /* We are keventd: create a thread. */
-static void keventd_create_kthread(void *_create)
+static void keventd_create_kthread(struct work_struct *work)
 {
-	struct kthread_create_info *create = _create;
+	struct kthread_create_info *create =
+		container_of(work, struct kthread_create_info, work);
 	int pid;
 
 	/* We want our own signal handler (we take no signals by default). */
@@ -154,20 +157,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   ...)
 {
 	struct kthread_create_info create;
-	DECLARE_WORK(work, keventd_create_kthread, &create);
 
 	create.threadfn = threadfn;
 	create.data = data;
 	init_completion(&create.started);
 	init_completion(&create.done);
+	INIT_WORK(&create.work, keventd_create_kthread);
 
 	/*
 	 * The workqueue needs to start up first:
 	 */
 	if (!helper_wq)
-		work.func(work.data);
+		create.work.func(&create.work);
 	else {
-		queue_work(helper_wq, &work);
+		queue_work(helper_wq, &create.work);
 		wait_for_completion(&create.done);
 	}
 	if (!IS_ERR(create.result)) {
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index f1f900ac3164..678ec736076b 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -16,12 +16,12 @@
  * callback we use.
  */
 
-static void do_poweroff(void *dummy)
+static void do_poweroff(struct work_struct *dummy)
 {
 	kernel_power_off();
 }
 
-static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+static DECLARE_WORK(poweroff_work, do_poweroff);
 
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 98489d82801b..c87b461de38d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -880,7 +880,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
 	return 0;
 }
 
-static void deferred_cad(void *dummy)
+static void deferred_cad(struct work_struct *dummy)
 {
 	kernel_restart(NULL);
 }
@@ -892,7 +892,7 @@ static void deferred_cad(void *dummy)
  */
 void ctrl_alt_del(void)
 {
-	static DECLARE_WORK(cad_work, deferred_cad, NULL);
+	static DECLARE_WORK(cad_work, deferred_cad);
 
 	if (C_A_D)
 		schedule_work(&cad_work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 967479756511..8d1e7cb8a51a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -241,14 +241,14 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
-		void *data = work->data;
 
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		clear_bit(WORK_STRUCT_PENDING, &work->management);
-		f(data);
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+			work_release(work);
+		f(work);
 
 		spin_lock_irqsave(&cwq->lock, flags);
 		cwq->remove_sequence++;
@@ -527,7 +527,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
 /**
  * schedule_on_each_cpu - call a function on each online CPU from keventd
  * @func: the function to call
- * @info: a pointer to pass to func()
  *
  * Returns zero on success.
  * Returns -ve errno on failure.
@@ -536,7 +535,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  *
  * schedule_on_each_cpu() is very slow.
  */
-int schedule_on_each_cpu(work_func_t func, void *info)
+int schedule_on_each_cpu(work_func_t func)
 {
 	int cpu;
 	struct work_struct *works;
@@ -547,7 +546,7 @@ int schedule_on_each_cpu(work_func_t func, void *info)
 
 	mutex_lock(&workqueue_mutex);
 	for_each_online_cpu(cpu) {
-		INIT_WORK(per_cpu_ptr(works, cpu), func, info);
+		INIT_WORK(per_cpu_ptr(works, cpu), func);
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
 				per_cpu_ptr(works, cpu));
 	}
@@ -591,7 +590,6 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute
- * @data:	data to pass to the function
  * @ew:		guaranteed storage for the execute work structure (must
  *		be available when the work executes)
  *
@@ -601,15 +599,14 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(work_func_t fn, void *data,
-			       struct execute_work *ew)
+int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 {
 	if (!in_interrupt()) {
-		fn(data);
+		fn(&ew->work);
 		return 0;
 	}
 
-	INIT_WORK(&ew->work, fn, data);
+	INIT_WORK(&ew->work, fn);
 	schedule_work(&ew->work);
 
 	return 1;
diff --git a/mm/slab.c b/mm/slab.c
index a65bc5e992c3..5de81473df34 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
 static int enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
+static void cache_reap(struct work_struct *unused);
 
 /*
  * This function must be completely optimized away if a constant is passed to
@@ -925,7 +925,7 @@ static void __devinit start_cpu_timer(int cpu)
 	 */
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_DELAYED_WORK(reap_work, cache_reap, NULL);
+		INIT_DELAYED_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
 }
@@ -3815,7 +3815,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
  * If we cannot acquire the cache chain mutex then just give up - we'll try
  * again on the next iteration.
  */
-static void cache_reap(void *unused)
+static void cache_reap(struct work_struct *unused)
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index f2ed09e25dfd..549a2ce951b0 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -34,8 +34,8 @@ enum lw_bits {
 static unsigned long linkwatch_flags;
 static unsigned long linkwatch_nextevent;
 
-static void linkwatch_event(void *dummy);
-static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event, NULL);
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
 static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
@@ -127,7 +127,7 @@ void linkwatch_run_queue(void)
 }       
 
 
-static void linkwatch_event(void *dummy)
+static void linkwatch_event(struct work_struct *dummy)
 {
 	/* Limit the number of linkwatch events to one
 	 * per second so that a runaway driver does not
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cdd805344c61..8c74f9168b7d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -197,9 +197,10 @@ EXPORT_SYMBOL_GPL(inet_twdr_hangman);
 
 extern void twkill_slots_invalid(void);
 
-void inet_twdr_twkill_work(void *data)
+void inet_twdr_twkill_work(struct work_struct *work)
 {
-	struct inet_timewait_death_row *twdr = data;
+	struct inet_timewait_death_row *twdr =
+		container_of(work, struct inet_timewait_death_row, twkill_work);
 	int i;
 
 	if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0163d9826907..af7b2c986b1f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -45,8 +45,7 @@ struct inet_timewait_death_row tcp_death_row = {
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&tcp_death_row),
 	.twkill_work	= __WORK_INITIALIZER(tcp_death_row.twkill_work,
-					     inet_twdr_twkill_work,
-					     &tcp_death_row),
+					     inet_twdr_twkill_work),
 /* Short-time timewait calendar */
 
 	.twcal_hand	= -1,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d5725cb1491e..d96fd466a9a4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -284,8 +284,8 @@ static struct file_operations cache_file_operations;
 static struct file_operations content_file_operations;
 static struct file_operations cache_flush_operations;
 
-static void do_cache_clean(void *data);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean, NULL);
+static void do_cache_clean(struct work_struct *work);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
 
 void cache_register(struct cache_detail *cd)
 {
@@ -461,7 +461,7 @@ static int cache_clean(void)
 /*
  * We want to regularly clean the cache, so we need to schedule some work ...
  */
-static void do_cache_clean(void *data)
+static void do_cache_clean(struct work_struct *work)
 {
 	int delay = 5;
 	if (cache_clean() == -1)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 97be3f7fed44..49dba5febbbd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -54,10 +54,11 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
 }
 
 static void
-rpc_timeout_upcall_queue(void *data)
+rpc_timeout_upcall_queue(struct work_struct *work)
 {
 	LIST_HEAD(free_list);
-	struct rpc_inode *rpci = (struct rpc_inode *)data;
+	struct rpc_inode *rpci =
+		container_of(work, struct rpc_inode, queue_timeout.work);
 	struct inode *inode = &rpci->vfs_inode;
 	void (*destroy_msg)(struct rpc_pipe_msg *);
 
@@ -838,7 +839,7 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		rpci->pipelen = 0;
 		init_waitqueue_head(&rpci->waitq);
 		INIT_DELAYED_WORK(&rpci->queue_timeout,
-				    rpc_timeout_upcall_queue, rpci);
+				    rpc_timeout_upcall_queue);
 		rpci->ops = NULL;
 	}
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a1ab4eed41f4..eff44bcdc95a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,7 +41,7 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_async_schedule(void *);
+static void			rpc_async_schedule(struct work_struct *);
 
 /*
  * RPC tasks sit here while waiting for conditions to improve.
@@ -305,7 +305,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 	if (RPC_IS_ASYNC(task)) {
 		int status;
 
-		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule);
 		status = queue_work(task->tk_workqueue, &task->u.tk_work);
 		if (status < 0) {
 			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
@@ -695,9 +695,9 @@ rpc_execute(struct rpc_task *task)
 	return __rpc_execute(task);
 }
 
-static void rpc_async_schedule(void *arg)
+static void rpc_async_schedule(struct work_struct *work)
 {
-	__rpc_execute((struct rpc_task *)arg);
+	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
 }
 
 /**
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 80857470dc11..4f9a5d9791fb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -479,9 +479,10 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
 	return status;
 }
 
-static void xprt_autoclose(void *args)
+static void xprt_autoclose(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, task_cleanup);
 
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
@@ -932,7 +933,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
-	INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
+	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	init_timer(&xprt->timer);
 	xprt->timer.function = xprt_init_autodisconnect;
 	xprt->timer.data = (unsigned long) xprt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3c7532cd009e..cfe3c15be948 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1060,13 +1060,14 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 
 /**
  * xs_udp_connect_worker - set up a UDP socket
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_udp_connect_worker(void *args)
+static void xs_udp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *) args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1144,13 +1145,14 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 
 /**
  * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker(void *args)
+static void xs_tcp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1375,7 +1377,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1420,7 +1422,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
diff --git a/security/keys/key.c b/security/keys/key.c
index 80de8c3e9cc3..70eacbe5abde 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -30,8 +30,8 @@ DEFINE_SPINLOCK(key_user_lock);
 static LIST_HEAD(key_types_list);
 static DECLARE_RWSEM(key_types_sem);
 
-static void key_cleanup(void *data);
-static DECLARE_WORK(key_cleanup_task, key_cleanup, NULL);
+static void key_cleanup(struct work_struct *work);
+static DECLARE_WORK(key_cleanup_task, key_cleanup);
 
 /* we serialise key instantiation and link */
 DECLARE_RWSEM(key_construction_sem);
@@ -552,7 +552,7 @@ EXPORT_SYMBOL(key_negate_and_link);
  * do cleaning up in process context so that we don't have to disable
  * interrupts all over the place
  */
-static void key_cleanup(void *data)
+static void key_cleanup(struct work_struct *work)
 {
 	struct rb_node *_n;
 	struct key *key;
-- 
cgit v1.2.3


From a09c17a6fdad9ae5b5ea1c3383080f84ec76ab20 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@cs.washington.edu>
Date: Wed, 6 Dec 2006 20:40:18 -0800
Subject: [PATCH] sys: remove unused variable

Remove unused 'new_ruid' variable.

Reported by David Binderman <dcb314@hotmail.com>.

Signed-off-by: David Rientjes <rientjes@cs.washington.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/kernel/sys.c b/kernel/sys.c
index c87b461de38d..a0c1a29a507f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1102,14 +1102,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 asmlinkage long sys_setuid(uid_t uid)
 {
 	int old_euid = current->euid;
-	int old_ruid, old_suid, new_ruid, new_suid;
+	int old_ruid, old_suid, new_suid;
 	int retval;
 
 	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 	if (retval)
 		return retval;
 
-	old_ruid = new_ruid = current->uid;
+	old_ruid = current->uid;
 	old_suid = current->suid;
 	new_suid = old_suid;
 	
-- 
cgit v1.2.3


From 24ec839c431eb79bb8f6abc00c4e1eb3b8c4d517 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Dec 2006 02:36:04 -0800
Subject: [PATCH] tty: ->signal->tty locking

Fix the locking of signal->tty.

Use ->sighand->siglock to protect ->signal->tty; this lock is already used
by most other members of ->signal/->sighand.  And unless we are 'current'
or the tasklist_lock is held we need ->siglock to access ->signal anyway.

(NOTE: sys_unshare() is broken wrt ->sighand locking rules)

Note that tty_mutex is held over tty destruction, so while holding
tty_mutex any tty pointer remains valid.  Otherwise the lifetime of ttys
are governed by their open file handles.  This leaves some holes for tty
access from signal->tty (or any other non file related tty access).

It solves the tty SLAB scribbles we were seeing.

(NOTE: the change from group_send_sig_info to __group_send_sig_info needs to
       be examined by someone familiar with the security framework, I think
       it is safe given the SEND_SIG_PRIV from other __group_send_sig_info
       invocations)

[schwidefsky@de.ibm.com: 3270 fix]
[akpm@osdl.org: various post-viro fixes]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Alan Cox <alan@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/solaris/misc.c |   4 +-
 arch/um/kernel/exec.c       |   7 +-
 drivers/char/tty_io.c       | 237 ++++++++++++++++++++++++++------------------
 drivers/s390/char/fs3270.c  |  12 ++-
 fs/dquot.c                  |  14 +--
 fs/open.c                   |   1 +
 include/linux/tty.h         |  11 +-
 kernel/acct.c               |   9 +-
 kernel/auditsc.c            |   2 +
 kernel/exit.c               |   4 +-
 kernel/sys.c                |   6 +-
 security/selinux/hooks.c    |  11 +-
 12 files changed, 186 insertions(+), 132 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 9ed997982f8d..e84241d5f7f4 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -423,9 +423,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
 			   Solaris setpgrp and setsid? */
 			ret = sys_setpgid(0, 0);
 			if (ret) return ret;
-			mutex_lock(&tty_mutex);
-			current->signal->tty = NULL;
-			mutex_unlock(&tty_mutex);
+			proc_clear_tty(current);
 			return process_group(current);
 		}
 	case 2: /* getsid */
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 0561c43b4685..8d56ec6cca79 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -39,12 +39,13 @@ static long execve1(char *file, char __user * __user *argv,
 		    char __user *__user *env)
 {
         long error;
+	struct tty_struct *tty;
 
 #ifdef CONFIG_TTY_LOG
 	mutex_lock(&tty_mutex);
-	task_lock(current);	/* FIXME:  is this needed ? */
-	log_exec(argv, current->signal->tty);
-	task_unlock(current);
+	tty = get_current_tty();
+	if (tty)
+		log_exec(argv, tty);
 	mutex_unlock(&tty_mutex);
 #endif
         error = do_execve(file, argv, env, &current->thread.regs);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index b3cfc8bc613c..0c856a6f3677 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(tty_std_termios);
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 
-/* Semaphore to protect creating and releasing a tty. This is shared with
+/* Mutex to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
@@ -250,7 +250,7 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 				    "!= #fd's(%d) in %s\n",
 		       tty->name, tty->count, count, routine);
 		return count;
-       }	
+	}
 #endif
 	return 0;
 }
@@ -259,18 +259,6 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
  * Tty buffer allocation management
  */
 
-
-/**
- *	tty_buffer_free_all		-	free buffers used by a tty
- *	@tty: tty to free from
- *
- *	Remove all the buffers pending on a tty whether queued with data
- *	or in the free ring. Must be called when the tty is no longer in use
- *
- *	Locking: none
- */
-
-
 /**
  *	tty_buffer_free_all		-	free buffers used by a tty
  *	@tty: tty to free from
@@ -614,7 +602,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
  *	they are not on hot paths so a little discipline won't do 
  *	any harm.
  *
- *	Locking: takes termios_sem
+ *	Locking: takes termios_mutex
  */
  
 static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -915,7 +903,7 @@ static void tty_ldisc_enable(struct tty_struct *tty)
  *	context.
  *
  *	Locking: takes tty_ldisc_lock.
- *		called functions take termios_sem
+ *		 called functions take termios_mutex
  */
  
 static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -1267,12 +1255,12 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *
  *	Locking:
  *		BKL
- *		redirect lock for undoing redirection
- *		file list lock for manipulating list of ttys
- *		tty_ldisc_lock from called functions
- *		termios_sem resetting termios data
- *		tasklist_lock to walk task list for hangup event
- *
+ *		  redirect lock for undoing redirection
+ *		  file list lock for manipulating list of ttys
+ *		  tty_ldisc_lock from called functions
+ *		  termios_mutex resetting termios data
+ *		  tasklist_lock to walk task list for hangup event
+ *		    ->siglock to protect ->signal/->sighand
  */
 static void do_tty_hangup(struct work_struct *work)
 {
@@ -1354,14 +1342,18 @@ static void do_tty_hangup(struct work_struct *work)
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
 		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			spin_lock_irq(&p->sighand->siglock);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
-			if (!p->signal->leader)
+			if (!p->signal->leader) {
+				spin_unlock_irq(&p->sighand->siglock);
 				continue;
-			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
-			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+			}
+			__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+			__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
+			spin_unlock_irq(&p->sighand->siglock);
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
@@ -1453,6 +1445,14 @@ int tty_hung_up_p(struct file * filp)
 
 EXPORT_SYMBOL(tty_hung_up_p);
 
+static void session_clear_tty(pid_t session)
+{
+	struct task_struct *p;
+	do_each_task_pid(session, PIDTYPE_SID, p) {
+		proc_clear_tty(p);
+	} while_each_task_pid(session, PIDTYPE_SID, p);
+}
+
 /**
  *	disassociate_ctty	-	disconnect controlling tty
  *	@on_exit: true if exiting so need to "hang up" the session
@@ -1469,31 +1469,35 @@ EXPORT_SYMBOL(tty_hung_up_p);
  *	The argument on_exit is set to 1 if called when a process is
  *	exiting; it is 0 if called by the ioctl TIOCNOTTY.
  *
- *	Locking: tty_mutex is taken to protect current->signal->tty
+ *	Locking:
  *		BKL is taken for hysterical raisins
- *		Tasklist lock is taken (under tty_mutex) to walk process
- *		lists for the session.
+ *		  tty_mutex is taken to protect tty
+ *		  ->siglock is taken to protect ->signal/->sighand
+ *		  tasklist_lock is taken to walk process list for sessions
+ *		    ->siglock is taken to protect ->signal/->sighand
  */
 
 void disassociate_ctty(int on_exit)
 {
 	struct tty_struct *tty;
-	struct task_struct *p;
 	int tty_pgrp = -1;
+	int session;
 
 	lock_kernel();
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = get_current_tty();
 	if (tty) {
 		tty_pgrp = tty->pgrp;
 		mutex_unlock(&tty_mutex);
+		/* XXX: here we race, there is nothing protecting tty */
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else {
-		if (current->signal->tty_old_pgrp) {
-			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
-			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+		pid_t old_pgrp = current->signal->tty_old_pgrp;
+		if (old_pgrp) {
+			kill_pg(old_pgrp, SIGHUP, on_exit);
+			kill_pg(old_pgrp, SIGCONT, on_exit);
 		}
 		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
@@ -1505,19 +1509,29 @@ void disassociate_ctty(int on_exit)
 			kill_pg(tty_pgrp, SIGCONT, on_exit);
 	}
 
-	/* Must lock changes to tty_old_pgrp */
-	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	current->signal->tty_old_pgrp = 0;
-	tty->session = 0;
-	tty->pgrp = -1;
+	session = current->signal->session;
+	spin_unlock_irq(&current->sighand->siglock);
+
+	mutex_lock(&tty_mutex);
+	/* It is possible that do_tty_hangup has free'd this tty */
+	tty = get_current_tty();
+	if (tty) {
+		tty->session = 0;
+		tty->pgrp = 0;
+	} else {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+		       " = NULL", tty);
+#endif
+	}
+	mutex_unlock(&tty_mutex);
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
-		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	session_clear_tty(session);
 	read_unlock(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	unlock_kernel();
 }
 
@@ -2337,16 +2351,10 @@ static void release_dev(struct file * filp)
 	 * tty.
 	 */
 	if (tty_closing || o_tty_closing) {
-		struct task_struct *p;
-
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		session_clear_tty(tty->session);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			session_clear_tty(o_tty->session);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2443,9 +2451,9 @@ static void release_dev(struct file * filp)
  *	The termios state of a pty is reset on first open so that
  *	settings don't persist across reuse.
  *
- *	Locking: tty_mutex protects current->signal->tty, get_tty_driver and
- *		init_dev work. tty->count should protect the rest.
- *		task_lock is held to update task details for sessions
+ *	Locking: tty_mutex protects tty, get_tty_driver and init_dev work.
+ *		 tty->count should protect the rest.
+ *		 ->siglock protects ->signal/->sighand
  */
 
 static int tty_open(struct inode * inode, struct file * filp)
@@ -2467,12 +2475,13 @@ retry_open:
 	mutex_lock(&tty_mutex);
 
 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
-		if (!current->signal->tty) {
+		tty = get_current_tty();
+		if (!tty) {
 			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
-		driver = current->signal->tty->driver;
-		index = current->signal->tty->index;
+		driver = tty->driver;
+		index = tty->index;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2547,17 +2556,16 @@ got_driver:
 			filp->f_op = &tty_fops;
 		goto retry_open;
 	}
+
+	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
 	    !current->signal->tty &&
-	    tty->session == 0) {
-	    	task_lock(current);
-		current->signal->tty = tty;
-		task_unlock(current);
-		current->signal->tty_old_pgrp = 0;
-		tty->session = current->signal->session;
-		tty->pgrp = process_group(current);
-	}
+	    tty->session == 0)
+		__proc_set_tty(current, tty);
+	spin_unlock_irq(&current->sighand->siglock);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2747,7 +2755,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
  *
  *	Copies the kernel idea of the window size into the user buffer.
  *
- *	Locking: tty->termios_sem is taken to ensure the winsize data
+ *	Locking: tty->termios_mutex is taken to ensure the winsize data
  *		is consistent.
  */
 
@@ -2774,8 +2782,8 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
  *	Locking:
  *		Called function use the console_sem is used to ensure we do
  *	not try and resize the console twice at once.
- *		The tty->termios_sem is used to ensure we don't double
- *	resize and get confused. Lock order - tty->termios.sem before
+ *		The tty->termios_mutex is used to ensure we don't double
+ *	resize and get confused. Lock order - tty->termios_mutex before
  *	console sem
  */
 
@@ -2880,25 +2888,28 @@ static int fionbio(struct file *file, int __user *p)
  *	leader to set this tty as the controlling tty for the session.
  *
  *	Locking:
- *		Takes tasklist lock internally to walk sessions
- *		Takes task_lock() when updating signal->tty
  *		Takes tty_mutex() to protect tty instance
- *
+ *		Takes tasklist_lock internally to walk sessions
+ *		Takes ->siglock() when updating signal->tty
  */
 
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
-	struct task_struct *p;
-
+	int ret = 0;
 	if (current->signal->leader &&
 	    (current->signal->session == tty->session))
-		return 0;
+		return ret;
+
+	mutex_lock(&tty_mutex);
 	/*
 	 * The process must be a session leader and
 	 * not have a controlling tty already.
 	 */
-	if (!current->signal->leader || current->signal->tty)
-		return -EPERM;
+	if (!current->signal->leader || current->signal->tty) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
 	if (tty->session > 0) {
 		/*
 		 * This tty is already the controlling
@@ -2908,24 +2919,18 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 			/*
 			 * Steal it away
 			 */
-
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			session_clear_tty(tty->session);
 			read_unlock(&tasklist_lock);
-		} else
-			return -EPERM;
+		} else {
+			ret = -EPERM;
+			goto unlock;
+		}
 	}
-	mutex_lock(&tty_mutex);
-	task_lock(current);
-	current->signal->tty = tty;
-	task_unlock(current);
+	proc_set_tty(current, tty);
+unlock:
 	mutex_unlock(&tty_mutex);
-	current->signal->tty_old_pgrp = 0;
-	tty->session = current->signal->session;
-	tty->pgrp = process_group(current);
-	return 0;
+	return ret;
 }
 
 /**
@@ -2937,7 +2942,7 @@ static int tiocsctty(struct tty_struct *tty, int arg)
  *	Obtain the process group of the tty. If there is no process group
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2995,7 +3000,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
  *	Obtain the session id of the tty. If there is no session
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3214,14 +3219,11 @@ int tty_ioctl(struct inode * inode, struct file * file,
 			clear_bit(TTY_EXCLUSIVE, &tty->flags);
 			return 0;
 		case TIOCNOTTY:
-			/* FIXME: taks lock or tty_mutex ? */
 			if (current->signal->tty != tty)
 				return -ENOTTY;
 			if (current->signal->leader)
 				disassociate_ctty(0);
-			task_lock(current);
-			current->signal->tty = NULL;
-			task_unlock(current);
+			proc_clear_tty(current);
 			return 0;
 		case TIOCSCTTY:
 			return tiocsctty(tty, arg);
@@ -3321,7 +3323,7 @@ static void __do_SAK(struct work_struct *work)
 	
 	if (!tty)
 		return;
-	session  = tty->session;
+	session = tty->session;
 	
 	/* We don't want an ldisc switch during this */
 	disc = tty_ldisc_ref(tty);
@@ -3834,9 +3836,52 @@ int tty_unregister_driver(struct tty_driver *driver)
 	cdev_del(&driver->cdev);
 	return 0;
 }
-
 EXPORT_SYMBOL(tty_unregister_driver);
 
+dev_t tty_devnum(struct tty_struct *tty)
+{
+	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
+}
+EXPORT_SYMBOL(tty_devnum);
+
+void proc_clear_tty(struct task_struct *p)
+{
+	spin_lock_irq(&p->sighand->siglock);
+	p->signal->tty = NULL;
+	spin_unlock_irq(&p->sighand->siglock);
+}
+EXPORT_SYMBOL(proc_clear_tty);
+
+void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+{
+	if (tty) {
+		tty->session = tsk->signal->session;
+		tty->pgrp = process_group(tsk);
+	}
+	tsk->signal->tty = tty;
+	tsk->signal->tty_old_pgrp = 0;
+}
+
+void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+{
+	spin_lock_irq(&tsk->sighand->siglock);
+	__proc_set_tty(tsk, tty);
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
+
+struct tty_struct *get_current_tty(void)
+{
+	struct tty_struct *tty;
+	WARN_ON_ONCE(!mutex_is_locked(&tty_mutex));
+	tty = current->signal->tty;
+	/*
+	 * session->tty can be changed/cleared from under us, make sure we
+	 * issue the load. The obtained pointer, when not NULL, is valid as
+	 * long as we hold tty_mutex.
+	 */
+	barrier();
+	return tty;
+}
 
 /*
  * Initialize the console device. This is called *early*, so
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 78f8bda81dae..ef205ddd31a4 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -424,11 +424,15 @@ fs3270_open(struct inode *inode, struct file *filp)
 	minor = iminor(filp->f_dentry->d_inode);
 	/* Check for minor 0 multiplexer. */
 	if (minor == 0) {
-		if (!current->signal->tty)
+		struct tty_struct *tty;
+		mutex_lock(&tty_mutex);
+		tty = get_current_tty();
+		if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) {
+			mutex_unlock(&tty_mutex);
 			return -ENODEV;
-		if (current->signal->tty->driver->major != IBM_TTY3270_MAJOR)
-			return -ENODEV;
-		minor = current->signal->tty->index + RAW3270_FIRSTMINOR;
+		}
+		minor = tty->index + RAW3270_FIRSTMINOR;
+		mutex_unlock(&tty_mutex);
 	}
 	/* Check if some other program is already using fullscreen mode. */
 	fp = (struct fs3270 *) raw3270_find_view(&fs3270_fn, minor);
diff --git a/fs/dquot.c b/fs/dquot.c
index f9cd5e23ebdf..89066b19124d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -828,6 +828,7 @@ static inline int need_print_warning(struct dquot *dquot)
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
+	struct tty_struct *tty;
 	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
 	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
@@ -835,14 +836,15 @@ static void print_warning(struct dquot *dquot, const char warntype)
 		return;
 
 	mutex_lock(&tty_mutex);
-	if (!current->signal->tty)
+	tty = get_current_tty();
+	if (!tty)
 		goto out_lock;
-	tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
+	tty_write_message(tty, dquot->dq_sb->s_id);
 	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
-		tty_write_message(current->signal->tty, ": warning, ");
+		tty_write_message(tty, ": warning, ");
 	else
-		tty_write_message(current->signal->tty, ": write failed, ");
-	tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
+		tty_write_message(tty, ": write failed, ");
+	tty_write_message(tty, quotatypes[dquot->dq_type]);
 	switch (warntype) {
 		case IHARDWARN:
 			msg = " file limit reached.\r\n";
@@ -863,7 +865,7 @@ static void print_warning(struct dquot *dquot, const char warntype)
 			msg = " block quota exceeded.\r\n";
 			break;
 	}
-	tty_write_message(current->signal->tty, msg);
+	tty_write_message(tty, msg);
 out_lock:
 	mutex_unlock(&tty_mutex);
 }
diff --git a/fs/open.c b/fs/open.c
index 89e0c237a636..3b56192816ca 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1087,6 +1087,7 @@ EXPORT_SYMBOL(sys_close);
 asmlinkage long sys_vhangup(void)
 {
 	if (capable(CAP_SYS_TTY_CONFIG)) {
+		/* XXX: this needs locking */
 		tty_vhangup(current->signal->tty);
 		return 0;
 	}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f717f0898238..1d29999a3439 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -309,6 +309,12 @@ extern void tty_ldisc_flush(struct tty_struct *tty);
 extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		     unsigned long arg);
 
+extern dev_t tty_devnum(struct tty_struct *tty);
+extern void proc_clear_tty(struct task_struct *p);
+extern void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+extern void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
+extern struct tty_struct *get_current_tty(void);
+
 extern struct mutex tty_mutex;
 
 /* n_tty.c */
@@ -335,10 +341,5 @@ extern void console_print(const char *);
 extern int vt_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg);
 
-static inline dev_t tty_devnum(struct tty_struct *tty)
-{
-	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
-}
-
 #endif /* __KERNEL__ */
 #endif
diff --git a/kernel/acct.c b/kernel/acct.c
index dc12db8600e7..ca5619039367 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -428,6 +428,7 @@ static void do_acct_process(struct file *file)
 	u64 elapsed;
 	u64 run_time;
 	struct timespec uptime;
+	struct tty_struct *tty;
 
 	/*
 	 * First check to see if there is enough free_space to continue
@@ -485,12 +486,8 @@ static void do_acct_process(struct file *file)
 #endif
 
 	mutex_lock(&tty_mutex);
-	/* FIXME: Whoever is responsible for current->signal locking needs
-	   to use the same locking all over the kernel and document it */
-	read_lock(&tasklist_lock);
-	ac.ac_tty = current->signal->tty ?
-		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
-	read_unlock(&tasklist_lock);
+	tty = get_current_tty();
+	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
 	mutex_unlock(&tty_mutex);
 
 	spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 40722e26de98..b6cb802fbcd1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -826,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				 context->return_code);
 
 	mutex_lock(&tty_mutex);
+	read_lock(&tasklist_lock);
 	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
 		tty = tsk->signal->tty->name;
 	else
 		tty = "(none)";
+	read_unlock(&tasklist_lock);
 	audit_log_format(ab,
 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
 		  " ppid=%d pid=%d auid=%u uid=%u gid=%u"
diff --git a/kernel/exit.c b/kernel/exit.c
index 4e3f919edc48..fa235779b6a3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -384,9 +384,7 @@ void daemonize(const char *name, ...)
 	exit_mm(current);
 
 	set_special_pids(1, 1);
-	mutex_lock(&tty_mutex);
-	current->signal->tty = NULL;
-	mutex_unlock(&tty_mutex);
+	proc_clear_tty(current);
 
 	/* Block and flush all signals */
 	sigfillset(&blocked);
diff --git a/kernel/sys.c b/kernel/sys.c
index a0c1a29a507f..1ac2d1c5d84e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1484,7 +1484,6 @@ asmlinkage long sys_setsid(void)
 	pid_t session;
 	int err = -EPERM;
 
-	mutex_lock(&tty_mutex);
 	write_lock_irq(&tasklist_lock);
 
 	/* Fail if I am already a session leader */
@@ -1504,12 +1503,15 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(session, session);
+
+	spin_lock(&group_leader->sighand->siglock);
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
+	spin_unlock(&group_leader->sighand->siglock);
+
 	err = process_group(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	return err;
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 44e9cd470543..f5df8c70a9b5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1695,9 +1695,10 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 	struct tty_struct *tty;
 	struct fdtable *fdt;
 	long j = -1;
+	int drop_tty = 0;
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = get_current_tty();
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1710,12 +1711,14 @@ static inline void flush_unauthorized_files(struct files_struct * files)
 			struct inode *inode = file->f_dentry->d_inode;
 			if (inode_has_perm(current, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
-				/* Reset controlling tty. */
-				current->signal->tty = NULL;
-				current->signal->tty_old_pgrp = 0;
+				drop_tty = 1;
 			}
 		}
 		file_list_unlock();
+
+		/* Reset controlling tty. */
+		if (drop_tty)
+			proc_set_tty(current, NULL);
 	}
 	mutex_unlock(&tty_mutex);
 
-- 
cgit v1.2.3


From 937949d9edbf4049bd41af6c9f92c26280584564 Mon Sep 17 00:00:00 2001
From: Cedric Le Goater <clg@fr.ibm.com>
Date: Fri, 8 Dec 2006 02:37:54 -0800
Subject: [PATCH] add process_session() helper routine

Replace occurences of task->signal->session by a new process_session() helper
routine.

It will be useful for pid namespaces to abstract the session pid number.

Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/kernel/irixelf.c |  2 +-
 drivers/char/mxser.c       |  2 +-
 drivers/char/rocket.c      |  2 +-
 drivers/char/tty_io.c      | 12 ++++++------
 fs/binfmt_elf.c            |  4 ++--
 fs/binfmt_elf_fdpic.c      |  4 ++--
 include/linux/sched.h      |  5 +++++
 kernel/exit.c              | 19 ++++++++++---------
 kernel/fork.c              |  4 ++--
 kernel/signal.c            |  2 +-
 kernel/sys.c               |  8 ++++----
 11 files changed, 35 insertions(+), 29 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index 1bbefbf43373..37cad5de515c 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1145,7 +1145,7 @@ static int irix_core_dump(long signr, struct pt_regs * regs, struct file *file)
 	psinfo.pr_pid = prstatus.pr_pid = current->pid;
 	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
 	psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current);
-	psinfo.pr_sid = prstatus.pr_sid = current->signal->session;
+	psinfo.pr_sid = prstatus.pr_sid = process_session(current);
 	if (current->pid == current->tgid) {
 		/*
 		 * This is the record for the group leader.  Add in the
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 5ed2486b7581..b1cc89c89820 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -994,7 +994,7 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
 		mxser_change_speed(info, NULL);
 	}
 
-	info->session = current->signal->session;
+	info->session = process_session(current);
 	info->pgrp = process_group(current);
 
 	/*
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index bac80056f7e0..4fdf52e9f3b1 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -1017,7 +1017,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	info->session = current->signal->session;
+	info->session = process_session(current);
 	info->pgrp = process_group(current);
 
 	if ((info->flags & ROCKET_INITIALIZED) == 0) {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 8a2d26e98ac4..4ebba7ca1dc9 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1511,7 +1511,7 @@ void disassociate_ctty(int on_exit)
 
 	spin_lock_irq(&current->sighand->siglock);
 	current->signal->tty_old_pgrp = 0;
-	session = current->signal->session;
+	session = process_session(current);
 	spin_unlock_irq(&current->sighand->siglock);
 
 	mutex_lock(&tty_mutex);
@@ -2897,7 +2897,7 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 {
 	int ret = 0;
 	if (current->signal->leader &&
-	    (current->signal->session == tty->session))
+			(process_session(current) == tty->session))
 		return ret;
 
 	mutex_lock(&tty_mutex);
@@ -2979,13 +2979,13 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 		return retval;
 	if (!current->signal->tty ||
 	    (current->signal->tty != real_tty) ||
-	    (real_tty->session != current->signal->session))
+	    (real_tty->session != process_session(current)))
 		return -ENOTTY;
 	if (get_user(pgrp, p))
 		return -EFAULT;
 	if (pgrp < 0)
 		return -EINVAL;
-	if (session_of_pgrp(pgrp) != current->signal->session)
+	if (session_of_pgrp(pgrp) != process_session(current))
 		return -EPERM;
 	real_tty->pgrp = pgrp;
 	return 0;
@@ -3338,7 +3338,7 @@ static void __do_SAK(struct work_struct *work)
 	/* Kill the entire session */
 	do_each_task_pid(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
-			" (%s): p->signal->session==tty->session\n",
+			" (%s): process_session(p)==tty->session\n",
 			p->pid, p->comm);
 		send_sig(SIGKILL, p, 1);
 	} while_each_task_pid(session, PIDTYPE_SID, p);
@@ -3348,7 +3348,7 @@ static void __do_SAK(struct work_struct *work)
 	do_each_thread(g, p) {
 		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
-			    " (%s): p->signal->session==tty->session\n",
+			    " (%s): process_session(p)==tty->session\n",
 			    p->pid, p->comm);
 			send_sig(SIGKILL, p, 1);
 			continue;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c6dbb4a7ec78..d3adfd353ff9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1317,7 +1317,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1363,7 +1363,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9f0b7efc3df5..76f06f6bc2f6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1322,7 +1322,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pid = p->pid;
 	prstatus->pr_ppid = p->parent->pid;
 	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = p->signal->session;
+	prstatus->pr_sid = process_session(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1371,7 +1371,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pid = p->pid;
 	psinfo->pr_ppid = p->parent->pid;
 	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = p->signal->session;
+	psinfo->pr_sid = process_session(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e8a0ba61749..270d864a8ff1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,6 +1047,11 @@ static inline pid_t process_group(struct task_struct *tsk)
 	return tsk->signal->pgrp;
 }
 
+static inline pid_t process_session(struct task_struct *tsk)
+{
+	return tsk->signal->session;
+}
+
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/kernel/exit.c b/kernel/exit.c
index fa0495e167e9..8d289bfc13d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -193,14 +193,14 @@ int session_of_pgrp(int pgrp)
 
 	read_lock(&tasklist_lock);
 	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-		if (p->signal->session > 0) {
-			sid = p->signal->session;
+		if (process_session(p) > 0) {
+			sid = process_session(p);
 			goto out;
 		}
 	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	p = find_task_by_pid(pgrp);
 	if (p)
-		sid = p->signal->session;
+		sid = process_session(p);
 out:
 	read_unlock(&tasklist_lock);
 	
@@ -225,8 +225,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
 				|| p->exit_state
 				|| is_init(p->real_parent))
 			continue;
-		if (process_group(p->real_parent) != pgrp
-			    && p->real_parent->signal->session == p->signal->session) {
+		if (process_group(p->real_parent) != pgrp &&
+		    process_session(p->real_parent) == process_session(p)) {
 			ret = 0;
 			break;
 		}
@@ -302,7 +302,7 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (curr->signal->session != session) {
+	if (process_session(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
 		curr->signal->session = session;
 		attach_pid(curr, PIDTYPE_SID, session);
@@ -647,10 +647,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 	 * outside, so the child pgrp is now orphaned.
 	 */
 	if ((process_group(p) != process_group(father)) &&
-	    (p->signal->session == father->signal->session)) {
+	    (process_session(p) == process_session(father))) {
 		int pgrp = process_group(p);
 
-		if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
+		if (will_become_orphaned_pgrp(pgrp, NULL) &&
+		    has_stopped_jobs(pgrp)) {
 			__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 			__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 		}
@@ -784,7 +785,7 @@ static void exit_notify(struct task_struct *tsk)
 	t = tsk->real_parent;
 	
 	if ((process_group(t) != process_group(tsk)) &&
-	    (t->signal->session == tsk->signal->session) &&
+	    (process_session(t) == process_session(tsk)) &&
 	    will_become_orphaned_pgrp(process_group(tsk), tsk) &&
 	    has_stopped_jobs(process_group(tsk))) {
 		__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
diff --git a/kernel/fork.c b/kernel/fork.c
index 597707819327..298c4d6ab512 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1259,9 +1259,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		if (thread_group_leader(p)) {
 			p->signal->tty = current->signal->tty;
 			p->signal->pgrp = process_group(current);
-			p->signal->session = current->signal->session;
+			p->signal->session = process_session(current);
 			attach_pid(p, PIDTYPE_PGID, process_group(p));
-			attach_pid(p, PIDTYPE_SID, p->signal->session);
+			attach_pid(p, PIDTYPE_SID, process_session(p));
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
diff --git a/kernel/signal.c b/kernel/signal.c
index ec81defde339..9eac4db60eda 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -583,7 +583,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	error = -EPERM;
 	if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
 	    && ((sig != SIGCONT) ||
-		(current->signal->session != t->signal->session))
+		(process_session(current) != process_session(t)))
 	    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 	    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
 	    && !capable(CAP_KILL))
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ac2d1c5d84e..4f9d23a3095f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1381,7 +1381,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 
 	if (p->real_parent == group_leader) {
 		err = -EPERM;
-		if (p->signal->session != group_leader->signal->session)
+		if (process_session(p) != process_session(group_leader))
 			goto out;
 		err = -EACCES;
 		if (p->did_exec)
@@ -1400,7 +1400,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		struct task_struct *p;
 
 		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (p->signal->session == group_leader->signal->session)
+			if (process_session(p) == process_session(group_leader))
 				goto ok_pgid;
 		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
 		goto out;
@@ -1459,7 +1459,7 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid)
-		return current->signal->session;
+		return process_session(current);
 	else {
 		int retval;
 		struct task_struct *p;
@@ -1471,7 +1471,7 @@ asmlinkage long sys_getsid(pid_t pid)
 		if (p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = p->signal->session;
+				retval = process_session(p);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
-- 
cgit v1.2.3


From f020bc468fe4a91d32046d448511978c7b611315 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 8 Dec 2006 02:38:02 -0800
Subject: [PATCH] sys_setpgid: eliminate unnecessary
 do_each_task_pid(PIDTYPE_PGID)

All tasks in the process group have the same sid, we don't need to iterate
them all to check that the caller of sys_setpgid() doesn't change its
session.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'kernel/sys.c')

diff --git a/kernel/sys.c b/kernel/sys.c
index 4f9d23a3095f..c7675c1bfdf2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1397,16 +1397,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		goto out;
 
 	if (pgid != pid) {
-		struct task_struct *p;
+		struct task_struct *g =
+			find_task_by_pid_type(PIDTYPE_PGID, pgid);
 
-		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-			if (process_session(p) == process_session(group_leader))
-				goto ok_pgid;
-		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
-		goto out;
+		if (!g || process_session(g) != process_session(group_leader))
+			goto out;
 	}
 
-ok_pgid:
 	err = security_task_setpgid(p, pgid);
 	if (err)
 		goto out;
-- 
cgit v1.2.3