Import changeset

author: Linus Torvalds <torvalds@athlon.transmeta.com> 2002-02-04 17:40:40 -0800
committer: Linus Torvalds <torvalds@athlon.transmeta.com> 2002-02-04 17:40:40 -0800
commit: 7a2deb32924142696b8174cdf9b38cd72a11fc96 (patch)
tree: 8ecc18f81fdb849254f39dc2e9fd77253319e1ec /kernel
27 files changed, 13118 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
new file mode 100644
index 000000000000..9adeb6b2c392
--- /dev/null
+++ b/kernel/Makefile
@@ -0,0 +1,32 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+O_TARGET := kernel.o
+
+export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o
+
+obj-y     = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
+	    module.o exit.o itimer.o info.o time.o softirq.o resource.o \
+	    sysctl.o acct.o capability.o ptrace.o timer.o user.o \
+	    signal.o sys.o kmod.o context.o
+
+obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_MODULES) += ksyms.o
+obj-$(CONFIG_PM) += pm.o
+
+ifneq ($(CONFIG_IA64),y)
+# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+# needed for x86 only.  Why this used to be enabled for all architectures is beyond
+# me.  I suspect most platforms don't need this, but until we know that for sure
+# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
+CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
+endif
+
+include $(TOPDIR)/Rules.make
diff --git a/kernel/acct.c b/kernel/acct.c
new file mode 100644
index 000000000000..e2e8826fa6fd
--- /dev/null
+++ b/kernel/acct.c
@@ -0,0 +1,373 @@
+/*
+ *  linux/kernel/acct.c
+ *
+ *  BSD Process Accounting for Linux
+ *
+ *  Author: Marco van Wieringen <mvw@planets.elm.net>
+ *
+ *  Some code based on ideas and code from:
+ *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
+ *
+ *  This file implements BSD-style process accounting. Whenever any
+ *  process exits, an accounting record of type "struct acct" is
+ *  written to the file specified with the acct() system call. It is
+ *  up to user-level programs to do useful things with the accounting
+ *  log. The kernel just provides the raw accounting information.
+ *
+ * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
+ *
+ *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
+ *  the file happened to be read-only. 2) If the accounting was suspended
+ *  due to the lack of space it happily allowed to reopen it and completely
+ *  lost the old acct_file. 3/10/98, Al Viro.
+ *
+ *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
+ *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
+ *
+ *  Fixed a nasty interaction with with sys_umount(). If the accointing
+ *  was suspeneded we failed to stop it on umount(). Messy.
+ *  Another one: remount to readonly didn't stop accounting.
+ *	Question: what should we do if we have CAP_SYS_ADMIN but not
+ *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
+ *  unless we are messing with the root. In that case we are getting a
+ *  real mess with do_remount_sb(). 9/11/98, AV.
+ *
+ *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
+ *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
+ *  one race (and leak) in BSD implementation.
+ *  OK, that's better. ANOTHER race and leak in BSD variant. There always
+ *  is one more bug... 10/11/98, AV.
+ *
+ *	Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
+ * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
+ * a struct file opened for write. Fixed. 2/6/2000, AV.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/acct.h>
+#include <linux/smp_lock.h>
+#include <linux/file.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * These constants control the amount of freespace that suspend and
+ * resume the process accounting system, and the time delay between
+ * each check.
+ * Turned into sysctl-controllable parameters. AV, 12/11/98
+ */
+
+int acct_parm[3] = {4, 2, 30};
+#define RESUME		(acct_parm[0])	/* >foo% free space - resume */
+#define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
+#define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
+
+/*
+ * External references and all of the globals.
+ */
+
+static volatile int acct_active;
+static volatile int acct_needcheck;
+static struct file *acct_file;
+static struct timer_list acct_timer;
+static void do_acct_process(long, struct file *);
+
+/*
+ * Called whenever the timer says to check the free space.
+ */
+static void acct_timeout(unsigned long unused)
+{
+	acct_needcheck = 1;
+}
+
+/*
+ * Check the amount of free space and suspend/resume accordingly.
+ */
+static int check_free_space(struct file *file)
+{
+	struct statfs sbuf;
+	int res;
+	int act;
+
+	lock_kernel();
+	res = acct_active;
+	if (!file || !acct_needcheck)
+		goto out;
+	unlock_kernel();
+
+	/* May block */
+	if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
+		return res;
+
+	if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100)
+		act = -1;
+	else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100)
+		act = 1;
+	else
+		act = 0;
+
+	/*
+	 * If some joker switched acct_file under us we'ld better be
+	 * silent and _not_ touch anything.
+	 */
+	lock_kernel();
+	if (file != acct_file) {
+		if (act)
+			res = act>0;
+		goto out;
+	}
+
+	if (acct_active) {
+		if (act < 0) {
+			acct_active = 0;
+			printk(KERN_INFO "Process accounting paused\n");
+		}
+	} else {
+		if (act > 0) {
+			acct_active = 1;
+			printk(KERN_INFO "Process accounting resumed\n");
+		}
+	}
+
+	del_timer(&acct_timer);
+	acct_needcheck = 0;
+	acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct_timer);
+	res = acct_active;
+out:
+	unlock_kernel();
+	return res;
+}
+
+/*
+ *  sys_acct() is the only system call needed to implement process
+ *  accounting. It takes the name of the file where accounting records
+ *  should be written. If the filename is NULL, accounting will be
+ *  shutdown.
+ */
+asmlinkage long sys_acct(const char *name)
+{
+	struct file *file = NULL, *old_acct = NULL;
+	char *tmp;
+	int error;
+
+	if (!capable(CAP_SYS_PACCT))
+		return -EPERM;
+
+	if (name) {
+		tmp = getname(name);
+		error = PTR_ERR(tmp);
+		if (IS_ERR(tmp))
+			goto out;
+		/* Difference from BSD - they don't do O_APPEND */
+		file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
+		putname(tmp);
+		if (IS_ERR(file)) {
+			error = PTR_ERR(file);
+			goto out;
+		}
+		error = -EACCES;
+		if (!S_ISREG(file->f_dentry->d_inode->i_mode)) 
+			goto out_err;
+
+		error = -EIO;
+		if (!file->f_op->write) 
+			goto out_err;
+	}
+
+	error = 0;
+	lock_kernel();
+	if (acct_file) {
+		old_acct = acct_file;
+		del_timer(&acct_timer);
+		acct_active = 0;
+		acct_needcheck = 0;
+		acct_file = NULL;
+	}
+	if (name) {
+		acct_file = file;
+		acct_needcheck = 0;
+		acct_active = 1;
+		/* It's been deleted if it was used before so this is safe */
+		init_timer(&acct_timer);
+		acct_timer.function = acct_timeout;
+		acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct_timer);
+	}
+	unlock_kernel();
+	if (old_acct) {
+		do_acct_process(0,old_acct);
+		filp_close(old_acct, NULL);
+	}
+out:
+	return error;
+out_err:
+	filp_close(file, NULL);
+	goto out;
+}
+
+void acct_auto_close(kdev_t dev)
+{
+	lock_kernel();
+	if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev)
+		sys_acct(NULL);
+	unlock_kernel();
+}
+
+/*
+ *  encode an unsigned long into a comp_t
+ *
+ *  This routine has been adopted from the encode_comp_t() function in
+ *  the kern_acct.c file of the FreeBSD operating system. The encoding
+ *  is a 13-bit fraction with a 3-bit (base 8) exponent.
+ */
+
+#define	MANTSIZE	13			/* 13 bit mantissa. */
+#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
+#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
+
+static comp_t encode_comp_t(unsigned long value)
+{
+	int exp, rnd;
+
+	exp = rnd = 0;
+	while (value > MAXFRACT) {
+		rnd = value & (1 << (EXPSIZE - 1));	/* Round up? */
+		value >>= EXPSIZE;	/* Base 8 exponent == 3 bit shift. */
+		exp++;
+	}
+
+	/*
+         * If we need to round up, do it (and handle overflow correctly).
+         */
+	if (rnd && (++value > MAXFRACT)) {
+		value >>= EXPSIZE;
+		exp++;
+	}
+
+	/*
+         * Clean it up and polish it off.
+         */
+	exp <<= MANTSIZE;		/* Shift the exponent into place */
+	exp += value;			/* and add on the mantissa. */
+	return exp;
+}
+
+/*
+ *  Write an accounting entry for an exiting process
+ *
+ *  The acct_process() call is the workhorse of the process
+ *  accounting system. The struct acct is built here and then written
+ *  into the accounting file. This function should only be called from
+ *  do_exit().
+ */
+
+/*
+ *  do_acct_process does all actual work. Caller holds the reference to file.
+ */
+static void do_acct_process(long exitcode, struct file *file)
+{
+	struct acct ac;
+	mm_segment_t fs;
+	unsigned long vsize;
+
+	/*
+	 * First check to see if there is enough free_space to continue
+	 * the process accounting system.
+	 */
+	if (!check_free_space(file))
+		return;
+
+	/*
+	 * Fill the accounting struct with the needed info as recorded
+	 * by the different kernel functions.
+	 */
+	memset((caddr_t)&ac, 0, sizeof(struct acct));
+
+	strncpy(ac.ac_comm, current->comm, ACCT_COMM);
+	ac.ac_comm[ACCT_COMM - 1] = '\0';
+
+	ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
+	ac.ac_etime = encode_comp_t(jiffies - current->start_time);
+	ac.ac_utime = encode_comp_t(current->times.tms_utime);
+	ac.ac_stime = encode_comp_t(current->times.tms_stime);
+	ac.ac_uid = current->uid;
+	ac.ac_gid = current->gid;
+	ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0;
+
+	ac.ac_flag = 0;
+	if (current->flags & PF_FORKNOEXEC)
+		ac.ac_flag |= AFORK;
+	if (current->flags & PF_SUPERPRIV)
+		ac.ac_flag |= ASU;
+	if (current->flags & PF_DUMPCORE)
+		ac.ac_flag |= ACORE;
+	if (current->flags & PF_SIGNALED)
+		ac.ac_flag |= AXSIG;
+
+	vsize = 0;
+	if (current->mm) {
+		struct vm_area_struct *vma;
+		down(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		up(&current->mm->mmap_sem);
+	}
+	vsize = vsize / 1024;
+	ac.ac_mem = encode_comp_t(vsize);
+	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
+	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
+	ac.ac_minflt = encode_comp_t(current->min_flt);
+	ac.ac_majflt = encode_comp_t(current->maj_flt);
+	ac.ac_swaps = encode_comp_t(current->nswap);
+	ac.ac_exitcode = exitcode;
+
+	/*
+         * Kernel segment override to datasegment and write it
+         * to the accounting file.
+         */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	file->f_op->write(file, (char *)&ac,
+			       sizeof(struct acct), &file->f_pos);
+	set_fs(fs);
+}
+
+/*
+ * acct_process - now just a wrapper around do_acct_process
+ */
+int acct_process(long exitcode)
+{
+	struct file *file = NULL;
+	lock_kernel();
+	if (acct_file) {
+		file = acct_file;
+		get_file(file);
+		unlock_kernel();
+		do_acct_process(exitcode, acct_file);
+		fput(file);
+	} else
+		unlock_kernel();
+	return 0;
+}
+
+#else
+/*
+ * Dummy system call when BSD process accounting is not configured
+ * into the kernel.
+ */
+
+asmlinkage long sys_acct(const char * filename)
+{
+	return -ENOSYS;
+}
+#endif
diff --git a/kernel/capability.c b/kernel/capability.c
new file mode 100644
index 000000000000..7aaf1a423011
--- /dev/null
+++ b/kernel/capability.c
@@ -0,0 +1,216 @@
+/*
+ * linux/kernel/capability.c
+ *
+ * Copyright (C) 1997  Andrew Main <zefram@fysh.org>
+ * Integrated into 2.1.97+,  Andrew G. Morgan <morgan@transmeta.com>
+ */ 
+
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+
+kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
+
+/* Note: never hold tasklist_lock while spinning for this one */
+spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * For sys_getproccap() and sys_setproccap(), any of the three
+ * capability set pointers may be NULL -- indicating that that set is
+ * uninteresting and/or not to be changed.
+ */
+
+asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
+{
+     int error, pid;
+     __u32 version;
+     struct task_struct *target;
+     struct __user_cap_data_struct data;
+
+     if (get_user(version, &header->version))
+	     return -EFAULT;
+	     
+     error = -EINVAL; 
+     if (version != _LINUX_CAPABILITY_VERSION) {
+             version = _LINUX_CAPABILITY_VERSION;
+	     if (put_user(version, &header->version))
+		     error = -EFAULT; 
+             return error;
+     }
+
+     if (get_user(pid, &header->pid))
+	     return -EFAULT; 
+
+     if (pid < 0) 
+             return -EINVAL;
+
+     error = 0;
+
+     spin_lock(&task_capability_lock);
+
+     if (pid && pid != current->pid) {
+	     read_lock(&tasklist_lock); 
+             target = find_task_by_pid(pid);  /* identify target of query */
+             if (!target) 
+                     error = -ESRCH;
+     } else {
+             target = current;
+     }
+
+     if (!error) { 
+	     data.permitted = cap_t(target->cap_permitted);
+	     data.inheritable = cap_t(target->cap_inheritable); 
+	     data.effective = cap_t(target->cap_effective);
+     }
+
+     if (target != current)
+	     read_unlock(&tasklist_lock); 
+     spin_unlock(&task_capability_lock);
+
+     if (!error) {
+	     if (copy_to_user(dataptr, &data, sizeof data))
+		     return -EFAULT; 
+     }
+
+     return error;
+}
+
+/* set capabilities for all processes in a given process group */
+
+static void cap_set_pg(int pgrp,
+                    kernel_cap_t *effective,
+                    kernel_cap_t *inheritable,
+                    kernel_cap_t *permitted)
+{
+     struct task_struct *target;
+
+     /* FIXME: do we need to have a write lock here..? */
+     read_lock(&tasklist_lock);
+     for_each_task(target) {
+             if (target->pgrp != pgrp)
+                     continue;
+             target->cap_effective   = *effective;
+             target->cap_inheritable = *inheritable;
+             target->cap_permitted   = *permitted;
+     }
+     read_unlock(&tasklist_lock);
+}
+
+/* set capabilities for all processes other than 1 and self */
+
+static void cap_set_all(kernel_cap_t *effective,
+                     kernel_cap_t *inheritable,
+                     kernel_cap_t *permitted)
+{
+     struct task_struct *target;
+
+     /* FIXME: do we need to have a write lock here..? */
+     read_lock(&tasklist_lock);
+     /* ALL means everyone other than self or 'init' */
+     for_each_task(target) {
+             if (target == current || target->pid == 1)
+                     continue;
+             target->cap_effective   = *effective;
+             target->cap_inheritable = *inheritable;
+             target->cap_permitted   = *permitted;
+     }
+     read_unlock(&tasklist_lock);
+}
+
+/*
+ * The restrictions on setting capabilities are specified as:
+ *
+ * [pid is for the 'target' task.  'current' is the calling task.]
+ *
+ * I: any raised capabilities must be a subset of the (old current) Permitted
+ * P: any raised capabilities must be a subset of the (old current) permitted
+ * E: must be set to a subset of (new target) Permitted
+ */
+
+asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
+{
+     kernel_cap_t inheritable, permitted, effective;
+     __u32 version;
+     struct task_struct *target;
+     int error, pid;
+
+     if (get_user(version, &header->version))
+	     return -EFAULT; 
+
+     if (version != _LINUX_CAPABILITY_VERSION) {
+             version = _LINUX_CAPABILITY_VERSION;
+	     if (put_user(version, &header->version))
+		     return -EFAULT; 
+             return -EINVAL;
+     }
+
+     if (get_user(pid, &header->pid))
+	     return -EFAULT; 
+
+     if (pid && !capable(CAP_SETPCAP))
+             return -EPERM;
+
+     if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
+	 copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
+	 copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
+	     return -EFAULT; 
+
+     error = -EPERM;
+     spin_lock(&task_capability_lock);
+
+     if (pid > 0 && pid != current->pid) {
+             read_lock(&tasklist_lock);
+             target = find_task_by_pid(pid);  /* identify target of query */
+             if (!target) {
+                     error = -ESRCH;
+		     goto out;
+	     }
+     } else {
+             target = current;
+     }
+
+
+     /* verify restrictions on target's new Inheritable set */
+     if (!cap_issubset(inheritable,
+                       cap_combine(target->cap_inheritable,
+                                   current->cap_permitted))) {
+             goto out;
+     }
+
+     /* verify restrictions on target's new Permitted set */
+     if (!cap_issubset(permitted,
+                       cap_combine(target->cap_permitted,
+                                   current->cap_permitted))) {
+             goto out;
+     }
+
+     /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
+     if (!cap_issubset(effective, permitted)) {
+             goto out;
+     }
+
+     /* having verified that the proposed changes are legal,
+           we now put them into effect. */
+     error = 0;
+
+     if (pid < 0) {
+             if (pid == -1)  /* all procs other than current and init */
+                     cap_set_all(&effective, &inheritable, &permitted);
+
+             else            /* all procs in process group */
+                     cap_set_pg(-pid, &effective, &inheritable, &permitted);
+             goto spin_out;
+     } else {
+             /* FIXME: do we need to have a write lock here..? */
+             target->cap_effective   = effective;
+             target->cap_inheritable = inheritable;
+             target->cap_permitted   = permitted;
+     }
+
+out:
+     if (target != current) {
+             read_unlock(&tasklist_lock);
+     }
+spin_out:
+     spin_unlock(&task_capability_lock);
+     return error;
+}
diff --git a/kernel/context.c b/kernel/context.c
new file mode 100644
index 000000000000..864a70131c88
--- /dev/null
+++ b/kernel/context.c
@@ -0,0 +1,157 @@
+/*
+ * linux/kernel/context.c
+ *
+ * Mechanism for running arbitrary tasks in process context
+ *
+ * dwmw2@redhat.com:		Genesis
+ *
+ * andrewm@uow.edu.au:		2.4.0-test12
+ *	- Child reaping
+ *	- Support for tasks which re-add themselves
+ *	- flush_scheduled_tasks.
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/signal.h>
+
+static DECLARE_TASK_QUEUE(tq_context);
+static DECLARE_WAIT_QUEUE_HEAD(context_task_wq);
+static DECLARE_WAIT_QUEUE_HEAD(context_task_done);
+static int keventd_running;
+static struct task_struct *keventd_task;
+
+static int need_keventd(const char *who)
+{
+	if (keventd_running == 0)
+		printk(KERN_ERR "%s(): keventd has not started\n", who);
+	return keventd_running;
+}
+	
+int current_is_keventd(void)
+{
+	int ret = 0;
+	if (need_keventd(__FUNCTION__))
+		ret = (current == keventd_task);
+	return ret;
+}
+
+/**
+ * schedule_task - schedule a function for subsequent execution in process context.
+ * @task: pointer to a &tq_struct which defines the function to be scheduled.
+ *
+ * May be called from interrupt context.  The scheduled function is run at some
+ * time in the near future by the keventd kernel thread.  If it can sleep, it
+ * should be designed to do so for the minimum possible time, as it will be
+ * stalling all other scheduled tasks.
+ *
+ * schedule_task() returns non-zero if the task was successfully scheduled.
+ * If @task is already residing on a task queue then schedule_task() fails
+ * to schedule your task and returns zero.
+ */
+int schedule_task(struct tq_struct *task)
+{
+	int ret;
+	need_keventd(__FUNCTION__);
+	ret = queue_task(task, &tq_context);
+	wake_up(&context_task_wq);
+	return ret;
+}
+
+static int context_thread(void *dummy)
+{
+	struct task_struct *curtask = current;
+	DECLARE_WAITQUEUE(wait, curtask);
+	struct k_sigaction sa;
+
+	daemonize();
+	strcpy(curtask->comm, "keventd");
+	keventd_running = 1;
+	keventd_task = curtask;
+
+	spin_lock_irq(&curtask->sigmask_lock);
+	siginitsetinv(&curtask->blocked, sigmask(SIGCHLD));
+	recalc_sigpending(curtask);
+	spin_unlock_irq(&curtask->sigmask_lock);
+
+	/* Install a handler so SIGCLD is delivered */
+	sa.sa.sa_handler = SIG_IGN;
+	sa.sa.sa_flags = 0;
+	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
+	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
+
+	/*
+	 * If one of the functions on a task queue re-adds itself
+	 * to the task queue we call schedule() in state TASK_RUNNING
+	 */
+	for (;;) {
+		set_task_state(curtask, TASK_INTERRUPTIBLE);
+		add_wait_queue(&context_task_wq, &wait);
+		if (TQ_ACTIVE(tq_context))
+			set_task_state(curtask, TASK_RUNNING);
+		schedule();
+		remove_wait_queue(&context_task_wq, &wait);
+		run_task_queue(&tq_context);
+		wake_up(&context_task_done);
+		if (signal_pending(curtask)) {
+			while (waitpid(-1, (unsigned int *)0, __WALL|WNOHANG) > 0)
+				;
+			flush_signals(curtask);
+			recalc_sigpending(curtask);
+		}
+	}
+}
+
+/**
+ * flush_scheduled_tasks - ensure that any scheduled tasks have run to completion.
+ *
+ * Forces execution of the schedule_task() queue and blocks until its completion.
+ *
+ * If a kernel subsystem uses schedule_task() and wishes to flush any pending
+ * tasks, it should use this function.  This is typically used in driver shutdown
+ * handlers.
+ *
+ * The caller should hold no spinlocks and should hold no semaphores which could
+ * cause the scheduled tasks to block.
+ */
+static struct tq_struct dummy_task;
+
+void flush_scheduled_tasks(void)
+{
+	int count;
+	DECLARE_WAITQUEUE(wait, current);
+
+	/*
+	 * Do it twice. It's possible, albeit highly unlikely, that
+	 * the caller queued a task immediately before calling us,
+	 * and that the eventd thread was already past the run_task_queue()
+	 * but not yet into wake_up(), so it woke us up before completing
+	 * the caller's queued task or our new dummy task.
+	 */
+	add_wait_queue(&context_task_done, &wait);
+	for (count = 0; count < 2; count++) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+
+		/* Queue a dummy task to make sure we get kicked */
+		schedule_task(&dummy_task);
+
+		/* Wait for it to complete */
+		schedule();
+	}
+	remove_wait_queue(&context_task_done, &wait);
+}
+	
+int start_context_thread(void)
+{
+	kernel_thread(context_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	return 0;
+}
+
+EXPORT_SYMBOL(schedule_task);
+EXPORT_SYMBOL(flush_scheduled_tasks);
+
diff --git a/kernel/dma.c b/kernel/dma.c
new file mode 100644
index 000000000000..3ee09759fda1
--- /dev/null
+++ b/kernel/dma.c
@@ -0,0 +1,129 @@
+/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $
+ * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
+ *
+ * Written by Hennus Bergman, 1992.
+ *
+ * 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
+ *   In the previous version the reported device could end up being wrong,
+ *   if a device requested a DMA channel that was already in use.
+ *   [It also happened to remove the sizeof(char *) == sizeof(int)
+ *   assumption introduced because of those /proc/dma patches. -- Hennus]
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <asm/dma.h>
+#include <asm/system.h>
+
+ 
+
+/* A note on resource allocation:
+ *
+ * All drivers needing DMA channels, should allocate and release them
+ * through the public routines `request_dma()' and `free_dma()'.
+ *
+ * In order to avoid problems, all processes should allocate resources in
+ * the same sequence and release them in the reverse order.
+ *
+ * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
+ * When releasing them, first release the DMA, then release the IRQ.
+ * If you don't, you may cause allocation requests to fail unnecessarily.
+ * This doesn't really matter now, but it will once we get real semaphores
+ * in the kernel.
+ */
+
+
+spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *	If our port doesn't define this it has no PC like DMA
+ */
+
+#ifdef MAX_DMA_CHANNELS
+
+
+/* Channel n is busy iff dma_chan_busy[n].lock != 0.
+ * DMA0 used to be reserved for DRAM refresh, but apparently not any more...
+ * DMA4 is reserved for cascading.
+ */
+
+struct dma_chan {
+	int  lock;
+	const char *device_id;
+};
+
+static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
+	{ 0, 0 },
+	{ 0, 0 },
+	{ 0, 0 },
+	{ 0, 0 },
+	{ 1, "cascade" },
+	{ 0, 0 },
+	{ 0, 0 },
+	{ 0, 0 }
+};
+
+int get_dma_list(char *buf)
+{
+	int i, len = 0;
+
+	for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
+		if (dma_chan_busy[i].lock) {
+		    len += sprintf(buf+len, "%2d: %s\n",
+				   i,
+				   dma_chan_busy[i].device_id);
+		}
+	}
+	return len;
+} /* get_dma_list */
+
+
+int request_dma(unsigned int dmanr, const char * device_id)
+{
+	if (dmanr >= MAX_DMA_CHANNELS)
+		return -EINVAL;
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
+		return -EBUSY;
+
+	dma_chan_busy[dmanr].device_id = device_id;
+
+	/* old flag was 0, now contains 1 to indicate busy */
+	return 0;
+} /* request_dma */
+
+
+void free_dma(unsigned int dmanr)
+{
+	if (dmanr >= MAX_DMA_CHANNELS) {
+		printk("Trying to free DMA%d\n", dmanr);
+		return;
+	}
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
+		printk("Trying to free free DMA%d\n", dmanr);
+		return;
+	}	
+
+} /* free_dma */
+
+#else
+
+int request_dma(unsigned int dmanr, const char *device_id)
+{
+	return -EINVAL;
+}
+
+int free_dma(unsigned int dmanr)
+{
+	return -EINVAL;
+}
+
+int get_dma_list(char *buf)
+{	
+	strcpy(buf, "No DMA\n");
+	return 7;
+}
+#endif
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
new file mode 100644
index 000000000000..1daf64cc19b6
--- /dev/null
+++ b/kernel/exec_domain.c
@@ -0,0 +1,163 @@
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs);
+
+
+static unsigned long ident_map[32] = {
+	0,	1,	2,	3,	4,	5,	6,	7,
+	8,	9,	10,	11,	12,	13,	14,	15,
+	16,	17,	18,	19,	20,	21,	22,	23,
+	24,	25,	26,	27,	28,	29,	30,	31
+};
+
+struct exec_domain default_exec_domain = {
+	"Linux",	/* name */
+	no_lcall7,	/* lcall7 causes a seg fault. */
+	0, 0xff,	/* All personalities. */
+	ident_map,	/* Identity map signals. */
+	ident_map,	/*  - both ways. */
+	NULL,		/* No usage counter. */
+	NULL		/* Nothing after this in the list. */
+};
+
+static struct exec_domain *exec_domains = &default_exec_domain;
+static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED;
+
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs)
+{
+  /*
+   * This may have been a static linked SVr4 binary, so we would have the
+   * personality set incorrectly.  Check to see whether SVr4 is available,
+   * and use it, otherwise give the user a SEGV.
+   */
+	set_personality(PER_SVR4);
+
+	if (current->exec_domain && current->exec_domain->handler
+	&& current->exec_domain->handler != no_lcall7) {
+		current->exec_domain->handler(segment, regs);
+		return;
+	}
+
+	send_sig(SIGSEGV, current, 1);
+}
+
+static struct exec_domain *lookup_exec_domain(unsigned long personality)
+{
+	unsigned long pers = personality & PER_MASK;
+	struct exec_domain *it;
+
+	read_lock(&exec_domains_lock);
+	for (it=exec_domains; it; it=it->next)
+		if (pers >= it->pers_low && pers <= it->pers_high) {
+			if (!try_inc_mod_count(it->module))
+				continue;
+			read_unlock(&exec_domains_lock);
+			return it;
+		}
+	read_unlock(&exec_domains_lock);
+
+	/* Should never get this far. */
+	printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers);
+	return NULL;
+}
+
+int register_exec_domain(struct exec_domain *it)
+{
+	struct exec_domain *tmp;
+
+	if (!it)
+		return -EINVAL;
+	if (it->next)
+		return -EBUSY;
+	write_lock(&exec_domains_lock);
+	for (tmp=exec_domains; tmp; tmp=tmp->next)
+		if (tmp == it) {
+			write_unlock(&exec_domains_lock);
+			return -EBUSY;
+		}
+	it->next = exec_domains;
+	exec_domains = it;
+	write_unlock(&exec_domains_lock);
+	return 0;
+}
+
+int unregister_exec_domain(struct exec_domain *it)
+{
+	struct exec_domain ** tmp;
+
+	tmp = &exec_domains;
+	write_lock(&exec_domains_lock);
+	while (*tmp) {
+		if (it == *tmp) {
+			*tmp = it->next;
+			it->next = NULL;
+			write_unlock(&exec_domains_lock);
+			return 0;
+		}
+		tmp = &(*tmp)->next;
+	}
+	write_unlock(&exec_domains_lock);
+	return -EINVAL;
+}
+
+void __set_personality(unsigned long personality)
+{
+	struct exec_domain *it, *prev;
+
+	it = lookup_exec_domain(personality);
+	if (it == current->exec_domain) {
+		current->personality = personality;
+		return;
+	}
+	if (!it)
+		return;
+	if (atomic_read(&current->fs->count) != 1) {
+		struct fs_struct *new = copy_fs_struct(current->fs);
+		struct fs_struct *old;
+		if (!new) {
+			put_exec_domain(it);
+			return;
+		}
+		task_lock(current);
+		old = current->fs;
+		current->fs = new;
+		task_unlock(current);
+		put_fs_struct(old);
+	}
+	/*
+	 * At that point we are guaranteed to be the sole owner of
+	 * current->fs.
+	 */
+	current->personality = personality;
+	prev = current->exec_domain;
+	current->exec_domain = it;
+	set_fs_altroot();
+	put_exec_domain(prev);
+}
+
+asmlinkage long sys_personality(unsigned long personality)
+{
+	int ret = current->personality;
+	if (personality != 0xffffffff) {
+		set_personality(personality);
+		if (current->personality != personality)
+			ret = -EINVAL;
+	}
+	return ret;
+}
+
+int get_exec_domain_list(char * page)
+{
+	int len = 0;
+	struct exec_domain * e;
+
+	read_lock(&exec_domains_lock);
+	for (e=exec_domains; e && len < PAGE_SIZE - 80; e=e->next)
+		len += sprintf(page+len, "%d-%d\t%-16s\t[%s]\n",
+			e->pers_low, e->pers_high, e->name,
+			e->module ? e->module->name : "kernel");
+	read_unlock(&exec_domains_lock);
+	return len;
+}
diff --git a/kernel/exit.c b/kernel/exit.c
new file mode 100644
index 000000000000..c747f547b182
--- /dev/null
+++ b/kernel/exit.c
@@ -0,0 +1,596 @@
+/*
+ *  linux/kernel/exit.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/malloc.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#ifdef CONFIG_BSD_PROCESS_ACCT
+#include <linux/acct.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+
+extern void sem_exit (void);
+extern struct task_struct *child_reaper;
+
+int getrusage(struct task_struct *, int, struct rusage *);
+
+static void release_task(struct task_struct * p)
+{
+	if (p != current) {
+#ifdef CONFIG_SMP
+		/*
+		 * Wait to make sure the process isn't on the
+		 * runqueue (active on some other CPU still)
+		 */
+		for (;;) {
+			task_lock(p);
+			if (!p->has_cpu)
+				break;
+			task_unlock(p);
+			do {
+				barrier();
+			} while (p->has_cpu);
+		}
+		task_unlock(p);
+#endif
+		atomic_dec(&p->user->processes);
+		free_uid(p->user);
+		unhash_process(p);
+
+		release_thread(p);
+		current->cmin_flt += p->min_flt + p->cmin_flt;
+		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
+		current->cnswap += p->nswap + p->cnswap;
+		/*
+		 * Potentially available timeslices are retrieved
+		 * here - this way the parent does not get penalized
+		 * for creating too many processes.
+		 *
+		 * (this cannot be used to artificially 'generate'
+		 * timeslices, because any timeslice recovered here
+		 * was given away by the parent in the first place.)
+		 */
+		current->counter += p->counter;
+		if (current->counter >= MAX_COUNTER)
+			current->counter = MAX_COUNTER;
+		free_task_struct(p);
+	} else {
+		printk("task releasing itself\n");
+	}
+}
+
+/*
+ * This checks not only the pgrp, but falls back on the pid if no
+ * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
+ * without this...
+ */
+int session_of_pgrp(int pgrp)
+{
+	struct task_struct *p;
+	int fallback;
+
+	fallback = -1;
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+ 		if (p->session <= 0)
+ 			continue;
+		if (p->pgrp == pgrp) {
+			fallback = p->session;
+			break;
+		}
+		if (p->pid == pgrp)
+			fallback = p->session;
+	}
+	read_unlock(&tasklist_lock);
+	return fallback;
+}
+
+/*
+ * Determine if a process group is "orphaned", according to the POSIX
+ * definition in 2.2.2.52.  Orphaned process groups are not to be affected
+ * by terminal-generated stop signals.  Newly orphaned process groups are
+ * to receive a SIGHUP and a SIGCONT.
+ *
+ * "I ask you, have you ever known what it is to be an orphan?"
+ */
+static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
+{
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if ((p == ignored_task) || (p->pgrp != pgrp) ||
+		    (p->state == TASK_ZOMBIE) ||
+		    (p->p_pptr->pid == 1))
+			continue;
+		if ((p->p_pptr->pgrp != pgrp) &&
+		    (p->p_pptr->session == p->session)) {
+			read_unlock(&tasklist_lock);
+ 			return 0;
+		}
+	}
+	read_unlock(&tasklist_lock);
+	return 1;	/* (sighing) "Often!" */
+}
+
+int is_orphaned_pgrp(int pgrp)
+{
+	return will_become_orphaned_pgrp(pgrp, 0);
+}
+
+static inline int has_stopped_jobs(int pgrp)
+{
+	int retval = 0;
+	struct task_struct * p;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (p->pgrp != pgrp)
+			continue;
+		if (p->state != TASK_STOPPED)
+			continue;
+		retval = 1;
+		break;
+	}
+	read_unlock(&tasklist_lock);
+	return retval;
+}
+
+/*
+ * When we die, we re-parent all our children.
+ * Try to give them to another thread in our process
+ * group, and if no such member exists, give it to
+ * the global child reaper process (ie "init")
+ */
+static inline void forget_original_parent(struct task_struct * father)
+{
+	struct task_struct * p, *reaper;
+
+	read_lock(&tasklist_lock);
+
+	/* Next in our thread group */
+	reaper = next_thread(father);
+	if (reaper == father)
+		reaper = child_reaper;
+
+	for_each_task(p) {
+		if (p->p_opptr == father) {
+			/* We dont want people slaying init */
+			p->exit_signal = SIGCHLD;
+			p->self_exec_id++;
+			p->p_opptr = reaper;
+			if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+static inline void close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->max_fdset || i >= files->max_fds)
+			break;
+		set = files->open_fds->fds_bits[j++];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+	}
+}
+
+void put_files_struct(struct files_struct *files)
+{
+	if (atomic_dec_and_test(&files->count)) {
+		close_files(files);
+		/*
+		 * Free the fd and fdset arrays if we expanded them.
+		 */
+		if (files->fd != &files->fd_array[0])
+			free_fd_array(files->fd, files->max_fds);
+		if (files->max_fdset > __FD_SETSIZE) {
+			free_fdset(files->open_fds, files->max_fdset);
+			free_fdset(files->close_on_exec, files->max_fdset);
+		}
+		kmem_cache_free(files_cachep, files);
+	}
+}
+
+static inline void __exit_files(struct task_struct *tsk)
+{
+	struct files_struct * files = tsk->files;
+
+	if (files) {
+		task_lock(tsk);
+		tsk->files = NULL;
+		task_unlock(tsk);
+		put_files_struct(files);
+	}
+}
+
+void exit_files(struct task_struct *tsk)
+{
+	__exit_files(tsk);
+}
+
+static inline void __put_fs_struct(struct fs_struct *fs)
+{
+	/* No need to hold fs->lock if we are killing it */
+	if (atomic_dec_and_test(&fs->count)) {
+		dput(fs->root);
+		mntput(fs->rootmnt);
+		dput(fs->pwd);
+		mntput(fs->pwdmnt);
+		if (fs->altroot) {
+			dput(fs->altroot);
+			mntput(fs->altrootmnt);
+		}
+		kmem_cache_free(fs_cachep, fs);
+	}
+}
+
+void put_fs_struct(struct fs_struct *fs)
+{
+	__put_fs_struct(fs);
+}
+
+static inline void __exit_fs(struct task_struct *tsk)
+{
+	struct fs_struct * fs = tsk->fs;
+
+	if (fs) {
+		task_lock(tsk);
+		tsk->fs = NULL;
+		task_unlock(tsk);
+		__put_fs_struct(fs);
+	}
+}
+
+void exit_fs(struct task_struct *tsk)
+{
+	__exit_fs(tsk);
+}
+
+/*
+ * We can use these to temporarily drop into
+ * "lazy TLB" mode and back.
+ */
+struct mm_struct * start_lazy_tlb(void)
+{
+	struct mm_struct *mm = current->mm;
+	current->mm = NULL;
+	/* active_mm is still 'mm' */
+	atomic_inc(&mm->mm_count);
+	enter_lazy_tlb(mm, current, smp_processor_id());
+	return mm;
+}
+
+void end_lazy_tlb(struct mm_struct *mm)
+{
+	struct mm_struct *active_mm = current->active_mm;
+
+	current->mm = mm;
+	if (mm != active_mm) {
+		current->active_mm = mm;
+		activate_mm(active_mm, mm);
+	}
+	mmdrop(active_mm);
+}
+
+/*
+ * Turn us into a lazy TLB process if we
+ * aren't already..
+ */
+static inline void __exit_mm(struct task_struct * tsk)
+{
+	struct mm_struct * mm = tsk->mm;
+
+	mm_release();
+	if (mm) {
+		atomic_inc(&mm->mm_count);
+		if (mm != tsk->active_mm) BUG();
+		/* more a memory barrier than a real lock */
+		task_lock(tsk);
+		tsk->mm = NULL;
+		task_unlock(tsk);
+		enter_lazy_tlb(mm, current, smp_processor_id());
+		mmput(mm);
+	}
+}
+
+void exit_mm(struct task_struct *tsk)
+{
+	__exit_mm(tsk);
+}
+
+/*
+ * Send signals to all our closest relatives so that they know
+ * to properly mourn us..
+ */
+static void exit_notify(void)
+{
+	struct task_struct * p, *t;
+
+	forget_original_parent(current);
+	/*
+	 * Check to see if any process groups have become orphaned
+	 * as a result of our exiting, and if they have any stopped
+	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+	 *
+	 * Case i: Our father is in a different pgrp than we are
+	 * and we were the only connection outside, so our pgrp
+	 * is about to become orphaned.
+	 */
+	 
+	t = current->p_pptr;
+	
+	if ((t->pgrp != current->pgrp) &&
+	    (t->session == current->session) &&
+	    will_become_orphaned_pgrp(current->pgrp, current) &&
+	    has_stopped_jobs(current->pgrp)) {
+		kill_pg(current->pgrp,SIGHUP,1);
+		kill_pg(current->pgrp,SIGCONT,1);
+	}
+
+	/* Let father know we died 
+	 *
+	 * Thread signals are configurable, but you aren't going to use
+	 * that to send signals to arbitary processes. 
+	 * That stops right now.
+	 *
+	 * If the parent exec id doesn't match the exec id we saved
+	 * when we started then we know the parent has changed security
+	 * domain.
+	 *
+	 * If our self_exec id doesn't match our parent_exec_id then
+	 * we have changed execution domain as these two values started
+	 * the same after a fork.
+	 *	
+	 */
+	
+	if(current->exit_signal != SIGCHLD &&
+	    ( current->parent_exec_id != t->self_exec_id  ||
+	      current->self_exec_id != current->parent_exec_id) 
+	    && !capable(CAP_KILL))
+		current->exit_signal = SIGCHLD;
+
+
+	/*
+	 * This loop does two things:
+	 *
+  	 * A.  Make init inherit all the child processes
+	 * B.  Check to see if any process groups have become orphaned
+	 *	as a result of our exiting, and if they have any stopped
+	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+	 */
+
+	write_lock_irq(&tasklist_lock);
+	current->state = TASK_ZOMBIE;
+	do_notify_parent(current, current->exit_signal);
+	while (current->p_cptr != NULL) {
+		p = current->p_cptr;
+		current->p_cptr = p->p_osptr;
+		p->p_ysptr = NULL;
+		p->ptrace = 0;
+
+		p->p_pptr = p->p_opptr;
+		p->p_osptr = p->p_pptr->p_cptr;
+		if (p->p_osptr)
+			p->p_osptr->p_ysptr = p;
+		p->p_pptr->p_cptr = p;
+		if (p->state == TASK_ZOMBIE)
+			do_notify_parent(p, p->exit_signal);
+		/*
+		 * process group orphan check
+		 * Case ii: Our child is in a different pgrp
+		 * than we are, and it was the only connection
+		 * outside, so the child pgrp is now orphaned.
+		 */
+		if ((p->pgrp != current->pgrp) &&
+		    (p->session == current->session)) {
+			int pgrp = p->pgrp;
+
+			write_unlock_irq(&tasklist_lock);
+			if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
+				kill_pg(pgrp,SIGHUP,1);
+				kill_pg(pgrp,SIGCONT,1);
+			}
+			write_lock_irq(&tasklist_lock);
+		}
+	}
+	write_unlock_irq(&tasklist_lock);
+}
+
+NORET_TYPE void do_exit(long code)
+{
+	struct task_struct *tsk = current;
+
+	if (in_interrupt())
+		panic("Aiee, killing interrupt handler!");
+	if (!tsk->pid)
+		panic("Attempted to kill the idle task!");
+	if (tsk->pid == 1)
+		panic("Attempted to kill init!");
+	tsk->flags |= PF_EXITING;
+	del_timer_sync(&tsk->real_timer);
+
+fake_volatile:
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	acct_process(code);
+#endif
+	__exit_mm(tsk);
+
+	lock_kernel();
+	sem_exit();
+	__exit_files(tsk);
+	__exit_fs(tsk);
+	exit_sighand(tsk);
+	exit_thread();
+
+	if (current->leader)
+		disassociate_ctty(1);
+
+	put_exec_domain(tsk->exec_domain);
+	if (tsk->binfmt && tsk->binfmt->module)
+		__MOD_DEC_USE_COUNT(tsk->binfmt->module);
+
+	tsk->exit_code = code;
+	exit_notify();
+	schedule();
+	BUG();
+/*
+ * In order to get rid of the "volatile function does return" message
+ * I did this little loop that confuses gcc to think do_exit really
+ * is volatile. In fact it's schedule() that is volatile in some
+ * circumstances: when current->state = ZOMBIE, schedule() never
+ * returns.
+ *
+ * In fact the natural way to do all this is to have the label and the
+ * goto right after each other, but I put the fake_volatile label at
+ * the start of the function just in case something /really/ bad
+ * happens, and the schedule returns. This way we can try again. I'm
+ * not paranoid: it's just that everybody is out to get me.
+ */
+	goto fake_volatile;
+}
+
+NORET_TYPE void up_and_exit(struct semaphore *sem, long code)
+{
+	if (sem)
+		up(sem);
+	
+	do_exit(code);
+}
+
+asmlinkage long sys_exit(int error_code)
+{
+	do_exit((error_code&0xff)<<8);
+}
+
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
+{
+	int flag, retval;
+	DECLARE_WAITQUEUE(wait, current);
+	struct task_struct *tsk;
+
+	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
+		return -EINVAL;
+
+	add_wait_queue(&current->wait_chldexit,&wait);
+repeat:
+	flag = 0;
+	current->state = TASK_INTERRUPTIBLE;
+	read_lock(&tasklist_lock);
+	tsk = current;
+	do {
+		struct task_struct *p;
+	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
+			if (pid>0) {
+				if (p->pid != pid)
+					continue;
+			} else if (!pid) {
+				if (p->pgrp != current->pgrp)
+					continue;
+			} else if (pid != -1) {
+				if (p->pgrp != -pid)
+					continue;
+			}
+			/* Wait for all children (clone and not) if __WALL is set;
+			 * otherwise, wait for clone children *only* if __WCLONE is
+			 * set; otherwise, wait for non-clone children *only*.  (Note:
+			 * A "clone" child here is one that reports to its parent
+			 * using a signal other than SIGCHLD.) */
+			if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
+			    && !(options & __WALL))
+				continue;
+			flag = 1;
+			switch (p->state) {
+			case TASK_STOPPED:
+				if (!p->exit_code)
+					continue;
+				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
+					continue;
+				read_unlock(&tasklist_lock);
+				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
+				if (!retval && stat_addr) 
+					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
+				if (!retval) {
+					p->exit_code = 0;
+					retval = p->pid;
+				}
+				goto end_wait4;
+			case TASK_ZOMBIE:
+				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
+				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
+				read_unlock(&tasklist_lock);
+				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+				if (!retval && stat_addr)
+					retval = put_user(p->exit_code, stat_addr);
+				if (retval)
+					goto end_wait4; 
+				retval = p->pid;
+				if (p->p_opptr != p->p_pptr) {
+					write_lock_irq(&tasklist_lock);
+					REMOVE_LINKS(p);
+					p->p_pptr = p->p_opptr;
+					SET_LINKS(p);
+					do_notify_parent(p, SIGCHLD);
+					write_unlock_irq(&tasklist_lock);
+				} else
+					release_task(p);
+				goto end_wait4;
+			default:
+				continue;
+			}
+		}
+		if (options & __WNOTHREAD)
+			break;
+		tsk = next_thread(tsk);
+	} while (tsk != current);
+	read_unlock(&tasklist_lock);
+	if (flag) {
+		retval = 0;
+		if (options & WNOHANG)
+			goto end_wait4;
+		retval = -ERESTARTSYS;
+		if (signal_pending(current))
+			goto end_wait4;
+		schedule();
+		goto repeat;
+	}
+	retval = -ECHILD;
+end_wait4:
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&current->wait_chldexit,&wait);
+	return retval;
+}
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * sys_waitpid() remains for compatibility. waitpid() should be
+ * implemented by calling sys_wait4() from libc.a.
+ */
+asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
+{
+	return sys_wait4(pid, stat_addr, options, NULL);
+}
+
+#endif
diff --git a/kernel/fork.c b/kernel/fork.c
new file mode 100644
index 000000000000..99c1f2317992
--- /dev/null
+++ b/kernel/fork.c
@@ -0,0 +1,771 @@
+/*
+ *  linux/kernel/fork.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  'fork.c' contains the help-routines for the 'fork' system call
+ * (see also entry.S and others).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
+ */
+
+#include <linux/config.h>
+#include <linux/malloc.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+/* The idle threads do not count.. */
+int nr_threads;
+int nr_running;
+
+int max_threads;
+unsigned long total_forks;	/* Handle normal Linux uptimes. */
+int last_pid;
+
+struct task_struct *pidhash[PIDHASH_SZ];
+
+void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+	unsigned long flags;
+
+	wq_write_lock_irqsave(&q->lock, flags);
+	wait->flags = 0;
+	__add_wait_queue(q, wait);
+	wq_write_unlock_irqrestore(&q->lock, flags);
+}
+
+void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
+{
+	unsigned long flags;
+
+	wq_write_lock_irqsave(&q->lock, flags);
+	wait->flags = WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue_tail(q, wait);
+	wq_write_unlock_irqrestore(&q->lock, flags);
+}
+
+void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+	unsigned long flags;
+
+	wq_write_lock_irqsave(&q->lock, flags);
+	__remove_wait_queue(q, wait);
+	wq_write_unlock_irqrestore(&q->lock, flags);
+}
+
+void __init fork_init(unsigned long mempages)
+{
+	/*
+	 * The default maximum number of threads is set to a safe
+	 * value: the thread structures can take up at most half
+	 * of memory.
+	 */
+	max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
+
+	init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+	init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+}
+
+/* Protects next_safe and last_pid. */
+spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
+
+static int get_pid(unsigned long flags)
+{
+	static int next_safe = PID_MAX;
+	struct task_struct *p;
+
+	if (flags & CLONE_PID)
+		return current->pid;
+
+	spin_lock(&lastpid_lock);
+	if((++last_pid) & 0xffff8000) {
+		last_pid = 300;		/* Skip daemons etc. */
+		goto inside;
+	}
+	if(last_pid >= next_safe) {
+inside:
+		next_safe = PID_MAX;
+		read_lock(&tasklist_lock);
+	repeat:
+		for_each_task(p) {
+			if(p->pid == last_pid	||
+			   p->pgrp == last_pid	||
+			   p->session == last_pid) {
+				if(++last_pid >= next_safe) {
+					if(last_pid & 0xffff8000)
+						last_pid = 300;
+					next_safe = PID_MAX;
+				}
+				goto repeat;
+			}
+			if(p->pid > last_pid && next_safe > p->pid)
+				next_safe = p->pid;
+			if(p->pgrp > last_pid && next_safe > p->pgrp)
+				next_safe = p->pgrp;
+			if(p->session > last_pid && next_safe > p->session)
+				next_safe = p->session;
+		}
+		read_unlock(&tasklist_lock);
+	}
+	spin_unlock(&lastpid_lock);
+
+	return last_pid;
+}
+
+static inline int dup_mmap(struct mm_struct * mm)
+{
+	struct vm_area_struct * mpnt, *tmp, **pprev;
+	int retval;
+
+	flush_cache_mm(current->mm);
+	mm->locked_vm = 0;
+	mm->mmap = NULL;
+	mm->mmap_avl = NULL;
+	mm->mmap_cache = NULL;
+	mm->map_count = 0;
+	mm->cpu_vm_mask = 0;
+	mm->swap_cnt = 0;
+	mm->swap_address = 0;
+	pprev = &mm->mmap;
+	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
+		struct file *file;
+
+		retval = -ENOMEM;
+		if(mpnt->vm_flags & VM_DONTCOPY)
+			continue;
+		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		if (!tmp)
+			goto fail_nomem;
+		*tmp = *mpnt;
+		tmp->vm_flags &= ~VM_LOCKED;
+		tmp->vm_mm = mm;
+		mm->map_count++;
+		tmp->vm_next = NULL;
+		file = tmp->vm_file;
+		if (file) {
+			struct inode *inode = file->f_dentry->d_inode;
+			get_file(file);
+			if (tmp->vm_flags & VM_DENYWRITE)
+				atomic_dec(&inode->i_writecount);
+      
+			/* insert tmp into the share list, just after mpnt */
+			spin_lock(&inode->i_mapping->i_shared_lock);
+			if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
+				mpnt->vm_next_share->vm_pprev_share =
+					&tmp->vm_next_share;
+			mpnt->vm_next_share = tmp;
+			tmp->vm_pprev_share = &mpnt->vm_next_share;
+			spin_unlock(&inode->i_mapping->i_shared_lock);
+		}
+
+		/* Copy the pages, but defer checking for errors */
+		retval = copy_page_range(mm, current->mm, tmp);
+		if (!retval && tmp->vm_ops && tmp->vm_ops->open)
+			tmp->vm_ops->open(tmp);
+
+		/*
+		 * Link in the new vma even if an error occurred,
+		 * so that exit_mmap() can clean up the mess.
+		 */
+		*pprev = tmp;
+		pprev = &tmp->vm_next;
+
+		if (retval)
+			goto fail_nomem;
+	}
+	retval = 0;
+	if (mm->map_count >= AVL_MIN_MAP_COUNT)
+		build_mmap_avl(mm);
+
+fail_nomem:
+	flush_tlb_mm(current->mm);
+	return retval;
+}
+
+spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
+
+#define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
+
+static struct mm_struct * mm_init(struct mm_struct * mm)
+{
+	atomic_set(&mm->mm_users, 1);
+	atomic_set(&mm->mm_count, 1);
+	init_MUTEX(&mm->mmap_sem);
+	mm->page_table_lock = SPIN_LOCK_UNLOCKED;
+	mm->pgd = pgd_alloc();
+	if (mm->pgd)
+		return mm;
+	free_mm(mm);
+	return NULL;
+}
+	
+
+/*
+ * Allocate and initialize an mm_struct.
+ */
+struct mm_struct * mm_alloc(void)
+{
+	struct mm_struct * mm;
+
+	mm = allocate_mm();
+	if (mm) {
+		memset(mm, 0, sizeof(*mm));
+		return mm_init(mm);
+	}
+	return NULL;
+}
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+inline void __mmdrop(struct mm_struct *mm)
+{
+	if (mm == &init_mm) BUG();
+	pgd_free(mm->pgd);
+	destroy_context(mm);
+	free_mm(mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+	if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
+		list_del(&mm->mmlist);
+		spin_unlock(&mmlist_lock);
+		exit_mmap(mm);
+		mmdrop(mm);
+	}
+}
+
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one.  Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(void)
+{
+	struct task_struct *tsk = current;
+
+	/* notify parent sleeping on vfork() */
+	if (tsk->flags & PF_VFORK) {
+		tsk->flags &= ~PF_VFORK;
+		up(tsk->p_opptr->vfork_sem);
+	}
+}
+
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct mm_struct * mm, *oldmm;
+	int retval;
+
+	tsk->min_flt = tsk->maj_flt = 0;
+	tsk->cmin_flt = tsk->cmaj_flt = 0;
+	tsk->nswap = tsk->cnswap = 0;
+
+	tsk->mm = NULL;
+	tsk->active_mm = NULL;
+
+	/*
+	 * Are we cloning a kernel thread?
+	 *
+	 * We need to steal a active VM for that..
+	 */
+	oldmm = current->mm;
+	if (!oldmm)
+		return 0;
+
+	if (clone_flags & CLONE_VM) {
+		atomic_inc(&oldmm->mm_users);
+		mm = oldmm;
+		goto good_mm;
+	}
+
+	retval = -ENOMEM;
+	mm = allocate_mm();
+	if (!mm)
+		goto fail_nomem;
+
+	/* Copy the current MM stuff.. */
+	memcpy(mm, oldmm, sizeof(*mm));
+	if (!mm_init(mm))
+		goto fail_nomem;
+
+	down(&oldmm->mmap_sem);
+	retval = dup_mmap(mm);
+	up(&oldmm->mmap_sem);
+
+	/*
+	 * Add it to the mmlist after the parent.
+	 *
+	 * Doing it this way means that we can order
+	 * the list, and fork() won't mess up the
+	 * ordering significantly.
+	 */
+	spin_lock(&mmlist_lock);
+	list_add(&mm->mmlist, &oldmm->mmlist);
+	spin_unlock(&mmlist_lock);
+
+	if (retval)
+		goto free_pt;
+
+	/*
+	 * child gets a private LDT (if there was an LDT in the parent)
+	 */
+	copy_segments(tsk, mm);
+
+	if (init_new_context(tsk,mm))
+		goto free_pt;
+
+good_mm:
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	return 0;
+
+free_pt:
+	mmput(mm);
+fail_nomem:
+	return retval;
+}
+
+static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+{
+	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+	/* We don't need to lock fs - think why ;-) */
+	if (fs) {
+		atomic_set(&fs->count, 1);
+		fs->lock = RW_LOCK_UNLOCKED;
+		fs->umask = old->umask;
+		read_lock(&old->lock);
+		fs->rootmnt = mntget(old->rootmnt);
+		fs->root = dget(old->root);
+		fs->pwdmnt = mntget(old->pwdmnt);
+		fs->pwd = dget(old->pwd);
+		if (old->altroot) {
+			fs->altrootmnt = mntget(old->altrootmnt);
+			fs->altroot = dget(old->altroot);
+		} else {
+			fs->altrootmnt = NULL;
+			fs->altroot = NULL;
+		}	
+		read_unlock(&old->lock);
+	}
+	return fs;
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+	return __copy_fs_struct(old);
+}
+
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+{
+	if (clone_flags & CLONE_FS) {
+		atomic_inc(&current->fs->count);
+		return 0;
+	}
+	tsk->fs = __copy_fs_struct(current->fs);
+	if (!tsk->fs)
+		return -1;
+	return 0;
+}
+
+static int count_open_files(struct files_struct *files, int size)
+{
+	int i;
+	
+	/* Find the last open fd */
+	for (i = size/(8*sizeof(long)); i > 0; ) {
+		if (files->open_fds->fds_bits[--i])
+			break;
+	}
+	i = (i+1) * 8 * sizeof(long);
+	return i;
+}
+
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct files_struct *oldf, *newf;
+	struct file **old_fds, **new_fds;
+	int open_files, nfds, size, i, error = 0;
+
+	/*
+	 * A background process may not have any files ...
+	 */
+	oldf = current->files;
+	if (!oldf)
+		goto out;
+
+	if (clone_flags & CLONE_FILES) {
+		atomic_inc(&oldf->count);
+		goto out;
+	}
+
+	tsk->files = NULL;
+	error = -ENOMEM;
+	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+	if (!newf) 
+		goto out;
+
+	atomic_set(&newf->count, 1);
+
+	newf->file_lock	    = RW_LOCK_UNLOCKED;
+	newf->next_fd	    = 0;
+	newf->max_fds	    = NR_OPEN_DEFAULT;
+	newf->max_fdset	    = __FD_SETSIZE;
+	newf->close_on_exec = &newf->close_on_exec_init;
+	newf->open_fds	    = &newf->open_fds_init;
+	newf->fd	    = &newf->fd_array[0];
+
+	/* We don't yet have the oldf readlock, but even if the old
+           fdset gets grown now, we'll only copy up to "size" fds */
+	size = oldf->max_fdset;
+	if (size > __FD_SETSIZE) {
+		newf->max_fdset = 0;
+		write_lock(&newf->file_lock);
+		error = expand_fdset(newf, size);
+		write_unlock(&newf->file_lock);
+		if (error)
+			goto out_release;
+	}
+	read_lock(&oldf->file_lock);
+
+	open_files = count_open_files(oldf, size);
+
+	/*
+	 * Check whether we need to allocate a larger fd array.
+	 * Note: we're not a clone task, so the open count won't
+	 * change.
+	 */
+	nfds = NR_OPEN_DEFAULT;
+	if (open_files > nfds) {
+		read_unlock(&oldf->file_lock);
+		newf->max_fds = 0;
+		write_lock(&newf->file_lock);
+		error = expand_fd_array(newf, open_files);
+		write_unlock(&newf->file_lock);
+		if (error) 
+			goto out_release;
+		nfds = newf->max_fds;
+		read_lock(&oldf->file_lock);
+	}
+
+	old_fds = oldf->fd;
+	new_fds = newf->fd;
+
+	memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
+	memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
+
+	for (i = open_files; i != 0; i--) {
+		struct file *f = *old_fds++;
+		if (f)
+			get_file(f);
+		*new_fds++ = f;
+	}
+	read_unlock(&oldf->file_lock);
+
+	/* compute the remainder to be cleared */
+	size = (newf->max_fds - open_files) * sizeof(struct file *);
+
+	/* This is long word aligned thus could use a optimized version */ 
+	memset(new_fds, 0, size); 
+
+	if (newf->max_fdset > open_files) {
+		int left = (newf->max_fdset-open_files)/8;
+		int start = open_files / (8 * sizeof(unsigned long));
+		
+		memset(&newf->open_fds->fds_bits[start], 0, left);
+		memset(&newf->close_on_exec->fds_bits[start], 0, left);
+	}
+
+	tsk->files = newf;
+	error = 0;
+out:
+	return error;
+
+out_release:
+	free_fdset (newf->close_on_exec, newf->max_fdset);
+	free_fdset (newf->open_fds, newf->max_fdset);
+	kmem_cache_free(files_cachep, newf);
+	goto out;
+}
+
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct signal_struct *sig;
+
+	if (clone_flags & CLONE_SIGHAND) {
+		atomic_inc(&current->sig->count);
+		return 0;
+	}
+	sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
+	tsk->sig = sig;
+	if (!sig)
+		return -1;
+	spin_lock_init(&sig->siglock);
+	atomic_set(&sig->count, 1);
+	memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
+	return 0;
+}
+
+static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+{
+	unsigned long new_flags = p->flags;
+
+	new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
+	new_flags |= PF_FORKNOEXEC;
+	if (!(clone_flags & CLONE_PTRACE))
+		p->ptrace = 0;
+	if (clone_flags & CLONE_VFORK)
+		new_flags |= PF_VFORK;
+	p->flags = new_flags;
+}
+
+/*
+ *  Ok, this is the main fork-routine. It copies the system process
+ * information (task[nr]) and sets up the necessary registers. It also
+ * copies the data segment in its entirety.  The "stack_start" and
+ * "stack_top" arguments are simply passed along to the platform
+ * specific copy_thread() routine.  Most platforms ignore stack_top.
+ * For an example that's using stack_top, see
+ * arch/ia64/kernel/process.c.
+ */
+int do_fork(unsigned long clone_flags, unsigned long stack_start,
+	    struct pt_regs *regs, unsigned long stack_size)
+{
+	int retval = -ENOMEM;
+	struct task_struct *p;
+	DECLARE_MUTEX_LOCKED(sem);
+
+	if (clone_flags & CLONE_PID) {
+		/* This is only allowed from the boot up thread */
+		if (current->pid)
+			return -EPERM;
+	}
+	
+	current->vfork_sem = &sem;
+
+	p = alloc_task_struct();
+	if (!p)
+		goto fork_out;
+
+	*p = *current;
+
+	retval = -EAGAIN;
+	if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur)
+		goto bad_fork_free;
+	atomic_inc(&p->user->__count);
+	atomic_inc(&p->user->processes);
+
+	/*
+	 * Counter increases are protected by
+	 * the kernel lock so nr_threads can't
+	 * increase under us (but it may decrease).
+	 */
+	if (nr_threads >= max_threads)
+		goto bad_fork_cleanup_count;
+	
+	get_exec_domain(p->exec_domain);
+
+	if (p->binfmt && p->binfmt->module)
+		__MOD_INC_USE_COUNT(p->binfmt->module);
+
+	p->did_exec = 0;
+	p->swappable = 0;
+	p->state = TASK_UNINTERRUPTIBLE;
+
+	copy_flags(clone_flags, p);
+	p->pid = get_pid(clone_flags);
+
+	p->run_list.next = NULL;
+	p->run_list.prev = NULL;
+
+	if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT)) {
+		p->p_opptr = current;
+		if (!(p->ptrace & PT_PTRACED))
+			p->p_pptr = current;
+	}
+	p->p_cptr = NULL;
+	init_waitqueue_head(&p->wait_chldexit);
+	p->vfork_sem = NULL;
+	spin_lock_init(&p->alloc_lock);
+
+	p->sigpending = 0;
+	init_sigpending(&p->pending);
+
+	p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
+	p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
+	init_timer(&p->real_timer);
+	p->real_timer.data = (unsigned long) p;
+
+	p->leader = 0;		/* session leadership doesn't inherit */
+	p->tty_old_pgrp = 0;
+	p->times.tms_utime = p->times.tms_stime = 0;
+	p->times.tms_cutime = p->times.tms_cstime = 0;
+#ifdef CONFIG_SMP
+	{
+		int i;
+		p->has_cpu = 0;
+		p->processor = current->processor;
+		/* ?? should we just memset this ?? */
+		for(i = 0; i < smp_num_cpus; i++)
+			p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
+		spin_lock_init(&p->sigmask_lock);
+	}
+#endif
+	p->lock_depth = -1;		/* -1 = no lock */
+	p->start_time = jiffies;
+
+	retval = -ENOMEM;
+	/* copy all the process information */
+	if (copy_files(clone_flags, p))
+		goto bad_fork_cleanup;
+	if (copy_fs(clone_flags, p))
+		goto bad_fork_cleanup_files;
+	if (copy_sighand(clone_flags, p))
+		goto bad_fork_cleanup_fs;
+	if (copy_mm(clone_flags, p))
+		goto bad_fork_cleanup_sighand;
+	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+	if (retval)
+		goto bad_fork_cleanup_sighand;
+	p->semundo = NULL;
+	
+	/* Our parent execution domain becomes current domain
+	   These must match for thread signalling to apply */
+	   
+	p->parent_exec_id = p->self_exec_id;
+
+	/* ok, now we should be set up.. */
+	p->swappable = 1;
+	p->exit_signal = clone_flags & CSIGNAL;
+	p->pdeath_signal = 0;
+
+	/*
+	 * "share" dynamic priority between parent and child, thus the
+	 * total amount of dynamic priorities in the system doesnt change,
+	 * more scheduling fairness. This is only important in the first
+	 * timeslice, on the long run the scheduling behaviour is unchanged.
+	 */
+	p->counter = (current->counter + 1) >> 1;
+	current->counter >>= 1;
+	if (!current->counter)
+		current->need_resched = 1;
+
+	/*
+	 * Ok, add it to the run-queues and make it
+	 * visible to the rest of the system.
+	 *
+	 * Let it rip!
+	 */
+	retval = p->pid;
+	p->tgid = retval;
+	INIT_LIST_HEAD(&p->thread_group);
+	write_lock_irq(&tasklist_lock);
+	if (clone_flags & CLONE_THREAD) {
+		p->tgid = current->tgid;
+		list_add(&p->thread_group, &current->thread_group);
+	}
+	SET_LINKS(p);
+	hash_pid(p);
+	nr_threads++;
+	write_unlock_irq(&tasklist_lock);
+
+	if (p->ptrace & PT_PTRACED)
+		send_sig(SIGSTOP, p, 1);
+
+	wake_up_process(p);		/* do this last */
+	++total_forks;
+
+fork_out:
+	if ((clone_flags & CLONE_VFORK) && (retval > 0)) 
+		down(&sem);
+	return retval;
+
+bad_fork_cleanup_sighand:
+	exit_sighand(p);
+bad_fork_cleanup_fs:
+	exit_fs(p); /* blocking */
+bad_fork_cleanup_files:
+	exit_files(p); /* blocking */
+bad_fork_cleanup:
+	put_exec_domain(p->exec_domain);
+	if (p->binfmt && p->binfmt->module)
+		__MOD_DEC_USE_COUNT(p->binfmt->module);
+bad_fork_cleanup_count:
+	atomic_dec(&p->user->processes);
+	free_uid(p->user);
+bad_fork_free:
+	free_task_struct(p);
+	goto fork_out;
+}
+
+/* SLAB cache for signal_struct structures (tsk->sig) */
+kmem_cache_t *sigact_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+kmem_cache_t *mm_cachep;
+
+void __init proc_caches_init(void)
+{
+	sigact_cachep = kmem_cache_create("signal_act",
+			sizeof(struct signal_struct), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!sigact_cachep)
+		panic("Cannot create signal action SLAB cache");
+
+	files_cachep = kmem_cache_create("files_cache", 
+			 sizeof(struct files_struct), 0, 
+			 SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!files_cachep) 
+		panic("Cannot create files SLAB cache");
+
+	fs_cachep = kmem_cache_create("fs_cache", 
+			 sizeof(struct fs_struct), 0, 
+			 SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!fs_cachep) 
+		panic("Cannot create fs_struct SLAB cache");
+ 
+	vm_area_cachep = kmem_cache_create("vm_area_struct",
+			sizeof(struct vm_area_struct), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!vm_area_cachep)
+		panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
+
+	mm_cachep = kmem_cache_create("mm_struct",
+			sizeof(struct mm_struct), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!mm_cachep)
+		panic("vma_init: Cannot alloc mm_struct SLAB cache");
+}
diff --git a/kernel/info.c b/kernel/info.c
new file mode 100644
index 000000000000..d7abf6713384
--- /dev/null
+++ b/kernel/info.c
@@ -0,0 +1,74 @@
+/*
+ * linux/kernel/info.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* This implements the sysinfo() system call */
+
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+asmlinkage long sys_sysinfo(struct sysinfo *info)
+{
+	struct sysinfo val;
+
+	memset((char *)&val, 0, sizeof(struct sysinfo));
+
+	cli();
+	val.uptime = jiffies / HZ;
+
+	val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+	val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+	val.procs = nr_threads-1;
+	sti();
+
+	si_meminfo(&val);
+	si_swapinfo(&val);
+
+	{
+		/* If the sum of all the available memory (i.e. ram + swap +
+		 * highmem) is less then can be stored in a 32 bit unsigned long
+		 * then we can be binary compatible with 2.2.x kernels.  If not,
+		 * well, who cares since in that case 2.2.x was broken anyways...
+		 *
+		 *  -Erik Andersen <andersee@debian.org> */
+
+		unsigned long mem_total = val.totalram + val.totalswap;
+		if ( !(mem_total < val.totalram || mem_total < val.totalswap)) {
+			unsigned long mem_total2 = mem_total + val.totalhigh; 
+			if (!(mem_total2 < mem_total || mem_total2 < val.totalhigh))
+			{
+				/* If mem_total did not overflow.  Divide all memory values by
+				 * mem_unit and set mem_unit=1.  This leaves things compatible with
+				 * 2.2.x, and also retains compatibility with earlier 2.4.x
+				 * kernels...  */
+
+				int bitcount = 0;
+				while (val.mem_unit > 1) 
+				{
+					bitcount++;
+					val.mem_unit >>= 1;
+				}
+				val.totalram <<= bitcount;
+				val.freeram <<= bitcount;
+				val.sharedram <<= bitcount;
+				val.bufferram <<= bitcount;
+				val.totalswap <<= bitcount;
+				val.freeswap <<= bitcount;
+				val.totalhigh <<= bitcount;
+				val.freehigh <<= bitcount;
+			}
+		}
+	}
+
+	if (copy_to_user(info, &val, sizeof(struct sysinfo)))
+		return -EFAULT;
+	return 0;
+}
diff --git a/kernel/itimer.c b/kernel/itimer.c
new file mode 100644
index 000000000000..79d58220c590
--- /dev/null
+++ b/kernel/itimer.c
@@ -0,0 +1,170 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* These are all the functions necessary to implement itimers */
+
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * change timeval to jiffies, trying to avoid the 
+ * most obvious overflows..
+ *
+ * The tv_*sec values are signed, but nothing seems to 
+ * indicate whether we really should use them as signed values
+ * when doing itimers. POSIX doesn't mention this (but if
+ * alarm() uses itimers without checking, we have to use unsigned
+ * arithmetic).
+ */
+static unsigned long tvtojiffies(struct timeval *value)
+{
+	unsigned long sec = (unsigned) value->tv_sec;
+	unsigned long usec = (unsigned) value->tv_usec;
+
+	if (sec > (ULONG_MAX / HZ))
+		return ULONG_MAX;
+	usec += 1000000 / HZ - 1;
+	usec /= 1000000 / HZ;
+	return HZ*sec+usec;
+}
+
+static void jiffiestotv(unsigned long jiffies, struct timeval *value)
+{
+	value->tv_usec = (jiffies % HZ) * (1000000 / HZ);
+	value->tv_sec = jiffies / HZ;
+}
+
+int do_getitimer(int which, struct itimerval *value)
+{
+	register unsigned long val, interval;
+
+	switch (which) {
+	case ITIMER_REAL:
+		interval = current->it_real_incr;
+		val = 0;
+		/* 
+		 * FIXME! This needs to be atomic, in case the kernel timer happens!
+		 */
+		if (timer_pending(&current->real_timer)) {
+			val = current->real_timer.expires - jiffies;
+
+			/* look out for negative/zero itimer.. */
+			if ((long) val <= 0)
+				val = 1;
+		}
+		break;
+	case ITIMER_VIRTUAL:
+		val = current->it_virt_value;
+		interval = current->it_virt_incr;
+		break;
+	case ITIMER_PROF:
+		val = current->it_prof_value;
+		interval = current->it_prof_incr;
+		break;
+	default:
+		return(-EINVAL);
+	}
+	jiffiestotv(val, &value->it_value);
+	jiffiestotv(interval, &value->it_interval);
+	return 0;
+}
+
+/* SMP: Only we modify our itimer values. */
+asmlinkage long sys_getitimer(int which, struct itimerval *value)
+{
+	int error = -EFAULT;
+	struct itimerval get_buffer;
+
+	if (value) {
+		error = do_getitimer(which, &get_buffer);
+		if (!error &&
+		    copy_to_user(value, &get_buffer, sizeof(get_buffer)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+void it_real_fn(unsigned long __data)
+{
+	struct task_struct * p = (struct task_struct *) __data;
+	unsigned long interval;
+
+	send_sig(SIGALRM, p, 1);
+	interval = p->it_real_incr;
+	if (interval) {
+		if (interval > (unsigned long) LONG_MAX)
+			interval = LONG_MAX;
+		p->real_timer.expires = jiffies + interval;
+		add_timer(&p->real_timer);
+	}
+}
+
+int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+	register unsigned long i, j;
+	int k;
+
+	i = tvtojiffies(&value->it_interval);
+	j = tvtojiffies(&value->it_value);
+	if (ovalue && (k = do_getitimer(which, ovalue)) < 0)
+		return k;
+	switch (which) {
+		case ITIMER_REAL:
+			del_timer_sync(&current->real_timer);
+			current->it_real_value = j;
+			current->it_real_incr = i;
+			if (!j)
+				break;
+			if (j > (unsigned long) LONG_MAX)
+				j = LONG_MAX;
+			i = j + jiffies;
+			current->real_timer.expires = i;
+			add_timer(&current->real_timer);
+			break;
+		case ITIMER_VIRTUAL:
+			if (j)
+				j++;
+			current->it_virt_value = j;
+			current->it_virt_incr = i;
+			break;
+		case ITIMER_PROF:
+			if (j)
+				j++;
+			current->it_prof_value = j;
+			current->it_prof_incr = i;
+			break;
+		default:
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* SMP: Again, only we play with our itimers, and signals are SMP safe
+ *      now so that is not an issue at all anymore.
+ */
+asmlinkage long sys_setitimer(int which, struct itimerval *value,
+			      struct itimerval *ovalue)
+{
+	struct itimerval set_buffer, get_buffer;
+	int error;
+
+	if (value) {
+		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
+			return -EFAULT;
+	} else
+		memset((char *) &set_buffer, 0, sizeof(set_buffer));
+
+	error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : 0);
+	if (error || !ovalue)
+		return error;
+
+	if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+		return -EFAULT; 
+	return 0;
+}
diff --git a/kernel/kmod.c b/kernel/kmod.c
new file mode 100644
index 000000000000..ac840a901f1a
--- /dev/null
+++ b/kernel/kmod.c
@@ -0,0 +1,373 @@
+/*
+	kmod, the new module loader (replaces kerneld)
+	Kirk Petersen
+
+	Reorganized not to be a daemon by Adam Richter, with guidance
+	from Greg Zornetzer.
+
+	Modified to avoid chroot and file sharing problems.
+	Mikael Pettersson
+
+	Limit the concurrent number of kmod modprobes to catch loops from
+	"modprobe needs a service that is in a module".
+	Keith Owens <kaos@ocs.com.au> December 1999
+
+	Unblock all signals when we exec a usermode process.
+	Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
+*/
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+extern int max_threads;
+
+static inline void
+use_init_fs_context(void)
+{
+	struct fs_struct *our_fs, *init_fs;
+	struct dentry *root, *pwd;
+	struct vfsmount *rootmnt, *pwdmnt;
+
+	/*
+	 * Make modprobe's fs context be a copy of init's.
+	 *
+	 * We cannot use the user's fs context, because it
+	 * may have a different root than init.
+	 * Since init was created with CLONE_FS, we can grab
+	 * its fs context from "init_task".
+	 *
+	 * The fs context has to be a copy. If it is shared
+	 * with init, then any chdir() call in modprobe will
+	 * also affect init and the other threads sharing
+	 * init_task's fs context.
+	 *
+	 * We created the exec_modprobe thread without CLONE_FS,
+	 * so we can update the fields in our fs context freely.
+	 */
+
+	init_fs = init_task.fs;
+	read_lock(&init_fs->lock);
+	rootmnt = mntget(init_fs->rootmnt);
+	root = dget(init_fs->root);
+	pwdmnt = mntget(init_fs->pwdmnt);
+	pwd = dget(init_fs->pwd);
+	read_unlock(&init_fs->lock);
+
+	/* FIXME - unsafe ->fs access */
+	our_fs = current->fs;
+	our_fs->umask = init_fs->umask;
+	set_fs_root(our_fs, rootmnt, root);
+	set_fs_pwd(our_fs, pwdmnt, pwd);
+	write_lock(&our_fs->lock);
+	if (our_fs->altroot) {
+		struct vfsmount *mnt = our_fs->altrootmnt;
+		struct dentry *dentry = our_fs->altroot;
+		our_fs->altrootmnt = NULL;
+		our_fs->altroot = NULL;
+		write_unlock(&our_fs->lock);
+		dput(dentry);
+		mntput(mnt);
+	} else 
+		write_unlock(&our_fs->lock);
+	dput(root);
+	mntput(rootmnt);
+	dput(pwd);
+	mntput(pwdmnt);
+}
+
+int exec_usermodehelper(char *program_path, char *argv[], char *envp[])
+{
+	int i;
+	struct task_struct *curtask = current;
+
+	curtask->session = 1;
+	curtask->pgrp = 1;
+
+	use_init_fs_context();
+
+	/* Prevent parent user process from sending signals to child.
+	   Otherwise, if the modprobe program does not exist, it might
+	   be possible to get a user defined signal handler to execute
+	   as the super user right after the execve fails if you time
+	   the signal just right.
+	*/
+	spin_lock_irq(&curtask->sigmask_lock);
+	sigemptyset(&curtask->blocked);
+	flush_signals(curtask);
+	flush_signal_handlers(curtask);
+	recalc_sigpending(curtask);
+	spin_unlock_irq(&curtask->sigmask_lock);
+
+	for (i = 0; i < curtask->files->max_fds; i++ ) {
+		if (curtask->files->fd[i]) close(i);
+	}
+
+	/* Drop the "current user" thing */
+	{
+		struct user_struct *user = curtask->user;
+		curtask->user = INIT_USER;
+		atomic_inc(&INIT_USER->__count);
+		atomic_inc(&INIT_USER->processes);
+		atomic_dec(&user->processes);
+		free_uid(user);
+	}
+
+	/* Give kmod all effective privileges.. */
+	curtask->euid = curtask->fsuid = 0;
+	curtask->egid = curtask->fsgid = 0;
+	cap_set_full(curtask->cap_effective);
+
+	/* Allow execve args to be in kernel space. */
+	set_fs(KERNEL_DS);
+
+	/* Go, go, go... */
+	if (execve(program_path, argv, envp) < 0)
+		return -errno;
+	return 0;
+}
+
+#ifdef CONFIG_KMOD
+
+/*
+	modprobe_path is set via /proc/sys.
+*/
+char modprobe_path[256] = "/sbin/modprobe";
+
+static int exec_modprobe(void * module_name)
+{
+	static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+	char *argv[] = { modprobe_path, "-s", "-k", "--", (char*)module_name, NULL };
+	int ret;
+
+	ret = exec_usermodehelper(modprobe_path, argv, envp);
+	if (ret) {
+		printk(KERN_ERR
+		       "kmod: failed to exec %s -s -k %s, errno = %d\n",
+		       modprobe_path, (char*) module_name, errno);
+	}
+	return ret;
+}
+
+/**
+ *	request_module - try to load a kernel module
+ *	@module_name: Name of module
+ *
+ * 	Load a module using the user mode module loader. The function returns
+ *	zero on success or a negative errno code on failure. Note that a
+ * 	successful module load does not mean the module did not then unload
+ *	and exit on an error of its own. Callers must check that the service
+ *	they requested is now available not blindly invoke it.
+ *
+ *	If module auto-loading support is disabled then this function
+ *	becomes a no-operation.
+ */
+ 
+int request_module(const char * module_name)
+{
+	pid_t pid;
+	int waitpid_result;
+	sigset_t tmpsig;
+	int i;
+	static atomic_t kmod_concurrent = ATOMIC_INIT(0);
+#define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+	static int kmod_loop_msg;
+
+	/* Don't allow request_module() before the root fs is mounted!  */
+	if ( ! current->fs->root ) {
+		printk(KERN_ERR "request_module[%s]: Root fs not mounted\n",
+			module_name);
+		return -EPERM;
+	}
+
+	/* If modprobe needs a service that is in a module, we get a recursive
+	 * loop.  Limit the number of running kmod threads to max_threads/2 or
+	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
+	 * would be to run the parents of this process, counting how many times
+	 * kmod was invoked.  That would mean accessing the internals of the
+	 * process tables to get the command line, proc_pid_cmdline is static
+	 * and it is not worth changing the proc code just to handle this case. 
+	 * KAO.
+	 */
+	i = max_threads/2;
+	if (i > MAX_KMOD_CONCURRENT)
+		i = MAX_KMOD_CONCURRENT;
+	atomic_inc(&kmod_concurrent);
+	if (atomic_read(&kmod_concurrent) > i) {
+		if (kmod_loop_msg++ < 5)
+			printk(KERN_ERR
+			       "kmod: runaway modprobe loop assumed and stopped\n");
+		atomic_dec(&kmod_concurrent);
+		return -ENOMEM;
+	}
+
+	pid = kernel_thread(exec_modprobe, (void*) module_name, 0);
+	if (pid < 0) {
+		printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid);
+		atomic_dec(&kmod_concurrent);
+		return pid;
+	}
+
+	/* Block everything but SIGKILL/SIGSTOP */
+	spin_lock_irq(&current->sigmask_lock);
+	tmpsig = current->blocked;
+	siginitsetinv(&current->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	waitpid_result = waitpid(pid, NULL, __WCLONE);
+	atomic_dec(&kmod_concurrent);
+
+	/* Allow signals again.. */
+	spin_lock_irq(&current->sigmask_lock);
+	current->blocked = tmpsig;
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	if (waitpid_result != pid) {
+		printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n",
+		       module_name, pid, -waitpid_result);
+	}
+	return 0;
+}
+#endif /* CONFIG_KMOD */
+
+
+#ifdef CONFIG_HOTPLUG
+/*
+	hotplug path is set via /proc/sys
+	invoked by hotplug-aware bus drivers,
+	with exec_usermodehelper and some thread-spawner
+
+	argv [0] = hotplug_path;
+	argv [1] = "usb", "scsi", "pci", "network", etc;
+	... plus optional type-specific parameters
+	argv [n] = 0;
+
+	envp [*] = HOME, PATH; optional type-specific parameters
+
+	a hotplug bus should invoke this for device add/remove
+	events.  the command is expected to load drivers when
+	necessary, and may perform additional system setup.
+*/
+char hotplug_path[256] = "/sbin/hotplug";
+
+EXPORT_SYMBOL(hotplug_path);
+
+#endif /* CONFIG_HOTPLUG */
+
+struct subprocess_info {
+	struct semaphore *sem;
+	char *path;
+	char **argv;
+	char **envp;
+	pid_t retval;
+};
+
+/*
+ * This is the task which runs the usermode application
+ */
+static int ____call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	int retval;
+
+	retval = -EPERM;
+	if (current->fs->root)
+		retval = exec_usermodehelper(sub_info->path, sub_info->argv, sub_info->envp);
+
+	/* Exec failed? */
+	sub_info->retval = (pid_t)retval;
+	do_exit(0);
+}
+
+/*
+ * This is run by keventd.
+ */
+static void __call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	pid_t pid;
+
+	/*
+	 * CLONE_VFORK: wait until the usermode helper has execve'd successfully
+	 * We need the data structures to stay around until that is done.
+	 */
+	pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD);
+	if (pid < 0)
+		sub_info->retval = pid;
+	up(sub_info->sem);
+}
+
+/**
+ * call_usermodehelper - start a usermode application
+ * @path: pathname for the application
+ * @argv: null-terminated argument list
+ * @envp: null-terminated environment list
+ *
+ * Runs a user-space application.  The application is started asynchronously.  It
+ * runs as a child of keventd.  It runs with full root capabilities.  keventd silently
+ * reaps the child when it exits.
+ *
+ * Must be called from process context.  Returns zero on success, else a negative
+ * error code.
+ */
+int call_usermodehelper(char *path, char **argv, char **envp)
+{
+	DECLARE_MUTEX_LOCKED(sem);
+	struct subprocess_info sub_info = {
+		sem:		&sem,
+		path:		path,
+		argv:		argv,
+		envp:		envp,
+		retval:		0,
+	};
+	struct tq_struct tqs = {
+		routine:	__call_usermodehelper,
+		data:		&sub_info,
+	};
+
+	if (path[0] == '\0')
+		goto out;
+
+	if (current_is_keventd()) {
+		/* We can't wait on keventd! */
+		__call_usermodehelper(&sub_info);
+	} else {
+		schedule_task(&tqs);
+		down(&sem);		/* Wait until keventd has started the subprocess */
+	}
+out:
+	return sub_info.retval;
+}
+
+/*
+ * This is for the serialisation of device probe() functions
+ * against device open() functions
+ */
+static DECLARE_MUTEX(dev_probe_sem);
+
+void dev_probe_lock(void)
+{
+	down(&dev_probe_sem);
+}
+
+void dev_probe_unlock(void)
+{
+	up(&dev_probe_sem);
+}
+
+EXPORT_SYMBOL(exec_usermodehelper);
+EXPORT_SYMBOL(call_usermodehelper);
+
+#ifdef CONFIG_KMOD
+EXPORT_SYMBOL(request_module);
+#endif
+
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
new file mode 100644
index 000000000000..8afe07cca5fb
--- /dev/null
+++ b/kernel/ksyms.c
@@ -0,0 +1,538 @@
+/*
+ * Herein lies all the functions/variables that are "exported" for linkage
+ * with dynamically loaded kernel modules.
+ *			Jon.
+ *
+ * - Stacked module support and unified symbol table added (June 1994)
+ * - External symbol table support added (December 1994)
+ * - Versions on symbols added (December 1994)
+ *   by Bjorn Ekwall <bj0rn@blox.se>
+ */
+
+#include <linux/config.h>
+#include <linux/malloc.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/cdrom.h>
+#include <linux/kernel_stat.h>
+#include <linux/vmalloc.h>
+#include <linux/sys.h>
+#include <linux/utsname.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/serial.h>
+#include <linux/locks.h>
+#include <linux/delay.h>
+#include <linux/minix_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+#include <linux/hdreg.h>
+#include <linux/skbuff.h>
+#include <linux/genhd.h>
+#include <linux/blkpg.h>
+#include <linux/swap.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/iobuf.h>
+#include <linux/console.h>
+#include <linux/poll.h>
+#include <linux/mmzone.h>
+#include <linux/mm.h>
+#include <linux/capability.h>
+#include <linux/highuid.h>
+#include <linux/brlock.h>
+#include <linux/fs.h>
+
+#if defined(CONFIG_PROC_FS)
+#include <linux/proc_fs.h>
+#endif
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+extern void set_device_ro(kdev_t dev,int flag);
+
+extern void *sys_call_table;
+
+extern int sys_tz;
+extern int request_dma(unsigned int dmanr, char * deviceID);
+extern void free_dma(unsigned int dmanr);
+extern spinlock_t dma_spin_lock;
+
+#ifdef CONFIG_MODVERSIONS
+const struct module_symbol __export_Using_Versions
+__attribute__((section("__ksymtab"))) = {
+	1 /* Version version */, "Using_Versions"
+};
+#endif
+
+
+EXPORT_SYMBOL(inter_module_register);
+EXPORT_SYMBOL(inter_module_unregister);
+EXPORT_SYMBOL(inter_module_get);
+EXPORT_SYMBOL(inter_module_get_request);
+EXPORT_SYMBOL(inter_module_put);
+EXPORT_SYMBOL(try_inc_mod_count);
+
+/* process memory management */
+EXPORT_SYMBOL(do_mmap_pgoff);
+EXPORT_SYMBOL(do_munmap);
+EXPORT_SYMBOL(do_brk);
+EXPORT_SYMBOL(exit_mm);
+EXPORT_SYMBOL(exit_files);
+EXPORT_SYMBOL(exit_fs);
+EXPORT_SYMBOL(exit_sighand);
+
+/* internal kernel memory management */
+EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(alloc_pages_node);
+EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(get_zeroed_page);
+EXPORT_SYMBOL(__free_pages);
+EXPORT_SYMBOL(free_pages);
+#ifndef CONFIG_DISCONTIGMEM
+EXPORT_SYMBOL(contig_page_data);
+#endif
+EXPORT_SYMBOL(num_physpages);
+EXPORT_SYMBOL(kmem_find_general_cachep);
+EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_destroy);
+EXPORT_SYMBOL(kmem_cache_shrink);
+EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_free);
+EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(kfree);
+EXPORT_SYMBOL(vfree);
+EXPORT_SYMBOL(__vmalloc);
+EXPORT_SYMBOL(mem_map);
+EXPORT_SYMBOL(remap_page_range);
+EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(vmtruncate);
+EXPORT_SYMBOL(find_vma);
+EXPORT_SYMBOL(get_unmapped_area);
+EXPORT_SYMBOL(init_mm);
+EXPORT_SYMBOL(deactivate_page);
+#ifdef CONFIG_HIGHMEM
+EXPORT_SYMBOL(kmap_high);
+EXPORT_SYMBOL(kunmap_high);
+EXPORT_SYMBOL(highmem_start_page);
+#endif
+
+/* filesystem internal functions */
+EXPORT_SYMBOL(def_blk_fops);
+EXPORT_SYMBOL(update_atime);
+EXPORT_SYMBOL(get_fs_type);
+EXPORT_SYMBOL(get_super);
+EXPORT_SYMBOL(get_empty_super);
+EXPORT_SYMBOL(getname);
+EXPORT_SYMBOL(names_cachep);
+EXPORT_SYMBOL(fput);
+EXPORT_SYMBOL(fget);
+EXPORT_SYMBOL(igrab);
+EXPORT_SYMBOL(iunique);
+EXPORT_SYMBOL(iget4);
+EXPORT_SYMBOL(iput);
+EXPORT_SYMBOL(force_delete);
+EXPORT_SYMBOL(follow_up);
+EXPORT_SYMBOL(follow_down);
+EXPORT_SYMBOL(path_init);
+EXPORT_SYMBOL(path_walk);
+EXPORT_SYMBOL(path_release);
+EXPORT_SYMBOL(__user_walk);
+EXPORT_SYMBOL(lookup_one);
+EXPORT_SYMBOL(lookup_hash);
+EXPORT_SYMBOL(sys_close);
+EXPORT_SYMBOL(dcache_lock);
+EXPORT_SYMBOL(d_alloc_root);
+EXPORT_SYMBOL(d_delete);
+EXPORT_SYMBOL(dget_locked);
+EXPORT_SYMBOL(d_validate);
+EXPORT_SYMBOL(d_rehash);
+EXPORT_SYMBOL(d_invalidate);	/* May be it will be better in dcache.h? */
+EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL(d_instantiate);
+EXPORT_SYMBOL(d_alloc);
+EXPORT_SYMBOL(d_lookup);
+EXPORT_SYMBOL(__d_path);
+EXPORT_SYMBOL(mark_buffer_dirty);
+EXPORT_SYMBOL(__mark_buffer_dirty);
+EXPORT_SYMBOL(__mark_inode_dirty);
+EXPORT_SYMBOL(get_empty_filp);
+EXPORT_SYMBOL(init_private_file);
+EXPORT_SYMBOL(filp_open);
+EXPORT_SYMBOL(filp_close);
+EXPORT_SYMBOL(put_filp);
+EXPORT_SYMBOL(files_lock);
+EXPORT_SYMBOL(check_disk_change);
+EXPORT_SYMBOL(__invalidate_buffers);
+EXPORT_SYMBOL(invalidate_inodes);
+EXPORT_SYMBOL(invalidate_inode_pages);
+EXPORT_SYMBOL(truncate_inode_pages);
+EXPORT_SYMBOL(fsync_dev);
+EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(vfs_permission);
+EXPORT_SYMBOL(inode_setattr);
+EXPORT_SYMBOL(inode_change_ok);
+EXPORT_SYMBOL(write_inode_now);
+EXPORT_SYMBOL(notify_change);
+EXPORT_SYMBOL(get_hardblocksize);
+EXPORT_SYMBOL(set_blocksize);
+EXPORT_SYMBOL(getblk);
+EXPORT_SYMBOL(bdget);
+EXPORT_SYMBOL(bdput);
+EXPORT_SYMBOL(bread);
+EXPORT_SYMBOL(__brelse);
+EXPORT_SYMBOL(__bforget);
+EXPORT_SYMBOL(ll_rw_block);
+EXPORT_SYMBOL(submit_bh);
+EXPORT_SYMBOL(__wait_on_buffer);
+EXPORT_SYMBOL(___wait_on_page);
+EXPORT_SYMBOL(block_write_full_page);
+EXPORT_SYMBOL(block_read_full_page);
+EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_sync_page);
+EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(generic_commit_write);
+EXPORT_SYMBOL(block_truncate_page);
+EXPORT_SYMBOL(generic_block_bmap);
+EXPORT_SYMBOL(generic_file_read);
+EXPORT_SYMBOL(do_generic_file_read);
+EXPORT_SYMBOL(generic_file_write);
+EXPORT_SYMBOL(generic_file_mmap);
+EXPORT_SYMBOL(generic_ro_fops);
+EXPORT_SYMBOL(generic_buffer_fdatasync);
+EXPORT_SYMBOL(page_hash_bits);
+EXPORT_SYMBOL(page_hash_table);
+EXPORT_SYMBOL(file_lock_list);
+EXPORT_SYMBOL(locks_init_lock);
+EXPORT_SYMBOL(locks_copy_lock);
+EXPORT_SYMBOL(posix_lock_file);
+EXPORT_SYMBOL(posix_test_lock);
+EXPORT_SYMBOL(posix_block_lock);
+EXPORT_SYMBOL(posix_unblock_lock);
+EXPORT_SYMBOL(locks_mandatory_area);
+EXPORT_SYMBOL(dput);
+EXPORT_SYMBOL(have_submounts);
+EXPORT_SYMBOL(d_find_alias);
+EXPORT_SYMBOL(d_prune_aliases);
+EXPORT_SYMBOL(prune_dcache);
+EXPORT_SYMBOL(shrink_dcache_sb);
+EXPORT_SYMBOL(shrink_dcache_parent);
+EXPORT_SYMBOL(find_inode_number);
+EXPORT_SYMBOL(is_subdir);
+EXPORT_SYMBOL(get_unused_fd);
+EXPORT_SYMBOL(vfs_create);
+EXPORT_SYMBOL(vfs_mkdir);
+EXPORT_SYMBOL(vfs_mknod);
+EXPORT_SYMBOL(vfs_symlink);
+EXPORT_SYMBOL(vfs_link);
+EXPORT_SYMBOL(vfs_rmdir);
+EXPORT_SYMBOL(vfs_unlink);
+EXPORT_SYMBOL(vfs_rename);
+EXPORT_SYMBOL(vfs_statfs);
+EXPORT_SYMBOL(generic_read_dir);
+EXPORT_SYMBOL(__pollwait);
+EXPORT_SYMBOL(poll_freewait);
+EXPORT_SYMBOL(ROOT_DEV);
+EXPORT_SYMBOL(__find_lock_page);
+EXPORT_SYMBOL(grab_cache_page);
+EXPORT_SYMBOL(read_cache_page);
+EXPORT_SYMBOL(vfs_readlink);
+EXPORT_SYMBOL(vfs_follow_link);
+EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_symlink_inode_operations);
+EXPORT_SYMBOL(block_symlink);
+EXPORT_SYMBOL(vfs_readdir);
+EXPORT_SYMBOL(__get_lease);
+EXPORT_SYMBOL(lease_get_mtime);
+EXPORT_SYMBOL(lock_may_read);
+EXPORT_SYMBOL(lock_may_write);
+EXPORT_SYMBOL(dcache_readdir);
+
+/* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
+EXPORT_SYMBOL(default_llseek);
+EXPORT_SYMBOL(dentry_open);
+EXPORT_SYMBOL(filemap_nopage);
+EXPORT_SYMBOL(filemap_sync);
+EXPORT_SYMBOL(lock_page);
+
+/* device registration */
+EXPORT_SYMBOL(register_chrdev);
+EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(register_blkdev);
+EXPORT_SYMBOL(unregister_blkdev);
+EXPORT_SYMBOL(tty_register_driver);
+EXPORT_SYMBOL(tty_unregister_driver);
+EXPORT_SYMBOL(tty_std_termios);
+
+/* block device driver support */
+EXPORT_SYMBOL(block_read);
+EXPORT_SYMBOL(block_write);
+EXPORT_SYMBOL(blksize_size);
+EXPORT_SYMBOL(hardsect_size);
+EXPORT_SYMBOL(blk_size);
+EXPORT_SYMBOL(blk_dev);
+EXPORT_SYMBOL(is_read_only);
+EXPORT_SYMBOL(set_device_ro);
+EXPORT_SYMBOL(bmap);
+EXPORT_SYMBOL(sync_dev);
+EXPORT_SYMBOL(devfs_register_partitions);
+EXPORT_SYMBOL(blkdev_open);
+EXPORT_SYMBOL(blkdev_get);
+EXPORT_SYMBOL(blkdev_put);
+EXPORT_SYMBOL(ioctl_by_bdev);
+EXPORT_SYMBOL(gendisk_head);
+EXPORT_SYMBOL(grok_partitions);
+EXPORT_SYMBOL(register_disk);
+EXPORT_SYMBOL(tq_disk);
+EXPORT_SYMBOL(init_buffer);
+EXPORT_SYMBOL(refile_buffer);
+EXPORT_SYMBOL(max_sectors);
+EXPORT_SYMBOL(max_readahead);
+EXPORT_SYMBOL(file_moveto);
+
+/* tty routines */
+EXPORT_SYMBOL(tty_hangup);
+EXPORT_SYMBOL(tty_wait_until_sent);
+EXPORT_SYMBOL(tty_check_change);
+EXPORT_SYMBOL(tty_hung_up_p);
+EXPORT_SYMBOL(tty_flip_buffer_push);
+EXPORT_SYMBOL(tty_get_baud_rate);
+EXPORT_SYMBOL(do_SAK);
+EXPORT_SYMBOL(console_print);
+EXPORT_SYMBOL(console_loglevel);
+
+/* filesystem registration */
+EXPORT_SYMBOL(register_filesystem);
+EXPORT_SYMBOL(unregister_filesystem);
+EXPORT_SYMBOL(kern_mount);
+EXPORT_SYMBOL(kern_umount);
+EXPORT_SYMBOL(may_umount);
+
+/* executable format registration */
+EXPORT_SYMBOL(register_binfmt);
+EXPORT_SYMBOL(unregister_binfmt);
+EXPORT_SYMBOL(search_binary_handler);
+EXPORT_SYMBOL(prepare_binprm);
+EXPORT_SYMBOL(compute_creds);
+EXPORT_SYMBOL(remove_arg_zero);
+EXPORT_SYMBOL(set_binfmt);
+
+/* execution environment registration */
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+EXPORT_SYMBOL(__set_personality);
+
+/* sysctl table registration */
+EXPORT_SYMBOL(register_sysctl_table);
+EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(sysctl_string);
+EXPORT_SYMBOL(sysctl_intvec);
+EXPORT_SYMBOL(sysctl_jiffies);
+EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_dointvec_jiffies);
+EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+EXPORT_SYMBOL(proc_doulongvec_minmax);
+
+/* interrupt handling */
+EXPORT_SYMBOL(add_timer);
+EXPORT_SYMBOL(del_timer);
+EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(free_irq);
+#if !defined(CONFIG_ARCH_S390)
+EXPORT_SYMBOL(irq_stat);	/* No separate irq_stat for s390, it is part of PSA */
+#endif
+
+/* waitqueue handling */
+EXPORT_SYMBOL(add_wait_queue);
+EXPORT_SYMBOL(add_wait_queue_exclusive);
+EXPORT_SYMBOL(remove_wait_queue);
+
+/* The notion of irq probe/assignment is foreign to S/390 */
+
+#if !defined(CONFIG_ARCH_S390)
+EXPORT_SYMBOL(probe_irq_on);
+EXPORT_SYMBOL(probe_irq_off);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(del_timer_sync);
+#endif
+EXPORT_SYMBOL(mod_timer);
+EXPORT_SYMBOL(tq_timer);
+EXPORT_SYMBOL(tq_immediate);
+
+#ifdef CONFIG_SMP
+/* Various random spinlocks we want to export */
+EXPORT_SYMBOL(tqueue_lock);
+
+/* Big-Reader lock implementation */
+EXPORT_SYMBOL(__brlock_array);
+#ifndef __BRLOCK_USE_ATOMICS
+EXPORT_SYMBOL(__br_write_locks);
+#endif
+EXPORT_SYMBOL(__br_write_lock);
+EXPORT_SYMBOL(__br_write_unlock);
+#endif
+
+/* Kiobufs */
+EXPORT_SYMBOL(kiobuf_init);
+
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(free_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+
+EXPORT_SYMBOL(map_user_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(lock_kiovec);
+EXPORT_SYMBOL(unlock_kiovec);
+EXPORT_SYMBOL(brw_kiovec);
+
+/* dma handling */
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
+EXPORT_SYMBOL(dma_spin_lock);
+#ifdef HAVE_DISABLE_HLT
+EXPORT_SYMBOL(disable_hlt);
+EXPORT_SYMBOL(enable_hlt);
+#endif
+
+/* resource handling */
+EXPORT_SYMBOL(request_resource);
+EXPORT_SYMBOL(release_resource);
+EXPORT_SYMBOL(allocate_resource);
+EXPORT_SYMBOL(check_resource);
+EXPORT_SYMBOL(__request_region);
+EXPORT_SYMBOL(__check_region);
+EXPORT_SYMBOL(__release_region);
+EXPORT_SYMBOL(ioport_resource);
+EXPORT_SYMBOL(iomem_resource);
+
+/* process management */
+EXPORT_SYMBOL(up_and_exit);
+EXPORT_SYMBOL(__wake_up);
+EXPORT_SYMBOL(wake_up_process);
+EXPORT_SYMBOL(sleep_on);
+EXPORT_SYMBOL(sleep_on_timeout);
+EXPORT_SYMBOL(interruptible_sleep_on);
+EXPORT_SYMBOL(interruptible_sleep_on_timeout);
+EXPORT_SYMBOL(schedule);
+EXPORT_SYMBOL(schedule_timeout);
+EXPORT_SYMBOL(jiffies);
+EXPORT_SYMBOL(xtime);
+EXPORT_SYMBOL(do_gettimeofday);
+EXPORT_SYMBOL(do_settimeofday);
+
+#if !defined(__ia64__)
+EXPORT_SYMBOL(loops_per_jiffy);
+#endif
+
+EXPORT_SYMBOL(kstat);
+EXPORT_SYMBOL(nr_running);
+
+/* misc */
+EXPORT_SYMBOL(panic);
+EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(sprintf);
+EXPORT_SYMBOL(vsprintf);
+EXPORT_SYMBOL(kdevname);
+EXPORT_SYMBOL(bdevname);
+EXPORT_SYMBOL(cdevname);
+EXPORT_SYMBOL(simple_strtoul);
+EXPORT_SYMBOL(system_utsname);	/* UTS data */
+EXPORT_SYMBOL(uts_sem);		/* UTS semaphore */
+#ifndef __mips__
+EXPORT_SYMBOL(sys_call_table);
+#endif
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+EXPORT_SYMBOL(_ctype);
+EXPORT_SYMBOL(secure_tcp_sequence_number);
+EXPORT_SYMBOL(get_random_bytes);
+EXPORT_SYMBOL(securebits);
+EXPORT_SYMBOL(cap_bset);
+EXPORT_SYMBOL(daemonize);
+
+/* Program loader interfaces */
+EXPORT_SYMBOL(setup_arg_pages);
+EXPORT_SYMBOL(copy_strings_kernel);
+EXPORT_SYMBOL(do_execve);
+EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(kernel_read);
+EXPORT_SYMBOL(open_exec);
+
+/* Miscellaneous access points */
+EXPORT_SYMBOL(si_meminfo);
+
+/* Added to make file system as module */
+EXPORT_SYMBOL(sys_tz);
+EXPORT_SYMBOL(__wait_on_super);
+EXPORT_SYMBOL(file_fsync);
+EXPORT_SYMBOL(fsync_inode_buffers);
+EXPORT_SYMBOL(clear_inode);
+EXPORT_SYMBOL(nr_async_pages);
+EXPORT_SYMBOL(___strtok);
+EXPORT_SYMBOL(init_special_inode);
+EXPORT_SYMBOL(read_ahead);
+EXPORT_SYMBOL(get_hash_table);
+EXPORT_SYMBOL(get_empty_inode);
+EXPORT_SYMBOL(insert_inode_hash);
+EXPORT_SYMBOL(remove_inode_hash);
+EXPORT_SYMBOL(buffer_insert_inode_queue);
+EXPORT_SYMBOL(make_bad_inode);
+EXPORT_SYMBOL(is_bad_inode);
+EXPORT_SYMBOL(event);
+EXPORT_SYMBOL(brw_page);
+
+#ifdef CONFIG_UID16
+EXPORT_SYMBOL(overflowuid);
+EXPORT_SYMBOL(overflowgid);
+#endif
+EXPORT_SYMBOL(fs_overflowuid);
+EXPORT_SYMBOL(fs_overflowgid);
+
+/* all busmice */
+EXPORT_SYMBOL(fasync_helper);
+EXPORT_SYMBOL(kill_fasync);
+
+EXPORT_SYMBOL(disk_name);	/* for md.c */
+
+/* binfmt_aout */
+EXPORT_SYMBOL(get_write_access);
+
+/* dynamic registering of consoles */
+EXPORT_SYMBOL(register_console);
+EXPORT_SYMBOL(unregister_console);
+
+/* time */
+EXPORT_SYMBOL(get_fast_time);
+
+/* library functions */
+EXPORT_SYMBOL(strnicmp);
+EXPORT_SYMBOL(strspn);
+EXPORT_SYMBOL(strsep);
+
+/* software interrupts */
+EXPORT_SYMBOL(tasklet_hi_vec);
+EXPORT_SYMBOL(tasklet_vec);
+EXPORT_SYMBOL(bh_task_vec);
+EXPORT_SYMBOL(init_bh);
+EXPORT_SYMBOL(remove_bh);
+EXPORT_SYMBOL(tasklet_init);
+EXPORT_SYMBOL(tasklet_kill);
+EXPORT_SYMBOL(__run_task_queue);
+
+/* init task, for moving kthread roots - ought to export a function ?? */
+
+EXPORT_SYMBOL(init_task_union);
+
+EXPORT_SYMBOL(tasklist_lock);
+EXPORT_SYMBOL(pidhash);
diff --git a/kernel/module.c b/kernel/module.c
new file mode 100644
index 000000000000..dd02b40cd891
--- /dev/null
+++ b/kernel/module.c
@@ -0,0 +1,1235 @@
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <asm/pgalloc.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+
+/*
+ * Originally by Anonymous (as far as I know...)
+ * Linux version by Bas Laarhoven <bas@vimec.nl>
+ * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>,
+ * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C)
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
+ * Add MOD_INITIALIZING Keith Owens <kaos@ocs.com.au> Nov 1999
+ * Add kallsyms support, Keith Owens <kaos@ocs.com.au> Apr 2000
+ * Add asm/module support, IA64 has special requirements.  Keith Owens <kaos@ocs.com.au> Sep 2000
+ * Fix assorted bugs in module verification.  Keith Owens <kaos@ocs.com.au> Sep 2000
+ * Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000
+ *     http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html
+ * Replace xxx_module_symbol with inter_module_xxx.  Keith Owens <kaos@ocs.com.au> Oct 2000
+ *
+ * This source is covered by the GNU GPL, the same as all kernel sources.
+ */
+
+#if defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS)
+
+extern struct module_symbol __start___ksymtab[];
+extern struct module_symbol __stop___ksymtab[];
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+extern const char __start___kallsyms[] __attribute__ ((weak));
+extern const char __stop___kallsyms[] __attribute__ ((weak));
+
+static struct module kernel_module =
+{
+	size_of_struct:		sizeof(struct module),
+	name: 			"",
+	uc:	 		{ATOMIC_INIT(1)},
+	flags:			MOD_RUNNING,
+	syms:			__start___ksymtab,
+	ex_table_start:		__start___ex_table,
+	ex_table_end:		__stop___ex_table,
+	kallsyms_start:		__start___kallsyms,
+	kallsyms_end:		__stop___kallsyms,
+};
+
+struct module *module_list = &kernel_module;
+
+#endif	/* defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS) */
+
+/* inter_module functions are always available, even when the kernel is
+ * compiled without modules.  Consumers of inter_module_xxx routines
+ * will always work, even when both are built into the kernel, this
+ * approach removes lots of #ifdefs in mainline code.
+ */
+
+static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
+static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED;
+static int kmalloc_failed;
+
+/**
+ * inter_module_register - register a new set of inter module data.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ * @owner: module that is registering the data, always use THIS_MODULE
+ * @userdata: pointer to arbitrary userdata to be registered
+ *
+ * Description: Check that the im_name has not already been registered,
+ * complain if it has.  For new data, add it to the inter_module_entry
+ * list.
+ */
+void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime, *ime_new;
+
+	if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) {
+		/* Overloaded kernel, not fatal */
+		printk(KERN_ERR
+			"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
+			im_name);
+		kmalloc_failed = 1;
+		return;
+	}
+	memset(ime_new, 0, sizeof(*ime_new));
+	ime_new->im_name = im_name;
+	ime_new->owner = owner;
+	ime_new->userdata = userdata;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			spin_unlock(&ime_lock);
+			kfree(ime_new);
+			/* Program logic error, fatal */
+			printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
+			BUG();
+		}
+	}
+	list_add(&(ime_new->list), &ime_list);
+	spin_unlock(&ime_lock);
+}
+
+/**
+ * inter_module_unregister - unregister a set of inter module data.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: Check that the im_name has been registered, complain if
+ * it has not.  For existing data, remove it from the
+ * inter_module_entry list.
+ */
+void inter_module_unregister(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			list_del(&(ime->list));
+			spin_unlock(&ime_lock);
+			kfree(ime);
+			return;
+		}
+	}
+	spin_unlock(&ime_lock);
+	if (kmalloc_failed) {
+		printk(KERN_ERR
+			"inter_module_unregister: no entry for '%s', "
+			"probably caused by previous kmalloc failure\n",
+			im_name);
+		return;
+	}
+	else {
+		/* Program logic error, fatal */
+		printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
+		BUG();
+	}
+}
+
+/**
+ * inter_module_get - return arbitrary userdata from another module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: If the im_name has not been registered, return NULL.
+ * Try to increment the use count on the owning module, if that fails
+ * then return NULL.  Otherwise return the userdata.
+ */
+const void *inter_module_get(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+	const void *result = NULL;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			if (try_inc_mod_count(ime->owner))
+				result = ime->userdata;
+			break;
+		}
+	}
+	spin_unlock(&ime_lock);
+	return(result);
+}
+
+/**
+ * inter_module_get_request - im get with automatic request_module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ * @modname: module that is expected to register im_name
+ *
+ * Description: If inter_module_get fails, do request_module then retry.
+ */
+const void *inter_module_get_request(const char *im_name, const char *modname)
+{
+	const void *result = inter_module_get(im_name);
+	if (!result) {
+		request_module(modname);
+		result = inter_module_get(im_name);
+	}
+	return(result);
+}
+
+/**
+ * inter_module_put - release use of data from another module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: If the im_name has not been registered, complain,
+ * otherwise decrement the use count on the owning module.
+ */
+void inter_module_put(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			if (ime->owner)
+				__MOD_DEC_USE_COUNT(ime->owner);
+			spin_unlock(&ime_lock);
+			return;
+		}
+	}
+	spin_unlock(&ime_lock);
+	printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
+	BUG();
+}
+
+
+#if defined(CONFIG_MODULES)	/* The rest of the source */
+
+static long get_mod_name(const char *user_name, char **buf);
+static void put_mod_name(char *buf);
+struct module *find_module(const char *name);
+void free_module(struct module *, int tag_freed);
+
+
+/*
+ * Called at boot time
+ */
+
+void __init init_modules(void)
+{
+	kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
+
+#ifdef __alpha__
+	__asm__("stq $29,%0" : "=m"(kernel_module.gp));
+#endif
+}
+
+/*
+ * Copy the name of a module from user space.
+ */
+
+static inline long
+get_mod_name(const char *user_name, char **buf)
+{
+	unsigned long page;
+	long retval;
+
+	page = __get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE);
+	if (retval > 0) {
+		if (retval < PAGE_SIZE) {
+			*buf = (char *)page;
+			return retval;
+		}
+		retval = -ENAMETOOLONG;
+	} else if (!retval)
+		retval = -EINVAL;
+
+	free_page(page);
+	return retval;
+}
+
+static inline void
+put_mod_name(char *buf)
+{
+	free_page((unsigned long)buf);
+}
+
+/*
+ * Allocate space for a module.
+ */
+
+asmlinkage unsigned long
+sys_create_module(const char *name_user, size_t size)
+{
+	char *name;
+	long namelen, error;
+	struct module *mod;
+
+	if (!capable(CAP_SYS_MODULE))
+		return -EPERM;
+	lock_kernel();
+	if ((namelen = get_mod_name(name_user, &name)) < 0) {
+		error = namelen;
+		goto err0;
+	}
+	if (size < sizeof(struct module)+namelen) {
+		error = -EINVAL;
+		goto err1;
+	}
+	if (find_module(name) != NULL) {
+		error = -EEXIST;
+		goto err1;
+	}
+	if ((mod = (struct module *)module_map(size)) == NULL) {
+		error = -ENOMEM;
+		goto err1;
+	}
+
+	memset(mod, 0, sizeof(*mod));
+	mod->size_of_struct = sizeof(*mod);
+	mod->next = module_list;
+	mod->name = (char *)(mod + 1);
+	mod->size = size;
+	memcpy((char*)(mod+1), name, namelen+1);
+
+	put_mod_name(name);
+
+	module_list = mod;	/* link it in */
+
+	error = (long) mod;
+	goto err0;
+err1:
+	put_mod_name(name);
+err0:
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * Initialize a module.
+ */
+
+asmlinkage long
+sys_init_module(const char *name_user, struct module *mod_user)
+{
+	struct module mod_tmp, *mod;
+	char *name, *n_name, *name_tmp = NULL;
+	long namelen, n_namelen, i, error;
+	unsigned long mod_user_size;
+	struct module_ref *dep;
+
+	if (!capable(CAP_SYS_MODULE))
+		return -EPERM;
+	lock_kernel();
+	if ((namelen = get_mod_name(name_user, &name)) < 0) {
+		error = namelen;
+		goto err0;
+	}
+	if ((mod = find_module(name)) == NULL) {
+		error = -ENOENT;
+		goto err1;
+	}
+
+	/* Check module header size.  We allow a bit of slop over the
+	   size we are familiar with to cope with a version of insmod
+	   for a newer kernel.  But don't over do it. */
+	if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0)
+		goto err1;
+	if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start
+	    || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) {
+		printk(KERN_ERR "init_module: Invalid module header size.\n"
+		       KERN_ERR "A new version of the modutils is likely "
+				"needed.\n");
+		error = -EINVAL;
+		goto err1;
+	}
+
+	/* Hold the current contents while we play with the user's idea
+	   of righteousness.  */
+	mod_tmp = *mod;
+	name_tmp = kmalloc(strlen(mod->name) + 1, GFP_KERNEL);	/* Where's kstrdup()? */
+	if (name_tmp == NULL) {
+		error = -ENOMEM;
+		goto err1;
+	}
+	strcpy(name_tmp, mod->name);
+
+	error = copy_from_user(mod, mod_user, mod_user_size);
+	if (error) {
+		error = -EFAULT;
+		goto err2;
+	}
+
+	/* Sanity check the size of the module.  */
+	error = -EINVAL;
+
+	if (mod->size > mod_tmp.size) {
+		printk(KERN_ERR "init_module: Size of initialized module "
+				"exceeds size of created module.\n");
+		goto err2;
+	}
+
+	/* Make sure all interesting pointers are sane.  */
+
+	if (!mod_bound(mod->name, namelen, mod)) {
+		printk(KERN_ERR "init_module: mod->name out of bounds.\n");
+		goto err2;
+	}
+	if (mod->nsyms && !mod_bound(mod->syms, mod->nsyms, mod)) {
+		printk(KERN_ERR "init_module: mod->syms out of bounds.\n");
+		goto err2;
+	}
+	if (mod->ndeps && !mod_bound(mod->deps, mod->ndeps, mod)) {
+		printk(KERN_ERR "init_module: mod->deps out of bounds.\n");
+		goto err2;
+	}
+	if (mod->init && !mod_bound(mod->init, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->init out of bounds.\n");
+		goto err2;
+	}
+	if (mod->cleanup && !mod_bound(mod->cleanup, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n");
+		goto err2;
+	}
+	if (mod->ex_table_start > mod->ex_table_end
+	    || (mod->ex_table_start &&
+		!((unsigned long)mod->ex_table_start >= ((unsigned long)mod + mod->size_of_struct)
+		  && ((unsigned long)mod->ex_table_end
+		      < (unsigned long)mod + mod->size)))
+	    || (((unsigned long)mod->ex_table_start
+		 - (unsigned long)mod->ex_table_end)
+		% sizeof(struct exception_table_entry))) {
+		printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n");
+		goto err2;
+	}
+	if (mod->flags & ~MOD_AUTOCLEAN) {
+		printk(KERN_ERR "init_module: mod->flags invalid.\n");
+		goto err2;
+	}
+#ifdef __alpha__
+	if (!mod_bound(mod->gp - 0x8000, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->gp out of bounds.\n");
+		goto err2;
+	}
+#endif
+	if (mod_member_present(mod, can_unload)
+	    && mod->can_unload && !mod_bound(mod->can_unload, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n");
+		goto err2;
+	}
+	if (mod_member_present(mod, kallsyms_end)) {
+	    if (mod->kallsyms_end &&
+		(!mod_bound(mod->kallsyms_start, 0, mod) ||
+		 !mod_bound(mod->kallsyms_end, 0, mod))) {
+		printk(KERN_ERR "init_module: mod->kallsyms out of bounds.\n");
+		goto err2;
+	    }
+	    if (mod->kallsyms_start > mod->kallsyms_end) {
+		printk(KERN_ERR "init_module: mod->kallsyms invalid.\n");
+		goto err2;
+	    }
+	}
+	if (mod_member_present(mod, archdata_end)) {
+	    if (mod->archdata_end &&
+		(!mod_bound(mod->archdata_start, 0, mod) ||
+		 !mod_bound(mod->archdata_end, 0, mod))) {
+		printk(KERN_ERR "init_module: mod->archdata out of bounds.\n");
+		goto err2;
+	    }
+	    if (mod->archdata_start > mod->archdata_end) {
+		printk(KERN_ERR "init_module: mod->archdata invalid.\n");
+		goto err2;
+	    }
+	}
+	if (mod_member_present(mod, kernel_data) && mod->kernel_data) {
+	    printk(KERN_ERR "init_module: mod->kernel_data must be zero.\n");
+	    goto err2;
+	}
+
+	/* Check that the user isn't doing something silly with the name.  */
+
+	if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod
+				      + (unsigned long)mod_user,
+				      &n_name)) < 0) {
+		printk(KERN_ERR "init_module: get_mod_name failure.\n");
+		error = n_namelen;
+		goto err2;
+	}
+	if (namelen != n_namelen || strcmp(n_name, mod_tmp.name) != 0) {
+		printk(KERN_ERR "init_module: changed module name to "
+				"`%s' from `%s'\n",
+		       n_name, mod_tmp.name);
+		goto err3;
+	}
+
+	/* Ok, that's about all the sanity we can stomach; copy the rest.  */
+
+	if (copy_from_user((char *)mod+mod_user_size,
+			   (char *)mod_user+mod_user_size,
+			   mod->size-mod_user_size)) {
+		error = -EFAULT;
+		goto err3;
+	}
+
+	if (module_arch_init(mod))
+		goto err3;
+
+	/* On some machines it is necessary to do something here
+	   to make the I and D caches consistent.  */
+	flush_icache_range((unsigned long)mod, (unsigned long)mod + mod->size);
+
+	mod->next = mod_tmp.next;
+	mod->refs = NULL;
+
+	/* Sanity check the module's dependents */
+	for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+		struct module *o, *d = dep->dep;
+
+		/* Make sure the indicated dependencies are really modules.  */
+		if (d == mod) {
+			printk(KERN_ERR "init_module: self-referential "
+					"dependency in mod->deps.\n");
+			goto err3;
+		}
+
+		/* Scan the current modules for this dependency */
+		for (o = module_list; o != &kernel_module && o != d; o = o->next)
+			;
+
+		if (o != d) {
+			printk(KERN_ERR "init_module: found dependency that is "
+				"(no longer?) a module.\n");
+			goto err3;
+		}
+	}
+
+	/* Update module references.  */
+	for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+		struct module *d = dep->dep;
+
+		dep->ref = mod;
+		dep->next_ref = d->refs;
+		d->refs = dep;
+		/* Being referenced by a dependent module counts as a
+		   use as far as kmod is concerned.  */
+		d->flags |= MOD_USED_ONCE;
+	}
+
+	/* Free our temporary memory.  */
+	put_mod_name(n_name);
+	put_mod_name(name);
+
+	/* Initialize the module.  */
+	mod->flags |= MOD_INITIALIZING;
+	atomic_set(&mod->uc.usecount,1);
+	if (mod->init && (error = mod->init()) != 0) {
+		atomic_set(&mod->uc.usecount,0);
+		mod->flags &= ~MOD_INITIALIZING;
+		if (error > 0)	/* Buggy module */
+			error = -EBUSY;
+		goto err0;
+	}
+	atomic_dec(&mod->uc.usecount);
+
+	/* And set it running.  */
+	mod->flags = (mod->flags | MOD_RUNNING) & ~MOD_INITIALIZING;
+	error = 0;
+	goto err0;
+
+err3:
+	put_mod_name(n_name);
+err2:
+	*mod = mod_tmp;
+	strcpy((char *)mod->name, name_tmp);	/* We know there is room for this */
+err1:
+	put_mod_name(name);
+err0:
+	unlock_kernel();
+	kfree(name_tmp);
+	return error;
+}
+
+static spinlock_t unload_lock = SPIN_LOCK_UNLOCKED;
+int try_inc_mod_count(struct module *mod)
+{
+	int res = 1;
+	if (mod) {
+		spin_lock(&unload_lock);
+		if (mod->flags & MOD_DELETED)
+			res = 0;
+		else
+			__MOD_INC_USE_COUNT(mod);
+		spin_unlock(&unload_lock);
+	}
+	return res;
+}
+
+asmlinkage long
+sys_delete_module(const char *name_user)
+{
+	struct module *mod, *next;
+	char *name;
+	long error;
+	int something_changed;
+
+	if (!capable(CAP_SYS_MODULE))
+		return -EPERM;
+
+	lock_kernel();
+	if (name_user) {
+		if ((error = get_mod_name(name_user, &name)) < 0)
+			goto out;
+		if (error == 0) {
+			error = -EINVAL;
+			put_mod_name(name);
+			goto out;
+		}
+		error = -ENOENT;
+		if ((mod = find_module(name)) == NULL) {
+			put_mod_name(name);
+			goto out;
+		}
+		put_mod_name(name);
+		error = -EBUSY;
+		if (mod->refs != NULL)
+			goto out;
+
+		spin_lock(&unload_lock);
+		if (!__MOD_IN_USE(mod)) {
+			mod->flags |= MOD_DELETED;
+			spin_unlock(&unload_lock);
+			free_module(mod, 0);
+			error = 0;
+		} else {
+			spin_unlock(&unload_lock);
+		}
+		goto out;
+	}
+
+	/* Do automatic reaping */
+restart:
+	something_changed = 0;
+	for (mod = module_list; mod != &kernel_module; mod = next) {
+		next = mod->next;
+		spin_lock(&unload_lock);
+		if (mod->refs == NULL
+		    && (mod->flags & MOD_AUTOCLEAN)
+		    && (mod->flags & MOD_RUNNING)
+		    && !(mod->flags & MOD_DELETED)
+		    && (mod->flags & MOD_USED_ONCE)
+		    && !__MOD_IN_USE(mod)) {
+			if ((mod->flags & MOD_VISITED)
+			    && !(mod->flags & MOD_JUST_FREED)) {
+				spin_unlock(&unload_lock);
+				mod->flags &= ~MOD_VISITED;
+			} else {
+				mod->flags |= MOD_DELETED;
+				spin_unlock(&unload_lock);
+				free_module(mod, 1);
+				something_changed = 1;
+			}
+		} else {
+			spin_unlock(&unload_lock);
+		}
+	}
+	if (something_changed)
+		goto restart;
+	for (mod = module_list; mod != &kernel_module; mod = mod->next)
+		mod->flags &= ~MOD_JUST_FREED;
+	error = 0;
+out:
+	unlock_kernel();
+	return error;
+}
+
+/* Query various bits about modules.  */
+
+static int
+qm_modules(char *buf, size_t bufsize, size_t *ret)
+{
+	struct module *mod;
+	size_t nmod, space, len;
+
+	nmod = space = 0;
+
+	for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) {
+		len = strlen(mod->name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, mod->name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
+	}
+
+	if (put_user(nmod, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while ((mod = mod->next) != &kernel_module)
+		space += strlen(mod->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
+}
+
+static int
+qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+	size_t i, space, len;
+
+	if (mod == &kernel_module)
+		return -EINVAL;
+	if (!MOD_CAN_QUERY(mod))
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = 0;
+	for (i = 0; i < mod->ndeps; ++i) {
+		const char *dep_name = mod->deps[i].dep->name;
+
+		len = strlen(dep_name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, dep_name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
+	}
+
+	if (put_user(i, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while (++i < mod->ndeps)
+		space += strlen(mod->deps[i].dep->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
+}
+
+static int
+qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+	size_t nrefs, space, len;
+	struct module_ref *ref;
+
+	if (mod == &kernel_module)
+		return -EINVAL;
+	if (!MOD_CAN_QUERY(mod))
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = 0;
+	for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) {
+		const char *ref_name = ref->ref->name;
+
+		len = strlen(ref_name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, ref_name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
+	}
+
+	if (put_user(nrefs, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while ((ref = ref->next_ref) != NULL)
+		space += strlen(ref->ref->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
+}
+
+static int
+qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+	size_t i, space, len;
+	struct module_symbol *s;
+	char *strings;
+	unsigned long *vals;
+
+	if (!MOD_CAN_QUERY(mod))
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = mod->nsyms * 2*sizeof(void *);
+
+	i = len = 0;
+	s = mod->syms;
+
+	if (space > bufsize)
+		goto calc_space_needed;
+
+	if (!access_ok(VERIFY_WRITE, buf, space))
+		return -EFAULT;
+
+	bufsize -= space;
+	vals = (unsigned long *)buf;
+	strings = buf+space;
+
+	for (; i < mod->nsyms ; ++i, ++s, vals += 2) {
+		len = strlen(s->name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+
+		if (copy_to_user(strings, s->name, len)
+		    || __put_user(s->value, vals+0)
+		    || __put_user(space, vals+1))
+			return -EFAULT;
+
+		strings += len;
+		bufsize -= len;
+		space += len;
+	}
+
+	if (put_user(i, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	for (; i < mod->nsyms; ++i, ++s)
+		space += strlen(s->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
+}
+
+static int
+qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+	int error = 0;
+
+	if (mod == &kernel_module)
+		return -EINVAL;
+
+	if (sizeof(struct module_info) <= bufsize) {
+		struct module_info info;
+		info.addr = (unsigned long)mod;
+		info.size = mod->size;
+		info.flags = mod->flags;
+		info.usecount = (mod_member_present(mod, can_unload)
+				 && mod->can_unload ? -1 : atomic_read(&mod->uc.usecount));
+
+		if (copy_to_user(buf, &info, sizeof(struct module_info)))
+			return -EFAULT;
+	} else
+		error = -ENOSPC;
+
+	if (put_user(sizeof(struct module_info), ret))
+		return -EFAULT;
+
+	return error;
+}
+
+asmlinkage long
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+		 size_t *ret)
+{
+	struct module *mod;
+	int err;
+
+	lock_kernel();
+	if (name_user == NULL)
+		mod = &kernel_module;
+	else {
+		long namelen;
+		char *name;
+
+		if ((namelen = get_mod_name(name_user, &name)) < 0) {
+			err = namelen;
+			goto out;
+		}
+		err = -ENOENT;
+		if (namelen == 0)
+			mod = &kernel_module;
+		else if ((mod = find_module(name)) == NULL) {
+			put_mod_name(name);
+			goto out;
+		}
+		put_mod_name(name);
+	}
+
+	switch (which)
+	{
+	case 0:
+		err = 0;
+		break;
+	case QM_MODULES:
+		err = qm_modules(buf, bufsize, ret);
+		break;
+	case QM_DEPS:
+		err = qm_deps(mod, buf, bufsize, ret);
+		break;
+	case QM_REFS:
+		err = qm_refs(mod, buf, bufsize, ret);
+		break;
+	case QM_SYMBOLS:
+		err = qm_symbols(mod, buf, bufsize, ret);
+		break;
+	case QM_INFO:
+		err = qm_info(mod, buf, bufsize, ret);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+out:
+	unlock_kernel();
+	return err;
+}
+
+/*
+ * Copy the kernel symbol table to user space.  If the argument is
+ * NULL, just return the size of the table.
+ *
+ * This call is obsolete.  New programs should use query_module+QM_SYMBOLS
+ * which does not arbitrarily limit the length of symbols.
+ */
+
+asmlinkage long
+sys_get_kernel_syms(struct kernel_sym *table)
+{
+	struct module *mod;
+	int i;
+	struct kernel_sym ksym;
+
+	lock_kernel();
+	for (mod = module_list, i = 0; mod; mod = mod->next) {
+		/* include the count for the module name! */
+		i += mod->nsyms + 1;
+	}
+
+	if (table == NULL)
+		goto out;
+
+	/* So that we don't give the user our stack content */
+	memset (&ksym, 0, sizeof (ksym));
+
+	for (mod = module_list, i = 0; mod; mod = mod->next) {
+		struct module_symbol *msym;
+		unsigned int j;
+
+		if (!MOD_CAN_QUERY(mod))
+			continue;
+
+		/* magic: write module info as a pseudo symbol */
+		ksym.value = (unsigned long)mod;
+		ksym.name[0] = '#';
+		strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1);
+		ksym.name[sizeof(ksym.name)-1] = '\0';
+
+		if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+			goto out;
+		++i, ++table;
+
+		if (mod->nsyms == 0)
+			continue;
+
+		for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) {
+			ksym.value = msym->value;
+			strncpy(ksym.name, msym->name, sizeof(ksym.name));
+			ksym.name[sizeof(ksym.name)-1] = '\0';
+
+			if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+				goto out;
+			++i, ++table;
+		}
+	}
+out:
+	unlock_kernel();
+	return i;
+}
+
+/*
+ * Look for a module by name, ignoring modules marked for deletion.
+ */
+
+struct module *
+find_module(const char *name)
+{
+	struct module *mod;
+
+	for (mod = module_list; mod ; mod = mod->next) {
+		if (mod->flags & MOD_DELETED)
+			continue;
+		if (!strcmp(mod->name, name))
+			break;
+	}
+
+	return mod;
+}
+
+/*
+ * Free the given module.
+ */
+
+void
+free_module(struct module *mod, int tag_freed)
+{
+	struct module_ref *dep;
+	unsigned i;
+
+	/* Let the module clean up.  */
+
+	if (mod->flags & MOD_RUNNING)
+	{
+		if(mod->cleanup)
+			mod->cleanup();
+		mod->flags &= ~MOD_RUNNING;
+	}
+
+	/* Remove the module from the dependency lists.  */
+
+	for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+		struct module_ref **pp;
+		for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref)
+			continue;
+		*pp = dep->next_ref;
+		if (tag_freed && dep->dep->refs == NULL)
+			dep->dep->flags |= MOD_JUST_FREED;
+	}
+
+	/* And from the main module list.  */
+
+	if (mod == module_list) {
+		module_list = mod->next;
+	} else {
+		struct module *p;
+		for (p = module_list; p->next != mod; p = p->next)
+			continue;
+		p->next = mod->next;
+	}
+
+	/* And free the memory.  */
+
+	module_unmap(mod);
+}
+
+/*
+ * Called by the /proc file system to return a current list of modules.
+ */
+
+int get_module_list(char *p)
+{
+	size_t left = PAGE_SIZE;
+	struct module *mod;
+	char tmpstr[64];
+	struct module_ref *ref;
+
+	for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+		long len;
+		const char *q;
+
+#define safe_copy_str(str, len)						\
+		do {							\
+			if (left < len)					\
+				goto fini;				\
+			memcpy(p, str, len); p += len, left -= len;	\
+		} while (0)
+#define safe_copy_cstr(str)	safe_copy_str(str, sizeof(str)-1)
+
+		len = strlen(mod->name);
+		safe_copy_str(mod->name, len);
+
+		if ((len = 20 - len) > 0) {
+			if (left < len)
+				goto fini;
+			memset(p, ' ', len);
+			p += len;
+			left -= len;
+		}
+
+		len = sprintf(tmpstr, "%8lu", mod->size);
+		safe_copy_str(tmpstr, len);
+
+		if (mod->flags & MOD_RUNNING) {
+			len = sprintf(tmpstr, "%4ld",
+				      (mod_member_present(mod, can_unload)
+				       && mod->can_unload
+				       ? -1L : (long)atomic_read(&mod->uc.usecount)));
+			safe_copy_str(tmpstr, len);
+		}
+
+		if (mod->flags & MOD_DELETED)
+			safe_copy_cstr(" (deleted)");
+		else if (mod->flags & MOD_RUNNING) {
+			if (mod->flags & MOD_AUTOCLEAN)
+				safe_copy_cstr(" (autoclean)");
+			if (!(mod->flags & MOD_USED_ONCE))
+				safe_copy_cstr(" (unused)");
+		}
+		else if (mod->flags & MOD_INITIALIZING)
+			safe_copy_cstr(" (initializing)");
+		else
+			safe_copy_cstr(" (uninitialized)");
+
+		if ((ref = mod->refs) != NULL) {
+			safe_copy_cstr(" [");
+			while (1) {
+				q = ref->ref->name;
+				len = strlen(q);
+				safe_copy_str(q, len);
+
+				if ((ref = ref->next_ref) != NULL)
+					safe_copy_cstr(" ");
+				else
+					break;
+			}
+			safe_copy_cstr("]");
+		}
+		safe_copy_cstr("\n");
+
+#undef safe_copy_str
+#undef safe_copy_cstr
+	}
+
+fini:
+	return PAGE_SIZE - left;
+}
+
+/*
+ * Called by the /proc file system to return a current list of ksyms.
+ */
+
+int
+get_ksyms_list(char *buf, char **start, off_t offset, int length)
+{
+	struct module *mod;
+	char *p = buf;
+	int len     = 0;	/* code from  net/ipv4/proc.c */
+	off_t pos   = 0;
+	off_t begin = 0;
+
+	for (mod = module_list; mod; mod = mod->next) {
+		unsigned i;
+		struct module_symbol *sym;
+
+		if (!MOD_CAN_QUERY(mod))
+			continue;
+
+		for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) {
+			p = buf + len;
+			if (*mod->name) {
+				len += sprintf(p, "%0*lx %s\t[%s]\n",
+					       (int)(2*sizeof(void*)),
+					       sym->value, sym->name,
+					       mod->name);
+			} else {
+				len += sprintf(p, "%0*lx %s\n",
+					       (int)(2*sizeof(void*)),
+					       sym->value, sym->name);
+			}
+			pos = begin + len;
+			if (pos < offset) {
+				len = 0;
+				begin = pos;
+			}
+			pos = begin + len;
+			if (pos > offset+length)
+				goto leave_the_loop;
+		}
+	}
+leave_the_loop:
+	*start = buf + (offset - begin);
+	len -= (offset - begin);
+	if (len > length)
+		len = length;
+	return len;
+}
+
+#else		/* CONFIG_MODULES */
+
+/* Dummy syscalls for people who don't want modules */
+
+asmlinkage unsigned long
+sys_create_module(const char *name_user, size_t size)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_init_module(const char *name_user, struct module *mod_user)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_delete_module(const char *name_user)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+		 size_t *ret)
+{
+	/* Let the program know about the new interface.  Not that
+	   it'll do them much good.  */
+	if (which == 0)
+		return 0;
+
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_get_kernel_syms(struct kernel_sym *table)
+{
+	return -ENOSYS;
+}
+
+int try_inc_mod_count(struct module *mod)
+{
+	return 1;
+}
+
+#endif	/* CONFIG_MODULES */
diff --git a/kernel/panic.c b/kernel/panic.c
new file mode 100644
index 000000000000..ac246f74589a
--- /dev/null
+++ b/kernel/panic.c
@@ -0,0 +1,103 @@
+/*
+ *  linux/kernel/panic.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * This function is used through-out the kernel (including mm and fs)
+ * to indicate a major problem.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/sysrq.h>
+#include <linux/interrupt.h>
+
+asmlinkage void sys_sync(void);	/* it's really int */
+extern void unblank_console(void);
+
+int panic_timeout;
+
+struct notifier_block *panic_notifier_list;
+
+static int __init panic_setup(char *str)
+{
+	panic_timeout = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+
+__setup("panic=", panic_setup);
+
+/**
+ *	panic - halt the system
+ *	@fmt: The text string to print
+ *
+ *	Display a message, then unblank the console and perform
+ *	cleanups. Functions in the panic notifier list are called
+ *	after the filesystem cache is flushed (when possible).
+ *
+ *	This function never returns.
+ */
+ 
+NORET_TYPE void panic(const char * fmt, ...)
+{
+	static char buf[1024];
+	va_list args;
+#if defined(CONFIG_ARCH_S390)
+        unsigned long caller = (unsigned long) __builtin_return_address(0);
+#endif
+
+	va_start(args, fmt);
+	vsprintf(buf, fmt, args);
+	va_end(args);
+	printk(KERN_EMERG "Kernel panic: %s\n",buf);
+	if (in_interrupt())
+		printk(KERN_EMERG "In interrupt handler - not syncing\n");
+	else if (!current->pid)
+		printk(KERN_EMERG "In idle task - not syncing\n");
+	else
+		sys_sync();
+
+	unblank_console();
+
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
+
+	notifier_call_chain(&panic_notifier_list, 0, NULL);
+
+	if (panic_timeout > 0)
+	{
+		/*
+	 	 * Delay timeout seconds before rebooting the machine. 
+		 * We can't use the "normal" timers since we just panicked..
+	 	 */
+		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+		mdelay(panic_timeout*1000);
+		/*
+		 *	Should we run the reboot notifier. For the moment Im
+		 *	choosing not too. It might crash, be corrupt or do
+		 *	more harm than good for other reasons.
+		 */
+		machine_restart(NULL);
+	}
+#ifdef __sparc__
+	{
+		extern int stop_a_enabled;
+		/* Make sure the user can actually press L1-A */
+		stop_a_enabled = 1;
+		printk("Press L1-A to return to the boot prom\n");
+	}
+#endif
+#if defined(CONFIG_ARCH_S390)
+        disabled_wait(caller);
+#endif
+	sti();
+	for(;;) {
+		CHECK_EMERGENCY_SYNC
+	}
+}
diff --git a/kernel/pm.c b/kernel/pm.c
new file mode 100644
index 000000000000..eb7c6f615bd8
--- /dev/null
+++ b/kernel/pm.c
@@ -0,0 +1,245 @@
+/*
+ *  pm.c - Power management interface
+ *
+ *  Copyright (C) 2000 Andrew Henroid
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+
+int pm_active;
+
+static spinlock_t pm_devs_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(pm_devs);
+
+/**
+ *	pm_register - register a device with power management
+ *	@type: device type 
+ *	@id: device ID
+ *	@callback: callback function
+ *
+ *	Add a device to the list of devices that wish to be notified about
+ *	power management events. A &pm_dev structure is returned on success,
+ *	on failure the return is %NULL.
+ */
+ 
+struct pm_dev *pm_register(pm_dev_t type,
+			   unsigned long id,
+			   pm_callback callback)
+{
+	struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+	if (dev) {
+		unsigned long flags;
+
+		memset(dev, 0, sizeof(*dev));
+		dev->type = type;
+		dev->id = id;
+		dev->callback = callback;
+
+		spin_lock_irqsave(&pm_devs_lock, flags);
+		list_add(&dev->entry, &pm_devs);
+		spin_unlock_irqrestore(&pm_devs_lock, flags);
+	}
+	return dev;
+}
+
+/**
+ *	pm_unregister -  unregister a device with power management
+ *	@dev: device to unregister
+ *
+ *	Remove a device from the power management notification lists. The
+ *	dev passed must be a handle previously returned by pm_register.
+ */
+ 
+void pm_unregister(struct pm_dev *dev)
+{
+	if (dev) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&pm_devs_lock, flags);
+		list_del(&dev->entry);
+		spin_unlock_irqrestore(&pm_devs_lock, flags);
+
+		kfree(dev);
+	}
+}
+
+/**
+ *	pm_unregister_all - unregister all devices with matching callback
+ *	@callback: callback function pointer
+ *
+ *	Unregister every device that would call the callback passed. This
+ *	is primarily meant as a helper function for loadable modules. It
+ *	enables a module to give up all its managed devices without keeping
+ *	its own private list.
+ */
+ 
+void pm_unregister_all(pm_callback callback)
+{
+	struct list_head *entry;
+
+	if (!callback)
+		return;
+
+	entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		entry = entry->next;
+		if (dev->callback == callback)
+			pm_unregister(dev);
+	}
+}
+
+/**
+ *	pm_send - send request to a single device
+ *	@dev: device to send to
+ *	@rqst: power management request
+ *	@data: data for the callback
+ *
+ *	Issue a power management request to a given device. The 
+ *	%PM_SUSPEND and %PM_RESUME events are handled specially. The
+ *	data field must hold the intended next state. No call is made
+ *	if the state matches.
+ *
+ *	BUGS: what stops two power management requests occuring in parallel
+ *	and conflicting.
+ */
+ 
+int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+	int status = 0;
+	int prev_state, next_state;
+	switch (rqst) {
+	case PM_SUSPEND:
+	case PM_RESUME:
+		prev_state = dev->state;
+		next_state = (int) data;
+		if (prev_state != next_state) {
+			if (dev->callback)
+				status = (*dev->callback)(dev, rqst, data);
+			if (!status) {
+				dev->state = next_state;
+				dev->prev_state = prev_state;
+			}
+		}
+		else {
+			dev->prev_state = prev_state;
+		}
+		break;
+	default:
+		if (dev->callback)
+			status = (*dev->callback)(dev, rqst, data);
+		break;
+	}
+	return status;
+}
+
+/*
+ * Undo incomplete request
+ */
+static void pm_undo_all(struct pm_dev *last)
+{
+	struct list_head *entry = last->entry.prev;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->state != dev->prev_state) {
+			/* previous state was zero (running) resume or
+			 * previous state was non-zero (suspended) suspend
+			 */
+			pm_request_t undo = (dev->prev_state
+					     ? PM_SUSPEND:PM_RESUME);
+			pm_send(dev, undo, (void*) dev->prev_state);
+		}
+		entry = entry->prev;
+	}
+}
+
+/**
+ *	pm_send_all - send request to all managed devices
+ *	@rqst: power management request
+ *	@data: data for the callback
+ *
+ *	Issue a power management request to a all devices. The 
+ *	%PM_SUSPEND events are handled specially. Any device is 
+ *	permitted to fail a suspend by returning a non zero (error)
+ *	value from its callback function. If any device vetoes a 
+ *	suspend request then all other devices that have suspended 
+ *	during the processing of this request are restored to their
+ *	previous state.
+ *
+ *	Zero is returned on success. If a suspend fails then the status
+ *	from the device that vetoes the suspend is returned.
+ *
+ *	BUGS: what stops two power management requests occuring in parallel
+ *	and conflicting.
+ */
+ 
+int pm_send_all(pm_request_t rqst, void *data)
+{
+	struct list_head *entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->callback) {
+			int status = pm_send(dev, rqst, data);
+			if (status) {
+				/* return devices to previous state on
+				 * failed suspend request
+				 */
+				if (rqst == PM_SUSPEND)
+					pm_undo_all(dev);
+				return status;
+			}
+		}
+		entry = entry->next;
+	}
+	return 0;
+}
+
+/**
+ *	pm_find  - find a device
+ *	@type: type of device
+ *	@from: where to start looking
+ *
+ *	Scan the power management list for devices of a specific type. The
+ *	return value for a matching device may be passed to further calls
+ *	to this function to find further matches. A %NULL indicates the end
+ *	of the list. 
+ *
+ *	To search from the beginning pass %NULL as the @from value.
+ */
+ 
+struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
+{
+	struct list_head *entry = from ? from->entry.next:pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (type == PM_UNKNOWN_DEV || dev->type == type)
+			return dev;
+		entry = entry->next;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(pm_register);
+EXPORT_SYMBOL(pm_unregister);
+EXPORT_SYMBOL(pm_unregister_all);
+EXPORT_SYMBOL(pm_send);
+EXPORT_SYMBOL(pm_send_all);
+EXPORT_SYMBOL(pm_find);
+EXPORT_SYMBOL(pm_active);
diff --git a/kernel/printk.c b/kernel/printk.c
new file mode 100644
index 000000000000..4a459b6051d8
--- /dev/null
+++ b/kernel/printk.c
@@ -0,0 +1,497 @@
+/*
+ *  linux/kernel/printk.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * Modified to make sys_syslog() more flexible: added commands to
+ * return the last 4k of kernel messages, regardless of whether
+ * they've been read or not.  Added option to suppress kernel printk's
+ * to the console.  Added hook for sending the console messages
+ * elsewhere, in preparation for a serial line console (someday).
+ * Ted Ts'o, 2/11/93.
+ * Modified for sysctl support, 1/8/97, Chris Horn.
+ * Fixed SMP synchronization, 08/08/99, Manfred Spraul 
+ *     manfreds@colorfullife.com
+ */
+
+#include <linux/mm.h>
+#include <linux/tty_driver.h>
+#include <linux/smp_lock.h>
+#include <linux/console.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+
+#define LOG_BUF_LEN	(16384)
+#define LOG_BUF_MASK	(LOG_BUF_LEN-1)
+
+static char buf[1024];
+
+/* printk's without a loglevel use this.. */
+#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
+
+/* We show everything that is MORE important than this.. */
+#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
+#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
+
+unsigned long log_size;
+DECLARE_WAIT_QUEUE_HEAD(log_wait);
+
+/* Keep together for sysctl support */
+int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL;
+int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
+int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
+struct console *console_drivers;
+static char log_buf[LOG_BUF_LEN];
+static unsigned long log_start;
+static unsigned long logged_chars;
+struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
+static int preferred_console = -1;
+
+/*
+ *	Setup a list of consoles. Called from init/main.c
+ */
+static int __init console_setup(char *str)
+{
+	struct console_cmdline *c;
+	char name[sizeof(c->name)];
+	char *s, *options;
+	int i, idx;
+
+	/*
+	 *	Decode str into name, index, options.
+	 */
+	if (str[0] >= '0' && str[0] <= '9') {
+		strcpy(name, "ttyS");
+		strncpy(name + 4, str, sizeof(name) - 5);
+	} else
+		strncpy(name, str, sizeof(name) - 1);
+	name[sizeof(name) - 1] = 0;
+	if ((options = strchr(str, ',')) != NULL)
+		*(options++) = 0;
+#ifdef __sparc__
+	if (!strcmp(str, "ttya"))
+		strcpy(name, "ttyS0");
+	if (!strcmp(str, "ttyb"))
+		strcpy(name, "ttyS1");
+#endif
+	for(s = name; *s; s++)
+		if (*s >= '0' && *s <= '9')
+			break;
+	idx = simple_strtoul(s, NULL, 10);
+	*s = 0;
+
+	/*
+	 *	See if this tty is not yet registered, and
+	 *	if we have a slot free.
+	 */
+	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
+		if (strcmp(console_cmdline[i].name, name) == 0 &&
+			  console_cmdline[i].index == idx) {
+				preferred_console = i;
+				return 1;
+		}
+	if (i == MAX_CMDLINECONSOLES)
+		return 1;
+	preferred_console = i;
+	c = &console_cmdline[i];
+	memcpy(c->name, name, sizeof(c->name));
+	c->options = options;
+	c->index = idx;
+	return 1;
+}
+
+__setup("console=", console_setup);
+
+/*
+ * Commands to do_syslog:
+ *
+ * 	0 -- Close the log.  Currently a NOP.
+ * 	1 -- Open the log. Currently a NOP.
+ * 	2 -- Read from the log.
+ * 	3 -- Read all messages remaining in the ring buffer.
+ * 	4 -- Read and clear all messages remaining in the ring buffer
+ * 	5 -- Clear ring buffer.
+ * 	6 -- Disable printk's to console
+ * 	7 -- Enable printk's to console
+ *	8 -- Set level of messages printed to console
+ */
+int do_syslog(int type, char * buf, int len)
+{
+	unsigned long i, j, limit, count;
+	int do_clear = 0;
+	char c;
+	int error = -EPERM;
+
+	error = 0;
+	switch (type) {
+	case 0:		/* Close log */
+		break;
+	case 1:		/* Open log */
+		break;
+	case 2:		/* Read from log */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		error = verify_area(VERIFY_WRITE,buf,len);
+		if (error)
+			goto out;
+		error = wait_event_interruptible(log_wait, log_size);
+		if (error)
+			goto out;
+		i = 0;
+		spin_lock_irq(&console_lock);
+		while (log_size && i < len) {
+			c = log_buf[log_start & LOG_BUF_MASK];
+			log_start++;
+			log_size--;
+			spin_unlock_irq(&console_lock);
+			__put_user(c,buf);
+			buf++;
+			i++;
+			spin_lock_irq(&console_lock);
+		}
+		spin_unlock_irq(&console_lock);
+		error = i;
+		break;
+	case 4:		/* Read/clear last kernel messages */
+		do_clear = 1; 
+		/* FALL THRU */
+	case 3:		/* Read last kernel messages */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		error = verify_area(VERIFY_WRITE,buf,len);
+		if (error)
+			goto out;
+		count = len;
+		if (count > LOG_BUF_LEN)
+			count = LOG_BUF_LEN;
+		spin_lock_irq(&console_lock);
+		if (count > logged_chars)
+			count = logged_chars;
+		if (do_clear)
+			logged_chars = 0;
+		limit = log_start + log_size;
+		/*
+		 * __put_user() could sleep, and while we sleep
+		 * printk() could overwrite the messages 
+		 * we try to copy to user space. Therefore
+		 * the messages are copied in reverse. <manfreds>
+		 */
+		for(i=0;i < count;i++) {
+			j = limit-1-i;
+			if (j+LOG_BUF_LEN < log_start+log_size)
+				break;
+			c = log_buf[ j  & LOG_BUF_MASK ];
+			spin_unlock_irq(&console_lock);
+			__put_user(c,&buf[count-1-i]);
+			spin_lock_irq(&console_lock);
+		}
+		spin_unlock_irq(&console_lock);
+		error = i;
+		if(i != count) {
+			int offset = count-error;
+			/* buffer overflow during copy, correct user buffer. */
+			for(i=0;i<error;i++) {
+				__get_user(c,&buf[i+offset]);
+				__put_user(c,&buf[i]);
+			}
+		}
+
+		break;
+	case 5:		/* Clear ring buffer */
+		spin_lock_irq(&console_lock);
+		logged_chars = 0;
+		spin_unlock_irq(&console_lock);
+		break;
+	case 6:		/* Disable logging to console */
+		spin_lock_irq(&console_lock);
+		console_loglevel = minimum_console_loglevel;
+		spin_unlock_irq(&console_lock);
+		break;
+	case 7:		/* Enable logging to console */
+		spin_lock_irq(&console_lock);
+		console_loglevel = default_console_loglevel;
+		spin_unlock_irq(&console_lock);
+		break;
+	case 8:
+		error = -EINVAL;
+		if (len < 1 || len > 8)
+			goto out;
+		if (len < minimum_console_loglevel)
+			len = minimum_console_loglevel;
+		spin_lock_irq(&console_lock);
+		console_loglevel = len;
+		spin_unlock_irq(&console_lock);
+		error = 0;
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+out:
+	return error;
+}
+
+asmlinkage long sys_syslog(int type, char * buf, int len)
+{
+	if ((type != 3) && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	return do_syslog(type, buf, len);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+	va_list args;
+	int i;
+	char *msg, *p, *buf_end;
+	int line_feed;
+	static signed char msg_level = -1;
+	long flags;
+
+	spin_lock_irqsave(&console_lock, flags);
+	va_start(args, fmt);
+	i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
+	buf_end = buf + 3 + i;
+	va_end(args);
+	for (p = buf + 3; p < buf_end; p++) {
+		msg = p;
+		if (msg_level < 0) {
+			if (
+				p[0] != '<' ||
+				p[1] < '0' || 
+				p[1] > '7' ||
+				p[2] != '>'
+			) {
+				p -= 3;
+				p[0] = '<';
+				p[1] = default_message_loglevel + '0';
+				p[2] = '>';
+			} else
+				msg += 3;
+			msg_level = p[1] - '0';
+		}
+		line_feed = 0;
+		for (; p < buf_end; p++) {
+			log_buf[(log_start+log_size) & LOG_BUF_MASK] = *p;
+			if (log_size < LOG_BUF_LEN)
+				log_size++;
+			else
+				log_start++;
+
+			logged_chars++;
+			if (*p == '\n') {
+				line_feed = 1;
+				break;
+			}
+		}
+		if (msg_level < console_loglevel && console_drivers) {
+			struct console *c = console_drivers;
+			while(c) {
+				if ((c->flags & CON_ENABLED) && c->write)
+					c->write(c, msg, p - msg + line_feed);
+				c = c->next;
+			}
+		}
+		if (line_feed)
+			msg_level = -1;
+	}
+	spin_unlock_irqrestore(&console_lock, flags);
+	wake_up_interruptible(&log_wait);
+	return i;
+}
+
+void console_print(const char *s)
+{
+	struct console *c;
+	unsigned long flags;
+	int len = strlen(s);
+
+	spin_lock_irqsave(&console_lock, flags);
+	c = console_drivers;
+	while(c) {
+		if ((c->flags & CON_ENABLED) && c->write)
+			c->write(c, s, len);
+		c = c->next;
+	}
+	spin_unlock_irqrestore(&console_lock, flags);
+}
+
+void unblank_console(void)
+{
+	struct console *c;
+	unsigned long flags;
+	
+	spin_lock_irqsave(&console_lock, flags);
+	c = console_drivers;
+	while(c) {
+		if ((c->flags & CON_ENABLED) && c->unblank)
+			c->unblank();
+		c = c->next;
+	}
+	spin_unlock_irqrestore(&console_lock, flags);
+}
+
+/*
+ * The console driver calls this routine during kernel initialization
+ * to register the console printing procedure with printk() and to
+ * print any messages that were printed by the kernel before the
+ * console driver was initialized.
+ */
+void register_console(struct console * console)
+{
+	int     i, j,len;
+	int	p;
+	char	buf[16];
+	signed char msg_level = -1;
+	char	*q;
+	unsigned long flags;
+
+	/*
+	 *	See if we want to use this console driver. If we
+	 *	didn't select a console we take the first one
+	 *	that registers here.
+	 */
+	if (preferred_console < 0) {
+		if (console->index < 0)
+			console->index = 0;
+		if (console->setup == NULL ||
+		    console->setup(console, NULL) == 0) {
+			console->flags |= CON_ENABLED | CON_CONSDEV;
+			preferred_console = 0;
+		}
+	}
+
+	/*
+	 *	See if this console matches one we selected on
+	 *	the command line.
+	 */
+	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) {
+		if (strcmp(console_cmdline[i].name, console->name) != 0)
+			continue;
+		if (console->index >= 0 &&
+		    console->index != console_cmdline[i].index)
+			continue;
+		if (console->index < 0)
+			console->index = console_cmdline[i].index;
+		if (console->setup &&
+		    console->setup(console, console_cmdline[i].options) != 0)
+			break;
+		console->flags |= CON_ENABLED;
+		console->index = console_cmdline[i].index;
+		if (i == preferred_console)
+			console->flags |= CON_CONSDEV;
+		break;
+	}
+
+	if (!(console->flags & CON_ENABLED))
+		return;
+
+	/*
+	 *	Put this console in the list - keep the
+	 *	preferred driver at the head of the list.
+	 */
+	spin_lock_irqsave(&console_lock, flags);
+	if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
+		console->next = console_drivers;
+		console_drivers = console;
+	} else {
+		console->next = console_drivers->next;
+		console_drivers->next = console;
+	}
+	if ((console->flags & CON_PRINTBUFFER) == 0)
+		goto done;
+	/*
+	 *	Print out buffered log messages.
+	 */
+	p = log_start & LOG_BUF_MASK;
+
+	for (i=0,j=0; i < log_size; i++) {
+		buf[j++] = log_buf[p];
+		p = (p+1) & LOG_BUF_MASK;
+		if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
+			continue;
+		buf[j] = 0;
+		q = buf;
+		len = j;
+		if (msg_level < 0) {
+			if(buf[0] == '<' &&
+				buf[1] >= '0' &&
+				buf[1] <= '7' &&
+				buf[2] == '>') {
+				msg_level = buf[1] - '0';
+				q = buf + 3;
+				len -= 3;
+			} else
+			{
+				msg_level = default_message_loglevel; 
+			}
+		}
+		if (msg_level < console_loglevel)
+			console->write(console, q, len);
+		if (buf[j-1] == '\n')
+			msg_level = -1;
+		j = 0;
+	}
+done:
+	spin_unlock_irqrestore(&console_lock, flags);
+}
+
+
+int unregister_console(struct console * console)
+{
+        struct console *a,*b;
+	unsigned long flags;
+	int res = 1;
+
+	spin_lock_irqsave(&console_lock, flags);
+	if (console_drivers == console) {
+		console_drivers=console->next;
+		res = 0;
+	} else
+	{
+		for (a=console_drivers->next, b=console_drivers ;
+		     a; b=a, a=b->next) {
+			if (a == console) {
+				b->next = a->next;
+				res = 0;
+				break;
+			}  
+		}
+	}
+	
+	/* If last console is removed, we re-enable picking the first
+	 * one that gets registered. Without that, pmac early boot console
+	 * would prevent fbcon from taking over.
+	 */
+	if (console_drivers == NULL)
+		preferred_console = -1;
+		
+
+	spin_unlock_irqrestore(&console_lock, flags);
+	return res;
+}
+	
+/*
+ * Write a message to a certain tty, not just the console. This is used for
+ * messages that need to be redirected to a specific tty.
+ * We don't put it into the syslog queue right now maybe in the future if
+ * really needed.
+ */
+void tty_write_message(struct tty_struct *tty, char *msg)
+{
+	if (tty && tty->driver.write)
+		tty->driver.write(tty, 0, msg, strlen(msg));
+	return;
+}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
new file mode 100644
index 000000000000..410f9de937bc
--- /dev/null
+++ b/kernel/ptrace.c
@@ -0,0 +1,193 @@
+/*
+ * linux/kernel/ptrace.c
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Common interfaces for "ptrace()" which we do not want
+ * to continually duplicate across every architecture.
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp_lock.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+
+/*
+ * Access another process' address space, one page at a time.
+ */
+static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+{
+	pgd_t * pgdir;
+	pmd_t * pgmiddle;
+	pte_t * pgtable;
+	char *maddr; 
+	struct page *page;
+
+repeat:
+	pgdir = pgd_offset(vma->vm_mm, addr);
+	if (pgd_none(*pgdir))
+		goto fault_in_page;
+	if (pgd_bad(*pgdir))
+		goto bad_pgd;
+	pgmiddle = pmd_offset(pgdir, addr);
+	if (pmd_none(*pgmiddle))
+		goto fault_in_page;
+	if (pmd_bad(*pgmiddle))
+		goto bad_pmd;
+	pgtable = pte_offset(pgmiddle, addr);
+	if (!pte_present(*pgtable))
+		goto fault_in_page;
+	if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
+		goto fault_in_page;
+	page = pte_page(*pgtable);
+
+	/* ZERO_PAGE is special: reads from it are ok even though it's marked reserved */
+	if (page != ZERO_PAGE(addr) || write) {
+		if ((!VALID_PAGE(page)) || PageReserved(page))
+			return 0;
+	}
+	flush_cache_page(vma, addr);
+
+	if (write) {
+		maddr = kmap(page);
+		memcpy(maddr + (addr & ~PAGE_MASK), buf, len);
+		flush_page_to_ram(page);
+		flush_icache_page(vma, page);
+		kunmap(page);
+	} else {
+		maddr = kmap(page);
+		memcpy(buf, maddr + (addr & ~PAGE_MASK), len);
+		flush_page_to_ram(page);
+		kunmap(page);
+	}
+	return len;
+
+fault_in_page:
+	/* -1: out of memory. 0 - unmapped page */
+	if (handle_mm_fault(mm, vma, addr, write) > 0)
+		goto repeat;
+	return 0;
+
+bad_pgd:
+	pgd_ERROR(*pgdir);
+	return 0;
+
+bad_pmd:
+	pmd_ERROR(*pgmiddle);
+	return 0;
+}
+
+static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+{
+	int copied = 0;
+
+	for (;;) {
+		unsigned long offset = addr & ~PAGE_MASK;
+		int this_len = PAGE_SIZE - offset;
+		int retval;
+
+		if (this_len > len)
+			this_len = len;
+		retval = access_one_page(mm, vma, addr, buf, this_len, write);
+		copied += retval;
+		if (retval != this_len)
+			break;
+
+		len -= retval;
+		if (!len)
+			break;
+
+		addr += retval;
+		buf += retval;
+
+		if (addr < vma->vm_end)
+			continue;	
+		if (!vma->vm_next)
+			break;
+		if (vma->vm_next->vm_start != vma->vm_end)
+			break;
+	
+		vma = vma->vm_next;
+	}
+	return copied;
+}
+
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+{
+	int copied;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+
+	/* Worry about races with exit() */
+	task_lock(tsk);
+	mm = tsk->mm;
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	task_unlock(tsk);
+	if (!mm)
+		return 0;
+
+	down(&mm->mmap_sem);
+	vma = find_extend_vma(mm, addr);
+	copied = 0;
+	if (vma)
+		copied = access_mm(mm, vma, addr, buf, len, write);
+
+	up(&mm->mmap_sem);
+	mmput(mm);
+	return copied;
+}
+
+int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len)
+{
+	int copied = 0;
+
+	while (len > 0) {
+		char buf[128];
+		int this_len, retval;
+
+		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+		retval = access_process_vm(tsk, src, buf, this_len, 0);
+		if (!retval) {
+			if (copied)
+				break;
+			return -EIO;
+		}
+		if (copy_to_user(dst, buf, retval))
+			return -EFAULT;
+		copied += retval;
+		src += retval;
+		dst += retval;
+		len -= retval;			
+	}
+	return copied;
+}
+
+int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len)
+{
+	int copied = 0;
+
+	while (len > 0) {
+		char buf[128];
+		int this_len, retval;
+
+		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+		if (copy_from_user(buf, src, this_len))
+			return -EFAULT;
+		retval = access_process_vm(tsk, dst, buf, this_len, 1);
+		if (!retval) {
+			if (copied)
+				break;
+			return -EIO;
+		}
+		copied += retval;
+		src += retval;
+		dst += retval;
+		len -= retval;			
+	}
+	return copied;
+}
diff --git a/kernel/resource.c b/kernel/resource.c
new file mode 100644
index 000000000000..b553eb0ff2e9
--- /dev/null
+++ b/kernel/resource.c
@@ -0,0 +1,322 @@
+/*
+ *	linux/kernel/resource.c
+ *
+ * Copyright (C) 1999	Linus Torvalds
+ * Copyright (C) 1999	Martin Mares <mj@ucw.cz>
+ *
+ * Arbitrary resource management.
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/malloc.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM };
+
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
+
+/*
+ * This generates reports for /proc/ioports and /proc/iomem
+ */
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
+{
+	if (offset < 0)
+		offset = 0;
+
+	while (entry) {
+		const char *name = entry->name;
+		unsigned long from, to;
+
+		if ((int) (end-buf) < 80)
+			return buf;
+
+		from = entry->start;
+		to = entry->end;
+		if (!name)
+			name = "<BAD>";
+
+		buf += sprintf(buf, fmt + offset, from, to, name);
+		if (entry->child)
+			buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+		entry = entry->sibling;
+	}
+
+	return buf;
+}
+
+int get_resource_list(struct resource *root, char *buf, int size)
+{
+	char *fmt;
+	int retval;
+
+	fmt = "        %08lx-%08lx : %s\n";
+	if (root->end < 0x10000)
+		fmt = "        %04lx-%04lx : %s\n";
+	read_lock(&resource_lock);
+	retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+	read_unlock(&resource_lock);
+	return retval;
+}	
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
+{
+	unsigned long start = new->start;
+	unsigned long end = new->end;
+	struct resource *tmp, **p;
+
+	if (end < start)
+		return root;
+	if (start < root->start)
+		return root;
+	if (end > root->end)
+		return root;
+	p = &root->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp || tmp->start > end) {
+			new->sibling = tmp;
+			*p = new;
+			new->parent = root;
+			return NULL;
+		}
+		p = &tmp->sibling;
+		if (tmp->end < start)
+			continue;
+		return tmp;
+	}
+}
+
+static int __release_resource(struct resource *old)
+{
+	struct resource *tmp, **p;
+
+	p = &old->parent->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp)
+			break;
+		if (tmp == old) {
+			*p = tmp->sibling;
+			old->parent = NULL;
+			return 0;
+		}
+		p = &tmp->sibling;
+	}
+	return -EINVAL;
+}
+
+int request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, new);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+	int retval;
+
+	write_lock(&resource_lock);
+	retval = __release_resource(old);
+	write_unlock(&resource_lock);
+	return retval;
+}
+
+int check_resource(struct resource *root, unsigned long start, unsigned long len)
+{
+	struct resource *conflict, tmp;
+
+	tmp.start = start;
+	tmp.end = start + len - 1;
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, &tmp);
+	if (!conflict)
+		__release_resource(&tmp);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ */
+static int find_resource(struct resource *root, struct resource *new,
+			 unsigned long size,
+			 unsigned long min, unsigned long max,
+			 unsigned long align,
+			 void (*alignf)(void *, struct resource *, unsigned long),
+			 void *alignf_data)
+{
+	struct resource *this = root->child;
+
+	new->start = root->start;
+	for(;;) {
+		if (this)
+			new->end = this->start;
+		else
+			new->end = root->end;
+		if (new->start < min)
+			new->start = min;
+		if (new->end > max)
+			new->end = max;
+		new->start = (new->start + align - 1) & ~(align - 1);
+		if (alignf)
+			alignf(alignf_data, new, size);
+		if (new->start < new->end && new->end - new->start + 1 >= size) {
+			new->end = new->start + size - 1;
+			return 0;
+		}
+		if (!this)
+			break;
+		new->start = this->end + 1;
+		this = this->sibling;
+	}
+	return -EBUSY;
+}
+
+/*
+ * Allocate empty slot in the resource tree given range and alignment.
+ */
+int allocate_resource(struct resource *root, struct resource *new,
+		      unsigned long size,
+		      unsigned long min, unsigned long max,
+		      unsigned long align,
+		      void (*alignf)(void *, struct resource *, unsigned long),
+		      void *alignf_data)
+{
+	int err;
+
+	write_lock(&resource_lock);
+	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+	if (err >= 0 && __request_resource(root, new))
+		err = -EBUSY;
+	write_unlock(&resource_lock);
+	return err;
+}
+
+/*
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
+ */
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+{
+	struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+
+	if (res) {
+		memset(res, 0, sizeof(*res));
+		res->name = name;
+		res->start = start;
+		res->end = start + n - 1;
+		res->flags = IORESOURCE_BUSY;
+
+		write_lock(&resource_lock);
+
+		for (;;) {
+			struct resource *conflict;
+
+			conflict = __request_resource(parent, res);
+			if (!conflict)
+				break;
+			if (conflict != parent) {
+				parent = conflict;
+				if (!(conflict->flags & IORESOURCE_BUSY))
+					continue;
+			}
+
+			/* Uhhuh, that didn't work out.. */
+			kfree(res);
+			res = NULL;
+			break;
+		}
+		write_unlock(&resource_lock);
+	}
+	return res;
+}
+
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource * res;
+
+	res = __request_region(parent, start, n, "check-region");
+	if (!res)
+		return -EBUSY;
+
+	release_resource(res);
+	kfree(res);
+	return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource **p;
+	unsigned long end;
+
+	p = &parent->child;
+	end = start + n - 1;
+
+	for (;;) {
+		struct resource *res = *p;
+
+		if (!res)
+			break;
+		if (res->start <= start && res->end >= end) {
+			if (!(res->flags & IORESOURCE_BUSY)) {
+				p = &res->child;
+				continue;
+			}
+			if (res->start != start || res->end != end)
+				break;
+			*p = res->sibling;
+			kfree(res);
+			return;
+		}
+		p = &res->sibling;
+	}
+	printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+}
+
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
+{
+	int opt = 2, io_start, io_num;
+	static int reserved = 0;
+	static struct resource reserve[MAXRESERVE];
+
+    while (opt==2) {
+		int x = reserved;
+
+        if (get_option (&str, &io_start) != 2) break;
+        if (get_option (&str, &io_num)   == 0) break;
+		if (x < MAXRESERVE) {
+			struct resource *res = reserve + x;
+			res->name = "reserved";
+			res->start = io_start;
+			res->end = io_start + io_num - 1;
+			res->child = NULL;
+			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+				reserved = x+1;
+		}
+	}
+	return 1;
+}
+
+__setup("reserve=", reserve_setup);
diff --git a/kernel/sched.c b/kernel/sched.c
new file mode 100644
index 000000000000..bc2dcfa70cfc
--- /dev/null
+++ b/kernel/sched.c
@@ -0,0 +1,1269 @@
+/*
+ *  linux/kernel/sched.c
+ *
+ *  Kernel scheduler and related syscalls
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
+ *              make semaphores SMP safe
+ *  1998-11-19	Implemented schedule_timeout() and related stuff
+ *		by Andrea Arcangeli
+ *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
+ */
+
+/*
+ * 'sched.c' is the main kernel file. It contains scheduling primitives
+ * (sleep_on, wakeup, schedule etc) as well as a number of simple system
+ * call functions (type getpid()), which just extract a field from
+ * current-task
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+extern void timer_bh(void);
+extern void tqueue_bh(void);
+extern void immediate_bh(void);
+
+/*
+ * scheduler variables
+ */
+
+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
+
+extern void mem_use(void);
+
+/*
+ * Scheduling quanta.
+ *
+ * NOTE! The unix "nice" value influences how long a process
+ * gets. The nice value ranges from -20 to +19, where a -20
+ * is a "high-priority" task, and a "+10" is a low-priority
+ * task.
+ *
+ * We want the time-slice to be around 50ms or so, so this
+ * calculation depends on the value of HZ.
+ */
+#if HZ < 200
+#define TICK_SCALE(x)	((x) >> 2)
+#elif HZ < 400
+#define TICK_SCALE(x)	((x) >> 1)
+#elif HZ < 800
+#define TICK_SCALE(x)	(x)
+#elif HZ < 1600
+#define TICK_SCALE(x)	((x) << 1)
+#else
+#define TICK_SCALE(x)	((x) << 2)
+#endif
+
+#define NICE_TO_TICKS(nice)	(TICK_SCALE(20-(nice))+1)
+
+
+/*
+ *	Init task must be ok at boot for the ix86 as we will check its signals
+ *	via the SMP irq return path.
+ */
+ 
+struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
+
+/*
+ * The tasklist_lock protects the linked list of processes.
+ *
+ * The runqueue_lock locks the parts that actually access
+ * and change the run-queues, and have to be interrupt-safe.
+ *
+ * If both locks are to be concurrently held, the runqueue_lock
+ * nests inside the tasklist_lock.
+ */
+spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;  /* inner */
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;	/* outer */
+
+static LIST_HEAD(runqueue_head);
+
+/*
+ * We align per-CPU scheduling data on cacheline boundaries,
+ * to prevent cacheline ping-pong.
+ */
+static union {
+	struct schedule_data {
+		struct task_struct * curr;
+		cycles_t last_schedule;
+	} schedule_data;
+	char __pad [SMP_CACHE_BYTES];
+} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
+
+#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
+#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
+
+struct kernel_stat kstat;
+
+#ifdef CONFIG_SMP
+
+#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
+#define can_schedule(p,cpu) ((!(p)->has_cpu) && \
+				((p)->cpus_allowed & (1 << cpu)))
+
+#else
+
+#define idle_task(cpu) (&init_task)
+#define can_schedule(p,cpu) (1)
+
+#endif
+
+void scheduling_functions_start_here(void) { }
+
+/*
+ * This is the function that decides how desirable a process is..
+ * You can weigh different processes against each other depending
+ * on what CPU they've run on lately etc to try to handle cache
+ * and TLB miss penalties.
+ *
+ * Return values:
+ *	 -1000: never select this
+ *	     0: out of time, recalculate counters (but it might still be
+ *		selected)
+ *	   +ve: "goodness" value (the larger, the better)
+ *	 +1000: realtime process, select this.
+ */
+
+static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
+{
+	int weight;
+
+	/*
+	 * select the current process after every other
+	 * runnable process, but before the idle thread.
+	 * Also, dont trigger a counter recalculation.
+	 */
+	weight = -1;
+	if (p->policy & SCHED_YIELD)
+		goto out;
+
+	/*
+	 * Non-RT process - normal case first.
+	 */
+	if (p->policy == SCHED_OTHER) {
+		/*
+		 * Give the process a first-approximation goodness value
+		 * according to the number of clock-ticks it has left.
+		 *
+		 * Don't do any other calculations if the time slice is
+		 * over..
+		 */
+		weight = p->counter;
+		if (!weight)
+			goto out;
+			
+#ifdef CONFIG_SMP
+		/* Give a largish advantage to the same processor...   */
+		/* (this is equivalent to penalizing other processors) */
+		if (p->processor == this_cpu)
+			weight += PROC_CHANGE_PENALTY;
+#endif
+
+		/* .. and a slight advantage to the current MM */
+		if (p->mm == this_mm || !p->mm)
+			weight += 1;
+		weight += 20 - p->nice;
+		goto out;
+	}
+
+	/*
+	 * Realtime process, select the first one on the
+	 * runqueue (taking priorities within processes
+	 * into account).
+	 */
+	weight = 1000 + p->rt_priority;
+out:
+	return weight;
+}
+
+/*
+ * the 'goodness value' of replacing a process on a given CPU.
+ * positive value means 'replace', zero or negative means 'dont'.
+ */
+static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
+{
+	return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
+}
+
+/*
+ * This is ugly, but reschedule_idle() is very timing-critical.
+ * We are called with the runqueue spinlock held and we must
+ * not claim the tasklist_lock.
+ */
+static FASTCALL(void reschedule_idle(struct task_struct * p));
+
+static void reschedule_idle(struct task_struct * p)
+{
+#ifdef CONFIG_SMP
+	int this_cpu = smp_processor_id();
+	struct task_struct *tsk, *target_tsk;
+	int cpu, best_cpu, i, max_prio;
+	cycles_t oldest_idle;
+
+	/*
+	 * shortcut if the woken up task's last CPU is
+	 * idle now.
+	 */
+	best_cpu = p->processor;
+	if (can_schedule(p, best_cpu)) {
+		tsk = idle_task(best_cpu);
+		if (cpu_curr(best_cpu) == tsk) {
+			int need_resched;
+send_now_idle:
+			/*
+			 * If need_resched == -1 then we can skip sending
+			 * the IPI altogether, tsk->need_resched is
+			 * actively watched by the idle thread.
+			 */
+			need_resched = tsk->need_resched;
+			tsk->need_resched = 1;
+			if ((best_cpu != this_cpu) && !need_resched)
+				smp_send_reschedule(best_cpu);
+			return;
+		}
+	}
+
+	/*
+	 * We know that the preferred CPU has a cache-affine current
+	 * process, lets try to find a new idle CPU for the woken-up
+	 * process. Select the least recently active idle CPU. (that
+	 * one will have the least active cache context.) Also find
+	 * the executing process which has the least priority.
+	 */
+	oldest_idle = (cycles_t) -1;
+	target_tsk = NULL;
+	max_prio = 1;
+
+	for (i = 0; i < smp_num_cpus; i++) {
+		cpu = cpu_logical_map(i);
+		if (!can_schedule(p, cpu))
+			continue;
+		tsk = cpu_curr(cpu);
+		/*
+		 * We use the first available idle CPU. This creates
+		 * a priority list between idle CPUs, but this is not
+		 * a problem.
+		 */
+		if (tsk == idle_task(cpu)) {
+			if (last_schedule(cpu) < oldest_idle) {
+				oldest_idle = last_schedule(cpu);
+				target_tsk = tsk;
+			}
+		} else {
+			if (oldest_idle == -1ULL) {
+				int prio = preemption_goodness(tsk, p, cpu);
+
+				if (prio > max_prio) {
+					max_prio = prio;
+					target_tsk = tsk;
+				}
+			}
+		}
+	}
+	tsk = target_tsk;
+	if (tsk) {
+		if (oldest_idle != -1ULL) {
+			best_cpu = tsk->processor;
+			goto send_now_idle;
+		}
+		tsk->need_resched = 1;
+		if (tsk->processor != this_cpu)
+			smp_send_reschedule(tsk->processor);
+	}
+	return;
+		
+
+#else /* UP */
+	int this_cpu = smp_processor_id();
+	struct task_struct *tsk;
+
+	tsk = cpu_curr(this_cpu);
+	if (preemption_goodness(tsk, p, this_cpu) > 1)
+		tsk->need_resched = 1;
+#endif
+}
+
+/*
+ * Careful!
+ *
+ * This has to add the process to the _beginning_ of the
+ * run-queue, not the end. See the comment about "This is
+ * subtle" in the scheduler proper..
+ */
+static inline void add_to_runqueue(struct task_struct * p)
+{
+	list_add(&p->run_list, &runqueue_head);
+	nr_running++;
+}
+
+static inline void move_last_runqueue(struct task_struct * p)
+{
+	list_del(&p->run_list);
+	list_add_tail(&p->run_list, &runqueue_head);
+}
+
+static inline void move_first_runqueue(struct task_struct * p)
+{
+	list_del(&p->run_list);
+	list_add(&p->run_list, &runqueue_head);
+}
+
+/*
+ * Wake up a process. Put it on the run-queue if it's not
+ * already there.  The "current" process is always on the
+ * run-queue (except when the actual re-schedule is in
+ * progress), and as such you're allowed to do the simpler
+ * "current->state = TASK_RUNNING" to mark yourself runnable
+ * without the overhead of this.
+ */
+inline void wake_up_process(struct task_struct * p)
+{
+	unsigned long flags;
+
+	/*
+	 * We want the common case fall through straight, thus the goto.
+	 */
+	spin_lock_irqsave(&runqueue_lock, flags);
+	p->state = TASK_RUNNING;
+	if (task_on_runqueue(p))
+		goto out;
+	add_to_runqueue(p);
+	reschedule_idle(p);
+out:
+	spin_unlock_irqrestore(&runqueue_lock, flags);
+}
+
+static inline void wake_up_process_synchronous(struct task_struct * p)
+{
+	unsigned long flags;
+
+	/*
+	 * We want the common case fall through straight, thus the goto.
+	 */
+	spin_lock_irqsave(&runqueue_lock, flags);
+	p->state = TASK_RUNNING;
+	if (task_on_runqueue(p))
+		goto out;
+	add_to_runqueue(p);
+out:
+	spin_unlock_irqrestore(&runqueue_lock, flags);
+}
+
+static void process_timeout(unsigned long __data)
+{
+	struct task_struct * p = (struct task_struct *) __data;
+
+	wake_up_process(p);
+}
+
+signed long schedule_timeout(signed long timeout)
+{
+	struct timer_list timer;
+	unsigned long expire;
+
+	switch (timeout)
+	{
+	case MAX_SCHEDULE_TIMEOUT:
+		/*
+		 * These two special cases are useful to be comfortable
+		 * in the caller. Nothing more. We could take
+		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
+		 * but I' d like to return a valid offset (>=0) to allow
+		 * the caller to do everything it want with the retval.
+		 */
+		schedule();
+		goto out;
+	default:
+		/*
+		 * Another bit of PARANOID. Note that the retval will be
+		 * 0 since no piece of kernel is supposed to do a check
+		 * for a negative retval of schedule_timeout() (since it
+		 * should never happens anyway). You just have the printk()
+		 * that will tell you if something is gone wrong and where.
+		 */
+		if (timeout < 0)
+		{
+			printk(KERN_ERR "schedule_timeout: wrong timeout "
+			       "value %lx from %p\n", timeout,
+			       __builtin_return_address(0));
+			current->state = TASK_RUNNING;
+			goto out;
+		}
+	}
+
+	expire = timeout + jiffies;
+
+	init_timer(&timer);
+	timer.expires = expire;
+	timer.data = (unsigned long) current;
+	timer.function = process_timeout;
+
+	add_timer(&timer);
+	schedule();
+	del_timer_sync(&timer);
+
+	timeout = expire - jiffies;
+
+ out:
+	return timeout < 0 ? 0 : timeout;
+}
+
+/*
+ * schedule_tail() is getting called from the fork return path. This
+ * cleans up all remaining scheduler things, without impacting the
+ * common case.
+ */
+static inline void __schedule_tail(struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	int policy;
+
+	/*
+	 * prev->policy can be written from here only before `prev'
+	 * can be scheduled (before setting prev->has_cpu to zero).
+	 * Of course it must also be read before allowing prev
+	 * to be rescheduled, but since the write depends on the read
+	 * to complete, wmb() is enough. (the spin_lock() acquired
+	 * before setting has_cpu is not enough because the spin_lock()
+	 * common code semantics allows code outside the critical section
+	 * to enter inside the critical section)
+	 */
+	policy = prev->policy;
+	prev->policy = policy & ~SCHED_YIELD;
+	wmb();
+
+	/*
+	 * fast path falls through. We have to clear has_cpu before
+	 * checking prev->state to avoid a wakeup race - thus we
+	 * also have to protect against the task exiting early.
+	 */
+	task_lock(prev);
+	prev->has_cpu = 0;
+	mb();
+	if (prev->state == TASK_RUNNING)
+		goto needs_resched;
+
+out_unlock:
+	task_unlock(prev);	/* Synchronise here with release_task() if prev is TASK_ZOMBIE */
+	return;
+
+	/*
+	 * Slow path - we 'push' the previous process and
+	 * reschedule_idle() will attempt to find a new
+	 * processor for it. (but it might preempt the
+	 * current process as well.) We must take the runqueue
+	 * lock and re-check prev->state to be correct. It might
+	 * still happen that this process has a preemption
+	 * 'in progress' already - but this is not a problem and
+	 * might happen in other circumstances as well.
+	 */
+needs_resched:
+	{
+		unsigned long flags;
+
+		/*
+		 * Avoid taking the runqueue lock in cases where
+		 * no preemption-check is necessery:
+		 */
+		if ((prev == idle_task(smp_processor_id())) ||
+						(policy & SCHED_YIELD))
+			goto out_unlock;
+
+		spin_lock_irqsave(&runqueue_lock, flags);
+		if (prev->state == TASK_RUNNING)
+			reschedule_idle(prev);
+		spin_unlock_irqrestore(&runqueue_lock, flags);
+		goto out_unlock;
+	}
+#else
+	prev->policy &= ~SCHED_YIELD;
+#endif /* CONFIG_SMP */
+}
+
+void schedule_tail(struct task_struct *prev)
+{
+	__schedule_tail(prev);
+}
+
+/*
+ *  'schedule()' is the scheduler function. It's a very simple and nice
+ * scheduler: it's not perfect, but certainly works for most things.
+ *
+ * The goto is "interesting".
+ *
+ *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
+ * tasks can run. It can not be killed, and it cannot sleep. The 'state'
+ * information in task[0] is never used.
+ */
+asmlinkage void schedule(void)
+{
+	struct schedule_data * sched_data;
+	struct task_struct *prev, *next, *p;
+	struct list_head *tmp;
+	int this_cpu, c;
+
+	if (!current->active_mm) BUG();
+need_resched_back:
+	prev = current;
+	this_cpu = prev->processor;
+
+	if (in_interrupt())
+		goto scheduling_in_interrupt;
+
+	release_kernel_lock(prev, this_cpu);
+
+	/* Do "administrative" work here while we don't hold any locks */
+	if (softirq_active(this_cpu) & softirq_mask(this_cpu))
+		goto handle_softirq;
+handle_softirq_back:
+
+	/*
+	 * 'sched_data' is protected by the fact that we can run
+	 * only one process per CPU.
+	 */
+	sched_data = & aligned_data[this_cpu].schedule_data;
+
+	spin_lock_irq(&runqueue_lock);
+
+	/* move an exhausted RR process to be last.. */
+	if (prev->policy == SCHED_RR)
+		goto move_rr_last;
+move_rr_back:
+
+	switch (prev->state) {
+		case TASK_INTERRUPTIBLE:
+			if (signal_pending(prev)) {
+				prev->state = TASK_RUNNING;
+				break;
+			}
+		default:
+			del_from_runqueue(prev);
+		case TASK_RUNNING:
+	}
+	prev->need_resched = 0;
+
+	/*
+	 * this is the scheduler proper:
+	 */
+
+repeat_schedule:
+	/*
+	 * Default process to select..
+	 */
+	next = idle_task(this_cpu);
+	c = -1000;
+	if (prev->state == TASK_RUNNING)
+		goto still_running;
+
+still_running_back:
+	list_for_each(tmp, &runqueue_head) {
+		p = list_entry(tmp, struct task_struct, run_list);
+		if (can_schedule(p, this_cpu)) {
+			int weight = goodness(p, this_cpu, prev->active_mm);
+			if (weight > c)
+				c = weight, next = p;
+		}
+	}
+
+	/* Do we need to re-calculate counters? */
+	if (!c)
+		goto recalculate;
+	/*
+	 * from this point on nothing can prevent us from
+	 * switching to the next task, save this fact in
+	 * sched_data.
+	 */
+	sched_data->curr = next;
+#ifdef CONFIG_SMP
+ 	next->has_cpu = 1;
+	next->processor = this_cpu;
+#endif
+	spin_unlock_irq(&runqueue_lock);
+
+	if (prev == next)
+		goto same_process;
+
+#ifdef CONFIG_SMP
+ 	/*
+ 	 * maintain the per-process 'last schedule' value.
+ 	 * (this has to be recalculated even if we reschedule to
+ 	 * the same process) Currently this is only used on SMP,
+	 * and it's approximate, so we do not have to maintain
+	 * it while holding the runqueue spinlock.
+ 	 */
+ 	sched_data->last_schedule = get_cycles();
+
+	/*
+	 * We drop the scheduler lock early (it's a global spinlock),
+	 * thus we have to lock the previous process from getting
+	 * rescheduled during switch_to().
+	 */
+
+#endif /* CONFIG_SMP */
+
+	kstat.context_swtch++;
+	/*
+	 * there are 3 processes which are affected by a context switch:
+	 *
+	 * prev == .... ==> (last => next)
+	 *
+	 * It's the 'much more previous' 'prev' that is on next's stack,
+	 * but prev is set to (the just run) 'last' process by switch_to().
+	 * This might sound slightly confusing but makes tons of sense.
+	 */
+	prepare_to_switch();
+	{
+		struct mm_struct *mm = next->mm;
+		struct mm_struct *oldmm = prev->active_mm;
+		if (!mm) {
+			if (next->active_mm) BUG();
+			next->active_mm = oldmm;
+			atomic_inc(&oldmm->mm_count);
+			enter_lazy_tlb(oldmm, next, this_cpu);
+		} else {
+			if (next->active_mm != mm) BUG();
+			switch_mm(oldmm, mm, next, this_cpu);
+		}
+
+		if (!prev->mm) {
+			prev->active_mm = NULL;
+			mmdrop(oldmm);
+		}
+	}
+
+	/*
+	 * This just switches the register state and the
+	 * stack.
+	 */
+	switch_to(prev, next, prev);
+	__schedule_tail(prev);
+
+same_process:
+	reacquire_kernel_lock(current);
+	if (current->need_resched)
+		goto need_resched_back;
+
+	return;
+
+recalculate:
+	{
+		struct task_struct *p;
+		spin_unlock_irq(&runqueue_lock);
+		read_lock(&tasklist_lock);
+		for_each_task(p)
+			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
+		read_unlock(&tasklist_lock);
+		spin_lock_irq(&runqueue_lock);
+	}
+	goto repeat_schedule;
+
+still_running:
+	c = goodness(prev, this_cpu, prev->active_mm);
+	next = prev;
+	goto still_running_back;
+
+handle_softirq:
+	do_softirq();
+	goto handle_softirq_back;
+
+move_rr_last:
+	if (!prev->counter) {
+		prev->counter = NICE_TO_TICKS(prev->nice);
+		move_last_runqueue(prev);
+	}
+	goto move_rr_back;
+
+scheduling_in_interrupt:
+	printk("Scheduling in interrupt\n");
+	BUG();
+	return;
+}
+
+static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
+				     unsigned int wq_mode, const int sync)
+{
+	struct list_head *tmp, *head;
+	struct task_struct *p, *best_exclusive;
+	unsigned long flags;
+	int best_cpu, irq;
+
+	if (!q)
+		goto out;
+
+	best_cpu = smp_processor_id();
+	irq = in_interrupt();
+	best_exclusive = NULL;
+	wq_write_lock_irqsave(&q->lock, flags);
+
+#if WAITQUEUE_DEBUG
+	CHECK_MAGIC_WQHEAD(q);
+#endif
+
+	head = &q->task_list;
+#if WAITQUEUE_DEBUG
+        if (!head->next || !head->prev)
+                WQ_BUG();
+#endif
+	tmp = head->next;
+	while (tmp != head) {
+		unsigned int state;
+                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+
+		tmp = tmp->next;
+
+#if WAITQUEUE_DEBUG
+		CHECK_MAGIC(curr->__magic);
+#endif
+		p = curr->task;
+		state = p->state;
+		if (state & mode) {
+#if WAITQUEUE_DEBUG
+			curr->__waker = (long)__builtin_return_address(0);
+#endif
+			/*
+			 * If waking up from an interrupt context then
+			 * prefer processes which are affine to this
+			 * CPU.
+			 */
+			if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) {
+				if (!best_exclusive)
+					best_exclusive = p;
+				if (p->processor == best_cpu) {
+					best_exclusive = p;
+					break;
+				}
+			} else {
+				if (sync)
+					wake_up_process_synchronous(p);
+				else
+					wake_up_process(p);
+				if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)
+					break;
+			}
+		}
+	}
+	if (best_exclusive) {
+		if (sync)
+			wake_up_process_synchronous(best_exclusive);
+		else
+			wake_up_process(best_exclusive);
+	}
+	wq_write_unlock_irqrestore(&q->lock, flags);
+out:
+	return;
+}
+
+void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+{
+	__wake_up_common(q, mode, wq_mode, 0);
+}
+
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+{
+	__wake_up_common(q, mode, wq_mode, 1);
+}
+
+#define	SLEEP_ON_VAR				\
+	unsigned long flags;			\
+	wait_queue_t wait;			\
+	init_waitqueue_entry(&wait, current);
+
+#define	SLEEP_ON_HEAD					\
+	wq_write_lock_irqsave(&q->lock,flags);		\
+	__add_wait_queue(q, &wait);			\
+	wq_write_unlock(&q->lock);
+
+#define	SLEEP_ON_TAIL						\
+	wq_write_lock_irq(&q->lock);				\
+	__remove_wait_queue(q, &wait);				\
+	wq_write_unlock_irqrestore(&q->lock,flags);
+
+void interruptible_sleep_on(wait_queue_head_t *q)
+{
+	SLEEP_ON_VAR
+
+	current->state = TASK_INTERRUPTIBLE;
+
+	SLEEP_ON_HEAD
+	schedule();
+	SLEEP_ON_TAIL
+}
+
+long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
+{
+	SLEEP_ON_VAR
+
+	current->state = TASK_INTERRUPTIBLE;
+
+	SLEEP_ON_HEAD
+	timeout = schedule_timeout(timeout);
+	SLEEP_ON_TAIL
+
+	return timeout;
+}
+
+void sleep_on(wait_queue_head_t *q)
+{
+	SLEEP_ON_VAR
+	
+	current->state = TASK_UNINTERRUPTIBLE;
+
+	SLEEP_ON_HEAD
+	schedule();
+	SLEEP_ON_TAIL
+}
+
+long sleep_on_timeout(wait_queue_head_t *q, long timeout)
+{
+	SLEEP_ON_VAR
+	
+	current->state = TASK_UNINTERRUPTIBLE;
+
+	SLEEP_ON_HEAD
+	timeout = schedule_timeout(timeout);
+	SLEEP_ON_TAIL
+
+	return timeout;
+}
+
+void scheduling_functions_end_here(void) { }
+
+#ifndef __alpha__
+
+/*
+ * This has been replaced by sys_setpriority.  Maybe it should be
+ * moved into the arch dependent tree for those ports that require
+ * it for backward compatibility?
+ */
+
+asmlinkage long sys_nice(int increment)
+{
+	long newprio;
+
+	/*
+	 *	Setpriority might change our priority at the same moment.
+	 *	We don't have to worry. Conceptually one call occurs first
+	 *	and we have a single winner.
+	 */
+	if (increment < 0) {
+		if (!capable(CAP_SYS_NICE))
+			return -EPERM;
+		if (increment < -40)
+			increment = -40;
+	}
+	if (increment > 40)
+		increment = 40;
+
+	newprio = current->nice + increment;
+	if (newprio < -20)
+		newprio = -20;
+	if (newprio > 19)
+		newprio = 19;
+	current->nice = newprio;
+	return 0;
+}
+
+#endif
+
+static inline struct task_struct *find_process_by_pid(pid_t pid)
+{
+	struct task_struct *tsk = current;
+
+	if (pid)
+		tsk = find_task_by_pid(pid);
+	return tsk;
+}
+
+static int setscheduler(pid_t pid, int policy, 
+			struct sched_param *param)
+{
+	struct sched_param lp;
+	struct task_struct *p;
+	int retval;
+
+	retval = -EINVAL;
+	if (!param || pid < 0)
+		goto out_nounlock;
+
+	retval = -EFAULT;
+	if (copy_from_user(&lp, param, sizeof(struct sched_param)))
+		goto out_nounlock;
+
+	/*
+	 * We play safe to avoid deadlocks.
+	 */
+	read_lock_irq(&tasklist_lock);
+	spin_lock(&runqueue_lock);
+
+	p = find_process_by_pid(pid);
+
+	retval = -ESRCH;
+	if (!p)
+		goto out_unlock;
+			
+	if (policy < 0)
+		policy = p->policy;
+	else {
+		retval = -EINVAL;
+		if (policy != SCHED_FIFO && policy != SCHED_RR &&
+				policy != SCHED_OTHER)
+			goto out_unlock;
+	}
+	
+	/*
+	 * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
+	 * priority for SCHED_OTHER is 0.
+	 */
+	retval = -EINVAL;
+	if (lp.sched_priority < 0 || lp.sched_priority > 99)
+		goto out_unlock;
+	if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
+		goto out_unlock;
+
+	retval = -EPERM;
+	if ((policy == SCHED_FIFO || policy == SCHED_RR) && 
+	    !capable(CAP_SYS_NICE))
+		goto out_unlock;
+	if ((current->euid != p->euid) && (current->euid != p->uid) &&
+	    !capable(CAP_SYS_NICE))
+		goto out_unlock;
+
+	retval = 0;
+	p->policy = policy;
+	p->rt_priority = lp.sched_priority;
+	if (task_on_runqueue(p))
+		move_first_runqueue(p);
+
+	current->need_resched = 1;
+
+out_unlock:
+	spin_unlock(&runqueue_lock);
+	read_unlock_irq(&tasklist_lock);
+
+out_nounlock:
+	return retval;
+}
+
+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, 
+				      struct sched_param *param)
+{
+	return setscheduler(pid, policy, param);
+}
+
+asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
+{
+	return setscheduler(pid, -1, param);
+}
+
+asmlinkage long sys_sched_getscheduler(pid_t pid)
+{
+	struct task_struct *p;
+	int retval;
+
+	retval = -EINVAL;
+	if (pid < 0)
+		goto out_nounlock;
+
+	retval = -ESRCH;
+	read_lock(&tasklist_lock);
+	p = find_process_by_pid(pid);
+	if (p)
+		retval = p->policy & ~SCHED_YIELD;
+	read_unlock(&tasklist_lock);
+
+out_nounlock:
+	return retval;
+}
+
+asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
+{
+	struct task_struct *p;
+	struct sched_param lp;
+	int retval;
+
+	retval = -EINVAL;
+	if (!param || pid < 0)
+		goto out_nounlock;
+
+	read_lock(&tasklist_lock);
+	p = find_process_by_pid(pid);
+	retval = -ESRCH;
+	if (!p)
+		goto out_unlock;
+	lp.sched_priority = p->rt_priority;
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * This one might sleep, we cannot do it with a spinlock held ...
+	 */
+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
+
+out_nounlock:
+	return retval;
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+	return retval;
+}
+
+asmlinkage long sys_sched_yield(void)
+{
+	/*
+	 * Trick. sched_yield() first counts the number of truly 
+	 * 'pending' runnable processes, then returns if it's
+	 * only the current processes. (This test does not have
+	 * to be atomic.) In threaded applications this optimization
+	 * gets triggered quite often.
+	 */
+
+	int nr_pending = nr_running;
+
+#if CONFIG_SMP
+	int i;
+
+	// Substract non-idle processes running on other CPUs.
+	for (i = 0; i < smp_num_cpus; i++)
+		if (aligned_data[i].schedule_data.curr != idle_task(i))
+			nr_pending--;
+#else
+	// on UP this process is on the runqueue as well
+	nr_pending--;
+#endif
+	if (nr_pending) {
+		/*
+		 * This process can only be rescheduled by us,
+		 * so this is safe without any locking.
+		 */
+		if (current->policy == SCHED_OTHER)
+			current->policy |= SCHED_YIELD;
+		current->need_resched = 1;
+	}
+	return 0;
+}
+
+asmlinkage long sys_sched_get_priority_max(int policy)
+{
+	int ret = -EINVAL;
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = 99;
+		break;
+	case SCHED_OTHER:
+		ret = 0;
+		break;
+	}
+	return ret;
+}
+
+asmlinkage long sys_sched_get_priority_min(int policy)
+{
+	int ret = -EINVAL;
+
+	switch (policy) {
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = 1;
+		break;
+	case SCHED_OTHER:
+		ret = 0;
+	}
+	return ret;
+}
+
+asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
+{
+	struct timespec t;
+	struct task_struct *p;
+	int retval = -EINVAL;
+
+	if (pid < 0)
+		goto out_nounlock;
+
+	retval = -ESRCH;
+	read_lock(&tasklist_lock);
+	p = find_process_by_pid(pid);
+	if (p)
+		jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
+				    &t);
+	read_unlock(&tasklist_lock);
+	if (p)
+		retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+out_nounlock:
+	return retval;
+}
+
+static void show_task(struct task_struct * p)
+{
+	unsigned long free = 0;
+	int state;
+	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
+
+	printk("%-8s  ", p->comm);
+	state = p->state ? ffz(~p->state) + 1 : 0;
+	if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
+		printk(stat_nam[state]);
+	else
+		printk(" ");
+#if (BITS_PER_LONG == 32)
+	if (p == current)
+		printk(" current  ");
+	else
+		printk(" %08lX ", thread_saved_pc(&p->thread));
+#else
+	if (p == current)
+		printk("   current task   ");
+	else
+		printk(" %016lx ", thread_saved_pc(&p->thread));
+#endif
+	{
+		unsigned long * n = (unsigned long *) (p+1);
+		while (!*n)
+			n++;
+		free = (unsigned long) n - (unsigned long)(p+1);
+	}
+	printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid);
+	if (p->p_cptr)
+		printk("%5d ", p->p_cptr->pid);
+	else
+		printk("      ");
+	if (!p->mm)
+		printk(" (L-TLB) ");
+	else
+		printk(" (NOTLB) ");
+	if (p->p_ysptr)
+		printk("%7d", p->p_ysptr->pid);
+	else
+		printk("       ");
+	if (p->p_osptr)
+		printk(" %5d\n", p->p_osptr->pid);
+	else
+		printk("\n");
+
+#ifdef CONFIG_X86
+/* This is very useful, but only works on x86 right now */
+	{
+		extern void show_trace(unsigned long);
+		show_trace(p->thread.esp);
+	}
+#endif
+}
+
+char * render_sigset_t(sigset_t *set, char *buffer)
+{
+	int i = _NSIG, x;
+	do {
+		i -= 4, x = 0;
+		if (sigismember(set, i+1)) x |= 1;
+		if (sigismember(set, i+2)) x |= 2;
+		if (sigismember(set, i+3)) x |= 4;
+		if (sigismember(set, i+4)) x |= 8;
+		*buffer++ = (x < 10 ? '0' : 'a' - 10) + x;
+	} while (i >= 4);
+	*buffer = 0;
+	return buffer;
+}
+
+void show_state(void)
+{
+	struct task_struct *p;
+
+#if (BITS_PER_LONG == 32)
+	printk("\n"
+	       "                         free                        sibling\n");
+	printk("  task             PC    stack   pid father child younger older\n");
+#else
+	printk("\n"
+	       "                                 free                        sibling\n");
+	printk("  task                 PC        stack   pid father child younger older\n");
+#endif
+	read_lock(&tasklist_lock);
+	for_each_task(p)
+		show_task(p);
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ *	Put all the gunge required to become a kernel thread without
+ *	attached user resources in one place where it belongs.
+ */
+
+void daemonize(void)
+{
+	struct fs_struct *fs;
+
+
+	/*
+	 * If we were started as result of loading a module, close all of the
+	 * user space pages.  We don't need them, and if we didn't close them
+	 * they would be locked into memory.
+	 */
+	exit_mm(current);
+
+	current->session = 1;
+	current->pgrp = 1;
+
+	/* Become as one with the init task */
+
+	exit_fs(current);	/* current->fs->count--; */
+	fs = init_task.fs;
+	current->fs = fs;
+	atomic_inc(&fs->count);
+ 	exit_files(current);
+	current->files = init_task.files;
+	atomic_inc(&current->files->count);
+}
+
+void __init init_idle(void)
+{
+	struct schedule_data * sched_data;
+	sched_data = &aligned_data[smp_processor_id()].schedule_data;
+
+	if (current != &init_task && task_on_runqueue(current)) {
+		printk("UGH! (%d:%d) was on the runqueue, removing.\n",
+			smp_processor_id(), current->pid);
+		del_from_runqueue(current);
+	}
+	sched_data->curr = current;
+	sched_data->last_schedule = get_cycles();
+}
+
+extern void init_timervecs (void);
+
+void __init sched_init(void)
+{
+	/*
+	 * We have to do a little magic to get the first
+	 * process right in SMP mode.
+	 */
+	int cpu = smp_processor_id();
+	int nr;
+
+	init_task.processor = cpu;
+
+	for(nr = 0; nr < PIDHASH_SZ; nr++)
+		pidhash[nr] = NULL;
+
+	init_timervecs();
+
+	init_bh(TIMER_BH, timer_bh);
+	init_bh(TQUEUE_BH, tqueue_bh);
+	init_bh(IMMEDIATE_BH, immediate_bh);
+
+	/*
+	 * The boot idle thread does lazy MMU switching as well:
+	 */
+	atomic_inc(&init_mm.mm_count);
+	enter_lazy_tlb(&init_mm, current, cpu);
+}
diff --git a/kernel/signal.c b/kernel/signal.c
new file mode 100644
index 000000000000..db22b0057738
--- /dev/null
+++ b/kernel/signal.c
@@ -0,0 +1,1260 @@
+/*
+ *  linux/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * SLAB caches for signal bits.
+ */
+
+#define DEBUG_SIG 0
+
+#if DEBUG_SIG
+#define SIG_SLAB_DEBUG	(SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */)
+#else
+#define SIG_SLAB_DEBUG	0
+#endif
+
+static kmem_cache_t *sigqueue_cachep;
+
+atomic_t nr_queued_signals;
+int max_queued_signals = 1024;
+
+void __init signals_init(void)
+{
+	sigqueue_cachep =
+		kmem_cache_create("sigqueue",
+				  sizeof(struct sigqueue),
+				  __alignof__(struct sigqueue),
+				  SIG_SLAB_DEBUG, NULL, NULL);
+	if (!sigqueue_cachep)
+		panic("signals_init(): cannot create sigqueue SLAB cache");
+}
+
+
+/* Given the mask, find the first available signal that should be serviced. */
+
+static int
+next_signal(struct task_struct *tsk, sigset_t *mask)
+{
+	unsigned long i, *s, *m, x;
+	int sig = 0;
+	
+	s = tsk->pending.signal.sig;
+	m = mask->sig;
+	switch (_NSIG_WORDS) {
+	default:
+		for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
+			if ((x = *s &~ *m) != 0) {
+				sig = ffz(~x) + i*_NSIG_BPW + 1;
+				break;
+			}
+		break;
+
+	case 2: if ((x = s[0] &~ m[0]) != 0)
+			sig = 1;
+		else if ((x = s[1] &~ m[1]) != 0)
+			sig = _NSIG_BPW + 1;
+		else
+			break;
+		sig += ffz(~x);
+		break;
+
+	case 1: if ((x = *s &~ *m) != 0)
+			sig = ffz(~x) + 1;
+		break;
+	}
+	
+	return sig;
+}
+
+static void flush_sigqueue(struct sigpending *queue)
+{
+	struct sigqueue *q, *n;
+
+	sigemptyset(&queue->signal);
+	q = queue->head;
+	queue->head = NULL;
+	queue->tail = &queue->head;
+
+	while (q) {
+		n = q->next;
+		kmem_cache_free(sigqueue_cachep, q);
+		atomic_dec(&nr_queued_signals);
+		q = n;
+	}
+}
+
+/*
+ * Flush all pending signals for a task.
+ */
+
+void
+flush_signals(struct task_struct *t)
+{
+	t->sigpending = 0;
+	flush_sigqueue(&t->pending);
+}
+
+void exit_sighand(struct task_struct *tsk)
+{
+	struct signal_struct * sig = tsk->sig;
+
+	spin_lock_irq(&tsk->sigmask_lock);
+	if (sig) {
+		tsk->sig = NULL;
+		if (atomic_dec_and_test(&sig->count))
+			kmem_cache_free(sigact_cachep, sig);
+	}
+	tsk->sigpending = 0;
+	flush_sigqueue(&tsk->pending);
+	spin_unlock_irq(&tsk->sigmask_lock);
+}
+
+/*
+ * Flush all handlers for a task.
+ */
+
+void
+flush_signal_handlers(struct task_struct *t)
+{
+	int i;
+	struct k_sigaction *ka = &t->sig->action[0];
+	for (i = _NSIG ; i != 0 ; i--) {
+		if (ka->sa.sa_handler != SIG_IGN)
+			ka->sa.sa_handler = SIG_DFL;
+		ka->sa.sa_flags = 0;
+		sigemptyset(&ka->sa.sa_mask);
+		ka++;
+	}
+}
+
+/* Notify the system that a driver wants to block all signals for this
+ * process, and wants to be notified if any signals at all were to be
+ * sent/acted upon.  If the notifier routine returns non-zero, then the
+ * signal will be acted upon after all.  If the notifier routine returns 0,
+ * then then signal will be blocked.  Only one block per process is
+ * allowed.  priv is a pointer to private data that the notifier routine
+ * can use to determine if the signal should be blocked or not.  */
+
+void
+block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&current->sigmask_lock, flags);
+	current->notifier_mask = mask;
+	current->notifier_data = priv;
+	current->notifier = notifier;
+	spin_unlock_irqrestore(&current->sigmask_lock, flags);
+}
+
+/* Notify the system that blocking has ended. */
+
+void
+unblock_all_signals(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&current->sigmask_lock, flags);
+	current->notifier = NULL;
+	current->notifier_data = NULL;
+	recalc_sigpending(current);
+	spin_unlock_irqrestore(&current->sigmask_lock, flags);
+}
+
+static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+{
+	if (sigismember(&list->signal, sig)) {
+		/* Collect the siginfo appropriate to this signal.  */
+		struct sigqueue *q, **pp;
+		pp = &list->head;
+		while ((q = *pp) != NULL) {
+			if (q->info.si_signo == sig)
+				goto found_it;
+			pp = &q->next;
+		}
+
+		/* Ok, it wasn't in the queue.  We must have
+		   been out of queue space.  So zero out the
+		   info.  */
+		sigdelset(&list->signal, sig);
+		info->si_signo = sig;
+		info->si_errno = 0;
+		info->si_code = 0;
+		info->si_pid = 0;
+		info->si_uid = 0;
+		return 1;
+
+found_it:
+		if ((*pp = q->next) == NULL)
+			list->tail = pp;
+
+		/* Copy the sigqueue information and free the queue entry */
+		copy_siginfo(info, &q->info);
+		kmem_cache_free(sigqueue_cachep,q);
+		atomic_dec(&nr_queued_signals);
+
+		/* Non-RT signals can exist multiple times.. */
+		if (sig >= SIGRTMIN) {
+			while ((q = *pp) != NULL) {
+				if (q->info.si_signo == sig)
+					goto found_another;
+				pp = &q->next;
+			}
+		}
+
+		sigdelset(&list->signal, sig);
+found_another:
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Dequeue a signal and return the element to the caller, which is 
+ * expected to free it.
+ *
+ * All callers must be holding current->sigmask_lock.
+ */
+
+int
+dequeue_signal(sigset_t *mask, siginfo_t *info)
+{
+	int sig = 0;
+
+#if DEBUG_SIG
+printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid,
+	signal_pending(current));
+#endif
+
+	sig = next_signal(current, mask);
+	if (current->notifier) {
+		if (sigismember(current->notifier_mask, sig)) {
+			if (!(current->notifier)(current->notifier_data)) {
+				current->sigpending = 0;
+				return 0;
+			}
+		}
+	}
+
+	if (sig) {
+		if (!collect_signal(sig, &current->pending, info))
+			sig = 0;
+				
+		/* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER,
+		   we need to xchg out the timer overrun values.  */
+	}
+	recalc_sigpending(current);
+
+#if DEBUG_SIG
+printk(" %d -> %d\n", signal_pending(current), sig);
+#endif
+
+	return sig;
+}
+
+static int rm_from_queue(int sig, struct sigpending *s)
+{
+	struct sigqueue *q, **pp;
+
+	if (!sigismember(&s->signal, sig))
+		return 0;
+
+	sigdelset(&s->signal, sig);
+
+	pp = &s->head;
+
+	while ((q = *pp) != NULL) {
+		if (q->info.si_signo == sig) {
+			if ((*pp = q->next) == NULL)
+				s->tail = pp;
+			kmem_cache_free(sigqueue_cachep,q);
+			atomic_dec(&nr_queued_signals);
+			continue;
+		}
+		pp = &q->next;
+	}
+	return 1;
+}
+
+/*
+ * Remove signal sig from t->pending.
+ * Returns 1 if sig was found.
+ *
+ * All callers must be holding t->sigmask_lock.
+ */
+static int rm_sig_from_queue(int sig, struct task_struct *t)
+{
+	return rm_from_queue(sig, &t->pending);
+}
+
+/*
+ * Bad permissions for sending the signal
+ */
+int bad_signal(int sig, struct siginfo *info, struct task_struct *t)
+{
+	return (!info || ((unsigned long)info != 1 && SI_FROMUSER(info)))
+	    && ((sig != SIGCONT) || (current->session != t->session))
+	    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
+	    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
+	    && !capable(CAP_KILL);
+}
+
+/*
+ * Signal type:
+ *    < 0 : global action (kill - spread to all non-blocked threads)
+ *    = 0 : ignored
+ *    > 0 : wake up.
+ */
+static int signal_type(int sig, struct signal_struct *signals)
+{
+	unsigned long handler;
+
+	if (!signals)
+		return 0;
+	
+	handler = (unsigned long) signals->action[sig-1].sa.sa_handler;
+	if (handler > 1)
+		return 1;
+
+	/* "Ignore" handler.. Illogical, but that has an implicit handler for SIGCHLD */
+	if (handler == 1)
+		return sig == SIGCHLD;
+
+	/* Default handler. Normally lethal, but.. */
+	switch (sig) {
+
+	/* Ignored */
+	case SIGCONT: case SIGWINCH:
+	case SIGCHLD: case SIGURG:
+		return 0;
+
+	/* Implicit behaviour */
+	case SIGTSTP: case SIGTTIN: case SIGTTOU:
+		return 1;
+
+	/* Implicit actions (kill or do special stuff) */
+	default:
+		return -1;
+	}
+}
+		
+
+/*
+ * Determine whether a signal should be posted or not.
+ *
+ * Signals with SIG_IGN can be ignored, except for the
+ * special case of a SIGCHLD. 
+ *
+ * Some signals with SIG_DFL default to a non-action.
+ */
+static int ignored_signal(int sig, struct task_struct *t)
+{
+	/* Don't ignore traced or blocked signals */
+	if ((t->ptrace & PT_PTRACED) || sigismember(&t->blocked, sig))
+		return 0;
+
+	return signal_type(sig, t->sig) == 0;
+}
+
+/*
+ * Handle TASK_STOPPED cases etc implicit behaviour
+ * of certain magical signals.
+ *
+ * SIGKILL gets spread out to every thread. 
+ */
+static void handle_stop_signal(int sig, struct task_struct *t)
+{
+	switch (sig) {
+	case SIGKILL: case SIGCONT:
+		/* Wake up the process if stopped.  */
+		if (t->state == TASK_STOPPED)
+			wake_up_process(t);
+		t->exit_code = 0;
+		rm_sig_from_queue(SIGSTOP, t);
+		rm_sig_from_queue(SIGTSTP, t);
+		rm_sig_from_queue(SIGTTOU, t);
+		rm_sig_from_queue(SIGTTIN, t);
+		break;
+
+	case SIGSTOP: case SIGTSTP:
+	case SIGTTIN: case SIGTTOU:
+		/* If we're stopping again, cancel SIGCONT */
+		rm_sig_from_queue(SIGCONT, t);
+		break;
+	}
+}
+
+static int send_signal(int sig, struct siginfo *info, struct sigpending *signals)
+{
+	struct sigqueue * q = NULL;
+
+	/* Real-time signals must be queued if sent by sigqueue, or
+	   some other real-time mechanism.  It is implementation
+	   defined whether kill() does so.  We attempt to do so, on
+	   the principle of least surprise, but since kill is not
+	   allowed to fail with EAGAIN when low on memory we just
+	   make sure at least one signal gets delivered and don't
+	   pass on the info struct.  */
+
+	if (atomic_read(&nr_queued_signals) < max_queued_signals) {
+		q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
+	}
+
+	if (q) {
+		atomic_inc(&nr_queued_signals);
+		q->next = NULL;
+		*signals->tail = q;
+		signals->tail = &q->next;
+		switch ((unsigned long) info) {
+			case 0:
+				q->info.si_signo = sig;
+				q->info.si_errno = 0;
+				q->info.si_code = SI_USER;
+				q->info.si_pid = current->pid;
+				q->info.si_uid = current->uid;
+				break;
+			case 1:
+				q->info.si_signo = sig;
+				q->info.si_errno = 0;
+				q->info.si_code = SI_KERNEL;
+				q->info.si_pid = 0;
+				q->info.si_uid = 0;
+				break;
+			default:
+				copy_siginfo(&q->info, info);
+				break;
+		}
+	} else if (sig >= SIGRTMIN && info && (unsigned long)info != 1
+		   && info->si_code != SI_USER) {
+		/*
+		 * Queue overflow, abort.  We may abort if the signal was rt
+		 * and sent by user using something other than kill().
+		 */
+		return -EAGAIN;
+	}
+
+	sigaddset(&signals->signal, sig);
+	return 0;
+}
+
+/*
+ * Tell a process that it has a new active signal..
+ *
+ * NOTE! we rely on the previous spin_lock to
+ * lock interrupts for us! We can only be called with
+ * "sigmask_lock" held, and the local interrupt must
+ * have been disabled when that got acquired!
+ *
+ * No need to set need_resched since signal event passing
+ * goes through ->blocked
+ */
+static inline void signal_wake_up(struct task_struct *t)
+{
+	t->sigpending = 1;
+
+	if (t->state & TASK_INTERRUPTIBLE) {
+		wake_up_process(t);
+		return;
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * If the task is running on a different CPU 
+	 * force a reschedule on the other CPU to make
+	 * it notice the new signal quickly.
+	 *
+	 * The code below is a tad loose and might occasionally
+	 * kick the wrong CPU if we catch the process in the
+	 * process of changing - but no harm is done by that
+	 * other than doing an extra (lightweight) IPI interrupt.
+	 */
+	spin_lock(&runqueue_lock);
+	if (t->has_cpu && t->processor != smp_processor_id())
+		smp_send_reschedule(t->processor);
+	spin_unlock(&runqueue_lock);
+#endif /* CONFIG_SMP */
+}
+
+static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t)
+{
+	int retval = send_signal(sig, info, &t->pending);
+
+	if (!retval && !sigismember(&t->blocked, sig))
+		signal_wake_up(t);
+
+	return retval;
+}
+
+int
+send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
+{
+	unsigned long flags;
+	int ret;
+
+
+#if DEBUG_SIG
+printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig);
+#endif
+
+	ret = -EINVAL;
+	if (sig < 0 || sig > _NSIG)
+		goto out_nolock;
+	/* The somewhat baroque permissions check... */
+	ret = -EPERM;
+	if (bad_signal(sig, info, t))
+		goto out_nolock;
+
+	/* The null signal is a permissions and process existance probe.
+	   No signal is actually delivered.  Same goes for zombies. */
+	ret = 0;
+	if (!sig || !t->sig)
+		goto out_nolock;
+
+	spin_lock_irqsave(&t->sigmask_lock, flags);
+	handle_stop_signal(sig, t);
+
+	/* Optimize away the signal, if it's a signal that can be
+	   handled immediately (ie non-blocked and untraced) and
+	   that is ignored (either explicitly or by default).  */
+
+	if (ignored_signal(sig, t))
+		goto out;
+
+	/* Support queueing exactly one non-rt signal, so that we
+	   can get more detailed information about the cause of
+	   the signal. */
+	if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig))
+		goto out;
+
+	ret = deliver_signal(sig, info, t);
+out:
+	spin_unlock_irqrestore(&t->sigmask_lock, flags);
+	if ((t->state & TASK_INTERRUPTIBLE) && signal_pending(t))
+		wake_up_process(t);
+out_nolock:
+#if DEBUG_SIG
+printk(" %d -> %d\n", signal_pending(t), ret);
+#endif
+
+	return ret;
+}
+
+/*
+ * Force a signal that the process can't ignore: if necessary
+ * we unblock the signal and change any SIG_IGN to SIG_DFL.
+ */
+
+int
+force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
+{
+	unsigned long int flags;
+
+	spin_lock_irqsave(&t->sigmask_lock, flags);
+	if (t->sig == NULL) {
+		spin_unlock_irqrestore(&t->sigmask_lock, flags);
+		return -ESRCH;
+	}
+
+	if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN)
+		t->sig->action[sig-1].sa.sa_handler = SIG_DFL;
+	sigdelset(&t->blocked, sig);
+	recalc_sigpending(t);
+	spin_unlock_irqrestore(&t->sigmask_lock, flags);
+
+	return send_sig_info(sig, info, t);
+}
+
+/*
+ * kill_pg_info() sends a signal to a process group: this is what the tty
+ * control characters do (^C, ^Z etc)
+ */
+
+int
+kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+{
+	int retval = -EINVAL;
+	if (pgrp > 0) {
+		struct task_struct *p;
+
+		retval = -ESRCH;
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pgrp == pgrp) {
+				int err = send_sig_info(sig, info, p);
+				if (retval)
+					retval = err;
+			}
+		}
+		read_unlock(&tasklist_lock);
+	}
+	return retval;
+}
+
+/*
+ * kill_sl_info() sends a signal to the session leader: this is used
+ * to send SIGHUP to the controlling process of a terminal when
+ * the connection is lost.
+ */
+
+int
+kill_sl_info(int sig, struct siginfo *info, pid_t sess)
+{
+	int retval = -EINVAL;
+	if (sess > 0) {
+		struct task_struct *p;
+
+		retval = -ESRCH;
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->leader && p->session == sess) {
+				int err = send_sig_info(sig, info, p);
+				if (retval)
+					retval = err;
+			}
+		}
+		read_unlock(&tasklist_lock);
+	}
+	return retval;
+}
+
+inline int
+kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+{
+	int error;
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+	p = find_task_by_pid(pid);
+	error = -ESRCH;
+	if (p)
+		error = send_sig_info(sig, info, p);
+	read_unlock(&tasklist_lock);
+	return error;
+}
+
+
+/*
+ * kill_something_info() interprets pid in interesting ways just like kill(2).
+ *
+ * POSIX specifies that kill(-1,sig) is unspecified, but what we have
+ * is probably wrong.  Should make it like BSD or SYSV.
+ */
+
+static int kill_something_info(int sig, struct siginfo *info, int pid)
+{
+	if (!pid) {
+		return kill_pg_info(sig, info, current->pgrp);
+	} else if (pid == -1) {
+		int retval = 0, count = 0;
+		struct task_struct * p;
+
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pid > 1 && p != current) {
+				int err = send_sig_info(sig, info, p);
+				++count;
+				if (err != -EPERM)
+					retval = err;
+			}
+		}
+		read_unlock(&tasklist_lock);
+		return count ? retval : -ESRCH;
+	} else if (pid < 0) {
+		return kill_pg_info(sig, info, -pid);
+	} else {
+		return kill_proc_info(sig, info, pid);
+	}
+}
+
+/*
+ * These are for backward compatibility with the rest of the kernel source.
+ */
+
+int
+send_sig(int sig, struct task_struct *p, int priv)
+{
+	return send_sig_info(sig, (void*)(long)(priv != 0), p);
+}
+
+void
+force_sig(int sig, struct task_struct *p)
+{
+	force_sig_info(sig, (void*)1L, p);
+}
+
+int
+kill_pg(pid_t pgrp, int sig, int priv)
+{
+	return kill_pg_info(sig, (void *)(long)(priv != 0), pgrp);
+}
+
+int
+kill_sl(pid_t sess, int sig, int priv)
+{
+	return kill_sl_info(sig, (void *)(long)(priv != 0), sess);
+}
+
+int
+kill_proc(pid_t pid, int sig, int priv)
+{
+	return kill_proc_info(sig, (void *)(long)(priv != 0), pid);
+}
+
+/*
+ * Joy. Or not. Pthread wants us to wake up every thread
+ * in our parent group.
+ */
+static void wake_up_parent(struct task_struct *parent)
+{
+	struct task_struct *tsk = parent;
+
+	do {
+		wake_up_interruptible(&tsk->wait_chldexit);
+		tsk = next_thread(tsk);
+	} while (tsk != parent);
+}
+
+/*
+ * Let a parent know about a status change of a child.
+ */
+
+void do_notify_parent(struct task_struct *tsk, int sig)
+{
+	struct siginfo info;
+	int why, status;
+
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_pid = tsk->pid;
+	info.si_uid = tsk->uid;
+
+	/* FIXME: find out whether or not this is supposed to be c*time. */
+	info.si_utime = tsk->times.tms_utime;
+	info.si_stime = tsk->times.tms_stime;
+
+	status = tsk->exit_code & 0x7f;
+	why = SI_KERNEL;	/* shouldn't happen */
+	switch (tsk->state) {
+	case TASK_STOPPED:
+		/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */
+		if (tsk->ptrace & PT_PTRACED)
+			why = CLD_TRAPPED;
+		else
+			why = CLD_STOPPED;
+		break;
+
+	default:
+		if (tsk->exit_code & 0x80)
+			why = CLD_DUMPED;
+		else if (tsk->exit_code & 0x7f)
+			why = CLD_KILLED;
+		else {
+			why = CLD_EXITED;
+			status = tsk->exit_code >> 8;
+		}
+		break;
+	}
+	info.si_code = why;
+	info.si_status = status;
+
+	send_sig_info(sig, &info, tsk->p_pptr);
+	wake_up_parent(tsk->p_pptr);
+}
+
+
+/*
+ * We need the tasklist lock because it's the only
+ * thing that protects out "parent" pointer.
+ *
+ * exit.c calls "do_notify_parent()" directly, because
+ * it already has the tasklist lock.
+ */
+void
+notify_parent(struct task_struct *tsk, int sig)
+{
+	read_lock(&tasklist_lock);
+	do_notify_parent(tsk, sig);
+	read_unlock(&tasklist_lock);
+}
+
+EXPORT_SYMBOL(dequeue_signal);
+EXPORT_SYMBOL(flush_signals);
+EXPORT_SYMBOL(force_sig);
+EXPORT_SYMBOL(force_sig_info);
+EXPORT_SYMBOL(kill_pg);
+EXPORT_SYMBOL(kill_pg_info);
+EXPORT_SYMBOL(kill_proc);
+EXPORT_SYMBOL(kill_proc_info);
+EXPORT_SYMBOL(kill_sl);
+EXPORT_SYMBOL(kill_sl_info);
+EXPORT_SYMBOL(notify_parent);
+EXPORT_SYMBOL(recalc_sigpending);
+EXPORT_SYMBOL(send_sig);
+EXPORT_SYMBOL(send_sig_info);
+EXPORT_SYMBOL(block_all_signals);
+EXPORT_SYMBOL(unblock_all_signals);
+
+
+/*
+ * System call entry points.
+ */
+
+/*
+ * We don't need to get the kernel lock - this is all local to this
+ * particular thread.. (and that's good, because this is _heavily_
+ * used by various programs)
+ */
+
+asmlinkage long
+sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize)
+{
+	int error = -EINVAL;
+	sigset_t old_set, new_set;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		goto out;
+
+	if (set) {
+		error = -EFAULT;
+		if (copy_from_user(&new_set, set, sizeof(*set)))
+			goto out;
+		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
+
+		spin_lock_irq(&current->sigmask_lock);
+		old_set = current->blocked;
+
+		error = 0;
+		switch (how) {
+		default:
+			error = -EINVAL;
+			break;
+		case SIG_BLOCK:
+			sigorsets(&new_set, &old_set, &new_set);
+			break;
+		case SIG_UNBLOCK:
+			signandsets(&new_set, &old_set, &new_set);
+			break;
+		case SIG_SETMASK:
+			break;
+		}
+
+		current->blocked = new_set;
+		recalc_sigpending(current);
+		spin_unlock_irq(&current->sigmask_lock);
+		if (error)
+			goto out;
+		if (oset)
+			goto set_old;
+	} else if (oset) {
+		spin_lock_irq(&current->sigmask_lock);
+		old_set = current->blocked;
+		spin_unlock_irq(&current->sigmask_lock);
+
+	set_old:
+		error = -EFAULT;
+		if (copy_to_user(oset, &old_set, sizeof(*oset)))
+			goto out;
+	}
+	error = 0;
+out:
+	return error;
+}
+
+long do_sigpending(void *set, unsigned long sigsetsize)
+{
+	long error = -EINVAL;
+	sigset_t pending;
+
+	if (sigsetsize > sizeof(sigset_t))
+		goto out;
+
+	spin_lock_irq(&current->sigmask_lock);
+	sigandsets(&pending, &current->blocked, &current->pending.signal);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	error = -EFAULT;
+	if (!copy_to_user(set, &pending, sigsetsize))
+		error = 0;
+out:
+	return error;
+}	
+
+asmlinkage long
+sys_rt_sigpending(sigset_t *set, size_t sigsetsize)
+{
+	return do_sigpending(set, sigsetsize);
+}
+
+asmlinkage long
+sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
+		    const struct timespec *uts, size_t sigsetsize)
+{
+	int ret, sig;
+	sigset_t these;
+	struct timespec ts;
+	siginfo_t info;
+	long timeout = 0;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&these, uthese, sizeof(these)))
+		return -EFAULT;
+		
+	/*
+	 * Invert the set of allowed signals to get those we
+	 * want to block.
+	 */
+	sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
+	signotset(&these);
+
+	if (uts) {
+		if (copy_from_user(&ts, uts, sizeof(ts)))
+			return -EFAULT;
+		if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
+		    || ts.tv_sec < 0)
+			return -EINVAL;
+	}
+
+	spin_lock_irq(&current->sigmask_lock);
+	sig = dequeue_signal(&these, &info);
+	if (!sig) {
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (uts)
+			timeout = (timespec_to_jiffies(&ts)
+				   + (ts.tv_sec || ts.tv_nsec));
+
+		if (timeout) {
+			/* None ready -- temporarily unblock those we're
+			 * interested while we are sleeping in so that we'll
+			 * be awakened when they arrive.  */
+			sigset_t oldblocked = current->blocked;
+			sigandsets(&current->blocked, &current->blocked, &these);
+			recalc_sigpending(current);
+			spin_unlock_irq(&current->sigmask_lock);
+
+			current->state = TASK_INTERRUPTIBLE;
+			timeout = schedule_timeout(timeout);
+
+			spin_lock_irq(&current->sigmask_lock);
+			sig = dequeue_signal(&these, &info);
+			current->blocked = oldblocked;
+			recalc_sigpending(current);
+		}
+	}
+	spin_unlock_irq(&current->sigmask_lock);
+
+	if (sig) {
+		ret = sig;
+		if (uinfo) {
+			if (copy_siginfo_to_user(uinfo, &info))
+				ret = -EFAULT;
+		}
+	} else {
+		ret = -EAGAIN;
+		if (timeout)
+			ret = -EINTR;
+	}
+
+	return ret;
+}
+
+asmlinkage long
+sys_kill(int pid, int sig)
+{
+	struct siginfo info;
+
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_USER;
+	info.si_pid = current->pid;
+	info.si_uid = current->uid;
+
+	return kill_something_info(sig, &info, pid);
+}
+
+asmlinkage long
+sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo)
+{
+	siginfo_t info;
+
+	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+		return -EFAULT;
+
+	/* Not even root can pretend to send signals from the kernel.
+	   Nor can they impersonate a kill(), which adds source info.  */
+	if (info.si_code >= 0)
+		return -EPERM;
+	info.si_signo = sig;
+
+	/* POSIX.1b doesn't mention process groups.  */
+	return kill_proc_info(sig, &info, pid);
+}
+
+int
+do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
+{
+	struct k_sigaction *k;
+
+	if (sig < 1 || sig > _NSIG ||
+	    (act && (sig == SIGKILL || sig == SIGSTOP)))
+		return -EINVAL;
+
+	k = &current->sig->action[sig-1];
+
+	spin_lock(&current->sig->siglock);
+
+	if (oact)
+		*oact = *k;
+
+	if (act) {
+		*k = *act;
+		sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+		/*
+		 * POSIX 3.3.1.3:
+		 *  "Setting a signal action to SIG_IGN for a signal that is
+		 *   pending shall cause the pending signal to be discarded,
+		 *   whether or not it is blocked."
+		 *
+		 *  "Setting a signal action to SIG_DFL for a signal that is
+		 *   pending and whose default action is to ignore the signal
+		 *   (for example, SIGCHLD), shall cause the pending signal to
+		 *   be discarded, whether or not it is blocked"
+		 *
+		 * Note the silly behaviour of SIGCHLD: SIG_IGN means that the
+		 * signal isn't actually ignored, but does automatic child
+		 * reaping, while SIG_DFL is explicitly said by POSIX to force
+		 * the signal to be ignored.
+		 */
+
+		if (k->sa.sa_handler == SIG_IGN
+		    || (k->sa.sa_handler == SIG_DFL
+			&& (sig == SIGCONT ||
+			    sig == SIGCHLD ||
+			    sig == SIGWINCH))) {
+			spin_lock_irq(&current->sigmask_lock);
+			if (rm_sig_from_queue(sig, current))
+				recalc_sigpending(current);
+			spin_unlock_irq(&current->sigmask_lock);
+		}
+	}
+
+	spin_unlock(&current->sig->siglock);
+	return 0;
+}
+
+int 
+do_sigaltstack (const stack_t *uss, stack_t *uoss, unsigned long sp)
+{
+	stack_t oss;
+	int error;
+
+	if (uoss) {
+		oss.ss_sp = (void *) current->sas_ss_sp;
+		oss.ss_size = current->sas_ss_size;
+		oss.ss_flags = sas_ss_flags(sp);
+	}
+
+	if (uss) {
+		void *ss_sp;
+		size_t ss_size;
+		int ss_flags;
+
+		error = -EFAULT;
+		if (verify_area(VERIFY_READ, uss, sizeof(*uss))
+		    || __get_user(ss_sp, &uss->ss_sp)
+		    || __get_user(ss_flags, &uss->ss_flags)
+		    || __get_user(ss_size, &uss->ss_size))
+			goto out;
+
+		error = -EPERM;
+		if (on_sig_stack (sp))
+			goto out;
+
+		error = -EINVAL;
+		/*
+		 *
+		 * Note - this code used to test ss_flags incorrectly
+		 *  	  old code may have been written using ss_flags==0
+		 *	  to mean ss_flags==SS_ONSTACK (as this was the only
+		 *	  way that worked) - this fix preserves that older
+		 *	  mechanism
+		 */
+		if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
+			goto out;
+
+		if (ss_flags == SS_DISABLE) {
+			ss_size = 0;
+			ss_sp = NULL;
+		} else {
+			error = -ENOMEM;
+			if (ss_size < MINSIGSTKSZ)
+				goto out;
+		}
+
+		current->sas_ss_sp = (unsigned long) ss_sp;
+		current->sas_ss_size = ss_size;
+	}
+
+	if (uoss) {
+		error = -EFAULT;
+		if (copy_to_user(uoss, &oss, sizeof(oss)))
+			goto out;
+	}
+
+	error = 0;
+out:
+	return error;
+}
+
+asmlinkage long
+sys_sigpending(old_sigset_t *set)
+{
+	return do_sigpending(set, sizeof(*set));
+}
+
+#if !defined(__alpha__)
+/* Alpha has its own versions with special arguments.  */
+
+asmlinkage long
+sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
+{
+	int error;
+	old_sigset_t old_set, new_set;
+
+	if (set) {
+		error = -EFAULT;
+		if (copy_from_user(&new_set, set, sizeof(*set)))
+			goto out;
+		new_set &= ~(sigmask(SIGKILL)|sigmask(SIGSTOP));
+
+		spin_lock_irq(&current->sigmask_lock);
+		old_set = current->blocked.sig[0];
+
+		error = 0;
+		switch (how) {
+		default:
+			error = -EINVAL;
+			break;
+		case SIG_BLOCK:
+			sigaddsetmask(&current->blocked, new_set);
+			break;
+		case SIG_UNBLOCK:
+			sigdelsetmask(&current->blocked, new_set);
+			break;
+		case SIG_SETMASK:
+			current->blocked.sig[0] = new_set;
+			break;
+		}
+
+		recalc_sigpending(current);
+		spin_unlock_irq(&current->sigmask_lock);
+		if (error)
+			goto out;
+		if (oset)
+			goto set_old;
+	} else if (oset) {
+		old_set = current->blocked.sig[0];
+	set_old:
+		error = -EFAULT;
+		if (copy_to_user(oset, &old_set, sizeof(*oset)))
+			goto out;
+	}
+	error = 0;
+out:
+	return error;
+}
+
+#ifndef __sparc__
+asmlinkage long
+sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact,
+		 size_t sigsetsize)
+{
+	struct k_sigaction new_sa, old_sa;
+	int ret = -EINVAL;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		goto out;
+
+	if (act) {
+		if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+			return -EFAULT;
+	}
+
+	ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+
+	if (!ret && oact) {
+		if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+			return -EFAULT;
+	}
+out:
+	return ret;
+}
+#endif /* __sparc__ */
+#endif
+
+#if !defined(__alpha__) && !defined(__ia64__)
+/*
+ * For backwards compatibility.  Functionality superseded by sigprocmask.
+ */
+asmlinkage long
+sys_sgetmask(void)
+{
+	/* SMP safe */
+	return current->blocked.sig[0];
+}
+
+asmlinkage long
+sys_ssetmask(int newmask)
+{
+	int old;
+
+	spin_lock_irq(&current->sigmask_lock);
+	old = current->blocked.sig[0];
+
+	siginitset(&current->blocked, newmask & ~(sigmask(SIGKILL)|
+						  sigmask(SIGSTOP)));
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	return old;
+}
+#endif /* !defined(__alpha__) */
+
+#if !defined(__alpha__) && !defined(__ia64__) && !defined(__mips__)
+/*
+ * For backwards compatibility.  Functionality superseded by sigaction.
+ */
+asmlinkage unsigned long
+sys_signal(int sig, __sighandler_t handler)
+{
+	struct k_sigaction new_sa, old_sa;
+	int ret;
+
+	new_sa.sa.sa_handler = handler;
+	new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
+
+	ret = do_sigaction(sig, &new_sa, &old_sa);
+
+	return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
+}
+#endif /* !alpha && !__ia64__ && !defined(__mips__) */
diff --git a/kernel/softirq.c b/kernel/softirq.c
new file mode 100644
index 000000000000..fe066399dafa
--- /dev/null
+++ b/kernel/softirq.c
@@ -0,0 +1,317 @@
+/*
+ *	linux/kernel/softirq.c
+ *
+ *	Copyright (C) 1992 Linus Torvalds
+ *
+ * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
+ * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/tqueue.h>
+
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+   - These softirqs are not masked by global cli() and start_bh_atomic()
+     (by clear reasons). Hence, old parts of code still using global locks
+     MUST NOT use softirqs, but insert interfacing routines acquiring
+     global locks. F.e. look at BHs implementation.
+
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+   - Bottom halves: globally serialized, grr...
+ */
+
+/* No separate irq_stat for s390, it is part of PSA */
+#if !defined(CONFIG_ARCH_S390)
+irq_cpustat_t irq_stat[NR_CPUS];
+#endif	/* CONFIG_ARCH_S390 */
+
+static struct softirq_action softirq_vec[32] __cacheline_aligned;
+
+asmlinkage void do_softirq()
+{
+	int cpu = smp_processor_id();
+	__u32 active, mask;
+
+	if (in_interrupt())
+		return;
+
+	local_bh_disable();
+
+	local_irq_disable();
+	mask = softirq_mask(cpu);
+	active = softirq_active(cpu) & mask;
+
+	if (active) {
+		struct softirq_action *h;
+
+restart:
+		/* Reset active bitmask before enabling irqs */
+		softirq_active(cpu) &= ~active;
+
+		local_irq_enable();
+
+		h = softirq_vec;
+		mask &= ~active;
+
+		do {
+			if (active & 1)
+				h->action(h);
+			h++;
+			active >>= 1;
+		} while (active);
+
+		local_irq_disable();
+
+		active = softirq_active(cpu);
+		if ((active &= mask) != 0)
+			goto retry;
+	}
+
+	local_bh_enable();
+
+	/* Leave with locally disabled hard irqs. It is critical to close
+	 * window for infinite recursion, while we help local bh count,
+	 * it protected us. Now we are defenceless.
+	 */
+	return;
+
+retry:
+	goto restart;
+}
+
+
+static spinlock_t softirq_mask_lock = SPIN_LOCK_UNLOCKED;
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&softirq_mask_lock, flags);
+	softirq_vec[nr].data = data;
+	softirq_vec[nr].action = action;
+
+	for (i=0; i<NR_CPUS; i++)
+		softirq_mask(i) |= (1<<nr);
+	spin_unlock_irqrestore(&softirq_mask_lock, flags);
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_action(struct softirq_action *a)
+{
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_vec[cpu].list;
+	tasklet_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+				t->func(t->data);
+				/*
+				 * talklet_trylock() uses test_and_set_bit that imply
+				 * an mb when it returns zero, thus we need the explicit
+				 * mb only here: while closing the critical section.
+				 */
+#ifdef CONFIG_SMP
+				smp_mb__before_clear_bit();
+#endif
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+		local_irq_disable();
+		t->next = tasklet_vec[cpu].list;
+		tasklet_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_hi_vec[cpu].list;
+	tasklet_hi_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+		local_irq_disable();
+		t->next = tasklet_hi_vec[cpu].list;
+		tasklet_hi_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, HI_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+		  void (*func)(unsigned long), unsigned long data)
+{
+	t->func = func;
+	t->data = data;
+	t->state = 0;
+	atomic_set(&t->count, 0);
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+	if (in_interrupt())
+		printk("Attempt to kill tasklet from interrupt\n");
+
+	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		current->state = TASK_RUNNING;
+		do {
+			current->policy |= SCHED_YIELD;
+			schedule();
+		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
+	}
+	tasklet_unlock_wait(t);
+	clear_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+   It is still possible to make synchronize_bh() as
+   spin_unlock_wait(&global_bh_lock). This operation is not used
+   by kernel now, so that this lock is not made private only
+   due to wait_on_irq().
+
+   It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+	int cpu = smp_processor_id();
+
+	if (!spin_trylock(&global_bh_lock))
+		goto resched;
+
+	if (!hardirq_trylock(cpu))
+		goto resched_unlock;
+
+	if (bh_base[nr])
+		bh_base[nr]();
+
+	hardirq_endlock(cpu);
+	spin_unlock(&global_bh_lock);
+	return;
+
+resched_unlock:
+	spin_unlock(&global_bh_lock);
+resched:
+	mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+	bh_base[nr] = routine;
+	mb();
+}
+
+void remove_bh(int nr)
+{
+	tasklet_kill(bh_task_vec+nr);
+	bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+	int i;
+
+	for (i=0; i<32; i++)
+		tasklet_init(bh_task_vec+i, bh_action, i);
+
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+void __run_task_queue(task_queue *list)
+{
+	struct list_head head, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tqueue_lock, flags);
+	list_add(&head, list);
+	list_del_init(list);
+	spin_unlock_irqrestore(&tqueue_lock, flags);
+
+	next = head.next;
+	while (next != &head) {
+		void (*f) (void *);
+		struct tq_struct *p;
+		void *data;
+
+		p = list_entry(next, struct tq_struct, list);
+		next = next->next;
+		f = p->routine;
+		data = p->data;
+		wmb();
+		p->sync = 0;
+		if (f)
+			f(data);
+	}
+}
diff --git a/kernel/sys.c b/kernel/sys.c
new file mode 100644
index 000000000000..38eb5dee9ce8
--- /dev/null
+++ b/kernel/sys.c
@@ -0,0 +1,1219 @@
+/*
+ *  linux/kernel/sys.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/mman.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/prctl.h>
+#include <linux/init.h>
+#include <linux/highuid.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+/*
+ * this is where the system-wide overflow UID and GID are defined, for
+ * architectures that now have 32-bit UID/GID but didn't in the past
+ */
+
+int overflowuid = DEFAULT_OVERFLOWUID;
+int overflowgid = DEFAULT_OVERFLOWGID;
+
+/*
+ * the same as above, but for filesystems which can only store a 16-bit
+ * UID and GID. as such, this is needed on all architectures
+ */
+
+int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
+int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
+
+/*
+ * this indicates whether you can reboot with ctrl-alt-del: the default is yes
+ */
+
+int C_A_D = 1;
+
+
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	at shutdown. This is used to stop any idling DMA operations
+ *	and the like. 
+ */
+
+static struct notifier_block *reboot_notifier_list;
+rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
+
+/**
+ *	notifier_chain_register	- Add notifier to a notifier chain
+ *	@list: Pointer to root list pointer
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a notifier chain.
+ *
+ *	Currently always returns zero.
+ */
+ 
+int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
+{
+	write_lock(&notifier_lock);
+	while(*list)
+	{
+		if(n->priority > (*list)->priority)
+			break;
+		list= &((*list)->next);
+	}
+	n->next = *list;
+	*list=n;
+	write_unlock(&notifier_lock);
+	return 0;
+}
+
+/**
+ *	notifier_chain_unregister - Remove notifier from a notifier chain
+ *	@nl: Pointer to root list pointer
+ *	@n: New entry in notifier chain
+ *
+ *	Removes a notifier from a notifier chain.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+ 
+int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
+{
+	write_lock(&notifier_lock);
+	while((*nl)!=NULL)
+	{
+		if((*nl)==n)
+		{
+			*nl=n->next;
+			write_unlock(&notifier_lock);
+			return 0;
+		}
+		nl=&((*nl)->next);
+	}
+	write_unlock(&notifier_lock);
+	return -ENOENT;
+}
+
+/**
+ *	notifier_call_chain - Call functions in a notifier chain
+ *	@n: Pointer to root pointer of notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *
+ *	Calls each function in a notifier chain in turn.
+ *
+ *	If the return value of the notifier can be and'd
+ *	with %NOTIFY_STOP_MASK, then notifier_call_chain
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise, the return value is the return value
+ *	of the last notifier function called.
+ */
+ 
+int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
+{
+	int ret=NOTIFY_DONE;
+	struct notifier_block *nb = *n;
+
+	while(nb)
+	{
+		ret=nb->notifier_call(nb,val,v);
+		if(ret&NOTIFY_STOP_MASK)
+		{
+			return ret;
+		}
+		nb=nb->next;
+	}
+	return ret;
+}
+
+/**
+ *	register_reboot_notifier - Register function to be called at reboot time
+ *	@nb: Info about notifier function to be called
+ *
+ *	Registers a function with the list of functions
+ *	to be called at reboot time.
+ *
+ *	Currently always returns zero, as notifier_chain_register
+ *	always returns zero.
+ */
+ 
+int register_reboot_notifier(struct notifier_block * nb)
+{
+	return notifier_chain_register(&reboot_notifier_list, nb);
+}
+
+/**
+ *	unregister_reboot_notifier - Unregister previously registered reboot notifier
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered reboot
+ *	notifier function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+ 
+int unregister_reboot_notifier(struct notifier_block * nb)
+{
+	return notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+
+asmlinkage long sys_ni_syscall(void)
+{
+	return -ENOSYS;
+}
+
+static int proc_sel(struct task_struct *p, int which, int who)
+{
+	if(p->pid)
+	{
+		switch (which) {
+			case PRIO_PROCESS:
+				if (!who && p == current)
+					return 1;
+				return(p->pid == who);
+			case PRIO_PGRP:
+				if (!who)
+					who = current->pgrp;
+				return(p->pgrp == who);
+			case PRIO_USER:
+				if (!who)
+					who = current->uid;
+				return(p->uid == who);
+		}
+	}
+	return 0;
+}
+
+asmlinkage long sys_setpriority(int which, int who, int niceval)
+{
+	struct task_struct *p;
+	int error;
+
+	if (which > 2 || which < 0)
+		return -EINVAL;
+
+	/* normalize: avoid signed division (rounding problems) */
+	error = -ESRCH;
+	if (niceval < -20)
+		niceval = -20;
+	if (niceval > 19)
+		niceval = 19;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (!proc_sel(p, which, who))
+			continue;
+		if (p->uid != current->euid &&
+			p->uid != current->uid && !capable(CAP_SYS_NICE)) {
+			error = -EPERM;
+			continue;
+		}
+		if (error == -ESRCH)
+			error = 0;
+		if (niceval < p->nice && !capable(CAP_SYS_NICE))
+			error = -EACCES;
+		else
+			p->nice = niceval;
+	}
+	read_unlock(&tasklist_lock);
+
+	return error;
+}
+
+/*
+ * Ugh. To avoid negative return values, "getpriority()" will
+ * not return the normal nice-value, but a negated value that
+ * has been offset by 20 (ie it returns 40..1 instead of -20..19)
+ * to stay compatible.
+ */
+asmlinkage long sys_getpriority(int which, int who)
+{
+	struct task_struct *p;
+	long retval = -ESRCH;
+
+	if (which > 2 || which < 0)
+		return -EINVAL;
+
+	read_lock(&tasklist_lock);
+	for_each_task (p) {
+		long niceval;
+		if (!proc_sel(p, which, who))
+			continue;
+		niceval = 20 - p->nice;
+		if (niceval > retval)
+			retval = niceval;
+	}
+	read_unlock(&tasklist_lock);
+
+	return retval;
+}
+
+
+/*
+ * Reboot system call: for obvious reasons only root may call it,
+ * and even root needs to set up some magic numbers in the registers
+ * so that some mistake won't make this reboot the whole machine.
+ * You can also set the meaning of the ctrl-alt-del-key here.
+ *
+ * reboot doesn't sync: do that yourself before calling this.
+ */
+asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
+{
+	char buffer[256];
+
+	/* We only trust the superuser with rebooting the system. */
+	if (!capable(CAP_SYS_BOOT))
+		return -EPERM;
+
+	/* For safety, we require "magic" arguments. */
+	if (magic1 != LINUX_REBOOT_MAGIC1 ||
+	    (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
+			magic2 != LINUX_REBOOT_MAGIC2B))
+		return -EINVAL;
+
+	lock_kernel();
+	switch (cmd) {
+	case LINUX_REBOOT_CMD_RESTART:
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+		printk(KERN_EMERG "Restarting system.\n");
+		machine_restart(NULL);
+		break;
+
+	case LINUX_REBOOT_CMD_CAD_ON:
+		C_A_D = 1;
+		break;
+
+	case LINUX_REBOOT_CMD_CAD_OFF:
+		C_A_D = 0;
+		break;
+
+	case LINUX_REBOOT_CMD_HALT:
+		notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
+		printk(KERN_EMERG "System halted.\n");
+		machine_halt();
+		do_exit(0);
+		break;
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
+		printk(KERN_EMERG "Power down.\n");
+		machine_power_off();
+		do_exit(0);
+		break;
+
+	case LINUX_REBOOT_CMD_RESTART2:
+		if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
+			unlock_kernel();
+			return -EFAULT;
+		}
+		buffer[sizeof(buffer) - 1] = '\0';
+
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
+		printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
+		machine_restart(buffer);
+		break;
+
+	default:
+		unlock_kernel();
+		return -EINVAL;
+	}
+	unlock_kernel();
+	return 0;
+}
+
+/*
+ * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
+ * As it's called within an interrupt, it may NOT sync: the only choice
+ * is whether to reboot at once, or just ignore the ctrl-alt-del.
+ */
+void ctrl_alt_del(void)
+{
+	if (C_A_D) {
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+		machine_restart(NULL);
+	} else
+		kill_proc(1, SIGINT, 1);
+}
+	
+
+/*
+ * Unprivileged users may change the real gid to the effective gid
+ * or vice versa.  (BSD-style)
+ *
+ * If you set the real gid at all, or set the effective gid to a value not
+ * equal to the real gid, then the saved gid is set to the new effective gid.
+ *
+ * This makes it possible for a setgid program to completely drop its
+ * privileges, which is often a useful assertion to make when you are doing
+ * a security audit over a program.
+ *
+ * The general idea is that a program which uses just setregid() will be
+ * 100% compatible with BSD.  A program which uses just setgid() will be
+ * 100% compatible with POSIX with saved IDs. 
+ *
+ * SMP: There are not races, the GIDs are checked only by filesystem
+ *      operations (as far as semantic preservation is concerned).
+ */
+asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
+{
+	int old_rgid = current->gid;
+	int old_egid = current->egid;
+
+	if (rgid != (gid_t) -1) {
+		if ((old_rgid == rgid) ||
+		    (current->egid==rgid) ||
+		    capable(CAP_SETGID))
+			current->gid = rgid;
+		else
+			return -EPERM;
+	}
+	if (egid != (gid_t) -1) {
+		if ((old_rgid == egid) ||
+		    (current->egid == egid) ||
+		    (current->sgid == egid) ||
+		    capable(CAP_SETGID))
+			current->fsgid = current->egid = egid;
+		else {
+			current->gid = old_rgid;
+			return -EPERM;
+		}
+	}
+	if (rgid != (gid_t) -1 ||
+	    (egid != (gid_t) -1 && egid != old_rgid))
+		current->sgid = current->egid;
+	current->fsgid = current->egid;
+	if (current->egid != old_egid)
+		current->dumpable = 0;
+	return 0;
+}
+
+/*
+ * setgid() is implemented like SysV w/ SAVED_IDS 
+ *
+ * SMP: Same implicit races as above.
+ */
+asmlinkage long sys_setgid(gid_t gid)
+{
+	int old_egid = current->egid;
+
+	if (capable(CAP_SETGID))
+		current->gid = current->egid = current->sgid = current->fsgid = gid;
+	else if ((gid == current->gid) || (gid == current->sgid))
+		current->egid = current->fsgid = gid;
+	else
+		return -EPERM;
+
+	if (current->egid != old_egid)
+		current->dumpable = 0;
+	return 0;
+}
+  
+/* 
+ * cap_emulate_setxuid() fixes the effective / permitted capabilities of
+ * a process after a call to setuid, setreuid, or setresuid.
+ *
+ *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
+ *  {r,e,s}uid != 0, the permitted and effective capabilities are
+ *  cleared.
+ *
+ *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
+ *  capabilities of the process are cleared.
+ *
+ *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
+ *  capabilities are set to the permitted capabilities.
+ *
+ *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
+ *  never happen.
+ *
+ *  -astor 
+ *
+ * cevans - New behaviour, Oct '99
+ * A process may, via prctl(), elect to keep its capabilities when it
+ * calls setuid() and switches away from uid==0. Both permitted and
+ * effective sets will be retained.
+ * Without this change, it was impossible for a daemon to drop only some
+ * of its privilege. The call to setuid(!=0) would drop all privileges!
+ * Keeping uid 0 is not an option because uid 0 owns too many vital
+ * files..
+ * Thanks to Olaf Kirch and Peter Benie for spotting this.
+ */
+extern inline void cap_emulate_setxuid(int old_ruid, int old_euid, 
+				       int old_suid)
+{
+	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
+	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
+	    !current->keep_capabilities) {
+		cap_clear(current->cap_permitted);
+		cap_clear(current->cap_effective);
+	}
+	if (old_euid == 0 && current->euid != 0) {
+		cap_clear(current->cap_effective);
+	}
+	if (old_euid != 0 && current->euid == 0) {
+		current->cap_effective = current->cap_permitted;
+	}
+}
+
+static int set_user(uid_t new_ruid)
+{
+	struct user_struct *new_user, *old_user;
+
+	/* What if a process setreuid()'s and this brings the
+	 * new uid over his NPROC rlimit?  We can check this now
+	 * cheaply with the new uid cache, so if it matters
+	 * we should be checking for it.  -DaveM
+	 */
+	new_user = alloc_uid(new_ruid);
+	if (!new_user)
+		return -EAGAIN;
+	old_user = current->user;
+	atomic_dec(&old_user->processes);
+	atomic_inc(&new_user->processes);
+
+	current->uid = new_ruid;
+	current->user = new_user;
+	free_uid(old_user);
+	return 0;
+}
+
+/*
+ * Unprivileged users may change the real uid to the effective uid
+ * or vice versa.  (BSD-style)
+ *
+ * If you set the real uid at all, or set the effective uid to a value not
+ * equal to the real uid, then the saved uid is set to the new effective uid.
+ *
+ * This makes it possible for a setuid program to completely drop its
+ * privileges, which is often a useful assertion to make when you are doing
+ * a security audit over a program.
+ *
+ * The general idea is that a program which uses just setreuid() will be
+ * 100% compatible with BSD.  A program which uses just setuid() will be
+ * 100% compatible with POSIX with saved IDs. 
+ */
+asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
+{
+	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
+
+	new_ruid = old_ruid = current->uid;
+	new_euid = old_euid = current->euid;
+	old_suid = current->suid;
+
+	if (ruid != (uid_t) -1) {
+		new_ruid = ruid;
+		if ((old_ruid != ruid) &&
+		    (current->euid != ruid) &&
+		    !capable(CAP_SETUID))
+			return -EPERM;
+	}
+
+	if (euid != (uid_t) -1) {
+		new_euid = euid;
+		if ((old_ruid != euid) &&
+		    (current->euid != euid) &&
+		    (current->suid != euid) &&
+		    !capable(CAP_SETUID))
+			return -EPERM;
+	}
+
+	if (new_ruid != old_ruid && set_user(new_ruid) < 0)
+		return -EAGAIN;
+
+	current->fsuid = current->euid = new_euid;
+	if (ruid != (uid_t) -1 ||
+	    (euid != (uid_t) -1 && euid != old_ruid))
+		current->suid = current->euid;
+	current->fsuid = current->euid;
+	if (current->euid != old_euid)
+		current->dumpable = 0;
+
+	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
+	}
+
+	return 0;
+}
+
+
+		
+/*
+ * setuid() is implemented like SysV with SAVED_IDS 
+ * 
+ * Note that SAVED_ID's is deficient in that a setuid root program
+ * like sendmail, for example, cannot set its uid to be a normal 
+ * user and then switch back, because if you're root, setuid() sets
+ * the saved uid too.  If you don't like this, blame the bright people
+ * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
+ * will allow a root program to temporarily drop privileges and be able to
+ * regain them by swapping the real and effective uid.  
+ */
+asmlinkage long sys_setuid(uid_t uid)
+{
+	int old_euid = current->euid;
+	int old_ruid, old_suid, new_ruid;
+
+	old_ruid = new_ruid = current->uid;
+	old_suid = current->suid;
+	if (capable(CAP_SETUID)) {
+		if (uid != old_ruid && set_user(uid) < 0)
+			return -EAGAIN;
+		current->suid = uid;
+	} else if ((uid != current->uid) && (uid != current->suid))
+		return -EPERM;
+
+	current->fsuid = current->euid = uid;
+
+	if (old_euid != uid)
+		current->dumpable = 0;
+
+	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
+	}
+
+	return 0;
+}
+
+
+/*
+ * This function implements a generic ability to update ruid, euid,
+ * and suid.  This allows you to implement the 4.4 compatible seteuid().
+ */
+asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+{
+	int old_ruid = current->uid;
+	int old_euid = current->euid;
+	int old_suid = current->suid;
+
+	if (!capable(CAP_SETUID)) {
+		if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
+		    (ruid != current->euid) && (ruid != current->suid))
+			return -EPERM;
+		if ((euid != (uid_t) -1) && (euid != current->uid) &&
+		    (euid != current->euid) && (euid != current->suid))
+			return -EPERM;
+		if ((suid != (uid_t) -1) && (suid != current->uid) &&
+		    (suid != current->euid) && (suid != current->suid))
+			return -EPERM;
+	}
+	if (ruid != (uid_t) -1) {
+		if (ruid != current->uid && set_user(ruid) < 0)
+			return -EAGAIN;
+	}
+	if (euid != (uid_t) -1) {
+		if (euid != current->euid)
+			current->dumpable = 0;
+		current->euid = euid;
+		current->fsuid = euid;
+	}
+	if (suid != (uid_t) -1)
+		current->suid = suid;
+
+	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
+	}
+
+	return 0;
+}
+
+asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
+{
+	int retval;
+
+	if (!(retval = put_user(current->uid, ruid)) &&
+	    !(retval = put_user(current->euid, euid)))
+		retval = put_user(current->suid, suid);
+
+	return retval;
+}
+
+/*
+ * Same as above, but for rgid, egid, sgid.
+ */
+asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+{
+       if (!capable(CAP_SETGID)) {
+		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
+		    (rgid != current->egid) && (rgid != current->sgid))
+			return -EPERM;
+		if ((egid != (gid_t) -1) && (egid != current->gid) &&
+		    (egid != current->egid) && (egid != current->sgid))
+			return -EPERM;
+		if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
+		    (sgid != current->egid) && (sgid != current->sgid))
+			return -EPERM;
+	}
+	if (rgid != (gid_t) -1)
+		current->gid = rgid;
+	if (egid != (gid_t) -1) {
+		if (egid != current->egid)
+			current->dumpable = 0;
+		current->egid = egid;
+		current->fsgid = egid;
+	}
+	if (sgid != (gid_t) -1)
+		current->sgid = sgid;
+	return 0;
+}
+
+asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
+{
+	int retval;
+
+	if (!(retval = put_user(current->gid, rgid)) &&
+	    !(retval = put_user(current->egid, egid)))
+		retval = put_user(current->sgid, sgid);
+
+	return retval;
+}
+
+
+/*
+ * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
+ * is used for "access()" and for the NFS daemon (letting nfsd stay at
+ * whatever uid it wants to). It normally shadows "euid", except when
+ * explicitly set by setfsuid() or for access..
+ */
+asmlinkage long sys_setfsuid(uid_t uid)
+{
+	int old_fsuid;
+
+	old_fsuid = current->fsuid;
+	if (uid == current->uid || uid == current->euid ||
+	    uid == current->suid || uid == current->fsuid || 
+	    capable(CAP_SETUID))
+		current->fsuid = uid;
+	if (current->fsuid != old_fsuid)
+		current->dumpable = 0;
+
+	/* We emulate fsuid by essentially doing a scaled-down version
+	 * of what we did in setresuid and friends. However, we only
+	 * operate on the fs-specific bits of the process' effective
+	 * capabilities 
+	 *
+	 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
+	 *          if not, we might be a bit too harsh here.
+	 */
+	
+	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+		if (old_fsuid == 0 && current->fsuid != 0) {
+			cap_t(current->cap_effective) &= ~CAP_FS_MASK;
+		}
+		if (old_fsuid != 0 && current->fsuid == 0) {
+			cap_t(current->cap_effective) |=
+				(cap_t(current->cap_permitted) & CAP_FS_MASK);
+		}
+	}
+
+	return old_fsuid;
+}
+
+/*
+ * Samma p� svenska..
+ */
+asmlinkage long sys_setfsgid(gid_t gid)
+{
+	int old_fsgid;
+
+	old_fsgid = current->fsgid;
+	if (gid == current->gid || gid == current->egid ||
+	    gid == current->sgid || gid == current->fsgid || 
+	    capable(CAP_SETGID))
+		current->fsgid = gid;
+	if (current->fsgid != old_fsgid)
+		current->dumpable = 0;
+
+	return old_fsgid;
+}
+
+asmlinkage long sys_times(struct tms * tbuf)
+{
+	/*
+	 *	In the SMP world we might just be unlucky and have one of
+	 *	the times increment as we use it. Since the value is an
+	 *	atomically safe type this is just fine. Conceptually its
+	 *	as if the syscall took an instant longer to occur.
+	 */
+	if (tbuf)
+		if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
+			return -EFAULT;
+	return jiffies;
+}
+
+/*
+ * This needs some heavy checking ...
+ * I just haven't the stomach for it. I also don't fully
+ * understand sessions/pgrp etc. Let somebody who does explain it.
+ *
+ * OK, I think I have the protection semantics right.... this is really
+ * only important on a multi-user system anyway, to make sure one user
+ * can't send a signal to a process owned by another.  -TYT, 12/12/91
+ *
+ * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
+ * LBT 04.03.94
+ */
+
+asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
+{
+	struct task_struct * p;
+	int err = -EINVAL;
+
+	if (!pid)
+		pid = current->pid;
+	if (!pgid)
+		pgid = pid;
+	if (pgid < 0)
+		return -EINVAL;
+
+	/* From this point forward we keep holding onto the tasklist lock
+	 * so that our parent does not change from under us. -DaveM
+	 */
+	read_lock(&tasklist_lock);
+
+	err = -ESRCH;
+	p = find_task_by_pid(pid);
+	if (!p)
+		goto out;
+
+	if (p->p_pptr == current || p->p_opptr == current) {
+		err = -EPERM;
+		if (p->session != current->session)
+			goto out;
+		err = -EACCES;
+		if (p->did_exec)
+			goto out;
+	} else if (p != current)
+		goto out;
+	err = -EPERM;
+	if (p->leader)
+		goto out;
+	if (pgid != pid) {
+		struct task_struct * tmp;
+		for_each_task (tmp) {
+			if (tmp->pgrp == pgid &&
+			    tmp->session == current->session)
+				goto ok_pgid;
+		}
+		goto out;
+	}
+
+ok_pgid:
+	p->pgrp = pgid;
+	err = 0;
+out:
+	/* All paths lead to here, thus we are safe. -DaveM */
+	read_unlock(&tasklist_lock);
+	return err;
+}
+
+asmlinkage long sys_getpgid(pid_t pid)
+{
+	if (!pid) {
+		return current->pgrp;
+	} else {
+		int retval;
+		struct task_struct *p;
+
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(pid);
+
+		retval = -ESRCH;
+		if (p)
+			retval = p->pgrp;
+		read_unlock(&tasklist_lock);
+		return retval;
+	}
+}
+
+asmlinkage long sys_getpgrp(void)
+{
+	/* SMP - assuming writes are word atomic this is fine */
+	return current->pgrp;
+}
+
+asmlinkage long sys_getsid(pid_t pid)
+{
+	if (!pid) {
+		return current->session;
+	} else {
+		int retval;
+		struct task_struct *p;
+
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid(pid);
+
+		retval = -ESRCH;
+		if(p)
+			retval = p->session;
+		read_unlock(&tasklist_lock);
+		return retval;
+	}
+}
+
+asmlinkage long sys_setsid(void)
+{
+	struct task_struct * p;
+	int err = -EPERM;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (p->pgrp == current->pid)
+			goto out;
+	}
+
+	current->leader = 1;
+	current->session = current->pgrp = current->pid;
+	current->tty = NULL;
+	current->tty_old_pgrp = 0;
+	err = current->pgrp;
+out:
+	read_unlock(&tasklist_lock);
+	return err;
+}
+
+/*
+ * Supplementary group IDs
+ */
+asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
+{
+	int i;
+	
+	/*
+	 *	SMP: Nobody else can change our grouplist. Thus we are
+	 *	safe.
+	 */
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+	i = current->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize)
+			return -EINVAL;
+		if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
+			return -EFAULT;
+	}
+	return i;
+}
+
+/*
+ *	SMP: Our groups are not shared. We can copy to/from them safely
+ *	without another task interfering.
+ */
+ 
+asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
+{
+	if (!capable(CAP_SETGID))
+		return -EPERM;
+	if ((unsigned) gidsetsize > NGROUPS)
+		return -EINVAL;
+	if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
+		return -EFAULT;
+	current->ngroups = gidsetsize;
+	return 0;
+}
+
+static int supplemental_group_member(gid_t grp)
+{
+	int i = current->ngroups;
+
+	if (i) {
+		gid_t *groups = current->groups;
+		do {
+			if (*groups == grp)
+				return 1;
+			groups++;
+			i--;
+		} while (i);
+	}
+	return 0;
+}
+
+/*
+ * Check whether we're fsgid/egid or in the supplemental group..
+ */
+int in_group_p(gid_t grp)
+{
+	int retval = 1;
+	if (grp != current->fsgid)
+		retval = supplemental_group_member(grp);
+	return retval;
+}
+
+int in_egroup_p(gid_t grp)
+{
+	int retval = 1;
+	if (grp != current->egid)
+		retval = supplemental_group_member(grp);
+	return retval;
+}
+
+DECLARE_RWSEM(uts_sem);
+
+asmlinkage long sys_newuname(struct new_utsname * name)
+{
+	int errno = 0;
+
+	down_read(&uts_sem);
+	if (copy_to_user(name,&system_utsname,sizeof *name))
+		errno = -EFAULT;
+	up_read(&uts_sem);
+	return errno;
+}
+
+asmlinkage long sys_sethostname(char *name, int len)
+{
+	int errno;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (len < 0 || len > __NEW_UTS_LEN)
+		return -EINVAL;
+	down_write(&uts_sem);
+	errno = -EFAULT;
+	if (!copy_from_user(system_utsname.nodename, name, len)) {
+		system_utsname.nodename[len] = 0;
+		errno = 0;
+	}
+	up_write(&uts_sem);
+	return errno;
+}
+
+asmlinkage long sys_gethostname(char *name, int len)
+{
+	int i, errno;
+
+	if (len < 0)
+		return -EINVAL;
+	down_read(&uts_sem);
+	i = 1 + strlen(system_utsname.nodename);
+	if (i > len)
+		i = len;
+	errno = 0;
+	if (copy_to_user(name, system_utsname.nodename, i))
+		errno = -EFAULT;
+	up_read(&uts_sem);
+	return errno;
+}
+
+/*
+ * Only setdomainname; getdomainname can be implemented by calling
+ * uname()
+ */
+asmlinkage long sys_setdomainname(char *name, int len)
+{
+	int errno;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (len < 0 || len > __NEW_UTS_LEN)
+		return -EINVAL;
+
+	down_write(&uts_sem);
+	errno = -EFAULT;
+	if (!copy_from_user(system_utsname.domainname, name, len)) {
+		errno = 0;
+		system_utsname.domainname[len] = 0;
+	}
+	up_write(&uts_sem);
+	return errno;
+}
+
+asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
+{
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	else
+		return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
+			? -EFAULT : 0;
+}
+
+#if !defined(__ia64__) && !defined(__s390__)
+
+/*
+ *	Back compatibility for getrlimit. Needed for some apps.
+ */
+ 
+asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
+{
+	struct rlimit x;
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	memcpy(&x, current->rlim + resource, sizeof(*rlim));
+	if(x.rlim_cur > 0x7FFFFFFF)
+		x.rlim_cur = 0x7FFFFFFF;
+	if(x.rlim_max > 0x7FFFFFFF)
+		x.rlim_max = 0x7FFFFFFF;
+	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
+}
+
+#endif
+
+asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
+{
+	struct rlimit new_rlim, *old_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+	if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
+		return -EINVAL;
+	old_rlim = current->rlim + resource;
+	if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
+	     (new_rlim.rlim_max > old_rlim->rlim_max)) &&
+	    !capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+	if (resource == RLIMIT_NOFILE) {
+		if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
+			return -EPERM;
+	}
+	*old_rlim = new_rlim;
+	return 0;
+}
+
+/*
+ * It would make sense to put struct rusage in the task_struct,
+ * except that would make the task_struct be *really big*.  After
+ * task_struct gets moved into malloc'ed memory, it would
+ * make sense to do this.  It will make moving the rest of the information
+ * a lot simpler!  (Which we're not doing right now because we're not
+ * measuring them yet).
+ *
+ * This is SMP safe.  Either we are called from sys_getrusage on ourselves
+ * below (we know we aren't going to exit/disappear and only we change our
+ * rusage counters), or we are called from wait4() on a process which is
+ * either stopped or zombied.  In the zombied case the task won't get
+ * reaped till shortly after the call to getrusage(), in both cases the
+ * task being examined is in a frozen state so the counters won't change.
+ *
+ * FIXME! Get the fault counts properly!
+ */
+int getrusage(struct task_struct *p, int who, struct rusage *ru)
+{
+	struct rusage r;
+
+	memset((char *) &r, 0, sizeof(r));
+	switch (who) {
+		case RUSAGE_SELF:
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
+			r.ru_minflt = p->min_flt;
+			r.ru_majflt = p->maj_flt;
+			r.ru_nswap = p->nswap;
+			break;
+		case RUSAGE_CHILDREN:
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
+			r.ru_minflt = p->cmin_flt;
+			r.ru_majflt = p->cmaj_flt;
+			r.ru_nswap = p->cnswap;
+			break;
+		default:
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
+			r.ru_minflt = p->min_flt + p->cmin_flt;
+			r.ru_majflt = p->maj_flt + p->cmaj_flt;
+			r.ru_nswap = p->nswap + p->cnswap;
+			break;
+	}
+	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
+}
+
+asmlinkage long sys_getrusage(int who, struct rusage *ru)
+{
+	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
+		return -EINVAL;
+	return getrusage(current, who, ru);
+}
+
+asmlinkage long sys_umask(int mask)
+{
+	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
+	return mask;
+}
+    
+asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+			  unsigned long arg4, unsigned long arg5)
+{
+	int error = 0;
+	int sig;
+
+	switch (option) {
+		case PR_SET_PDEATHSIG:
+			sig = arg2;
+			if (sig > _NSIG) {
+				error = -EINVAL;
+				break;
+			}
+			current->pdeath_signal = sig;
+			break;
+		case PR_GET_PDEATHSIG:
+			error = put_user(current->pdeath_signal, (int *)arg2);
+			break;
+		case PR_GET_DUMPABLE:
+			if (current->dumpable)
+				error = 1;
+			break;
+		case PR_SET_DUMPABLE:
+			if (arg2 != 0 && arg2 != 1) {
+				error = -EINVAL;
+				break;
+			}
+			current->dumpable = arg2;
+			break;
+	        case PR_SET_UNALIGN:
+#ifdef SET_UNALIGN_CTL
+			error = SET_UNALIGN_CTL(current, arg2);
+#else
+			error = -EINVAL;
+#endif
+			break;
+
+	        case PR_GET_UNALIGN:
+#ifdef GET_UNALIGN_CTL
+			error = GET_UNALIGN_CTL(current, arg2);
+#else
+			error = -EINVAL;
+#endif
+			break;
+
+		case PR_GET_KEEPCAPS:
+			if (current->keep_capabilities)
+				error = 1;
+			break;
+		case PR_SET_KEEPCAPS:
+			if (arg2 != 0 && arg2 != 1) {
+				error = -EINVAL;
+				break;
+			}
+			current->keep_capabilities = arg2;
+			break;
+		default:
+			error = -EINVAL;
+			break;
+	}
+	return error;
+}
+
+EXPORT_SYMBOL(notifier_chain_register);
+EXPORT_SYMBOL(notifier_chain_unregister);
+EXPORT_SYMBOL(notifier_call_chain);
+EXPORT_SYMBOL(register_reboot_notifier);
+EXPORT_SYMBOL(unregister_reboot_notifier);
+EXPORT_SYMBOL(in_group_p);
+EXPORT_SYMBOL(in_egroup_p);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
new file mode 100644
index 000000000000..1c22d7838dd2
--- /dev/null
+++ b/kernel/sysctl.c
@@ -0,0 +1,1309 @@
+/*
+ * sysctl.c: General linux system control interface
+ *
+ * Begun 24 March 1995, Stephen Tweedie
+ * Added /proc support, Dec 1995
+ * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
+ * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
+ * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
+ * Dynamic registration fixes, Stephen Tweedie.
+ * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
+ *  Horn.
+ * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
+ * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
+ * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
+ *  Wendling.
+ * The list_for_each() macro wasn't appropriate for the sysctl loop.
+ *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
+ */
+
+#include <linux/config.h>
+#include <linux/malloc.h>
+#include <linux/sysctl.h>
+#include <linux/swapctl.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/utsname.h>
+#include <linux/capability.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/sysrq.h>
+#include <linux/highuid.h>
+
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_ROOT_NFS
+#include <linux/nfs_fs.h>
+#endif
+
+#if defined(CONFIG_SYSCTL)
+
+/* External variables not in a header file. */
+extern int panic_timeout;
+extern int C_A_D;
+extern int bdf_prm[], bdflush_min[], bdflush_max[];
+extern int sysctl_overcommit_memory;
+extern int max_threads;
+extern int nr_queued_signals, max_queued_signals;
+extern int sysrq_enabled;
+
+/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+static int maxolduid = 65535;
+static int minolduid;
+
+#ifdef CONFIG_KMOD
+extern char modprobe_path[];
+#endif
+#ifdef CONFIG_HOTPLUG
+extern char hotplug_path[];
+#endif
+#ifdef CONFIG_CHR_DEV_SG
+extern int sg_big_buff;
+#endif
+#ifdef CONFIG_SYSVIPC
+extern size_t shm_ctlmax;
+extern size_t shm_ctlall;
+extern int shm_ctlmni;
+extern int msg_ctlmax;
+extern int msg_ctlmnb;
+extern int msg_ctlmni;
+extern int sem_ctls[];
+#endif
+
+#ifdef __sparc__
+extern char reboot_command [];
+extern int stop_a_enabled;
+#endif
+#ifdef __powerpc__
+extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap;
+int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp);
+#endif
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+extern int acct_parm[];
+#endif
+
+extern int pgt_cache_water[];
+
+static int parse_table(int *, int, void *, size_t *, void *, size_t,
+		       ctl_table *, void **);
+static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp);
+
+static ctl_table root_table[];
+static struct ctl_table_header root_table_header =
+	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
+
+static ctl_table kern_table[];
+static ctl_table vm_table[];
+#ifdef CONFIG_NET
+extern ctl_table net_table[];
+#endif
+static ctl_table proc_table[];
+static ctl_table fs_table[];
+static ctl_table debug_table[];
+static ctl_table dev_table[];
+extern ctl_table random_table[];
+
+/* /proc declarations: */
+
+#ifdef CONFIG_PROC_FS
+
+static ssize_t proc_readsys(struct file *, char *, size_t, loff_t *);
+static ssize_t proc_writesys(struct file *, const char *, size_t, loff_t *);
+static int proc_sys_permission(struct inode *, int);
+
+struct file_operations proc_sys_file_operations = {
+	read:		proc_readsys,
+	write:		proc_writesys,
+};
+
+static struct inode_operations proc_sys_inode_operations = {
+	permission:	proc_sys_permission,
+};
+
+extern struct proc_dir_entry *proc_sys_root;
+
+static void register_proc_table(ctl_table *, struct proc_dir_entry *);
+static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
+#endif
+
+extern int inodes_stat[];
+extern int dentry_stat[];
+
+/* The default sysctl tables: */
+
+static ctl_table root_table[] = {
+	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
+	{CTL_VM, "vm", NULL, 0, 0555, vm_table},
+#ifdef CONFIG_NET
+	{CTL_NET, "net", NULL, 0, 0555, net_table},
+#endif
+	{CTL_PROC, "proc", NULL, 0, 0555, proc_table},
+	{CTL_FS, "fs", NULL, 0, 0555, fs_table},
+	{CTL_DEBUG, "debug", NULL, 0, 0555, debug_table},
+        {CTL_DEV, "dev", NULL, 0, 0555, dev_table},
+	{0}
+};
+
+static ctl_table kern_table[] = {
+	{KERN_OSTYPE, "ostype", system_utsname.sysname, 64,
+	 0444, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_OSRELEASE, "osrelease", system_utsname.release, 64,
+	 0444, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_VERSION, "version", system_utsname.version, 64,
+	 0444, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
+	 0644, NULL, &proc_doutsstring, &sysctl_string},
+	{KERN_PANIC, "panic", &panic_timeout, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
+	 0600, NULL, &proc_dointvec_bset},
+#ifdef CONFIG_BLK_DEV_INITRD
+	{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+#endif
+#ifdef __sparc__
+	{KERN_SPARC_REBOOT, "reboot-cmd", reboot_command,
+	 256, 0644, NULL, &proc_dostring, &sysctl_string },
+	{KERN_SPARC_STOP_A, "stop-a", &stop_a_enabled, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
+#ifdef __powerpc__
+	{KERN_PPC_HTABRECLAIM, "htab-reclaim", &htab_reclaim_on, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_PPC_ZEROPAGED, "zero-paged", &zero_paged_on, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_PPC_POWERSAVE_NAP, "powersave-nap", &powersave_nap, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_PPC_L2CR, "l2cr", NULL, 0,
+	 0644, NULL, &proc_dol2crvec},
+#endif
+	{KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int),
+	 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_KMOD
+	{KERN_MODPROBE, "modprobe", &modprobe_path, 256,
+	 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+#ifdef CONFIG_HOTPLUG
+	{KERN_HOTPLUG, "hotplug", &hotplug_path, 256,
+	 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+#ifdef CONFIG_CHR_DEV_SG
+	{KERN_SG_BIG_BUFF, "sg-big-buff", &sg_big_buff, sizeof (int),
+	 0444, NULL, &proc_dointvec},
+#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	{KERN_ACCT, "acct", &acct_parm, 3*sizeof(int),
+	0644, NULL, &proc_dointvec},
+#endif
+	{KERN_RTSIGNR, "rtsig-nr", &nr_queued_signals, sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+#ifdef CONFIG_SYSVIPC
+	{KERN_SHMMAX, "shmmax", &shm_ctlmax, sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax},
+	{KERN_SHMALL, "shmall", &shm_ctlall, sizeof (size_t),
+	 0644, NULL, &proc_doulongvec_minmax},
+	{KERN_SHMMNI, "shmmni", &shm_ctlmni, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_MSGMAX, "msgmax", &msg_ctlmax, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_MSGMNI, "msgmni", &msg_ctlmni, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_MSGMNB, "msgmnb", &msg_ctlmnb, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_SEM, "sem", &sem_ctls, 4*sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+	{KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif	 
+	{KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_RANDOM, "random", NULL, 0, 0555, random_table},
+	{KERN_OVERFLOWUID, "overflowuid", &overflowuid, sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &minolduid, &maxolduid},
+	{KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &minolduid, &maxolduid},
+	{0}
+};
+
+static ctl_table vm_table[] = {
+	{VM_FREEPG, "freepages", 
+	 &freepages, sizeof(freepages_t), 0444, NULL, &proc_dointvec},
+	{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &bdflush_min, &bdflush_max},
+	{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
+	 sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
+	{VM_BUFFERMEM, "buffermem",
+	 &buffer_mem, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
+	{VM_PAGECACHE, "pagecache",
+	 &page_cache, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec},
+	{VM_PAGERDAEMON, "kswapd",
+	 &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec},
+	{VM_PGT_CACHE, "pagetable_cache", 
+	 &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
+	{VM_PAGE_CLUSTER, "page-cluster", 
+	 &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
+	{0}
+};
+
+static ctl_table proc_table[] = {
+	{0}
+};
+
+static ctl_table fs_table[] = {
+	{FS_NRINODE, "inode-nr", &inodes_stat, 2*sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_STATINODE, "inode-state", &inodes_stat, 7*sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_NRFILE, "file-nr", &files_stat, 3*sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_MAXFILE, "file-max", &files_stat.max_files, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{FS_NRSUPER, "super-nr", &nr_super_blocks, sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_MAXSUPER, "super-max", &max_super_blocks, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{FS_NRDQUOT, "dquot-nr", &nr_dquots, 2*sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_MAXDQUOT, "dquot-max", &max_dquots, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{FS_DENTRY, "dentry-state", &dentry_stat, 6*sizeof(int),
+	 0444, NULL, &proc_dointvec},
+	{FS_OVERFLOWUID, "overflowuid", &fs_overflowuid, sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &minolduid, &maxolduid},
+	{FS_OVERFLOWGID, "overflowgid", &fs_overflowgid, sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &minolduid, &maxolduid},
+	{FS_LEASES, "leases-enable", &leases_enable, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{FS_DIR_NOTIFY, "dir-notify-enable", &dir_notify_enable,
+	 sizeof(int), 0644, NULL, &proc_dointvec},
+	{FS_LEASE_TIME, "lease-break-time", &lease_break_time, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{0}
+};
+
+static ctl_table debug_table[] = {
+	{0}
+};
+
+static ctl_table dev_table[] = {
+	{0}
+};  
+
+extern void init_irq_proc (void);
+
+void __init sysctl_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	register_proc_table(root_table, proc_sys_root);
+	init_irq_proc();
+#endif
+}
+
+int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
+	       void *newval, size_t newlen)
+{
+	struct list_head *tmp;
+
+	if (nlen == 0 || nlen >= CTL_MAXNAME)
+		return -ENOTDIR;
+	if (oldval) {
+		int old_len;
+		if (!oldlenp || get_user(old_len, oldlenp))
+			return -EFAULT;
+	}
+	tmp = &root_table_header.ctl_entry;
+	do {
+		struct ctl_table_header *head =
+			list_entry(tmp, struct ctl_table_header, ctl_entry);
+		void *context = NULL;
+		int error = parse_table(name, nlen, oldval, oldlenp, 
+					newval, newlen, head->ctl_table,
+					&context);
+		if (context)
+			kfree(context);
+		if (error != -ENOTDIR)
+			return error;
+		tmp = tmp->next;
+	} while (tmp != &root_table_header.ctl_entry);
+	return -ENOTDIR;
+}
+
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
+{
+	struct __sysctl_args tmp;
+	int error;
+
+	if (copy_from_user(&tmp, args, sizeof(tmp)))
+		return -EFAULT;
+		
+	lock_kernel();
+	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
+			  tmp.newval, tmp.newlen);
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * ctl_perm does NOT grant the superuser all rights automatically, because
+ * some sysctl variables are readonly even to root.
+ */
+
+static int test_perm(int mode, int op)
+{
+	if (!current->euid)
+		mode >>= 6;
+	else if (in_egroup_p(0))
+		mode >>= 3;
+	if ((mode & op & 0007) == op)
+		return 0;
+	return -EACCES;
+}
+
+static inline int ctl_perm(ctl_table *table, int op)
+{
+	return test_perm(table->mode, op);
+}
+
+static int parse_table(int *name, int nlen,
+		       void *oldval, size_t *oldlenp,
+		       void *newval, size_t newlen,
+		       ctl_table *table, void **context)
+{
+	int n;
+repeat:
+	if (!nlen)
+		return -ENOTDIR;
+	if (get_user(n, name))
+		return -EFAULT;
+	for ( ; table->ctl_name; table++) {
+		if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
+			int error;
+			if (table->child) {
+				if (ctl_perm(table, 001))
+					return -EPERM;
+				if (table->strategy) {
+					error = table->strategy(
+						table, name, nlen,
+						oldval, oldlenp,
+						newval, newlen, context);
+					if (error)
+						return error;
+				}
+				name++;
+				nlen--;
+				table = table->child;
+				goto repeat;
+			}
+			error = do_sysctl_strategy(table, name, nlen,
+						   oldval, oldlenp,
+						   newval, newlen, context);
+			return error;
+		}
+	}
+	return -ENOTDIR;
+}
+
+/* Perform the actual read/write of a sysctl table entry. */
+int do_sysctl_strategy (ctl_table *table, 
+			int *name, int nlen,
+			void *oldval, size_t *oldlenp,
+			void *newval, size_t newlen, void **context)
+{
+	int op = 0, rc, len;
+
+	if (oldval)
+		op |= 004;
+	if (newval) 
+		op |= 002;
+	if (ctl_perm(table, op))
+		return -EPERM;
+
+	if (table->strategy) {
+		rc = table->strategy(table, name, nlen, oldval, oldlenp,
+				     newval, newlen, context);
+		if (rc < 0)
+			return rc;
+		if (rc > 0)
+			return 0;
+	}
+
+	/* If there is no strategy routine, or if the strategy returns
+	 * zero, proceed with automatic r/w */
+	if (table->data && table->maxlen) {
+		if (oldval && oldlenp) {
+			get_user(len, oldlenp);
+			if (len) {
+				if (len > table->maxlen)
+					len = table->maxlen;
+				if(copy_to_user(oldval, table->data, len))
+					return -EFAULT;
+				if(put_user(len, oldlenp))
+					return -EFAULT;
+			}
+		}
+		if (newval && newlen) {
+			len = newlen;
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if(copy_from_user(table->data, newval, len))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+struct ctl_table_header *register_sysctl_table(ctl_table * table, 
+					       int insert_at_head)
+{
+	struct ctl_table_header *tmp;
+	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+	if (!tmp)
+		return 0;
+	tmp->ctl_table = table;
+	INIT_LIST_HEAD(&tmp->ctl_entry);
+	if (insert_at_head)
+		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+	else
+		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+#ifdef CONFIG_PROC_FS
+	register_proc_table(table, proc_sys_root);
+#endif
+	return tmp;
+}
+
+/*
+ * Unlink and free a ctl_table.
+ */
+void unregister_sysctl_table(struct ctl_table_header * header)
+{
+	list_del(&header->ctl_entry);
+#ifdef CONFIG_PROC_FS
+	unregister_proc_table(header->ctl_table, proc_sys_root);
+#endif
+	kfree(header);
+}
+
+/*
+ * /proc/sys support
+ */
+
+#ifdef CONFIG_PROC_FS
+
+/* Scan the sysctl entries in table and add them all into /proc */
+static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
+{
+	struct proc_dir_entry *de;
+	int len;
+	mode_t mode;
+	
+	for (; table->ctl_name; table++) {
+		/* Can't do anything without a proc name. */
+		if (!table->procname)
+			continue;
+		/* Maybe we can't do anything with it... */
+		if (!table->proc_handler && !table->child) {
+			printk(KERN_WARNING "SYSCTL: Can't register %s\n",
+				table->procname);
+			continue;
+		}
+
+		len = strlen(table->procname);
+		mode = table->mode;
+
+		de = NULL;
+		if (table->proc_handler)
+			mode |= S_IFREG;
+		else {
+			mode |= S_IFDIR;
+			for (de = root->subdir; de; de = de->next) {
+				if (proc_match(len, table->procname, de))
+					break;
+			}
+			/* If the subdir exists already, de is non-NULL */
+		}
+
+		if (!de) {
+			de = create_proc_entry(table->procname, mode, root);
+			if (!de)
+				continue;
+			de->data = (void *) table;
+			if (table->proc_handler) {
+				de->proc_fops = &proc_sys_file_operations;
+				de->proc_iops = &proc_sys_inode_operations;
+			}
+		}
+		table->de = de;
+		if (de->mode & S_IFDIR)
+			register_proc_table(table->child, de);
+	}
+}
+
+/*
+ * Unregister a /proc sysctl table and any subdirectories.
+ */
+static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
+{
+	struct proc_dir_entry *de;
+	for (; table->ctl_name; table++) {
+		if (!(de = table->de))
+			continue;
+		if (de->mode & S_IFDIR) {
+			if (!table->child) {
+				printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
+				continue;
+			}
+			unregister_proc_table(table->child, de);
+
+			/* Don't unregister directories which still have entries.. */
+			if (de->subdir)
+				continue;
+		}
+
+		/* Don't unregister proc entries that are still being used.. */
+		if (atomic_read(&de->count))
+			continue;
+
+		table->de = NULL;
+		remove_proc_entry(table->procname, root);
+	}
+}
+
+static ssize_t do_rw_proc(int write, struct file * file, char * buf,
+			  size_t count, loff_t *ppos)
+{
+	int op;
+	struct proc_dir_entry *de;
+	struct ctl_table *table;
+	size_t res;
+	ssize_t error;
+	
+	de = (struct proc_dir_entry*) file->f_dentry->d_inode->u.generic_ip;
+	if (!de || !de->data)
+		return -ENOTDIR;
+	table = (struct ctl_table *) de->data;
+	if (!table || !table->proc_handler)
+		return -ENOTDIR;
+	op = (write ? 002 : 004);
+	if (ctl_perm(table, op))
+		return -EPERM;
+	
+	res = count;
+
+	/*
+	 * FIXME: we need to pass on ppos to the handler.
+	 */
+
+	error = (*table->proc_handler) (table, write, file, buf, &res);
+	if (error)
+		return error;
+	return res;
+}
+
+static ssize_t proc_readsys(struct file * file, char * buf,
+			    size_t count, loff_t *ppos)
+{
+	return do_rw_proc(0, file, buf, count, ppos);
+}
+
+static ssize_t proc_writesys(struct file * file, const char * buf,
+			     size_t count, loff_t *ppos)
+{
+	return do_rw_proc(1, file, (char *) buf, count, ppos);
+}
+
+static int proc_sys_permission(struct inode *inode, int op)
+{
+	return test_perm(inode->i_mode, op);
+}
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	int len;
+	char *p, c;
+	
+	if (!table->data || !table->maxlen || !*lenp ||
+	    (filp->f_pos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+	
+	if (write) {
+		len = 0;
+		p = buffer;
+		while (len < *lenp) {
+			if(get_user(c, p++))
+				return -EFAULT;
+			if (c == 0 || c == '\n')
+				break;
+			len++;
+		}
+		if (len >= table->maxlen)
+			len = table->maxlen-1;
+		if(copy_from_user(table->data, buffer, len))
+			return -EFAULT;
+		((char *) table->data)[len] = 0;
+		filp->f_pos += *lenp;
+	} else {
+		len = strlen(table->data);
+		if (len > table->maxlen)
+			len = table->maxlen;
+		if (len > *lenp)
+			len = *lenp;
+		if (len)
+			if(copy_to_user(buffer, table->data, len))
+				return -EFAULT;
+		if (len < *lenp) {
+			if(put_user('\n', ((char *) buffer) + len))
+				return -EFAULT;
+			len++;
+		}
+		*lenp = len;
+		filp->f_pos += len;
+	}
+	return 0;
+}
+
+/*
+ *	Special case of dostring for the UTS structure. This has locks
+ *	to observe. Should this be in kernel/sys.c ????
+ */
+ 
+static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	int r;
+
+	if (!write) {
+		down_read(&uts_sem);
+		r=proc_dostring(table,0,filp,buffer,lenp);
+		up_read(&uts_sem);
+	} else {
+		down_write(&uts_sem);
+		r=proc_dostring(table,1,filp,buffer,lenp);
+		up_write(&uts_sem);
+	}
+	return r;
+}
+
+#define OP_SET	0
+#define OP_AND	1
+#define OP_OR	2
+#define OP_MAX	3
+#define OP_MIN	4
+
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp, int conv, int op)
+{
+	int *i, vleft, first=1, len, left, neg, val;
+	#define TMPBUFLEN 20
+	char buf[TMPBUFLEN], *p;
+	
+	if (!table->data || !table->maxlen || !*lenp ||
+	    (filp->f_pos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+	
+	i = (int *) table->data;
+	vleft = table->maxlen / sizeof(int);
+	left = *lenp;
+	
+	for (; left && vleft--; i++, first=0) {
+		if (write) {
+			while (left) {
+				char c;
+				if(get_user(c,(char *) buffer))
+					return -EFAULT;
+				if (!isspace(c))
+					break;
+				left--;
+				((char *) buffer)++;
+			}
+			if (!left)
+				break;
+			neg = 0;
+			len = left;
+			if (len > TMPBUFLEN-1)
+				len = TMPBUFLEN-1;
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
+			buf[len] = 0;
+			p = buf;
+			if (*p == '-' && left > 1) {
+				neg = 1;
+				left--, p++;
+			}
+			if (*p < '0' || *p > '9')
+				break;
+			val = simple_strtoul(p, &p, 0) * conv;
+			len = p-buf;
+			if ((len < left) && *p && !isspace(*p))
+				break;
+			if (neg)
+				val = -val;
+			buffer += len;
+			left -= len;
+			switch(op) {
+			case OP_SET:	*i = val; break;
+			case OP_AND:	*i &= val; break;
+			case OP_OR:	*i |= val; break;
+			case OP_MAX:	if(*i < val)
+						*i = val;
+					break;
+			case OP_MIN:	if(*i > val)
+						*i = val;
+					break;
+			}
+		} else {
+			p = buf;
+			if (!first)
+				*p++ = '\t';
+			sprintf(p, "%d", (*i) / conv);
+			len = strlen(buf);
+			if (len > left)
+				len = left;
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
+			left -= len;
+			buffer += len;
+		}
+	}
+
+	if (!write && !first && left) {
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
+		left--, buffer++;
+	}
+	if (write) {
+		p = (char *) buffer;
+		while (left) {
+			char c;
+			if(get_user(c, p++))
+				return -EFAULT;
+			if (!isspace(c))
+				break;
+			left--;
+		}
+	}
+	if (write && first)
+		return -EINVAL;
+	*lenp -= left;
+	filp->f_pos += *lenp;
+	return 0;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+		     void *buffer, size_t *lenp)
+{
+    return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+}
+
+/*
+ *	init may raise the set.
+ */
+ 
+int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
+			void *buffer, size_t *lenp)
+{
+	if (!capable(CAP_SYS_MODULE)) {
+		return -EPERM;
+	}
+	return do_proc_dointvec(table,write,filp,buffer,lenp,1,
+				(current->pid == 1) ? OP_SET : OP_AND);
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	int *i, *min, *max, vleft, first=1, len, left, neg, val;
+	#define TMPBUFLEN 20
+	char buf[TMPBUFLEN], *p;
+	
+	if (!table->data || !table->maxlen || !*lenp ||
+	    (filp->f_pos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+	
+	i = (int *) table->data;
+	min = (int *) table->extra1;
+	max = (int *) table->extra2;
+	vleft = table->maxlen / sizeof(int);
+	left = *lenp;
+	
+	for (; left && vleft--; i++, first=0) {
+		if (write) {
+			while (left) {
+				char c;
+				if(get_user(c, (char *) buffer))
+					return -EFAULT;
+				if (!isspace(c))
+					break;
+				left--;
+				((char *) buffer)++;
+			}
+			if (!left)
+				break;
+			neg = 0;
+			len = left;
+			if (len > TMPBUFLEN-1)
+				len = TMPBUFLEN-1;
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
+			buf[len] = 0;
+			p = buf;
+			if (*p == '-' && left > 1) {
+				neg = 1;
+				left--, p++;
+			}
+			if (*p < '0' || *p > '9')
+				break;
+			val = simple_strtoul(p, &p, 0);
+			len = p-buf;
+			if ((len < left) && *p && !isspace(*p))
+				break;
+			if (neg)
+				val = -val;
+			buffer += len;
+			left -= len;
+
+			if (min && val < *min++)
+				continue;
+			if (max && val > *max++)
+				continue;
+			*i = val;
+		} else {
+			p = buf;
+			if (!first)
+				*p++ = '\t';
+			sprintf(p, "%d", *i);
+			len = strlen(buf);
+			if (len > left)
+				len = left;
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
+			left -= len;
+			buffer += len;
+		}
+	}
+
+	if (!write && !first && left) {
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
+		left--, buffer++;
+	}
+	if (write) {
+		p = (char *) buffer;
+		while (left) {
+			char c;
+			if(get_user(c, p++))
+				return -EFAULT;
+			if (!isspace(c))
+				break;
+			left--;
+		}
+	}
+	if (write && first)
+		return -EINVAL;
+	*lenp -= left;
+	filp->f_pos += *lenp;
+	return 0;
+}
+
+/*
+ * an unsigned long function version
+ */
+
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+				     struct file *filp,
+				     void *buffer, size_t *lenp,
+				     unsigned long convmul,
+				     unsigned long convdiv)
+{
+#define TMPBUFLEN 20
+	unsigned long *i, *min, *max, val;
+	int vleft, first=1, len, left, neg;
+	char buf[TMPBUFLEN], *p;
+	
+	if (!table->data || !table->maxlen || !*lenp ||
+	    (filp->f_pos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+	
+	i = (unsigned long *) table->data;
+	min = (unsigned long *) table->extra1;
+	max = (unsigned long *) table->extra2;
+	vleft = table->maxlen / sizeof(unsigned long);
+	left = *lenp;
+	
+	for (; left && vleft--; i++, first=0) {
+		if (write) {
+			while (left) {
+				char c;
+				if(get_user(c, (char *) buffer))
+					return -EFAULT;
+				if (!isspace(c))
+					break;
+				left--;
+				((char *) buffer)++;
+			}
+			if (!left)
+				break;
+			neg = 0;
+			len = left;
+			if (len > TMPBUFLEN-1)
+				len = TMPBUFLEN-1;
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
+			buf[len] = 0;
+			p = buf;
+			if (*p == '-' && left > 1) {
+				neg = 1;
+				left--, p++;
+			}
+			if (*p < '0' || *p > '9')
+				break;
+			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
+			len = p-buf;
+			if ((len < left) && *p && !isspace(*p))
+				break;
+			if (neg)
+				val = -val;
+			buffer += len;
+			left -= len;
+
+			if(neg)
+				continue;
+			if (min && val < *min++)
+				continue;
+			if (max && val > *max++)
+				continue;
+			*i = val;
+		} else {
+			p = buf;
+			if (!first)
+				*p++ = '\t';
+			sprintf(p, "%lu", convdiv * (*i) / convmul);
+			len = strlen(buf);
+			if (len > left)
+				len = left;
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
+			left -= len;
+			buffer += len;
+		}
+	}
+
+	if (!write && !first && left) {
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
+		left--, buffer++;
+	}
+	if (write) {
+		p = (char *) buffer;
+		while (left) {
+			char c;
+			if(get_user(c, p++))
+				return -EFAULT;
+			if (!isspace(c))
+				break;
+			left--;
+		}
+	}
+	if (write && first)
+		return -EINVAL;
+	*lenp -= left;
+	filp->f_pos += *lenp;
+	return 0;
+#undef TMPBUFLEN
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+			   void *buffer, size_t *lenp)
+{
+    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l);
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+				     lenp, HZ, 1000l);
+}
+
+
+/* Like proc_dointvec, but converts seconds to jiffies */
+int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+			  void *buffer, size_t *lenp)
+{
+    return do_proc_dointvec(table,write,filp,buffer,lenp,HZ,OP_SET);
+}
+
+#else /* CONFIG_PROC_FS */
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+			    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
+			void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return -ENOSYS;
+}
+
+
+#endif /* CONFIG_PROC_FS */
+
+
+/*
+ * General sysctl support routines 
+ */
+
+/* The generic string strategy routine: */
+int sysctl_string(ctl_table *table, int *name, int nlen,
+		  void *oldval, size_t *oldlenp,
+		  void *newval, size_t newlen, void **context)
+{
+	int l, len;
+	
+	if (!table->data || !table->maxlen) 
+		return -ENOTDIR;
+	
+	if (oldval && oldlenp) {
+		if(get_user(len, oldlenp))
+			return -EFAULT;
+		if (len) {
+			l = strlen(table->data);
+			if (len > l) len = l;
+			if (len >= table->maxlen)
+				len = table->maxlen;
+			if(copy_to_user(oldval, table->data, len))
+				return -EFAULT;
+			if(put_user(0, ((char *) oldval) + len))
+				return -EFAULT;
+			if(put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+	if (newval && newlen) {
+		len = newlen;
+		if (len > table->maxlen)
+			len = table->maxlen;
+		if(copy_from_user(table->data, newval, len))
+			return -EFAULT;
+		if (len == table->maxlen)
+			len--;
+		((char *) table->data)[len] = 0;
+	}
+	return 0;
+}
+
+/*
+ * This function makes sure that all of the integers in the vector
+ * are between the minimum and maximum values given in the arrays
+ * table->extra1 and table->extra2, respectively.
+ */
+int sysctl_intvec(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	int i, length, *vec, *min, *max;
+
+	if (newval && newlen) {
+		if (newlen % sizeof(int) != 0)
+			return -EINVAL;
+
+		if (!table->extra1 && !table->extra2)
+			return 0;
+
+		if (newlen > table->maxlen)
+			newlen = table->maxlen;
+		length = newlen / sizeof(int);
+
+		vec = (int *) newval;
+		min = (int *) table->extra1;
+		max = (int *) table->extra2;
+
+		for (i = 0; i < length; i++) {
+			int value;
+			get_user(value, vec + i);
+			if (min && value < min[i])
+				return -EINVAL;
+			if (max && value > max[i])
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* Strategy function to convert jiffies to seconds */ 
+int sysctl_jiffies(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	if (oldval) {
+		size_t olen;
+		if (oldlenp) { 
+			if (get_user(olen, oldlenp))
+				return -EFAULT;
+			if (olen!=sizeof(int))
+				return -EINVAL; 
+		}
+		if (put_user(*(int *)(table->data) / HZ, (int *)oldval) || 
+		    (oldlenp && put_user(sizeof(int),oldlenp)))
+			return -EFAULT;
+	}
+	if (newval && newlen) { 
+		int new;
+		if (newlen != sizeof(int))
+			return -EINVAL; 
+		if (get_user(new, (int *)newval))
+			return -EFAULT;
+		*(int *)(table->data) = new*HZ; 
+	}
+	return 1;
+}
+
+
+#else /* CONFIG_SYSCTL */
+
+
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
+{
+	return -ENOSYS;
+}
+
+int sysctl_string(ctl_table *table, int *name, int nlen,
+		  void *oldval, size_t *oldlenp,
+		  void *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
+
+int sysctl_intvec(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
+
+int sysctl_jiffies(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
+			void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+			  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return -ENOSYS;
+}
+
+struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+						int insert_at_head)
+{
+	return 0;
+}
+
+void unregister_sysctl_table(struct ctl_table_header * table)
+{
+}
+
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/time.c b/kernel/time.c
new file mode 100644
index 000000000000..ffad77ad6203
--- /dev/null
+++ b/kernel/time.c
@@ -0,0 +1,420 @@
+/*
+ *  linux/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  This file contains the interface functions for the various
+ *  time related system calls: time, stime, gettimeofday, settimeofday,
+ *			       adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ * 
+ * 1993-09-02    Philip Gladstone
+ *      Created file with time related functions from sched.c and adjtimex() 
+ * 1993-10-08    Torsten Duwe
+ *      adjtime interface update and CMOS clock write code
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *	Introduced error checking for many cases in adjtimex().
+ *	Updated NTP code according to technical memorandum Jan '96
+ *	"A Kernel Model for Precision Timekeeping" by Dave Mills
+ *	Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *	(Even though the technical memorandum forbids it)
+ */
+
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+/* 
+ * The timezone where the local system is located.  Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+
+static void do_normal_gettime(struct timeval * tm)
+{
+        *tm=xtime;
+}
+
+void (*do_get_fast_time)(struct timeval *) = do_normal_gettime;
+
+/*
+ * Generic way to access 'xtime' (the current time of day).
+ * This can be changed if the platform provides a more accurate (and fast!) 
+ * version.
+ */
+
+void get_fast_time(struct timeval * t)
+{
+	do_get_fast_time(t);
+}
+
+/* The xtime_lock is not only serializing the xtime read/writes but it's also
+   serializing all accesses to the global NTP variables now. */
+extern rwlock_t xtime_lock;
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ *
+ * XXX This function is NOT 64-bit clean!
+ */
+asmlinkage long sys_time(int * tloc)
+{
+	int i;
+
+	/* SMP: This is fairly trivial. We grab CURRENT_TIME and 
+	   stuff it to user space. No side effects */
+	i = CURRENT_TIME;
+	if (tloc) {
+		if (put_user(i,tloc))
+			i = -EFAULT;
+	}
+	return i;
+}
+
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+ 
+asmlinkage long sys_stime(int * tptr)
+{
+	int value;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+	if (get_user(value, tptr))
+		return -EFAULT;
+	write_lock_irq(&xtime_lock);
+	xtime.tv_sec = value;
+	xtime.tv_usec = 0;
+	time_adjust = 0;	/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+	write_unlock_irq(&xtime_lock);
+	return 0;
+}
+
+#endif
+
+asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	if (tv) {
+		struct timeval ktv;
+		do_gettimeofday(&ktv);
+		if (copy_to_user(tv, &ktv, sizeof(ktv)))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ * 
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be 
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *              				- TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+inline static void warp_clock(void)
+{
+	write_lock_irq(&xtime_lock);
+	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+	write_unlock_irq(&xtime_lock);
+}
+
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+
+int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+	static int firsttime = 1;
+
+	if (!capable(CAP_SYS_TIME))
+		return -EPERM;
+		
+	if (tz) {
+		/* SMP safe, global irq locking makes it work. */
+		sys_tz = *tz;
+		if (firsttime) {
+			firsttime = 0;
+			if (!tv)
+				warp_clock();
+		}
+	}
+	if (tv)
+	{
+		/* SMP safe, again the code in arch/foo/time.c should
+		 * globally block out interrupts when it runs.
+		 */
+		do_settimeofday(tv);
+	}
+	return 0;
+}
+
+asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct timeval	new_tv;
+	struct timezone new_tz;
+
+	if (tv) {
+		if (copy_from_user(&new_tv, tv, sizeof(*tv)))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+			return -EFAULT;
+	}
+
+	return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL);
+}
+
+long pps_offset;		/* pps time offset (us) */
+long pps_jitter = MAXTIME;	/* time dispersion (jitter) (us) */
+
+long pps_freq;			/* frequency offset (scaled ppm) */
+long pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
+
+long pps_valid = PPS_VALID;	/* pps signal watchdog counter */
+
+int pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
+
+long pps_jitcnt;		/* jitter limit exceeded */
+long pps_calcnt;		/* calibration intervals */
+long pps_errcnt;		/* calibration errors */
+long pps_stbcnt;		/* stability limit exceeded */
+
+/* hook for a loadable hardpps kernel module */
+void (*hardpps_ptr)(struct timeval *);
+
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int do_adjtimex(struct timex *txc)
+{
+        long ltemp, mtemp, save_adjust;
+	int result;
+
+	/* In order to modify anything, you gotta be super-user! */
+	if (txc->modes && !capable(CAP_SYS_TIME))
+		return -EPERM;
+		
+	/* Now we validate the data before disabling interrupts */
+
+	if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
+	  /* adjustment Offset limited to +- .512 seconds */
+		if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
+			return -EINVAL;	
+
+	/* if the quartz is off by more than 10% something is VERY wrong ! */
+	if (txc->modes & ADJ_TICK)
+		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ)
+			return -EINVAL;
+
+	write_lock_irq(&xtime_lock);
+	result = time_state;	/* mostly `TIME_OK' */
+
+	/* Save for later - semantics of adjtime is to return old value */
+	save_adjust = time_adjust;
+
+#if 0	/* STA_CLOCKERR is never set yet */
+	time_status &= ~STA_CLOCKERR;		/* reset STA_CLOCKERR */
+#endif
+	/* If there are input parameters, then process them */
+	if (txc->modes)
+	{
+	    if (txc->modes & ADJ_STATUS)	/* only set allowed bits */
+		time_status =  (txc->status & ~STA_RONLY) |
+			      (time_status & STA_RONLY);
+
+	    if (txc->modes & ADJ_FREQUENCY) {	/* p. 22 */
+		if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_freq = txc->freq - pps_freq;
+	    }
+
+	    if (txc->modes & ADJ_MAXERROR) {
+		if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_maxerror = txc->maxerror;
+	    }
+
+	    if (txc->modes & ADJ_ESTERROR) {
+		if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_esterror = txc->esterror;
+	    }
+
+	    if (txc->modes & ADJ_TIMECONST) {	/* p. 24 */
+		if (txc->constant < 0) {	/* NTP v4 uses values > 6 */
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_constant = txc->constant;
+	    }
+
+	    if (txc->modes & ADJ_OFFSET) {	/* values checked earlier */
+		if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+		    /* adjtime() is independent from ntp_adjtime() */
+		    time_adjust = txc->offset;
+		}
+		else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
+		    ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+		            (STA_PPSTIME | STA_PPSSIGNAL) ?
+		            pps_offset : txc->offset;
+
+		    /*
+		     * Scale the phase adjustment and
+		     * clamp to the operating range.
+		     */
+		    if (ltemp > MAXPHASE)
+		        time_offset = MAXPHASE << SHIFT_UPDATE;
+		    else if (ltemp < -MAXPHASE)
+			time_offset = -(MAXPHASE << SHIFT_UPDATE);
+		    else
+		        time_offset = ltemp << SHIFT_UPDATE;
+
+		    /*
+		     * Select whether the frequency is to be controlled
+		     * and in which mode (PLL or FLL). Clamp to the operating
+		     * range. Ugly multiply/divide should be replaced someday.
+		     */
+
+		    if (time_status & STA_FREQHOLD || time_reftime == 0)
+		        time_reftime = xtime.tv_sec;
+		    mtemp = xtime.tv_sec - time_reftime;
+		    time_reftime = xtime.tv_sec;
+		    if (time_status & STA_FLL) {
+		        if (mtemp >= MINSEC) {
+			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
+							      SHIFT_UPDATE);
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> SHIFT_KH;
+			    else
+			        time_freq += ltemp >> SHIFT_KH;
+			} else /* calibration interval too short (p. 12) */
+				result = TIME_ERROR;
+		    } else {	/* PLL mode */
+		        if (mtemp < MAXSEC) {
+			    ltemp *= mtemp;
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> (time_constant +
+							time_constant +
+							SHIFT_KF - SHIFT_USEC);
+			    else
+			        time_freq += ltemp >> (time_constant +
+						       time_constant +
+						       SHIFT_KF - SHIFT_USEC);
+			} else /* calibration interval too long (p. 12) */
+				result = TIME_ERROR;
+		    }
+		    if (time_freq > time_tolerance)
+		        time_freq = time_tolerance;
+		    else if (time_freq < -time_tolerance)
+		        time_freq = -time_tolerance;
+		} /* STA_PLL || STA_PPSTIME */
+	    } /* txc->modes & ADJ_OFFSET */
+	    if (txc->modes & ADJ_TICK) {
+		/* if the quartz is off by more than 10% something is
+		   VERY wrong ! */
+		if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		tick = txc->tick;
+	    }
+	} /* txc->modes */
+leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+	    || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+		&& (time_status & STA_PPSSIGNAL) == 0)
+	    /* p. 24, (b) */
+	    || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+		== (STA_PPSTIME|STA_PPSJITTER))
+	    /* p. 24, (c) */
+	    || ((time_status & STA_PPSFREQ) != 0
+		&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+	    /* p. 24, (d) */
+		result = TIME_ERROR;
+	
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	    txc->offset	   = save_adjust;
+	else {
+	    if (time_offset < 0)
+		txc->offset = -(-time_offset >> SHIFT_UPDATE);
+	    else
+		txc->offset = time_offset >> SHIFT_UPDATE;
+	}
+	txc->freq	   = time_freq + pps_freq;
+	txc->maxerror	   = time_maxerror;
+	txc->esterror	   = time_esterror;
+	txc->status	   = time_status;
+	txc->constant	   = time_constant;
+	txc->precision	   = time_precision;
+	txc->tolerance	   = time_tolerance;
+	txc->tick	   = tick;
+	txc->ppsfreq	   = pps_freq;
+	txc->jitter	   = pps_jitter >> PPS_AVG;
+	txc->shift	   = pps_shift;
+	txc->stabil	   = pps_stabil;
+	txc->jitcnt	   = pps_jitcnt;
+	txc->calcnt	   = pps_calcnt;
+	txc->errcnt	   = pps_errcnt;
+	txc->stbcnt	   = pps_stbcnt;
+	write_unlock_irq(&xtime_lock);
+	do_gettimeofday(&txc->time);
+	return(result);
+}
+
+asmlinkage long sys_adjtimex(struct timex *txc_p)
+{
+	struct timex txc;		/* Local copy of parameter */
+	int ret;
+
+	/* Copy the user data space into the kernel copy
+	 * structure. But bear in mind that the structures
+	 * may change
+	 */
+	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
+	ret = do_adjtimex(&txc);
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
diff --git a/kernel/timer.c b/kernel/timer.c
new file mode 100644
index 000000000000..579b065f3f46
--- /dev/null
+++ b/kernel/timer.c
@@ -0,0 +1,837 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
+
+/* The current time */
+volatile struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500.  */
+int tickadj = 500/HZ ? : 1;		/* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK;		/* clock synchronization status	*/
+int time_status = STA_UNSYNC;		/* clock status bits		*/
+long time_offset;			/* time adjustment (us)		*/
+long time_constant = 2;			/* pll time constant		*/
+long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
+long time_precision = 1;		/* clock precision (us)		*/
+long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
+long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
+long time_phase;			/* phase offset (scaled us)	*/
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+					/* frequency offset (scaled ppm)*/
+long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
+long time_reftime;			/* time at last adjustment (s)	*/
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+extern int do_setitimer(int, struct itimerval *, struct itimerval *);
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+	int index;
+	struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+	int index;
+	struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+	int i;
+
+	for (i = 0; i < TVN_SIZE; i++) {
+		INIT_LIST_HEAD(tv5.vec + i);
+		INIT_LIST_HEAD(tv4.vec + i);
+		INIT_LIST_HEAD(tv3.vec + i);
+		INIT_LIST_HEAD(tv2.vec + i);
+	}
+	for (i = 0; i < TVR_SIZE; i++)
+		INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+	/*
+	 * must be cli-ed when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - timer_jiffies;
+	struct list_head * vec;
+
+	if (idx < TVR_SIZE) {
+		int i = expires & TVR_MASK;
+		vec = tv1.vec + i;
+	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+		int i = (expires >> TVR_BITS) & TVN_MASK;
+		vec = tv2.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+		vec =  tv3.vec + i;
+	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+		vec = tv4.vec + i;
+	} else if ((signed long) idx < 0) {
+		/* can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		vec = tv1.vec + tv1.index;
+	} else if (idx <= 0xffffffffUL) {
+		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+		vec = tv5.vec + i;
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		INIT_LIST_HEAD(&timer->list);
+		return;
+	}
+	/*
+	 * Timers are FIFO!
+	 */
+	list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t)		do { } while (0)
+#define timer_exit()		do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	if (timer_pending(timer))
+		goto bug;
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return;
+bug:
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	printk("bug: kernel timer added twice at %p.\n",
+			__builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+	if (!timer_pending(timer))
+		return 0;
+	list_del(&timer->list);
+	return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	timer->expires = expires;
+	ret = detach_timer(timer);
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	ret = detach_timer(timer);
+	timer->list.next = timer->list.prev = NULL;
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+	spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+	int ret = 0;
+
+	for (;;) {
+		unsigned long flags;
+		int running;
+
+		spin_lock_irqsave(&timerlist_lock, flags);
+		ret += detach_timer(timer);
+		timer->list.next = timer->list.prev = 0;
+		running = timer_is_running(timer);
+		spin_unlock_irqrestore(&timerlist_lock, flags);
+
+		if (!running)
+			break;
+
+		timer_synchronize(timer);
+	}
+
+	return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+	/* cascade all the timers from tv up one level */
+	struct list_head *head, *curr, *next;
+
+	head = tv->vec + tv->index;
+	curr = head->next;
+	/*
+	 * We are removing _all_ timers from the list, so we don't  have to
+	 * detach them individually, just clear the list afterwards.
+	 */
+	while (curr != head) {
+		struct timer_list *tmp;
+
+		tmp = list_entry(curr, struct timer_list, list);
+		next = curr->next;
+		list_del(curr); // not needed
+		internal_add_timer(tmp);
+		curr = next;
+	}
+	INIT_LIST_HEAD(head);
+	tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+	spin_lock_irq(&timerlist_lock);
+	while ((long)(jiffies - timer_jiffies) >= 0) {
+		struct list_head *head, *curr;
+		if (!tv1.index) {
+			int n = 1;
+			do {
+				cascade_timers(tvecs[n]);
+			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+		}
+repeat:
+		head = tv1.vec + tv1.index;
+		curr = head->next;
+		if (curr != head) {
+			struct timer_list *timer;
+			void (*fn)(unsigned long);
+			unsigned long data;
+
+			timer = list_entry(curr, struct timer_list, list);
+ 			fn = timer->function;
+ 			data= timer->data;
+
+			detach_timer(timer);
+			timer->list.next = timer->list.prev = NULL;
+			timer_enter(timer);
+			spin_unlock_irq(&timerlist_lock);
+			fn(data);
+			spin_lock_irq(&timerlist_lock);
+			timer_exit();
+			goto repeat;
+		}
+		++timer_jiffies; 
+		tv1.index = (tv1.index + 1) & TVR_MASK;
+	}
+	spin_unlock_irq(&timerlist_lock);
+}
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+	run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+	run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+    long ltemp;
+
+    /* Bump the maxerror field */
+    time_maxerror += time_tolerance >> SHIFT_USEC;
+    if ( time_maxerror > NTP_PHASE_LIMIT ) {
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_status |= STA_UNSYNC;
+    }
+
+    /*
+     * Leap second processing. If in leap-insert state at
+     * the end of the day, the system clock is set back one
+     * second; if in leap-delete state, the system clock is
+     * set ahead one second. The microtime() routine or
+     * external clock driver will insure that reported time
+     * is always monotonic. The ugly divides should be
+     * replaced.
+     */
+    switch (time_state) {
+
+    case TIME_OK:
+	if (time_status & STA_INS)
+	    time_state = TIME_INS;
+	else if (time_status & STA_DEL)
+	    time_state = TIME_DEL;
+	break;
+
+    case TIME_INS:
+	if (xtime.tv_sec % 86400 == 0) {
+	    xtime.tv_sec--;
+	    time_state = TIME_OOP;
+	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+	}
+	break;
+
+    case TIME_DEL:
+	if ((xtime.tv_sec + 1) % 86400 == 0) {
+	    xtime.tv_sec++;
+	    time_state = TIME_WAIT;
+	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+	}
+	break;
+
+    case TIME_OOP:
+	time_state = TIME_WAIT;
+	break;
+
+    case TIME_WAIT:
+	if (!(time_status & (STA_INS | STA_DEL)))
+	    time_state = TIME_OK;
+    }
+
+    /*
+     * Compute the phase adjustment for the next second. In
+     * PLL mode, the offset is reduced by a fixed factor
+     * times the time constant. In FLL mode the offset is
+     * used directly. In either mode, the maximum phase
+     * adjustment for each second is clamped so as to spread
+     * the adjustment over not more than the number of
+     * seconds between updates.
+     */
+    if (time_offset < 0) {
+	ltemp = -time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset += ltemp;
+	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    } else {
+	ltemp = time_offset;
+	if (!(time_status & STA_FLL))
+	    ltemp >>= SHIFT_KG + time_constant;
+	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+	time_offset -= ltemp;
+	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    }
+
+    /*
+     * Compute the frequency estimate and additional phase
+     * adjustment due to frequency error for the next
+     * second. When the PPS signal is engaged, gnaw on the
+     * watchdog counter and update the frequency computed by
+     * the pll and the PPS signal.
+     */
+    pps_valid++;
+    if (pps_valid == PPS_VALID) {	/* PPS signal lost */
+	pps_jitter = MAXTIME;
+	pps_stabil = MAXFREQ;
+	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+			 STA_PPSWANDER | STA_PPSERROR);
+    }
+    ltemp = time_freq + pps_freq;
+    if (ltemp < 0)
+	time_adj -= -ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+    else
+	time_adj += ltemp >>
+	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+     */
+    if (time_adj < 0)
+	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+    else
+	time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	if ( (time_adjust_step = time_adjust) != 0 ) {
+	    /* We are doing an adjtime thing. 
+	     *
+	     * Prepare time_adjust_step to be within bounds.
+	     * Note that a positive time_adjust means we want the clock
+	     * to run faster.
+	     *
+	     * Limit the amount of the step to be in the range
+	     * -tickadj .. +tickadj
+	     */
+	     if (time_adjust > tickadj)
+		time_adjust_step = tickadj;
+	     else if (time_adjust < -tickadj)
+		time_adjust_step = -tickadj;
+	     
+	    /* Reduce by this step the amount of time left  */
+	    time_adjust -= time_adjust_step;
+	}
+	xtime.tv_usec += tick + time_adjust_step;
+	/*
+	 * Advance the phase, once it gets to one microsecond, then
+	 * advance the tick more.
+	 */
+	time_phase += time_adj;
+	if (time_phase <= -FINEUSEC) {
+		long ltemp = -time_phase >> SHIFT_SCALE;
+		time_phase += ltemp << SHIFT_SCALE;
+		xtime.tv_usec -= ltemp;
+	}
+	else if (time_phase >= FINEUSEC) {
+		long ltemp = time_phase >> SHIFT_SCALE;
+		time_phase -= ltemp << SHIFT_SCALE;
+		xtime.tv_usec += ltemp;
+	}
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+	do {
+		ticks--;
+		update_wall_time_one_tick();
+	} while (ticks);
+
+	if (xtime.tv_usec >= 1000000) {
+	    xtime.tv_usec -= 1000000;
+	    xtime.tv_sec++;
+	    second_overflow();
+	}
+}
+
+static inline void do_process_times(struct task_struct *p,
+	unsigned long user, unsigned long system)
+{
+	unsigned long psecs;
+
+	psecs = (p->times.tms_utime += user);
+	psecs += (p->times.tms_stime += system);
+	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
+		/* Send SIGXCPU every second.. */
+		if (!(psecs % HZ))
+			send_sig(SIGXCPU, p, 1);
+		/* and SIGKILL when we go over max.. */
+		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
+			send_sig(SIGKILL, p, 1);
+	}
+}
+
+static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
+{
+	unsigned long it_virt = p->it_virt_value;
+
+	if (it_virt) {
+		it_virt -= ticks;
+		if (!it_virt) {
+			it_virt = p->it_virt_incr;
+			send_sig(SIGVTALRM, p, 1);
+		}
+		p->it_virt_value = it_virt;
+	}
+}
+
+static inline void do_it_prof(struct task_struct *p)
+{
+	unsigned long it_prof = p->it_prof_value;
+
+	if (it_prof) {
+		if (--it_prof == 0) {
+			it_prof = p->it_prof_incr;
+			send_sig(SIGPROF, p, 1);
+		}
+		p->it_prof_value = it_prof;
+	}
+}
+
+void update_one_process(struct task_struct *p, unsigned long user,
+			unsigned long system, int cpu)
+{
+	p->per_cpu_utime[cpu] += user;
+	p->per_cpu_stime[cpu] += system;
+	do_process_times(p, user, system);
+	do_it_virt(p, user);
+	do_it_prof(p);
+}	
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current 
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+	struct task_struct *p = current;
+	int cpu = smp_processor_id(), system = user_tick ^ 1;
+
+	update_one_process(p, user_tick, system, cpu);
+	if (p->pid) {
+		if (--p->counter <= 0) {
+			p->counter = 0;
+			p->need_resched = 1;
+		}
+		if (p->nice > 0)
+			kstat.per_cpu_nice[cpu] += user_tick;
+		else
+			kstat.per_cpu_user[cpu] += user_tick;
+		kstat.per_cpu_system[cpu] += system;
+	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+		kstat.per_cpu_system[cpu] += system;
+}
+
+/*
+ * Nr of active tasks - counted in fixed-point numbers
+ */
+static unsigned long count_active_tasks(void)
+{
+	struct task_struct *p;
+	unsigned long nr = 0;
+
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if ((p->state == TASK_RUNNING ||
+		     (p->state & TASK_UNINTERRUPTIBLE)))
+			nr += FIXED_1;
+	}
+	read_unlock(&tasklist_lock);
+	return nr;
+}
+
+/*
+ * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+ * imply that avenrun[] is the standard name for this kind of thing.
+ * Nothing else seems to be standardized: the fractional size etc
+ * all seem to differ on different machines.
+ */
+unsigned long avenrun[3];
+
+static inline void calc_load(unsigned long ticks)
+{
+	unsigned long active_tasks; /* fixed-point */
+	static int count = LOAD_FREQ;
+
+	count -= ticks;
+	if (count < 0) {
+		count += LOAD_FREQ;
+		active_tasks = count_active_tasks();
+		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+	}
+}
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+	unsigned long ticks;
+
+	/*
+	 * update_times() is run from the raw timer_bh handler so we
+	 * just know that the irqs are locally enabled and so we don't
+	 * need to save/restore the flags of the local CPU here. -arca
+	 */
+	write_lock_irq(&xtime_lock);
+
+	ticks = jiffies - wall_jiffies;
+	if (ticks) {
+		wall_jiffies += ticks;
+		update_wall_time(ticks);
+	}
+	write_unlock_irq(&xtime_lock);
+	calc_load(ticks);
+}
+
+void timer_bh(void)
+{
+	update_times();
+	run_timer_list();
+}
+
+void do_timer(struct pt_regs *regs)
+{
+	(*(unsigned long *)&jiffies)++;
+#ifndef CONFIG_SMP
+	/* SMP process accounting uses the local APIC timer */
+
+	update_process_times(user_mode(regs));
+#endif
+	mark_bh(TIMER_BH);
+	if (TQ_ACTIVE(tq_timer))
+		mark_bh(TQUEUE_BH);
+}
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+asmlinkage unsigned long sys_alarm(unsigned int seconds)
+{
+	struct itimerval it_new, it_old;
+	unsigned int oldalarm;
+
+	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+	it_new.it_value.tv_sec = seconds;
+	it_new.it_value.tv_usec = 0;
+	do_setitimer(ITIMER_REAL, &it_new, &it_old);
+	oldalarm = it_old.it_value.tv_sec;
+	/* ehhh.. We can't return 0 if we have an alarm pending.. */
+	/* And we'd better return too much than too little anyway */
+	if (it_old.it_value.tv_usec)
+		oldalarm++;
+	return oldalarm;
+}
+
+#endif
+
+#ifndef __alpha__
+
+/*
+ * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
+ * should be moved into arch/i386 instead?
+ */
+ 
+asmlinkage long sys_getpid(void)
+{
+	/* This is SMP safe - current->pid doesn't change */
+	return current->tgid;
+}
+
+/*
+ * This is not strictly SMP safe: p_opptr could change
+ * from under us. However, rather than getting any lock
+ * we can use an optimistic algorithm: get the parent
+ * pid, and go back and check that the parent is still
+ * the same. If it has changed (which is extremely unlikely
+ * indeed), we just try again..
+ *
+ * NOTE! This depends on the fact that even if we _do_
+ * get an old value of "parent", we can happily dereference
+ * the pointer: we just can't necessarily trust the result
+ * until we know that the parent pointer is valid.
+ *
+ * The "mb()" macro is a memory barrier - a synchronizing
+ * event. It also makes sure that gcc doesn't optimize
+ * away the necessary memory references.. The barrier doesn't
+ * have to have all that strong semantics: on x86 we don't
+ * really require a synchronizing instruction, for example.
+ * The barrier is more important for code generation than
+ * for any real memory ordering semantics (even if there is
+ * a small window for a race, using the old pointer is
+ * harmless for a while).
+ */
+asmlinkage long sys_getppid(void)
+{
+	int pid;
+	struct task_struct * me = current;
+	struct task_struct * parent;
+
+	parent = me->p_opptr;
+	for (;;) {
+		pid = parent->pid;
+#if CONFIG_SMP
+{
+		struct task_struct *old = parent;
+		mb();
+		parent = me->p_opptr;
+		if (old != parent)
+			continue;
+}
+#endif
+		break;
+	}
+	return pid;
+}
+
+asmlinkage long sys_getuid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->uid;
+}
+
+asmlinkage long sys_geteuid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->euid;
+}
+
+asmlinkage long sys_getgid(void)
+{
+	/* Only we change this so SMP safe */
+	return current->gid;
+}
+
+asmlinkage long sys_getegid(void)
+{
+	/* Only we change this so SMP safe */
+	return  current->egid;
+}
+
+#endif
+
+asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+{
+	struct timespec t;
+	unsigned long expire;
+
+	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+		return -EFAULT;
+
+	if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
+		return -EINVAL;
+
+
+	if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
+	    current->policy != SCHED_OTHER)
+	{
+		/*
+		 * Short delay requests up to 2 ms will be handled with
+		 * high precision by a busy wait for all real-time processes.
+		 *
+		 * Its important on SMP not to do this holding locks.
+		 */
+		udelay((t.tv_nsec + 999) / 1000);
+		return 0;
+	}
+
+	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+
+	current->state = TASK_INTERRUPTIBLE;
+	expire = schedule_timeout(expire);
+
+	if (expire) {
+		if (rmtp) {
+			jiffies_to_timespec(expire, &t);
+			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
+				return -EFAULT;
+		}
+		return -EINTR;
+	}
+	return 0;
+}
+
diff --git a/kernel/uid16.c b/kernel/uid16.c
new file mode 100644
index 000000000000..f76e4fd706e5
--- /dev/null
+++ b/kernel/uid16.c
@@ -0,0 +1,163 @@
+/*
+ *	Wrapper functions for 16bit uid back compatibility. All nicely tied
+ *	together in the faint hope we can take the out in five years time.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/mman.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/prctl.h>
+#include <linux/init.h>
+#include <linux/highuid.h>
+
+#include <asm/uaccess.h>
+
+extern asmlinkage long sys_chown(const char *, uid_t,gid_t);
+extern asmlinkage long sys_lchown(const char *, uid_t,gid_t);
+extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t);
+extern asmlinkage long sys_setregid(gid_t, gid_t);
+extern asmlinkage long sys_setgid(gid_t);
+extern asmlinkage long sys_setreuid(uid_t, uid_t);
+extern asmlinkage long sys_setuid(uid_t);
+extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t);
+extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t);
+extern asmlinkage long sys_setfsuid(uid_t);
+extern asmlinkage long sys_setfsgid(gid_t);
+ 
+asmlinkage long sys_chown16(const char * filename, old_uid_t user, old_gid_t group)
+{
+	return sys_chown(filename, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_lchown16(const char * filename, old_uid_t user, old_gid_t group)
+{
+	return sys_lchown(filename, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
+{
+	return sys_fchown(fd, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
+{
+	return sys_setregid(low2highgid(rgid), low2highgid(egid));
+}
+
+asmlinkage long sys_setgid16(old_gid_t gid)
+{
+	return sys_setgid((gid_t)gid);
+}
+
+asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
+{
+	return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+}
+
+asmlinkage long sys_setuid16(old_uid_t uid)
+{
+	return sys_setuid((uid_t)uid);
+}
+
+asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
+{
+	return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+		low2highuid(suid));
+}
+
+asmlinkage long sys_getresuid16(old_uid_t *ruid, old_uid_t *euid, old_uid_t *suid)
+{
+	int retval;
+
+	if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
+	    !(retval = put_user(high2lowuid(current->euid), euid)))
+		retval = put_user(high2lowuid(current->suid), suid);
+
+	return retval;
+}
+
+asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
+{
+	return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+		low2highgid(sgid));
+}
+
+asmlinkage long sys_getresgid16(old_gid_t *rgid, old_gid_t *egid, old_gid_t *sgid)
+{
+	int retval;
+
+	if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
+	    !(retval = put_user(high2lowgid(current->egid), egid)))
+		retval = put_user(high2lowgid(current->sgid), sgid);
+
+	return retval;
+}
+
+asmlinkage long sys_setfsuid16(old_uid_t uid)
+{
+	return sys_setfsuid((uid_t)uid);
+}
+
+asmlinkage long sys_setfsgid16(old_gid_t gid)
+{
+	return sys_setfsgid((gid_t)gid);
+}
+
+asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t *grouplist)
+{
+	old_gid_t groups[NGROUPS];
+	int i,j;
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+	i = current->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize)
+			return -EINVAL;
+		for(j=0;j<i;j++)
+			groups[j] = current->groups[j];
+		if (copy_to_user(grouplist, groups, sizeof(old_gid_t)*i))
+			return -EFAULT;
+	}
+	return i;
+}
+
+asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t *grouplist)
+{
+	old_gid_t groups[NGROUPS];
+	int i;
+
+	if (!capable(CAP_SETGID))
+		return -EPERM;
+	if ((unsigned) gidsetsize > NGROUPS)
+		return -EINVAL;
+	if (copy_from_user(groups, grouplist, gidsetsize * sizeof(old_gid_t)))
+		return -EFAULT;
+	for (i = 0 ; i < gidsetsize ; i++)
+		current->groups[i] = (gid_t)groups[i];
+	current->ngroups = gidsetsize;
+	return 0;
+}
+
+asmlinkage long sys_getuid16(void)
+{
+	return high2lowuid(current->uid);
+}
+
+asmlinkage long sys_geteuid16(void)
+{
+	return high2lowuid(current->euid);
+}
+
+asmlinkage long sys_getgid16(void)
+{
+	return high2lowgid(current->gid);
+}
+
+asmlinkage long sys_getegid16(void)
+{
+	return high2lowgid(current->egid);
+}
diff --git a/kernel/user.c b/kernel/user.c
new file mode 100644
index 000000000000..be99b110e745
--- /dev/null
+++ b/kernel/user.c
@@ -0,0 +1,137 @@
+/*
+ * The "user cache".
+ *
+ * (C) Copyright 1991-2000 Linus Torvalds
+ *
+ * We have a per-user structure to keep track of how many
+ * processes, files etc the user has claimed, in order to be
+ * able to have per-user limits for system resources. 
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+/*
+ * UID task count cache, to get fast user lookup in "alloc_uid"
+ * when changing user ID's (ie setuid() and friends).
+ */
+#define UIDHASH_BITS		8
+#define UIDHASH_SZ		(1 << UIDHASH_BITS)
+#define UIDHASH_MASK		(UIDHASH_SZ - 1)
+#define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) ^ uid) & UIDHASH_MASK)
+#define uidhashentry(uid)	(uidhash_table + __uidhashfn(uid))
+
+static kmem_cache_t *uid_cachep;
+static struct user_struct *uidhash_table[UIDHASH_SZ];
+static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+
+struct user_struct root_user = {
+	__count:	ATOMIC_INIT(1),
+	processes:	ATOMIC_INIT(1),
+	files:		ATOMIC_INIT(0)
+};
+
+/*
+ * These routines must be called with the uidhash spinlock held!
+ */
+static inline void uid_hash_insert(struct user_struct *up, struct user_struct **hashent)
+{
+	struct user_struct *next = *hashent;
+
+	up->next = next;
+	if (next)
+		next->pprev = &up->next;
+	up->pprev = hashent;
+	*hashent = up;
+}
+
+static inline void uid_hash_remove(struct user_struct *up)
+{
+	struct user_struct *next = up->next;
+	struct user_struct **pprev = up->pprev;
+
+	if (next)
+		next->pprev = pprev;
+	*pprev = next;
+}
+
+static inline struct user_struct *uid_hash_find(uid_t uid, struct user_struct **hashent)
+{
+	struct user_struct *next;
+
+	next = *hashent;
+	for (;;) {
+		struct user_struct *up = next;
+		if (next) {
+			next = up->next;
+			if (up->uid != uid)
+				continue;
+			atomic_inc(&up->__count);
+		}
+		return up;
+	}
+}
+
+void free_uid(struct user_struct *up)
+{
+	if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+		uid_hash_remove(up);
+		kmem_cache_free(uid_cachep, up);
+		spin_unlock(&uidhash_lock);
+	}
+}
+
+struct user_struct * alloc_uid(uid_t uid)
+{
+	struct user_struct **hashent = uidhashentry(uid);
+	struct user_struct *up;
+
+	spin_lock(&uidhash_lock);
+	up = uid_hash_find(uid, hashent);
+	spin_unlock(&uidhash_lock);
+
+	if (!up) {
+		struct user_struct *new;
+
+		new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
+		if (!new)
+			return NULL;
+		new->uid = uid;
+		atomic_set(&new->__count, 1);
+		atomic_set(&new->processes, 0);
+		atomic_set(&new->files, 0);
+
+		/*
+		 * Before adding this, check whether we raced
+		 * on adding the same user already..
+		 */
+		spin_lock(&uidhash_lock);
+		up = uid_hash_find(uid, hashent);
+		if (up) {
+			kmem_cache_free(uid_cachep, new);
+		} else {
+			uid_hash_insert(new, hashent);
+			up = new;
+		}
+		spin_unlock(&uidhash_lock);
+
+	}
+	return up;
+}
+
+
+static int __init uid_cache_init(void)
+{
+	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+				       0,
+				       SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!uid_cachep)
+		panic("Cannot create uid taskcount SLAB cache\n");
+
+	/* Insert the root user immediately - init already runs with this */
+	uid_hash_insert(&root_user, uidhashentry(0));
+	return 0;
+}
+
+module_init(uid_cache_init);
author	Linus Torvalds <torvalds@athlon.transmeta.com>	2002-02-04 17:40:40 -0800
committer	Linus Torvalds <torvalds@athlon.transmeta.com>	2002-02-04 17:40:40 -0800
commit	7a2deb32924142696b8174cdf9b38cd72a11fc96 (patch)
tree	8ecc18f81fdb849254f39dc2e9fd77253319e1ec /kernel