8 files changed, 182 insertions, 146 deletions
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index 3e9ffd1891ef..fc6ff398cbc4 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -177,7 +177,8 @@ EXPORT_SYMBOL(mark_buffer_dirty);
 EXPORT_SYMBOL(end_buffer_io_sync);
 EXPORT_SYMBOL(__mark_inode_dirty);
 EXPORT_SYMBOL(get_empty_filp);
-EXPORT_SYMBOL(init_private_file);
+EXPORT_SYMBOL(open_private_file);
+EXPORT_SYMBOL(close_private_file);
 EXPORT_SYMBOL(filp_open);
 EXPORT_SYMBOL(filp_close);
 EXPORT_SYMBOL(put_filp);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 19e2aa5dcb41..3780d17e49b0 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -9,7 +9,6 @@
 /* These are all the functions necessary to implement 
  * POSIX clocks & timers
  */
-
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
@@ -23,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/idr.h>
 #include <linux/posix-timers.h>
+#include <linux/wait.h>
 
 #ifndef div_long_long_rem
 #include <asm/div64.h>
@@ -56,8 +56,8 @@
    * Lets keep our timers in a slab cache :-)
  */
 static kmem_cache_t *posix_timers_cache;
-struct idr posix_timers_id;
-spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
+static struct idr posix_timers_id;
+static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
 
 /*
  * Just because the timer is not in the timer list does NOT mean it is
@@ -130,7 +130,7 @@ spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
  *	    which we beg off on and pass to do_sys_settimeofday().
  */
 
-struct k_clock posix_clocks[MAX_CLOCKS];
+static struct k_clock posix_clocks[MAX_CLOCKS];
 
 #define if_clock_do(clock_fun, alt_fun,parms)	(! clock_fun)? alt_fun parms :\
 							      clock_fun parms
@@ -183,7 +183,7 @@ init_posix_timers(void)
 __initcall(init_posix_timers);
 
 static inline int
-tstojiffie(struct timespec *tp, int res, unsigned long *jiff)
+tstojiffie(struct timespec *tp, int res, u64 *jiff)
 {
 	unsigned long sec = tp->tv_sec;
 	long nsec = tp->tv_nsec + res - 1;
@@ -203,7 +203,7 @@ tstojiffie(struct timespec *tp, int res, unsigned long *jiff)
 	 * below.  Here it is enough to just discard the high order
 	 * bits.  
 	 */
-	*jiff = HZ * sec;
+	*jiff = (u64)sec * HZ;
 	/*
 	 * Do the res thing. (Don't forget the add in the declaration of nsec) 
 	 */
@@ -221,9 +221,12 @@ tstojiffie(struct timespec *tp, int res, unsigned long *jiff)
 static void
 tstotimer(struct itimerspec *time, struct k_itimer *timer)
 {
+	u64 result;
 	int res = posix_clocks[timer->it_clock].res;
-	tstojiffie(&time->it_value, res, &timer->it_timer.expires);
-	tstojiffie(&time->it_interval, res, &timer->it_incr);
+	tstojiffie(&time->it_value, res, &result);
+	timer->it_timer.expires = (unsigned long)result;
+	tstojiffie(&time->it_interval, res, &result);
+	timer->it_incr = (unsigned long)result;
 }
 
 static void
@@ -1020,6 +1023,9 @@ do_posix_gettime(struct k_clock *clock, struct timespec *tp)
  * Note also that the while loop assures that the sub_jiff_offset
  * will be less than a jiffie, thus no need to normalize the result.
  * Well, not really, if called with ints off :(
+
+ * HELP, this code should make an attempt at resolution beyond the 
+ * jiffie.  Trouble is this is "arch" dependent...
  */
 
 int
@@ -1127,26 +1133,14 @@ nanosleep_wake_up(unsigned long __data)
  * holds (or has held for it) a write_lock_irq( xtime_lock) and is 
  * called from the timer bh code.  Thus we need the irq save locks.
  */
-spinlock_t nanosleep_abs_list_lock = SPIN_LOCK_UNLOCKED;
 
-struct list_head nanosleep_abs_list = LIST_HEAD_INIT(nanosleep_abs_list);
+static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
 
-struct abs_struct {
-	struct list_head list;
-	struct task_struct *t;
-};
 
 void
 clock_was_set(void)
 {
-	struct list_head *pos;
-	unsigned long flags;
-
-	spin_lock_irqsave(&nanosleep_abs_list_lock, flags);
-	list_for_each(pos, &nanosleep_abs_list) {
-		wake_up_process(list_entry(pos, struct abs_struct, list)->t);
-	}
-	spin_unlock_irqrestore(&nanosleep_abs_list_lock, flags);
+	wake_up_all(&nanosleep_abs_wqueue);
 }
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
@@ -1201,19 +1195,19 @@ sys_clock_nanosleep(clockid_t which_clock, int flags,
 	return ret;
 
 }
-
 long
 do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 {
 	struct timespec t;
 	struct timer_list new_timer;
-	struct abs_struct abs_struct = { .list = { .next = 0 } };
+	DECLARE_WAITQUEUE(abs_wqueue, current);
+	u64 rq_time = 0;
+	s64 left;
 	int abs;
-	int rtn = 0;
-	int active;
 	struct restart_block *restart_block =
 	    &current_thread_info()->restart_block;
 
+	abs_wqueue.flags = 0;
 	init_timer(&new_timer);
 	new_timer.expires = 0;
 	new_timer.data = (unsigned long) current;
@@ -1226,54 +1220,50 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 		 * time and continue.
 		 */
 		restart_block->fn = do_no_restart_syscall;
-		if (!restart_block->arg2)
-			return -EINTR;
 
-		new_timer.expires = restart_block->arg2;
-		if (time_before(new_timer.expires, jiffies))
+		rq_time = restart_block->arg3;
+		rq_time = (rq_time << 32) + restart_block->arg2;
+		if (!rq_time)
+			return -EINTR;
+		if (rq_time <= get_jiffies_64())
 			return 0;
 	}
 
 	if (abs && (posix_clocks[which_clock].clock_get !=
 		    posix_clocks[CLOCK_MONOTONIC].clock_get)) {
-		spin_lock_irq(&nanosleep_abs_list_lock);
-		list_add(&abs_struct.list, &nanosleep_abs_list);
-		abs_struct.t = current;
-		spin_unlock_irq(&nanosleep_abs_list_lock);
+		add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue);
 	}
 	do {
 		t = *tsave;
-		if ((abs || !new_timer.expires) &&
-		    !(rtn = adjust_abs_time(&posix_clocks[which_clock],
-					    &t, abs))) {
-			/*
-			 * On error, we don't set up the timer so
-			 * we don't arm the timer so
-			 * del_timer_sync() will return 0, thus
-			 * active is zero... and so it goes.
-			 */
+		if (abs || !rq_time){
+			adjust_abs_time(&posix_clocks[which_clock], &t, abs);
 
-			tstojiffie(&t,
-				   posix_clocks[which_clock].res,
-				   &new_timer.expires);
+			tstojiffie(&t, posix_clocks[which_clock].res, &rq_time);
 		}
-		if (new_timer.expires) {
-			current->state = TASK_INTERRUPTIBLE;
-			add_timer(&new_timer);
-
-			schedule();
+#if (BITS_PER_LONG < 64)
+		if ((rq_time - get_jiffies_64()) > MAX_JIFFY_OFFSET){
+			new_timer.expires = MAX_JIFFY_OFFSET;
+		}else
+#endif
+		{
+			new_timer.expires = (long)rq_time;
 		}
-	}
-	while ((active = del_timer_sync(&new_timer)) &&
-	       !test_thread_flag(TIF_SIGPENDING));
+		current->state = TASK_INTERRUPTIBLE;
+		add_timer(&new_timer);
+
+		schedule();
 
-	if (abs_struct.list.next) {
-		spin_lock_irq(&nanosleep_abs_list_lock);
-		list_del(&abs_struct.list);
-		spin_unlock_irq(&nanosleep_abs_list_lock);
+		del_timer_sync(&new_timer);
+		left = rq_time - get_jiffies_64();
 	}
-	if (active) {
-		long jiffies_left;
+	while ( (left > 0)  &&
+		!test_thread_flag(TIF_SIGPENDING));
+
+	if( abs_wqueue.task_list.next)
+		finish_wait(&nanosleep_abs_wqueue, &abs_wqueue);
+
+	if (left > 0) {
+		unsigned long rmd;
 
 		/*
 		 * Always restart abs calls from scratch to pick up any
@@ -1282,29 +1272,19 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 		if (abs)
 			return -ERESTARTNOHAND;
 
-		jiffies_left = new_timer.expires - jiffies;
-
-		if (jiffies_left < 0)
-			return 0;
-
-		jiffies_to_timespec(jiffies_left, tsave);
+		tsave->tv_sec = div_long_long_rem(left, HZ, &rmd);
+		tsave->tv_nsec = rmd * (NSEC_PER_SEC / HZ);
 
-		while (tsave->tv_nsec < 0) {
-			tsave->tv_nsec += NSEC_PER_SEC;
-			tsave->tv_sec--;
-		}
-		if (tsave->tv_sec < 0) {
-			tsave->tv_sec = 0;
-			tsave->tv_nsec = 1;
-		}
 		restart_block->fn = clock_nanosleep_restart;
 		restart_block->arg0 = which_clock;
 		restart_block->arg1 = (unsigned long)tsave;
-		restart_block->arg2 = new_timer.expires;
+		restart_block->arg2 = rq_time & 0xffffffffLL;
+		restart_block->arg3 = rq_time >> 32;
+
 		return -ERESTART_RESTARTBLOCK;
 	}
 
-	return rtn;
+	return 0;
 }
 /*
  * This will restart either clock_nanosleep or clock_nanosleep
diff --git a/kernel/printk.c b/kernel/printk.c
index 9f2eb4b45669..853ac68708ae 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -28,6 +28,7 @@
 #include <linux/config.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
+#include <linux/security.h>
 
 #include <asm/uaccess.h>
 
@@ -161,6 +162,10 @@ int do_syslog(int type, char * buf, int len)
 	char c;
 	int error = 0;
 
+	error = security_syslog(type);
+	if (error)
+		return error;
+
 	switch (type) {
 	case 0:		/* Close log */
 		break;
@@ -273,8 +278,6 @@ out:
 
 asmlinkage long sys_syslog(int type, char * buf, int len)
 {
-	if ((type != 3) && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	return do_syslog(type, buf, len);
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index a399056e6ac2..caeca9ec9c21 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -342,10 +342,10 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
  */
 static inline int activate_task(task_t *p, runqueue_t *rq)
 {
-	unsigned long sleep_time = jiffies - p->last_run;
+	long sleep_time = jiffies - p->last_run - 1;
 	int requeue_waker = 0;
 
-	if (sleep_time) {
+	if (sleep_time > 0) {
 		int sleep_avg;
 
 		/*
@@ -846,7 +846,7 @@ void sched_balance_exec(void)
 }
 
 /*
- * Find the busiest node. All previous node loads contribute with a 
+ * Find the busiest node. All previous node loads contribute with a
  * geometrically deccaying weight to the load measure:
  *      load_{t} = load_{t-1}/2 + nr_node_running_{t}
  * This way sudden load peaks are flattened out a bit.
@@ -854,7 +854,7 @@ void sched_balance_exec(void)
 static int find_busiest_node(int this_node)
 {
 	int i, node = -1, load, this_load, maxload;
-	
+
 	this_load = maxload = (this_rq()->prev_node_load[this_node] >> 1)
 		+ atomic_read(&node_nr_running[this_node]);
 	this_rq()->prev_node_load[this_node] = this_load;
@@ -1194,8 +1194,8 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
 
- 	if (rcu_pending(cpu))
- 		rcu_check_callbacks(cpu, user_ticks);
+	if (rcu_pending(cpu))
+		rcu_check_callbacks(cpu, user_ticks);
 
 	if (p == rq->idle) {
 		/* note: this timer irq context must be accounted for as well */
@@ -1353,7 +1353,7 @@ switch_tasks:
 	if (likely(prev != next)) {
 		rq->nr_switches++;
 		rq->curr = next;
-	
+
 		prepare_arch_switch(rq, next);
 		prev = context_switch(rq, prev, next);
 		barrier();
@@ -1483,7 +1483,7 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 
 #endif
- 
+
 void complete(struct completion *x)
 {
 	unsigned long flags;
@@ -1567,7 +1567,7 @@ long interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
 void sleep_on(wait_queue_head_t *q)
 {
 	SLEEP_ON_VAR
-	
+
 	current->state = TASK_UNINTERRUPTIBLE;
 
 	SLEEP_ON_HEAD
@@ -1578,7 +1578,7 @@ void sleep_on(wait_queue_head_t *q)
 long sleep_on_timeout(wait_queue_head_t *q, long timeout)
 {
 	SLEEP_ON_VAR
-	
+
 	current->state = TASK_UNINTERRUPTIBLE;
 
 	SLEEP_ON_HEAD
@@ -2472,12 +2472,12 @@ spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 static void kstat_init_cpu(int cpu)
 {
-        /* Add any initialisation to kstat here */
-        /* Useful when cpu offlining logic is added.. */
+	/* Add any initialisation to kstat here */
+	/* Useful when cpu offlining logic is added.. */
 }
 
 static int __devinit kstat_cpu_notify(struct notifier_block *self,
-                                unsigned long action, void *hcpu)
+					unsigned long action, void *hcpu)
 {
 	int cpu = (unsigned long)hcpu;
 	switch(action) {
@@ -2489,7 +2489,7 @@ static int __devinit kstat_cpu_notify(struct notifier_block *self,
 	}
 	return NOTIFY_OK;
 }
- 
+
 static struct notifier_block __devinitdata kstat_nb = {
 	.notifier_call  = kstat_cpu_notify,
 	.next           = NULL,
@@ -2498,7 +2498,7 @@ static struct notifier_block __devinitdata kstat_nb = {
 __init static void init_kstat(void) {
 	kstat_cpu_notify(&kstat_nb, (unsigned long)CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
-	register_cpu_notifier(&kstat_nb);  
+	register_cpu_notifier(&kstat_nb);
 }
 
 void __init sched_init(void)
@@ -2538,7 +2538,6 @@ void __init sched_init(void)
 	rq->idle = current;
 	set_task_cpu(current, smp_processor_id());
 	wake_up_forked_process(current);
-	current->prio = MAX_PRIO;
 
 	init_timers();
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 49e483f8451e..7f630c0261e0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1460,6 +1460,45 @@ do_signal_stop(int signr)
 
 #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
 
+/*
+ * Do appropriate magic when group_stop_count > 0.
+ * We return nonzero if we stopped, after releasing the siglock.
+ * We return zero if we still hold the siglock and should look
+ * for another signal without checking group_stop_count again.
+ */
+static inline int handle_group_stop(void)
+{
+	int stop_count;
+
+	if (current->signal->group_exit_task == current) {
+		/*
+		 * Group stop is so we can do a core dump,
+		 * We are the initiating thread, so get on with it.
+		 */
+		current->signal->group_exit_task = NULL;
+		return 0;
+	}
+
+	if (current->signal->group_exit)
+		/*
+		 * Group stop is so another thread can do a core dump,
+		 * or else we are racing against a death signal.
+		 * Just punt the stop so we can get the next signal.
+		 */
+		return 0;
+
+	/*
+	 * There is a group stop in progress.  We stop
+	 * without any associated signal being in our queue.
+	 */
+	stop_count = --current->signal->group_stop_count;
+	current->exit_code = current->signal->group_exit_code;
+	set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+	finish_stop(stop_count);
+	return 1;
+}
+
 int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie)
 {
 	sigset_t *mask = &current->blocked;
@@ -1469,28 +1508,9 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie)
 		struct k_sigaction *ka;
 
 		spin_lock_irq(&current->sighand->siglock);
-		if (unlikely(current->signal->group_stop_count > 0)) {
-			int stop_count;
-			if (current->signal->group_exit_task == current) {
-				/*
-				 * Group stop is so we can do a core dump.
-				 */
-				current->signal->group_exit_task = NULL;
-				goto dequeue;
-			}
-			/*
-			 * There is a group stop in progress.  We stop
-			 * without any associated signal being in our queue.
-			 */
-			stop_count = --current->signal->group_stop_count;
-			signr = current->signal->group_exit_code;
-			current->exit_code = signr;
-			set_current_state(TASK_STOPPED);
-			spin_unlock_irq(&current->sighand->siglock);
-			finish_stop(stop_count);
+		if (unlikely(current->signal->group_stop_count > 0) &&
+		    handle_group_stop())
 			continue;
-		}
-	dequeue:
 		signr = dequeue_signal(current, mask, info);
 		spin_unlock_irq(&current->sighand->siglock);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 8e3fb524d641..21c75eaf033e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -212,18 +212,25 @@ cond_syscall(sys_delete_module)
 
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
+	int no_nice;
+
 	if (p->uid != current->euid &&
 		p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 		error = -EPERM;
 		goto out;
 	}
-
+	if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) {
+		error = -EACCES;
+		goto out;
+	}
+	no_nice = security_task_setnice(p, niceval);
+	if (no_nice) {
+		error = no_nice;
+		goto out;
+	}
 	if (error == -ESRCH)
 		error = 0;
-	if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
-		error = -EACCES;
-	else
-		set_user_nice(p, niceval);
+	set_user_nice(p, niceval);
 out:
 	return error;
 }
@@ -941,6 +948,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 	}
 
 ok_pgid:
+	err = security_task_setpgid(p, pgid);
+	if (err)
+		goto out;
+
 	if (p->pgrp != pgid) {
 		detach_pid(p, PIDTYPE_PGID);
 		p->pgrp = pgid;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c3c96cd208d4..0364833761c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -33,6 +33,7 @@
 #include <linux/highuid.h>
 #include <linux/writeback.h>
 #include <linux/hugetlb.h>
+#include <linux/security.h>
 #include <asm/uaccess.h>
 
 #ifdef CONFIG_ROOT_NFS
@@ -432,6 +433,10 @@ static int test_perm(int mode, int op)
 
 static inline int ctl_perm(ctl_table *table, int op)
 {
+	int error;
+	error = security_sysctl(table, op);
+	if (error)
+		return error;
 	return test_perm(table->mode, op);
 }
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 6e7e23cb95ba..d3983cbfa8d7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -53,11 +53,11 @@ typedef struct tvec_root_s {
 	struct list_head vec[TVR_SIZE];
 } tvec_root_t;
 
-
 struct tvec_t_base_s {
 	spinlock_t lock;
 	unsigned long timer_jiffies;
 	struct timer_list *running_timer;
+	struct list_head *run_timer_list_running;
 	tvec_root_t tv1;
 	tvec_t tv2;
 	tvec_t tv3;
@@ -67,6 +67,14 @@ struct tvec_t_base_s {
 
 typedef struct tvec_t_base_s tvec_base_t;
 
+static inline void set_running_timer(tvec_base_t *base,
+					struct timer_list *timer)
+{
+#ifdef CONFIG_SMP
+	base->running_timer = timer;
+#endif
+}
+
 /* Fake initialization */
 static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
 
@@ -94,13 +102,22 @@ static inline void check_timer(struct timer_list *timer)
 		check_timer_failed(timer);
 }
 
-static inline void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+/*
+ * If a timer handler re-adds the timer with expires == jiffies, the timer
+ * running code can lock up.  So here we detect that situation and park the
+ * timer onto base->run_timer_list_running.  It will be added to the main timer
+ * structures later, by __run_timers().
+ */
+
+static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
 	struct list_head *vec;
 
-	if (idx < TVR_SIZE) {
+	if (base->run_timer_list_running) {
+		vec = base->run_timer_list_running;
+	} else if (idx < TVR_SIZE) {
 		int i = expires & TVR_MASK;
 		vec = base->tv1.vec + i;
 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
@@ -354,7 +371,7 @@ del_again:
 static int cascade(tvec_base_t *base, tvec_t *tv)
 {
 	/* cascade all the timers from tv up one level */
-	struct list_head *head, *curr, *next;
+	struct list_head *head, *curr;
 
 	head = tv->vec + tv->index;
 	curr = head->next;
@@ -366,11 +383,9 @@ static int cascade(tvec_base_t *base, tvec_t *tv)
 		struct timer_list *tmp;
 
 		tmp = list_entry(curr, struct timer_list, entry);
-		if (tmp->base != base)
-			BUG();
-		next = curr->next;
+		BUG_ON(tmp->base != base);
+		curr = curr->next;
 		internal_add_timer(base, tmp);
-		curr = next;
 	}
 	INIT_LIST_HEAD(head);
 
@@ -386,9 +401,12 @@ static int cascade(tvec_base_t *base, tvec_t *tv)
  */
 static inline void __run_timers(tvec_base_t *base)
 {
+	struct timer_list *timer;
+
 	spin_lock_irq(&base->lock);
-	while ((long)(jiffies - base->timer_jiffies) >= 0) {
-		struct list_head *head, *curr;
+	while (time_after_eq(jiffies, base->timer_jiffies)) {
+		LIST_HEAD(deferred_timers);
+		struct list_head *head;
 
 		/*
 		 * Cascade timers:
@@ -398,37 +416,36 @@ static inline void __run_timers(tvec_base_t *base)
 				(cascade(base, &base->tv3) == 1) &&
 					cascade(base, &base->tv4) == 1)
 			cascade(base, &base->tv5);
+		base->run_timer_list_running = &deferred_timers;
 repeat:
 		head = base->tv1.vec + base->tv1.index;
-		curr = head->next;
-		if (curr != head) {
+		if (!list_empty(head)) {
 			void (*fn)(unsigned long);
 			unsigned long data;
-			struct timer_list *timer;
 
-			timer = list_entry(curr, struct timer_list, entry);
+			timer = list_entry(head->next,struct timer_list,entry);
  			fn = timer->function;
  			data = timer->data;
 
 			list_del(&timer->entry);
 			timer->base = NULL;
-#if CONFIG_SMP
-			base->running_timer = timer;
-#endif
+			set_running_timer(base, timer);
 			spin_unlock_irq(&base->lock);
-			if (!fn)
-				printk("Bad: timer %p has NULL fn. (data: %08lx)\n", timer, data);
-			else
-				fn(data);
+			fn(data);
 			spin_lock_irq(&base->lock);
 			goto repeat;
 		}
+		base->run_timer_list_running = NULL;
 		++base->timer_jiffies; 
 		base->tv1.index = (base->tv1.index + 1) & TVR_MASK;
+		while (!list_empty(&deferred_timers)) {
+			timer = list_entry(deferred_timers.prev,
+						struct timer_list, entry);
+			list_del(&timer->entry);
+			internal_add_timer(base, timer);
+		}
 	}
-#if CONFIG_SMP
-	base->running_timer = NULL;
-#endif
+	set_running_timer(base, NULL);
 	spin_unlock_irq(&base->lock);
 }
 
@@ -775,7 +792,7 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	tvec_base_t *base = &per_cpu(tvec_bases, smp_processor_id());
 
-	if ((long)(jiffies - base->timer_jiffies) >= 0)
+	if (time_after_eq(jiffies, base->timer_jiffies))
 		__run_timers(base);
 }