Merge bk://linux-ntfs.bkbits.net/ntfs-2.6

into ppc970.osdl.org:/home/torvalds/v2.6/linux
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-07-02 06:34:04 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-07-02 06:34:04 -0700
commit: faa7a4c05ace72d85bbfb8d2a458a80491f8045b (patch)
tree: 360c5cb33ba4e4c762d0052af0c0e856d592328a /kernel
parent: 62054d49783cfe93c9cee9d90a0779f1f9054968 (diff)
parent: 9da9210dbc752cf35fa2fe866614a1d5b1266066 (diff)
7 files changed, 371 insertions, 73 deletions
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index b96428328e57..74ba3cb21809 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -40,14 +40,14 @@ static inline int is_kernel_text(unsigned long addr)
 /* Lookup the address for this symbol. Returns 0 if not found. */
 unsigned long kallsyms_lookup_name(const char *name)
 {
-	char namebuf[128];
+	char namebuf[KSYM_NAME_LEN+1];
 	unsigned long i;
 	char *knames;
 
 	for (i = 0, knames = kallsyms_names; i < kallsyms_num_syms; i++) {
 		unsigned prefix = *knames++;
 
-		strlcpy(namebuf + prefix, knames, 127 - prefix);
+		strlcpy(namebuf + prefix, knames, KSYM_NAME_LEN - prefix);
 		if (strcmp(namebuf, name) == 0)
 			return kallsyms_addresses[i];
 
@@ -67,7 +67,7 @@ const char *kallsyms_lookup(unsigned long addr,
 	/* This kernel should never had been booted. */
 	BUG_ON(!kallsyms_addresses);
 
-	namebuf[127] = 0;
+	namebuf[KSYM_NAME_LEN] = 0;
 	namebuf[0] = 0;
 
 	if (is_kernel_text(addr) || is_kernel_inittext(addr)) {
@@ -84,7 +84,7 @@ const char *kallsyms_lookup(unsigned long addr,
 		/* Grab name */
 		for (i = 0; i <= best; i++) { 
 			unsigned prefix = *name++;
-			strncpy(namebuf + prefix, name, 127 - prefix);
+			strncpy(namebuf + prefix, name, KSYM_NAME_LEN - prefix);
 			name += strlen(name) + 1;
 		}
 
@@ -117,34 +117,22 @@ void __print_symbol(const char *fmt, unsigned long address)
 	char *modname;
 	const char *name;
 	unsigned long offset, size;
-	char namebuf[128];
+	char namebuf[KSYM_NAME_LEN+1];
+	char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
+		    2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1];
 
 	name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
 
-	if (!name) {
-		char addrstr[sizeof("0x%lx") + (BITS_PER_LONG*3/10)];
-
-		sprintf(addrstr, "0x%lx", address);
-		printk(fmt, addrstr);
-		return;
-	}
-
-	if (modname) {
-		/* This is pretty small. */
-		char buffer[sizeof("%s+%#lx/%#lx [%s]")
-			   + strlen(name) + 2*(BITS_PER_LONG*3/10)
-			   + strlen(modname)];
-
-		sprintf(buffer, "%s+%#lx/%#lx [%s]",
-			name, offset, size, modname);
-		printk(fmt, buffer);
-	} else {
-		char buffer[sizeof("%s+%#lx/%#lx")
-			   + strlen(name) + 2*(BITS_PER_LONG*3/10)];
-
-		sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
-		printk(fmt, buffer);
+	if (!name)
+		sprintf(buffer, "0x%lx", address);
+	else {
+		if (modname)
+			sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
+				size, modname);
+		else
+			sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
 	}
+	printk(fmt, buffer);
 }
 
 /* To avoid O(n^2) iteration, we carry prefix along. */
@@ -155,7 +143,7 @@ struct kallsym_iter
 	unsigned long value;
 	unsigned int nameoff; /* If iterating in core kernel symbols */
 	char type;
-	char name[128];
+	char name[KSYM_NAME_LEN+1];
 };
 
 /* Only label it "global" if it is exported. */
@@ -186,7 +174,8 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
 	   shared with previous name (stem compression). */
 	stemlen = kallsyms_names[off++];
 
-	strlcpy(iter->name+stemlen, kallsyms_names + off, 128-stemlen);
+	strlcpy(iter->name+stemlen, kallsyms_names + off,
+		KSYM_NAME_LEN+1-stemlen);
 	off += strlen(kallsyms_names + off) + 1;
 	iter->owner = NULL;
 	iter->value = kallsyms_addresses[iter->pos];
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index c18d947b582b..42c24868837c 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -7,6 +7,9 @@
  *
  *			     Copyright (C) 2002 2003 by MontaVista Software.
  *
+ * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ *			     Copyright (C) 2004 Boris Hu
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or (at
@@ -41,6 +44,7 @@
 #include <linux/idr.h>
 #include <linux/posix-timers.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 
 #ifndef div_long_long_rem
 #include <asm/div64.h>
@@ -169,6 +173,12 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
  */
 
 static struct k_clock posix_clocks[MAX_CLOCKS];
+/*
+ * We only have one real clock that can be set so we need only one abs list,
+ * even if we should want to have several clocks with differing resolutions.
+ */
+static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
+				      .lock = SPIN_LOCK_UNLOCKED};
 
 #define if_clock_do(clock_fun,alt_fun,parms) \
 		(!clock_fun) ? alt_fun parms : clock_fun parms
@@ -200,8 +210,11 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
  */
 static __init int init_posix_timers(void)
 {
-	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES };
+	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
+					 .abs_struct = &abs_list
+	};
 	struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
+		.abs_struct = NULL,
 		.clock_get = do_posix_clock_monotonic_gettime,
 		.clock_set = do_posix_clock_monotonic_settime
 	};
@@ -212,7 +225,6 @@ static __init int init_posix_timers(void)
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, 0, NULL, NULL);
 	idr_init(&posix_timers_id);
-
 	return 0;
 }
 
@@ -239,19 +251,92 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
 		   (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 }
 
+/*
+ * This function adjusts the timer as needed as a result of the clock
+ * being set.  It should only be called for absolute timers, and then
+ * under the abs_list lock.  It computes the time difference and sets
+ * the new jiffies value in the timer.  It also updates the timers
+ * reference wall_to_monotonic value.  It is complicated by the fact
+ * that tstojiffies() only handles positive times and it needs to work
+ * with both positive and negative times.  Also, for negative offsets,
+ * we need to defeat the res round up.
+ *
+ * Return is true if there is a new time, else false.
+ */
+static long add_clockset_delta(struct k_itimer *timr,
+			       struct timespec *new_wall_to)
+{
+	struct timespec delta;
+	int sign = 0;
+	u64 exp;
+
+	set_normalized_timespec(&delta,
+				new_wall_to->tv_sec -
+				timr->wall_to_prev.tv_sec,
+				new_wall_to->tv_nsec -
+				timr->wall_to_prev.tv_nsec);
+	if (likely(!(delta.tv_sec | delta.tv_nsec)))
+		return 0;
+	if (delta.tv_sec < 0) {
+		set_normalized_timespec(&delta,
+					-delta.tv_sec,
+					1 - delta.tv_nsec -
+					posix_clocks[timr->it_clock].res);
+		sign++;
+	}
+	tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
+	timr->wall_to_prev = *new_wall_to;
+	timr->it_timer.expires += (sign ? -exp : exp);
+	return 1;
+}
+
+static void remove_from_abslist(struct k_itimer *timr)
+{
+	if (!list_empty(&timr->abs_timer_entry)) {
+		spin_lock(&abs_list.lock);
+		list_del_init(&timr->abs_timer_entry);
+		spin_unlock(&abs_list.lock);
+	}
+}
+
 static void schedule_next_timer(struct k_itimer *timr)
 {
+	struct timespec new_wall_to;
 	struct now_struct now;
+	unsigned long seq;
 
-	/* Set up the timer for the next interval (if there is one) */
+	/*
+	 * Set up the timer for the next interval (if there is one).
+	 * Note: this code uses the abs_timer_lock to protect
+	 * wall_to_prev and must hold it until exp is set, not exactly
+	 * obvious...
+
+	 * This function is used for CLOCK_REALTIME* and
+	 * CLOCK_MONOTONIC* timers.  If we ever want to handle other
+	 * CLOCKs, the calling code (do_schedule_next_timer) would need
+	 * to pull the "clock" info from the timer and dispatch the
+	 * "other" CLOCKs "next timer" code (which, I suppose should
+	 * also be added to the k_clock structure).
+	 */
 	if (!timr->it_incr) 
 		return;
 
-	posix_get_now(&now);
 	do {
-		posix_bump_timer(timr);
-	}while (posix_time_before(&timr->it_timer, &now));
+		seq = read_seqbegin(&xtime_lock);
+		new_wall_to =	wall_to_monotonic;
+		posix_get_now(&now);
+	} while (read_seqretry(&xtime_lock, seq));
+
+	if (!list_empty(&timr->abs_timer_entry)) {
+		spin_lock(&abs_list.lock);
+		add_clockset_delta(timr, &new_wall_to);
+
+		posix_bump_timer(timr, now);
 
+		spin_unlock(&abs_list.lock);
+	} else {
+		posix_bump_timer(timr, now);
+	}
 	timr->it_overrun_last = timr->it_overrun;
 	timr->it_overrun = -1;
 	++timr->it_requeue_pending;
@@ -312,7 +397,15 @@ static void timer_notify_task(struct k_itimer *timr)
 
 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
 
-	/* Send signal to the process that owns this timer. */
+	/*
+	 * Send signal to the process that owns this timer.
+
+	 * This code assumes that all the possible abs_lists share the
+	 * same lock (there is only one list at this time). If this is
+	 * not the case, the CLOCK info would need to be used to find
+	 * the proper abs list lock.
+	 */
+
 	timr->sigq->info.si_signo = timr->it_sigev_signo;
 	timr->sigq->info.si_errno = 0;
 	timr->sigq->info.si_code = SI_TIMER;
@@ -320,6 +413,9 @@ static void timer_notify_task(struct k_itimer *timr)
 	timr->sigq->info.si_value = timr->it_sigev_value;
 	if (timr->it_incr)
 		timr->sigq->info.si_sys_private = ++timr->it_requeue_pending;
+	else {
+		remove_from_abslist(timr);
+	}
 
 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
 		if (unlikely(timr->it_process->flags & PF_EXITING)) {
@@ -350,16 +446,51 @@ static void timer_notify_task(struct k_itimer *timr)
  * This function gets called when a POSIX.1b interval timer expires.  It
  * is used as a callback from the kernel internal timer.  The
  * run_timer_list code ALWAYS calls with interrutps on.
+
+ * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
  */
 static void posix_timer_fn(unsigned long __data)
 {
 	struct k_itimer *timr = (struct k_itimer *) __data;
 	unsigned long flags;
+	unsigned long seq;
+	struct timespec delta, new_wall_to;
+	u64 exp = 0;
+	int do_notify = 1;
 
 	spin_lock_irqsave(&timr->it_lock, flags);
  	set_timer_inactive(timr);
-	timer_notify_task(timr);
-	unlock_timer(timr, flags);
+	if (!list_empty(&timr->abs_timer_entry)) {
+		spin_lock(&abs_list.lock);
+		do {
+			seq = read_seqbegin(&xtime_lock);
+			new_wall_to =	wall_to_monotonic;
+		} while (read_seqretry(&xtime_lock, seq));
+		set_normalized_timespec(&delta,
+					new_wall_to.tv_sec -
+					timr->wall_to_prev.tv_sec,
+					new_wall_to.tv_nsec -
+					timr->wall_to_prev.tv_nsec);
+		if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) {
+			/* do nothing, timer is on time */
+		} else if (delta.tv_sec < 0) {
+			/* do nothing, timer is already late */
+		} else {
+			/* timer is early due to a clock set */
+			tstojiffie(&delta,
+				   posix_clocks[timr->it_clock].res,
+				   &exp);
+			timr->wall_to_prev = new_wall_to;
+			timr->it_timer.expires += exp;
+			add_timer(&timr->it_timer);
+			do_notify = 0;
+		}
+		spin_unlock(&abs_list.lock);
+
+	}
+	if (do_notify)
+		timer_notify_task(timr);
+	unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
 }
 
 
@@ -397,6 +528,7 @@ static struct k_itimer * alloc_posix_timer(void)
 	if (!tmr)
 		return tmr;
 	memset(tmr, 0, sizeof (struct k_itimer));
+	INIT_LIST_HEAD(&tmr->abs_timer_entry);
 	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
 		kmem_cache_free(posix_timers_cache, tmr);
 		tmr = NULL;
@@ -644,8 +776,7 @@ do_timer_gettime(struct k_itimer *timr, struct itimerspec *cur_setting)
 	if (expires) {
 		if (timr->it_requeue_pending & REQUEUE_PENDING ||
 		    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
-			while (posix_time_before(&timr->it_timer, &now))
-				posix_bump_timer(timr);
+			posix_bump_timer(timr, now);
 			expires = timr->it_timer.expires;
 		}
 		else
@@ -721,11 +852,10 @@ sys_timer_getoverrun(timer_t timer_id)
  * time to it to get the proper time for the timer.
  */
 static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, 
-			   int abs, u64 *exp)
+			   int abs, u64 *exp, struct timespec *wall_to)
 {
 	struct timespec now;
 	struct timespec oc = *tp;
-	struct timespec wall_to_mono;
 	u64 jiffies_64_f;
 	int rtn =0;
 
@@ -733,15 +863,15 @@ static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,
 		/*
 		 * The mask pick up the 4 basic clocks 
 		 */
-		if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) {
+		if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
 			jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
-				&now,  &wall_to_mono);
+				&now,  wall_to);
 			/*
 			 * If we are doing a MONOTONIC clock
 			 */
 			if((clock - &posix_clocks[0]) & CLOCKS_MONO){
-				now.tv_sec += wall_to_mono.tv_sec;
-				now.tv_nsec += wall_to_mono.tv_nsec;
+				now.tv_sec += wall_to->tv_sec;
+				now.tv_nsec += wall_to->tv_nsec;
 			}
 		} else {
 			/*
@@ -831,6 +961,8 @@ do_timer_settime(struct k_itimer *timr, int flags,
 #else
 	del_timer(&timr->it_timer);
 #endif
+	remove_from_abslist(timr);
+
 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
 		~REQUEUE_PENDING;
 	timr->it_overrun_last = 0;
@@ -845,24 +977,25 @@ do_timer_settime(struct k_itimer *timr, int flags,
 
 	if (adjust_abs_time(clock,
 			    &new_setting->it_value, flags & TIMER_ABSTIME, 
-			    &expire_64)) {
+			    &expire_64, &(timr->wall_to_prev))) {
 		return -EINVAL;
 	}
 	timr->it_timer.expires = (unsigned long)expire_64;	
 	tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
 	timr->it_incr = (unsigned long)expire_64;
 
-
 	/*
-	 * For some reason the timer does not fire immediately if expires is
-	 * equal to jiffies, so the timer notify function is called directly.
-	 * We do not even queue SIGEV_NONE timers!
+	 * We do not even queue SIGEV_NONE timers!  But we do put them
+	 * in the abs list so we can do that right.
 	 */
-	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) {
-		if (timr->it_timer.expires == jiffies)
-			timer_notify_task(timr);
-		else
-			add_timer(&timr->it_timer);
+	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
+		add_timer(&timr->it_timer);
+
+	if (flags & TIMER_ABSTIME && clock->abs_struct) {
+		spin_lock(&clock->abs_struct->lock);
+		list_add_tail(&(timr->abs_timer_entry),
+			      &(clock->abs_struct->list));
+		spin_unlock(&clock->abs_struct->lock);
 	}
 	return 0;
 }
@@ -896,7 +1029,7 @@ retry:
 	if (!posix_clocks[timr->it_clock].timer_set)
 		error = do_timer_settime(timr, flags, &new_spec, rtn);
 	else
-		error = posix_clocks[timr->it_clock].timer_set(timr,
+	        error = posix_clocks[timr->it_clock].timer_set(timr,
 							       flags,
 							       &new_spec, rtn);
 	unlock_timer(timr, flag);
@@ -929,6 +1062,8 @@ static inline int do_timer_delete(struct k_itimer *timer)
 #else
 	del_timer(&timer->it_timer);
 #endif
+	remove_from_abslist(timer);
+
 	return 0;
 }
 
@@ -1171,13 +1306,93 @@ static void nanosleep_wake_up(unsigned long __data)
  * On locking, clock_was_set() is called from update_wall_clock which
  * holds (or has held for it) a write_lock_irq( xtime_lock) and is
  * called from the timer bh code.  Thus we need the irq save locks.
+ *
+ * Also, on the call from update_wall_clock, that is done as part of a
+ * softirq thing.  We don't want to delay the system that much (possibly
+ * long list of timers to fix), so we defer that work to keventd.
  */
 
 static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
+static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL);
+
+static DECLARE_MUTEX(clock_was_set_lock);
 
 void clock_was_set(void)
 {
+	struct k_itimer *timr;
+	struct timespec new_wall_to;
+	LIST_HEAD(cws_list);
+	unsigned long seq;
+
+
+	if (unlikely(in_interrupt())) {
+		schedule_work(&clock_was_set_work);
+		return;
+	}
 	wake_up_all(&nanosleep_abs_wqueue);
+
+	/*
+	 * Check if there exist TIMER_ABSTIME timers to correct.
+	 *
+	 * Notes on locking: This code is run in task context with irq
+	 * on.  We CAN be interrupted!  All other usage of the abs list
+	 * lock is under the timer lock which holds the irq lock as
+	 * well.  We REALLY don't want to scan the whole list with the
+	 * interrupt system off, AND we would like a sequence lock on
+	 * this code as well.  Since we assume that the clock will not
+	 * be set often, it seems ok to take and release the irq lock
+	 * for each timer.  In fact add_timer will do this, so this is
+	 * not an issue.  So we know when we are done, we will move the
+	 * whole list to a new location.  Then as we process each entry,
+	 * we will move it to the actual list again.  This way, when our
+	 * copy is empty, we are done.  We are not all that concerned
+	 * about preemption so we will use a semaphore lock to protect
+	 * aginst reentry.  This way we will not stall another
+	 * processor.  It is possible that this may delay some timers
+	 * that should have expired, given the new clock, but even this
+	 * will be minimal as we will always update to the current time,
+	 * even if it was set by a task that is waiting for entry to
+	 * this code.  Timers that expire too early will be caught by
+	 * the expire code and restarted.
+
+	 * Absolute timers that repeat are left in the abs list while
+	 * waiting for the task to pick up the signal.  This means we
+	 * may find timers that are not in the "add_timer" list, but are
+	 * in the abs list.  We do the same thing for these, save
+	 * putting them back in the "add_timer" list.  (Note, these are
+	 * left in the abs list mainly to indicate that they are
+	 * ABSOLUTE timers, a fact that is used by the re-arm code, and
+	 * for which we have no other flag.)
+
+	 */
+
+	down(&clock_was_set_lock);
+	spin_lock_irq(&abs_list.lock);
+	list_splice_init(&abs_list.list, &cws_list);
+	spin_unlock_irq(&abs_list.lock);
+	do {
+		do {
+			seq = read_seqbegin(&xtime_lock);
+			new_wall_to =	wall_to_monotonic;
+		} while (read_seqretry(&xtime_lock, seq));
+
+		spin_lock_irq(&abs_list.lock);
+		if (list_empty(&cws_list)) {
+			spin_unlock_irq(&abs_list.lock);
+			break;
+		}
+		timr = list_entry(cws_list.next, struct k_itimer,
+				   abs_timer_entry);
+
+		list_del_init(&timr->abs_timer_entry);
+		if (add_clockset_delta(timr, &new_wall_to) &&
+		    del_timer(&timr->it_timer))  /* timer run yet? */
+			add_timer(&timr->it_timer);
+		list_add(&timr->abs_timer_entry, &abs_list.list);
+		spin_unlock_irq(&abs_list.lock);
+	} while (1);
+
+	up(&clock_was_set_lock);
 }
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
@@ -1220,7 +1435,7 @@ sys_clock_nanosleep(clockid_t which_clock, int flags,
 long
 do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 {
-	struct timespec t;
+	struct timespec t, dum;
 	struct timer_list new_timer;
 	DECLARE_WAITQUEUE(abs_wqueue, current);
 	u64 rq_time = (u64)0;
@@ -1260,7 +1475,7 @@ do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave)
 		t = *tsave;
 		if (abs || !rq_time) {
 			adjust_abs_time(&posix_clocks[which_clock], &t, abs,
-					&rq_time);
+					&rq_time, &dum);
 			rq_time += (t.tv_sec || t.tv_nsec);
 		}
 
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index d00edd15c0fd..079dd4d678ce 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,4 +1,5 @@
 obj-y				:= main.o process.o console.o pm.o
+obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
 obj-$(CONFIG_PM_DISK)		+= disk.o pmdisk.o
 
diff --git a/kernel/power/pmdisk.c b/kernel/power/pmdisk.c
index dd474ccdd1be..d4c20e05a488 100644
--- a/kernel/power/pmdisk.c
+++ b/kernel/power/pmdisk.c
@@ -792,7 +792,7 @@ static int __init relocate_pagedir(void)
 	}
 
 	err = -ENOMEM;
-	while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
+	while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
 		if (!does_collide_order(old_pagedir, (unsigned long)m,
 					pagedir_order)) {
 			pm_pagedir_nosave =
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
new file mode 100644
index 000000000000..cda77cdfb8c1
--- /dev/null
+++ b/kernel/power/smp.c
@@ -0,0 +1,85 @@
+/*
+ * drivers/power/smp.c - Functions for stopping other CPUs.
+ *
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#undef DEBUG
+
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+
+static atomic_t cpu_counter, freeze;
+
+
+static void smp_pause(void * data)
+{
+	struct saved_context ctxt;
+	__save_processor_state(&ctxt);
+	printk("Sleeping in:\n");
+	dump_stack();
+	atomic_inc(&cpu_counter);
+	while (atomic_read(&freeze)) {
+		/* FIXME: restore takes place at random piece inside this.
+		   This should probably be written in assembly, and
+		   preserve general-purpose registers, too
+
+		   What about stack? We may need to move to new stack here.
+
+		   This should better be ran with interrupts disabled.
+		 */
+		cpu_relax();
+		barrier();
+	}
+	atomic_dec(&cpu_counter);
+	__restore_processor_state(&ctxt);
+}
+
+cpumask_t oldmask;
+
+void disable_nonboot_cpus(void)
+{
+	printk("Freezing CPUs (at %d)", smp_processor_id());
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(0));
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(HZ);
+	printk("...");
+	BUG_ON(smp_processor_id() != 0);
+
+	/* FIXME: for this to work, all the CPUs must be running
+	 * "idle" thread (or we deadlock). Is that guaranteed? */
+
+	atomic_set(&cpu_counter, 0);
+	atomic_set(&freeze, 1);
+	smp_call_function(smp_pause, NULL, 0, 0);
+	while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
+		cpu_relax();
+		barrier();
+	}
+	printk("ok\n");
+}
+
+void enable_nonboot_cpus(void)
+{
+	printk("Restarting CPUs");
+	atomic_set(&freeze, 0);
+	while (atomic_read(&cpu_counter)) {
+		cpu_relax();
+		barrier();
+	}
+	printk("...");
+	set_cpus_allowed(current, oldmask);
+	schedule();
+	printk("ok\n");
+
+}
+
+
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index ff8fdbc21a45..081b65103abd 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -317,7 +317,8 @@ static int write_suspend_image(void)
 	for (i=0; i<nr_copy_pages; i++) {
 		if (!(i%100))
 			printk( "." );
-		if (!(entry = get_swap_page()).val)
+		entry = get_swap_page();
+		if (!entry.val)
 			panic("\nNot enough swapspace when writing data" );
 		
 		if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
@@ -334,7 +335,8 @@ static int write_suspend_image(void)
 		cur = (union diskpage *)((char *) pagedir_nosave)+i;
 		BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
 		printk( "." );
-		if (!(entry = get_swap_page()).val) {
+		entry = get_swap_page();
+		if (!entry.val) {
 			printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
 			panic("Don't know how to recover");
 			free_page((unsigned long) buffer);
@@ -356,7 +358,8 @@ static int write_suspend_image(void)
 	BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
 	BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
 	BUG_ON (sizeof(struct link) != PAGE_SIZE);
-	if (!(entry = get_swap_page()).val)
+	entry = get_swap_page();
+	if (!entry.val)
 		panic( "\nNot enough swapspace when writing header" );
 	if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
 		panic("\nNot enough swapspace for header on suspend device" );
@@ -696,6 +699,7 @@ static void suspend_power_down(void)
 	else
 #endif
 	{
+		device_suspend(3);
 		device_shutdown();
 		machine_power_off();
 	}
@@ -716,7 +720,7 @@ asmlinkage void do_magic_resume_1(void)
 	mb();
 	spin_lock_irq(&suspend_pagedir_lock);	/* Done to disable interrupts */ 
 
-	device_power_down(4);
+	device_power_down(3);
 	PRINTK( "Waiting for DMAs to settle down...\n");
 	mdelay(1000);	/* We do not want some readahead with DMA to corrupt our memory, right?
 			   Do it with disabled interrupts for best effect. That way, if some
@@ -785,7 +789,7 @@ asmlinkage void do_magic_suspend_2(void)
 {
 	int is_problem;
 	read_swapfiles();
-	device_power_down(4);
+	device_power_down(3);
 	is_problem = suspend_prepare_image();
 	device_power_up();
 	spin_unlock_irq(&suspend_pagedir_lock);
@@ -802,7 +806,6 @@ asmlinkage void do_magic_suspend_2(void)
 	barrier();
 	mb();
 	spin_lock_irq(&suspend_pagedir_lock);	/* Done to disable interrupts */ 
-	mdelay(1000);
 
 	free_pages((unsigned long) pagedir_nosave, pagedir_order);
 	spin_unlock_irq(&suspend_pagedir_lock);
@@ -839,9 +842,10 @@ int software_suspend(void)
                    need half of memory free. */
 
 		free_some_memory();
-		
-		/* Save state of all device drivers, and stop them. */		   
-		if ((res = device_suspend(4))==0)
+		disable_nonboot_cpus();
+		/* Save state of all device drivers, and stop them. */
+		printk("Suspending devices... ");
+		if ((res = device_suspend(3))==0) {
 			/* If stopping device drivers worked, we proceed basically into
 			 * suspend_save_image.
 			 *
@@ -852,7 +856,9 @@ int software_suspend(void)
 			 * using normal kernel mechanism.
 			 */
 			do_magic(0);
+		}
 		thaw_processes();
+		enable_nonboot_cpus();
 	} else
 		res = -EBUSY;
 	software_suspend_enabled = 1;
@@ -922,7 +928,7 @@ static int relocate_pagedir(void)
 		return 0;
 	}
 
-	while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
+	while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
 		if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
 			break;
 		eaten_memory = m;
@@ -1192,7 +1198,9 @@ static int __init software_resume(void)
 	printk( "resuming from %s\n", resume_file);
 	if (read_suspend_image(resume_file, 0))
 		goto read_failure;
-	device_suspend(4);
+	/* FIXME: Should we stop processes here, just to be safer? */
+	disable_nonboot_cpus();
+	device_suspend(3);
 	do_magic(1);
 	panic("This never returns");
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 641727bab22f..0df0748d56bd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2162,7 +2162,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
 struct ctl_table_header * register_sysctl_table(ctl_table * table, 
 						int insert_at_head)
 {
-	return 0;
+	return NULL;
 }
 
 void unregister_sysctl_table(struct ctl_table_header * table)
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-07-02 06:34:04 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-07-02 06:34:04 -0700
commit	faa7a4c05ace72d85bbfb8d2a458a80491f8045b (patch)
tree	360c5cb33ba4e4c762d0052af0c0e856d592328a /kernel
parent	62054d49783cfe93c9cee9d90a0779f1f9054968 (diff)
parent	9da9210dbc752cf35fa2fe866614a1d5b1266066 (diff)