diff options
| author | Patrick Mochel <mochel@osdl.org> | 2003-09-09 18:10:40 -0700 |
|---|---|---|
| committer | Patrick Mochel <mochel@osdl.org> | 2003-09-09 18:10:40 -0700 |
| commit | c521afac12ad04a7a9a4eacc778a7db65b89efa0 (patch) | |
| tree | e2af5fdb6f4ee863ec67f97bba5b2acca767a1ce /kernel | |
| parent | b11a855891448d827182f88d9fa32d33fe3cd102 (diff) | |
| parent | ad50ff186e3544ce316bc24e8b37e82b840b42e1 (diff) | |
Merge osdl.org:/home/mochel/src/kernel/linux-2.5-virgin
into osdl.org:/home/mochel/src/kernel/linux-2.5-power
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/power/Kconfig | 64 | ||||
| -rw-r--r-- | kernel/power/Makefile | 2 | ||||
| -rw-r--r-- | kernel/power/console.c | 2 | ||||
| -rw-r--r-- | kernel/power/disk.c | 335 | ||||
| -rw-r--r-- | kernel/power/main.c | 391 | ||||
| -rw-r--r-- | kernel/power/pmdisk.c | 969 | ||||
| -rw-r--r-- | kernel/power/power.h | 39 | ||||
| -rw-r--r-- | kernel/power/swsusp.c | 345 | ||||
| -rw-r--r-- | kernel/sched.c | 4 | ||||
| -rw-r--r-- | kernel/sys.c | 2 |
10 files changed, 1650 insertions, 503 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 000000000000..c30771af69f0 --- /dev/null +++ b/kernel/power/Kconfig @@ -0,0 +1,64 @@ +config PM + bool "Power Management support" + ---help--- + "Power Management" means that parts of your computer are shut + off or put into a power conserving "sleep" mode if they are not + being used. There are two competing standards for doing this: APM + and ACPI. If you want to use either one, say Y here and then also + to the requisite support below. + + Power Management is most important for battery powered laptop + computers; if you have a laptop, check out the Linux Laptop home + page on the WWW at + <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that, even if you say N here, Linux on the x86 architecture + will issue the hlt instruction if nothing is to be done, thereby + sending the processor to sleep and saving power. + +config SOFTWARE_SUSPEND + bool "Software Suspend (EXPERIMENTAL)" + depends on EXPERIMENTAL && PM && SWAP + ---help--- + Enable the possibilty of suspendig machine. It doesn't need APM. + You may suspend your machine by 'swsusp' or 'shutdown -z <time>' + (patch for sysvinit needed). + + It creates an image which is saved in your active swaps. By the next + booting the, pass 'resume=/dev/swappartition' and kernel will + detect the saved image, restore the memory from + it and then it continues to run as before you've suspended. + If you don't want the previous state to continue use the 'noresume' + kernel option. However note that your partitions will be fsck'd and + you must re-mkswap your swap partitions. It does not work with swap + files. + + Right now you may boot without resuming and then later resume but + in meantime you cannot use those swap partitions/files which were + involved in suspending. Also in this case there is a risk that buffers + on disk won't match with saved ones. + + For more information take a look at Documentation/swsusp.txt. + +config PM_DISK + bool "Suspend-to-Disk Support" + depends on PM && SWAP + ---help--- + Suspend-to-disk is a power management state in which the contents + of memory are stored on disk and the entire system is shut down or + put into a low-power state (e.g. ACPI S4). When the computer is + turned back on, the stored image is loaded from disk and execution + resumes from where it left off before suspending. + + This config option enables the core infrastructure necessary to + perform the suspend and resume transition. + + Currently, this suspend-to-disk implementation is based on a forked + version of the swsusp code base. As such, it's still experimental, + and still relies on CONFIG_SWAP. + + More information can be found in Documentation/power/. + + If unsure, Say N. diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9640751c4338..7f127b848827 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,4 +1,4 @@ obj-y := main.o process.o console.o pm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += disk.o swsusp.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/console.c b/kernel/power/console.c index 35b1f50d97de..c8a48236619b 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -8,7 +8,7 @@ #include <linux/kbd_kern.h> #include "power.h" -static int new_loglevel = 7; +static int new_loglevel = 10; static int orig_loglevel; static int orig_fgconsole, orig_kmsg; diff --git a/kernel/power/disk.c b/kernel/power/disk.c new file mode 100644 index 000000000000..64a3130a5411 --- /dev/null +++ b/kernel/power/disk.c @@ -0,0 +1,335 @@ +/* + * kernel/power/disk.c - Suspend-to-disk support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is release under the GPLv2 + * + */ + +#define DEBUG + + +#include <linux/suspend.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include "power.h" + + +extern u32 pm_disk_mode; +extern struct pm_ops * pm_ops; + +extern int swsusp_save(void); +extern int swsusp_write(void); +extern int swsusp_read(void); +extern int swsusp_restore(void); +extern int swsusp_free(void); + +extern long sys_sync(void); + + +/** + * power_down - Shut machine down for hibernate. + * @mode: Suspend-to-disk mode + * + * Use the platform driver, if configured so, and return gracefully if it + * fails. + * Otherwise, try to power off and reboot. If they fail, halt the machine, + * there ain't no turning back. + */ + +static int power_down(u32 mode) +{ + unsigned long flags; + int error = 0; + + local_irq_save(flags); + device_power_down(PM_SUSPEND_DISK); + switch(mode) { + case PM_DISK_PLATFORM: + error = pm_ops->enter(PM_SUSPEND_DISK); + break; + case PM_DISK_SHUTDOWN: + printk("Powering off system\n"); + machine_power_off(); + break; + case PM_DISK_REBOOT: + machine_restart(NULL); + break; + } + machine_halt(); + device_power_up(); + local_irq_restore(flags); + return 0; +} + + +static int in_suspend __nosavedata = 0; + + +/** + * free_some_memory - Try to free as much memory as possible + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped at this point, or + * livelock is possible. + */ + +static void free_some_memory(void) +{ + printk("Freeing memory: "); + while (shrink_all_memory(10000)) + printk("."); + printk("|\n"); + blk_run_queues(); +} + + +static inline void platform_finish(void) +{ + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->finish) + pm_ops->finish(PM_SUSPEND_DISK); + } +} + +static void finish(void) +{ + device_resume(); + platform_finish(); + thaw_processes(); + pm_restore_console(); +} + + +static int prepare(void) +{ + int error; + + pm_prepare_console(); + + sys_sync(); + if (freeze_processes()) { + error = -EBUSY; + goto Thaw; + } + + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->prepare) { + if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) + goto Thaw; + } + } + + /* Free memory before shutting down devices. */ + free_some_memory(); + + if ((error = device_suspend(PM_SUSPEND_DISK))) + goto Finish; + + return 0; + Finish: + platform_finish(); + Thaw: + thaw_processes(); + pm_restore_console(); + return error; +} + + +/** + * pm_suspend_disk - The granpappy of power management. + * + * If we're going through the firmware, then get it over with quickly. + * + * If not, then call swsusp to do it's thing, then figure out how + * to power down the system. + */ + +int pm_suspend_disk(void) +{ + int error; + + if ((error = prepare())) + return error; + + pr_debug("PM: Attempting to suspend to disk.\n"); + if (pm_disk_mode == PM_DISK_FIRMWARE) + return pm_ops->enter(PM_SUSPEND_DISK); + + pr_debug("PM: snapshotting memory.\n"); + in_suspend = 1; + if ((error = swsusp_save())) + goto Done; + + if (in_suspend) { + pr_debug("PM: writing image.\n"); + + /* + * FIXME: Leftover from swsusp. Are they necessary? + */ + mb(); + barrier(); + + error = swsusp_write(); + if (!error) { + error = power_down(pm_disk_mode); + pr_debug("PM: Power down failed.\n"); + } + } else + pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); + Done: + finish(); + return error; +} + + +/** + * pm_resume - Resume from a saved image. + * + * Called as a late_initcall (so all devices are discovered and + * initialized), we call swsusp to see if we have a saved image or not. + * If so, we quiesce devices, the restore the saved image. We will + * return above (in pm_suspend_disk() ) if everything goes well. + * Otherwise, we fail gracefully and return to the normally + * scheduled program. + * + */ + +static int pm_resume(void) +{ + int error; + + pr_debug("PM: Reading swsusp image.\n"); + + if ((error = swsusp_read())) + goto Done; + + pr_debug("PM: Preparing system for restore.\n"); + + if ((error = prepare())) + goto Free; + + barrier(); + mb(); + + /* FIXME: The following (comment and mdelay()) are from swsusp. + * Are they really necessary? + * + * We do not want some readahead with DMA to corrupt our memory, right? + * Do it with disabled interrupts for best effect. That way, if some + * driver scheduled DMA, we have good chance for DMA to finish ;-). + */ + pr_debug("PM: Waiting for DMAs to settle down.\n"); + mdelay(1000); + + pr_debug("PM: Restoring saved image.\n"); + swsusp_restore(); + pr_debug("PM: Restore failed, recovering.n"); + finish(); + Free: + swsusp_free(); + Done: + pr_debug("PM: Resume from disk failed.\n"); + return 0; +} + +late_initcall(pm_resume); + + +static char * pm_disk_modes[] = { + [PM_DISK_FIRMWARE] = "firmware", + [PM_DISK_PLATFORM] = "platform", + [PM_DISK_SHUTDOWN] = "shutdown", + [PM_DISK_REBOOT] = "reboot", +}; + +/** + * disk - Control suspend-to-disk mode + * + * Suspend-to-disk can be handled in several ways. The greatest + * distinction is who writes memory to disk - the firmware or the OS. + * If the firmware does it, we assume that it also handles suspending + * the system. + * If the OS does it, then we have three options for putting the system + * to sleep - using the platform driver (e.g. ACPI or other PM registers), + * powering off the system or rebooting the system (for testing). + * + * The system will support either 'firmware' or 'platform', and that is + * known a priori (and encoded in pm_ops). But, the user may choose + * 'shutdown' or 'reboot' as alternatives. + * + * show() will display what the mode is currently set to. + * store() will accept one of + * + * 'firmware' + * 'platform' + * 'shutdown' + * 'reboot' + * + * It will only change to 'firmware' or 'platform' if the system + * supports it (as determined from pm_ops->pm_disk_mode). + */ + +static ssize_t disk_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); +} + + +static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +{ + int error = 0; + int i; + u32 mode = 0; + + down(&pm_sem); + for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { + if (!strcmp(buf,pm_disk_modes[i])) { + mode = i; + break; + } + } + if (mode) { + if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) + pm_disk_mode = mode; + else { + if (pm_ops && pm_ops->enter && + (mode == pm_ops->pm_disk_mode)) + pm_disk_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + pr_debug("PM: suspend-to-disk mode set to '%s'\n", + pm_disk_modes[mode]); + up(&pm_sem); + return error ? error : n; +} + +power_attr(disk); + +static struct attribute * g[] = { + &disk_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); +} + +core_initcall(pm_disk_init); diff --git a/kernel/power/main.c b/kernel/power/main.c index 1b92f13d9a77..fd212e7ecd9f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,32 +8,23 @@ * */ +#define DEBUG + #include <linux/suspend.h> #include <linux/kobject.h> -#include <linux/reboot.h> #include <linux/string.h> +#include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/pm.h> -#include <linux/fs.h> #include "power.h" -static DECLARE_MUTEX(pm_sem); - -static struct pm_ops * pm_ops = NULL; - -static u32 pm_disk_mode = PM_DISK_SHUTDOWN; - -#ifdef CONFIG_SOFTWARE_SUSPEND -static int have_swsusp = 1; -#else -static int have_swsusp = 0; -#endif - -extern long sys_sync(void); +DECLARE_MUTEX(pm_sem); +struct pm_ops * pm_ops = NULL; +u32 pm_disk_mode = PM_DISK_SHUTDOWN; /** * pm_set_ops - Set the global power method table. @@ -51,171 +42,6 @@ void pm_set_ops(struct pm_ops * ops) /** - * pm_suspend_standby - Enter 'standby' state. - * - * 'standby' is also known as 'Power-On Suspend'. Here, we power down - * devices, disable interrupts, and enter the state. - */ - -static int pm_suspend_standby(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * pm_suspend_mem - Enter suspend-to-RAM state. - * - * Identical to pm_suspend_standby() - we power down devices, disable - * interrupts, and enter the low-power state. - */ - -static int pm_suspend_mem(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * power_down - Shut machine down for hibernate. - * @mode: Suspend-to-disk mode - * - * Use the platform driver, if configured so, and return gracefully if it - * fails. - * Otherwise, try to power off and reboot. If they fail, halt the machine, - * there ain't no turning back. - */ - -static int power_down(u32 mode) -{ - unsigned long flags; - int error = 0; - - local_irq_save(flags); - device_power_down(PM_SUSPEND_DISK); - switch(mode) { - case PM_DISK_PLATFORM: - error = pm_ops->enter(PM_SUSPEND_DISK); - if (error) { - device_power_up(); - local_irq_restore(flags); - return error; - } - case PM_DISK_SHUTDOWN: - machine_power_off(); - break; - case PM_DISK_REBOOT: - machine_restart(NULL); - break; - } - machine_halt(); - return 0; -} - - -static int in_suspend __nosavedata = 0; - - -/** - * free_some_memory - Try to free as much memory as possible - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or - * livelock is possible. - */ - -static void free_some_memory(void) -{ - printk("Freeing memory: "); - while (shrink_all_memory(10000)) - printk("."); - printk("|\n"); - blk_run_queues(); -} - - -/** - * pm_suspend_disk - The granpappy of power management. - * - * If we're going through the firmware, then get it over with quickly. - * - * If not, then call swsusp to do it's thing, then figure out how - * to power down the system. - */ - -static int pm_suspend_disk(void) -{ - int error; - - pr_debug("PM: Attempting to suspend to disk.\n"); - if (pm_disk_mode == PM_DISK_FIRMWARE) - return pm_ops->enter(PM_SUSPEND_DISK); - - if (!have_swsusp) - return -EPERM; - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - if ((error = swsusp_save())) - goto Done; - - if (in_suspend) { - pr_debug("PM: writing image.\n"); - error = swsusp_write(); - if (!error) - error = power_down(pm_disk_mode); - pr_debug("PM: Power down failed.\n"); - } else - pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); - Done: - return error; -} - - - -#define decl_state(_name) \ - { .name = __stringify(_name), .fn = pm_suspend_##_name } - -struct pm_state { - char * name; - int (*fn)(void); -} pm_states[] = { - [PM_SUSPEND_STANDBY] = decl_state(standby), - [PM_SUSPEND_MEM] = decl_state(mem), - [PM_SUSPEND_DISK] = decl_state(disk), - { NULL }, -}; - - -/** * suspend_prepare - Do prep work before entering low-power state. * @state: State we're entering. * @@ -228,36 +54,47 @@ static int suspend_prepare(u32 state) { int error = 0; + if (!pm_ops || !pm_ops->enter) + return -EPERM; + pm_prepare_console(); - sys_sync(); if (freeze_processes()) { error = -EAGAIN; goto Thaw; } - if (pm_ops && pm_ops->prepare) { + if (pm_ops->prepare) { if ((error = pm_ops->prepare(state))) goto Thaw; } - /* Free memory before shutting down devices. */ - if (state == PM_SUSPEND_DISK) - free_some_memory(); - if ((error = device_suspend(state))) goto Finish; - return 0; - Done: - pm_restore_console(); - return error; Finish: - if (pm_ops && pm_ops->finish) + if (pm_ops->finish) pm_ops->finish(state); Thaw: thaw_processes(); - goto Done; + pm_restore_console(); + return error; +} + + +static int suspend_enter(u32 state) +{ + int error = 0; + unsigned long flags; + + local_irq_save(flags); + if ((error = device_power_down(state))) + goto Done; + error = pm_ops->enter(state); + device_power_up(); + Done: + local_irq_restore(flags); + return error; } @@ -279,6 +116,16 @@ static void suspend_finish(u32 state) } + + +char * pm_states[] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", + [PM_SUSPEND_DISK] = "disk", + NULL, +}; + + /** * enter_state - Do common work of entering low-power state. * @state: pm_state structure for state we're entering. @@ -293,7 +140,6 @@ static void suspend_finish(u32 state) static int enter_state(u32 state) { int error; - struct pm_state * s = &pm_states[state]; if (down_trylock(&pm_sem)) return -EBUSY; @@ -304,12 +150,17 @@ static int enter_state(u32 state) goto Unlock; } - pr_debug("PM: Preparing system for suspend.\n"); + if (state == PM_SUSPEND_DISK) { + error = pm_suspend_disk(); + goto Unlock; + } + + pr_debug("PM: Preparing system for suspend\n"); if ((error = suspend_prepare(state))) goto Unlock; pr_debug("PM: Entering state.\n"); - error = s->fn(); + error = suspend_enter(state); pr_debug("PM: Finishing up.\n"); suspend_finish(state); @@ -335,138 +186,10 @@ int pm_suspend(u32 state) } -/** - * pm_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, the restore the saved image. We will - * return above (in pm_suspend_disk() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. - * - */ - -static int pm_resume(void) -{ - int error; - - if (!have_swsusp) - return 0; - - pr_debug("PM: Reading swsusp image.\n"); - - if ((error = swsusp_read())) - goto Done; - - pr_debug("PM: Preparing system for restore.\n"); - - if ((error = suspend_prepare(PM_SUSPEND_DISK))) - goto Free; - - pr_debug("PM: Restoring saved image.\n"); - swsusp_restore(); - - pr_debug("PM: Restore failed, recovering.n"); - suspend_finish(PM_SUSPEND_DISK); - Free: - swsusp_free(); - Done: - pr_debug("PM: Resume from disk failed.\n"); - return 0; -} - -late_initcall(pm_resume); - decl_subsys(power,NULL,NULL); -#define power_attr(_name) \ -static struct subsys_attribute _name##_attr = { \ - .attr = { \ - .name = __stringify(_name), \ - .mode = 0644, \ - }, \ - .show = _name##_show, \ - .store = _name##_store, \ -} - - -static char * pm_disk_modes[] = { - [PM_DISK_FIRMWARE] = "firmware", - [PM_DISK_PLATFORM] = "platform", - [PM_DISK_SHUTDOWN] = "shutdown", - [PM_DISK_REBOOT] = "reboot", -}; - -/** - * disk - Control suspend-to-disk mode - * - * Suspend-to-disk can be handled in several ways. The greatest - * distinction is who writes memory to disk - the firmware or the OS. - * If the firmware does it, we assume that it also handles suspending - * the system. - * If the OS does it, then we have three options for putting the system - * to sleep - using the platform driver (e.g. ACPI or other PM registers), - * powering off the system or rebooting the system (for testing). - * - * The system will support either 'firmware' or 'platform', and that is - * known a priori (and encoded in pm_ops). But, the user may choose - * 'shutdown' or 'reboot' as alternatives. - * - * show() will display what the mode is currently set to. - * store() will accept one of - * - * 'firmware' - * 'platform' - * 'shutdown' - * 'reboot' - * - * It will only change to 'firmware' or 'platform' if the system - * supports it (as determined from pm_ops->pm_disk_mode). - */ - -static ssize_t disk_show(struct subsystem * subsys, char * buf) -{ - return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); -} - - -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) -{ - int error = 0; - int i; - u32 mode = 0; - - down(&pm_sem); - for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { - if (!strcmp(buf,pm_disk_modes[i])) { - mode = i; - break; - } - } - if (mode) { - if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) - pm_disk_mode = mode; - else { - if (pm_ops && pm_ops->enter && - (mode == pm_ops->pm_disk_mode)) - pm_disk_mode = mode; - else - error = -EINVAL; - } - } else - error = -EINVAL; - - pr_debug("PM: suspend-to-disk mode set to '%s'\n", - pm_disk_modes[mode]); - up(&pm_sem); - return error ? error : n; -} - -power_attr(disk); - /** * state - control system power state. * @@ -480,27 +203,28 @@ power_attr(disk); static ssize_t state_show(struct subsystem * subsys, char * buf) { - struct pm_state * state; + int i; char * s = buf; - for (state = &pm_states[0]; state->name; state++) - s += sprintf(s,"%s ",state->name); + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i]) + s += sprintf(s,"%s ",pm_states[i]); + } s += sprintf(s,"\n"); return (s - buf); } static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) { - u32 state; - struct pm_state * s; + u32 state = PM_SUSPEND_STANDBY; + char ** s; int error; - for (state = 0; state < PM_SUSPEND_MAX; state++) { - s = &pm_states[state]; - if (s->name && !strcmp(buf,s->name)) + for (s = &pm_states[state]; *s; s++, state++) { + if (!strcmp(buf,*s)) break; } - if (s) + if (*s) error = enter_state(state); else error = -EINVAL; @@ -511,7 +235,6 @@ power_attr(state); static struct attribute * g[] = { &state_attr.attr, - &disk_attr.attr, NULL, }; @@ -520,7 +243,7 @@ static struct attribute_group attr_group = { }; -static int pm_init(void) +static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) diff --git a/kernel/power/pmdisk.c b/kernel/power/pmdisk.c new file mode 100644 index 000000000000..bb795b6dc537 --- /dev/null +++ b/kernel/power/pmdisk.c @@ -0,0 +1,969 @@ +/* + * linux/kernel/suspend.c + * + * This file is to realize architecture-independent + * machine suspend feature using pretty near only high-level routines + * + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> + * + * I'd like to thank the following people for their work: + * + * Pavel Machek <pavel@ucw.cz>: + * Modifications, defectiveness pointing, being with me at the very beginning, + * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. + * + * Steve Doddi <dirk@loth.demon.co.uk>: + * Support the possibility of hardware state restoring. + * + * Raph <grey.havens@earthling.net>: + * Support for preserving states of network devices and virtual console + * (including X and svgatextmode) + * + * Kurt Garloff <garloff@suse.de>: + * Straightened the critical function in order to prevent compilers from + * playing tricks with local variables. + * + * Andreas Mohr <a.mohr@mailto.de> + * + * Alex Badea <vampire@go.ro>: + * Fixed runaway init + * + * More state savers are welcome. Especially for the scsi layer... + * + * For TODOs,FIXMEs also look in Documentation/swsusp.txt + */ + +#include <linux/mm.h> +#include <linux/bio.h> +#include <linux/suspend.h> +#include <linux/version.h> +#include <linux/reboot.h> +#include <linux/device.h> +#include <linux/swapops.h> +#include <linux/bootmem.h> + +#include <asm/mmu_context.h> + +#include "power.h" + + +extern int swsusp_arch_suspend(int resume); + +#define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) +#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) +#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */ + +/* References to section boundaries */ +extern char __nosave_begin, __nosave_end; + +extern int is_head_of_free_region(struct page *); + +/* Variables to be preserved over suspend */ +static int pagedir_order_check; +static int nr_copy_pages_check; + +static char resume_file[256]; /* For resume= kernel option */ +static dev_t resume_device; +/* Local variables that should not be affected by save */ +unsigned int nr_copy_pages __nosavedata = 0; + +/* Suspend pagedir is allocated before final copy, therefore it + must be freed after resume + + Warning: this is evil. There are actually two pagedirs at time of + resume. One is "pagedir_save", which is empty frame allocated at + time of suspend, that must be freed. Second is "pagedir_nosave", + allocated at time of resume, that travels through memory not to + collide with anything. + */ +suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; +static suspend_pagedir_t *pagedir_save; +static int pagedir_order __nosavedata = 0; + +struct link { + char dummy[PAGE_SIZE - sizeof(swp_entry_t)]; + swp_entry_t next; +}; + +union diskpage { + union swap_header swh; + struct link link; + struct suspend_header sh; +}; + +/* + * XXX: We try to keep some more pages free so that I/O operations succeed + * without paging. Might this be more? + */ +#define PAGES_FOR_IO 512 + +static const char name_suspend[] = "Suspend Machine: "; +static const char name_resume[] = "Resume Machine: "; + +/* + * Debug + */ +#define DEBUG_DEFAULT +#undef DEBUG_PROCESS +#undef DEBUG_SLOW +#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */ + +#ifdef DEBUG_DEFAULT +# define PRINTK(f, a...) printk(f, ## a) +#else +# define PRINTK(f, a...) +#endif + +#ifdef DEBUG_SLOW +#define MDELAY(a) mdelay(a) +#else +#define MDELAY(a) +#endif + +/* + * Saving part... + */ + +static __inline__ int fill_suspend_header(struct suspend_header *sh) +{ + memset((char *)sh, 0, sizeof(*sh)); + + sh->version_code = LINUX_VERSION_CODE; + sh->num_physpages = num_physpages; + strncpy(sh->machine, system_utsname.machine, 8); + strncpy(sh->version, system_utsname.version, 20); + /* FIXME: Is this bogus? --RR */ + sh->num_cpus = num_online_cpus(); + sh->page_size = PAGE_SIZE; + sh->suspend_pagedir = pagedir_nosave; + BUG_ON (pagedir_save != pagedir_nosave); + sh->num_pbes = nr_copy_pages; + /* TODO: needed? mounted fs' last mounted date comparison + * [so they haven't been mounted since last suspend. + * Maybe it isn't.] [we'd need to do this for _all_ fs-es] + */ + return 0; +} + +/* We memorize in swapfile_used what swap devices are used for suspension */ +#define SWAPFILE_UNUSED 0 +#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ +#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ + +static unsigned short swapfile_used[MAX_SWAPFILES]; +static unsigned short root_swap; +#define MARK_SWAP_SUSPEND 0 +#define MARK_SWAP_RESUME 2 + +static void mark_swapfiles(swp_entry_t prev, int mode) +{ + swp_entry_t entry; + union diskpage *cur; + struct page *page; + + if (root_swap == 0xFFFF) /* ignored */ + return; + + page = alloc_page(GFP_ATOMIC); + if (!page) + panic("Out of memory in mark_swapfiles"); + cur = page_address(page); + /* XXX: this is dirty hack to get first page of swap file */ + entry = swp_entry(root_swap, 0); + rw_swap_page_sync(READ, entry, page); + + if (mode == MARK_SWAP_RESUME) { + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", + name_resume, cur->swh.magic.magic); + } else { + if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S1SUSP....",10); + else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S2SUSP....",10); + else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic); + cur->link.next = prev; /* prev is the first/last swap page of the resume area */ + /* link.next lies *no more* in last 4/8 bytes of magic */ + } + rw_swap_page_sync(WRITE, entry, page); + __free_page(page); +} + +static void read_swapfiles(void) /* This is called before saving image */ +{ + int i, len; + + len=strlen(resume_file); + root_swap = 0xFFFF; + + swap_list_lock(); + for(i=0; i<MAX_SWAPFILES; i++) { + if (swap_info[i].flags == 0) { + swapfile_used[i]=SWAPFILE_UNUSED; + } else { + if(!len) { + printk(KERN_WARNING "resume= option should be used to set suspend device" ); + if(root_swap == 0xFFFF) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else + swapfile_used[i] = SWAPFILE_IGNORED; + } else { + /* we ignore all swap devices that are not the resume_file */ + if (1) { +// FIXME if(resume_device == swap_info[i].swap_device) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else { +#if 0 + printk( "Resume: device %s (%x != %x) ignored\n", swap_info[i].swap_file->d_name.name, swap_info[i].swap_device, resume_device ); +#endif + swapfile_used[i] = SWAPFILE_IGNORED; + } + } + } + } + swap_list_unlock(); +} + +static void lock_swapdevices(void) /* This is called after saving image so modification + will be lost after resume... and that's what we want. */ +{ + int i; + + swap_list_lock(); + for(i = 0; i< MAX_SWAPFILES; i++) + if(swapfile_used[i] == SWAPFILE_IGNORED) { + swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to + lock_swapdevices can unlock the devices. */ + } + swap_list_unlock(); +} + +static int write_suspend_image(void) +{ + int i; + swp_entry_t entry, prev = { 0 }; + int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); + union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + unsigned long address; + struct page *page; + + printk( "Writing data to swap (%d pages): ", nr_copy_pages ); + for (i=0; i<nr_copy_pages; i++) { + if (!(i%100)) + printk( "." ); + if (!(entry = get_swap_page()).val) + panic("\nNot enough swapspace when writing data" ); + + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nPage %d: not enough swapspace on suspend device", i ); + + address = (pagedir_nosave+i)->address; + page = virt_to_page(address); + rw_swap_page_sync(WRITE, entry, page); + (pagedir_nosave+i)->swap_address = entry; + } + printk( "|\n" ); + printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); + for (i=0; i<nr_pgdir_pages; i++) { + cur = (union diskpage *)((char *) pagedir_nosave)+i; + BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE)); + printk( "." ); + if (!(entry = get_swap_page()).val) { + printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" ); + panic("Don't know how to recover"); + free_page((unsigned long) buffer); + return -ENOSPC; + } + + if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for pagedir on suspend device" ); + + BUG_ON (sizeof(swp_entry_t) != sizeof(long)); + BUG_ON (PAGE_SIZE % sizeof(struct pbe)); + + cur->link.next = prev; + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + } + printk("H"); + BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); + BUG_ON (sizeof(union diskpage) != PAGE_SIZE); + if (!(entry = get_swap_page()).val) + panic( "\nNot enough swapspace when writing header" ); + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for header on suspend device" ); + + cur = (void *) buffer; + if (fill_suspend_header(&cur->sh)) + panic("\nOut of memory while writing header"); + + cur->link.next = prev; + + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + + printk( "S" ); + mark_swapfiles(prev, MARK_SWAP_SUSPEND); + printk( "|\n" ); + + MDELAY(1000); + free_page((unsigned long) buffer); + return 0; +} + +/* if pagedir_p != NULL it also copies the counted pages */ +static int count_and_copy_data_pages(struct pbe *pagedir_p) +{ + int chunk_size; + int nr_copy_pages = 0; + int pfn; + struct page *page; + + BUG_ON (max_pfn != num_physpages); + + for (pfn = 0; pfn < max_pfn; pfn++) { + page = pfn_to_page(pfn); + + if (!PageReserved(page)) { + if (PageNosave(page)) + continue; + + if ((chunk_size=is_head_of_free_region(page))!=0) { + pfn += chunk_size - 1; + continue; + } + } else if (PageReserved(page)) { + BUG_ON (PageNosave(page)); + + /* + * Just copy whole code segment. Hopefully it is not that big. + */ + if ((ADDRESS(pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) && + (ADDRESS(pfn) < (unsigned long) ADDRESS2(&__nosave_end))) { + PRINTK("[nosave %lx]", ADDRESS(pfn)); + continue; + } + /* Hmm, perhaps copying all reserved pages is not too healthy as they may contain + critical bios data? */ + } else BUG(); + + nr_copy_pages++; + if (pagedir_p) { + pagedir_p->orig_address = ADDRESS(pfn); + copy_page((void *) pagedir_p->address, (void *) pagedir_p->orig_address); + pagedir_p++; + } + } + return nr_copy_pages; +} + +static void free_suspend_pagedir(unsigned long this_pagedir) +{ + struct page *page; + int pfn; + unsigned long this_pagedir_end = this_pagedir + + (PAGE_SIZE << pagedir_order); + + for(pfn = 0; pfn < num_physpages; pfn++) { + page = pfn_to_page(pfn); + if (!TestClearPageNosave(page)) + continue; + + if (ADDRESS(pfn) >= this_pagedir && ADDRESS(pfn) < this_pagedir_end) + continue; /* old pagedir gets freed in one */ + + free_page(ADDRESS(pfn)); + } + free_pages(this_pagedir, pagedir_order); +} + +static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) +{ + int i; + suspend_pagedir_t *pagedir; + struct pbe *p; + struct page *page; + + pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); + + p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); + if(!pagedir) + return NULL; + + page = virt_to_page(pagedir); + for(i=0; i < 1<<pagedir_order; i++) + SetPageNosave(page++); + + while(nr_copy_pages--) { + p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + if(!p->address) { + free_suspend_pagedir((unsigned long) pagedir); + return NULL; + } + SetPageNosave(virt_to_page(p->address)); + p->orig_address = 0; + p++; + } + return pagedir; +} + + +int swsusp_suspend(void) +{ + struct sysinfo i; + unsigned int nr_needed_pages = 0; + + read_swapfiles(); + drain_local_pages(); + + pagedir_nosave = NULL; + printk( "/critical section: Counting pages to copy" ); + nr_copy_pages = count_and_copy_data_pages(NULL); + nr_needed_pages = nr_copy_pages + PAGES_FOR_IO; + + printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); + if(nr_free_pages() < nr_needed_pages) { + printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", + name_suspend, nr_needed_pages-nr_free_pages()); + root_swap = 0xFFFF; + return 1; + } + si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. + We should only consider resume_device. */ + if (i.freeswap < nr_needed_pages) { + printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", + name_suspend, nr_needed_pages-i.freeswap); + return 1; + } + + PRINTK( "Alloc pagedir\n" ); + pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages); + if(!pagedir_nosave) { + /* Shouldn't happen */ + printk(KERN_CRIT "%sCouldn't allocate enough pages\n",name_suspend); + panic("Really should not happen"); + return 1; + } + nr_copy_pages_check = nr_copy_pages; + pagedir_order_check = pagedir_order; + + drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ + if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */ + BUG(); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + printk( "critical section/: done (%d pages copied)\n", nr_copy_pages ); + return 0; +} + + +/** + * suspend_save_image - Prepare and write saved image to swap. + * + * IRQs are re-enabled here so we can resume devices and safely write + * to the swap devices. We disable them again before we leave. + * + * The second lock_swapdevices() will unlock ignored swap devices since + * writing is finished. + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + +static int suspend_save_image(void) +{ + int error; + device_resume(); + lock_swapdevices(); + error = write_suspend_image(); + lock_swapdevices(); + return error; +} + +/* + * Magic happens here + */ + +int swsusp_resume(void) +{ + BUG_ON (nr_copy_pages_check != nr_copy_pages); + BUG_ON (pagedir_order_check != pagedir_order); + + /* Even mappings of "global" things (vmalloc) need to be fixed */ + __flush_tlb_global(); + return 0; +} + +/* swsusp_arch_suspend() is implemented in arch/?/power/swsusp.S, + and basically does: + + if (!resume) { + save_processor_state(); + SAVE_REGISTERS + return swsusp_suspend(); + } + GO_TO_SWAPPER_PAGE_TABLES + COPY_PAGES_BACK + RESTORE_REGISTERS + restore_processor_state(); + return swsusp_resume(); + + */ + + +/* More restore stuff */ + +/* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */ +static void __init copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from) +{ + int i; + char *topointer=(char *)to, *frompointer=(char *)from; + + for(i=0; i < 1 << pagedir_order; i++) { + copy_page(topointer, frompointer); + topointer += PAGE_SIZE; + frompointer += PAGE_SIZE; + } +} + +#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0) + +/* + * Returns true if given address/order collides with any orig_address + */ +static int __init does_collide_order(suspend_pagedir_t *pagedir, + unsigned long addr, int order) +{ + int i; + unsigned long addre = addr + (PAGE_SIZE<<order); + + for(i=0; i < nr_copy_pages; i++) + if((pagedir+i)->orig_address >= addr && + (pagedir+i)->orig_address < addre) + return 1; + + return 0; +} + +/* + * We check here that pagedir & pages it points to won't collide with pages + * where we're going to restore from the loaded pages later + */ +static int __init check_pagedir(void) +{ + int i; + + for(i=0; i < nr_copy_pages; i++) { + unsigned long addr; + + do { + addr = get_zeroed_page(GFP_ATOMIC); + if(!addr) + return -ENOMEM; + } while (does_collide(addr)); + + (pagedir_nosave+i)->address = addr; + } + return 0; +} + +static int __init relocate_pagedir(void) +{ + /* + * We have to avoid recursion (not to overflow kernel stack), + * and that's why code looks pretty cryptic + */ + suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave; + void **eaten_memory = NULL; + void **c = eaten_memory, *m, *f; + + printk("Relocating pagedir"); + + if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { + printk("not necessary\n"); + return 0; + } + + while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) { + memset(m, 0, PAGE_SIZE); + if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order)) + break; + eaten_memory = m; + printk( "." ); + *eaten_memory = c; + c = eaten_memory; + } + + if (!m) + return -ENOMEM; + + pagedir_nosave = new_pagedir = m; + copy_pagedir(new_pagedir, old_pagedir); + + c = eaten_memory; + while(c) { + printk(":"); + f = *c; + c = *c; + if (f) + free_pages((unsigned long)f, pagedir_order); + } + printk("|\n"); + return 0; +} + +/* + * Sanity check if this image makes sense with this kernel/swap context + * I really don't think that it's foolproof but more than nothing.. + */ + +static int __init sanity_check_failed(char *reason) +{ + printk(KERN_ERR "%s%s\n",name_resume,reason); + return -EPERM; +} + +static int __init sanity_check(struct suspend_header *sh) +{ + if(sh->version_code != LINUX_VERSION_CODE) + return sanity_check_failed("Incorrect kernel version"); + if(sh->num_physpages != num_physpages) + return sanity_check_failed("Incorrect memory size"); + if(strncmp(sh->machine, system_utsname.machine, 8)) + return sanity_check_failed("Incorrect machine type"); + if(strncmp(sh->version, system_utsname.version, 20)) + return sanity_check_failed("Incorrect version"); + if(sh->num_cpus != num_online_cpus()) + return sanity_check_failed("Incorrect number of cpus"); + if(sh->page_size != PAGE_SIZE) + return sanity_check_failed("Incorrect PAGE_SIZE"); + return 0; +} + +static struct block_device * resume_bdev; + + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static void start_io(void) +{ + atomic_set(&io_done,1); +} + +static int end_io(struct bio * bio, unsigned int num, int err) +{ + atomic_set(&io_done,0); + return 0; +} + +static void wait_io(void) +{ + blk_run_queues(); + while(atomic_read(&io_done)) + io_schedule(); +} + + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw,bio); + wait_io(); + Done: + bio_put(bio); + return error; +} + +static int +read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} + +static int +write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} + + +extern dev_t __init name_to_dev_t(const char *line); + + +#define next_entry(diskpage) diskpage->link.next + +static int __init read_suspend_image(void) +{ + swp_entry_t next; + int i, nr_pgdir_pages; + union diskpage *cur; + int error = 0; + + cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + if (!cur) + return -ENOMEM; + + if ((error = read_page(0, cur))) + goto Done; + + /* + * We have to read next position before we overwrite it + */ + next = next_entry(cur); + + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || + (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { + printk(KERN_ERR "swsusp: Partition is normal swap space\n"); + error = -EINVAL; + goto Done; + } else { + printk(KERN_ERR "swsusp: Invalid partition type.\n"); + error = -EINVAL; + goto Done; + } + + /* + * Reset swap signature now. + */ + if ((error = write_page(0,cur))) + goto Done; + + printk( "%sSignature found, resuming\n", name_resume ); + MDELAY(1000); + + if ((error = read_page(swp_offset(next), cur))) + goto Done; + /* Is this same machine? */ + if ((error = sanity_check(&cur->sh))) + goto Done; + next = next_entry(cur); + + pagedir_save = cur->sh.suspend_pagedir; + nr_copy_pages = cur->sh.num_pbes; + nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); + pagedir_order = get_bitmask_order(nr_pgdir_pages); + + pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); + if (!pagedir_nosave) { + error = -ENOMEM; + goto Done; + } + + PRINTK( "%sReading pagedir, ", name_resume ); + + /* We get pages in reverse order of saving! */ + for (i=nr_pgdir_pages-1; i>=0; i--) { + BUG_ON (!next.val); + cur = (union diskpage *)((char *) pagedir_nosave)+i; + error = read_page(swp_offset(next), cur); + if (error) + goto FreePagedir; + next = next_entry(cur); + } + BUG_ON (next.val); + + if ((error = relocate_pagedir())) + goto FreePagedir; + if ((error = check_pagedir())) + goto FreePagedir; + + printk( "Reading image data (%d pages): ", nr_copy_pages ); + for(i=0; i < nr_copy_pages; i++) { + swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; + if (!(i%100)) + printk( "." ); + /* You do not need to check for overlaps... + ... check_pagedir already did this work */ + error = read_page(swp_offset(swap_address), + (char *)((pagedir_nosave+i)->address)); + if (error) + goto FreePagedir; + } + printk( "|\n" ); + Done: + free_page((unsigned long)cur); + return error; + FreePagedir: + free_pages((unsigned long)pagedir_nosave,pagedir_order); + goto Done; +} + +/** + * swsusp_save - Snapshot memory + */ + +int swsusp_save(void) +{ + int error; + +#if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) + printk("swsusp is not supported with high- or discontig-mem.\n"); + return -EPERM; +#endif + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + error = swsusp_arch_suspend(0); + local_irq_enable(); + return error; +} + + +/** + * swsusp_write - Write saved memory image to swap. + * + * swsusp_arch_suspend(0) returns after system is resumed. + * + * swsusp_arch_suspend() copies all "used" memory to "free" memory, + * then unsuspends all device drivers, and writes memory to disk + * using normal kernel mechanism. + */ + +int swsusp_write(void) +{ + return suspend_save_image(); +} + + +/** + * swsusp_read - Read saved image from swap. + */ + +int __init swsusp_read(void) +{ + int error; + char b[BDEVNAME_SIZE]; + + if (!strlen(resume_file)) + return -ENOENT; + + resume_device = name_to_dev_t(resume_file); + printk("swsusp: Resume From Partition: %s, Device: %s\n", + resume_file, __bdevname(resume_device, b)); + + resume_bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev, BDEV_RAW); + } else + error = PTR_ERR(resume_bdev); + + if (!error) + PRINTK("Reading resume file was successful\n"); + else + printk( "%sError %d resuming\n", name_resume, error ); + MDELAY(1000); + return error; +} + + +/** + * swsusp_restore - Replace running kernel with saved image. + */ + +int __init swsusp_restore(void) +{ + int error; + local_irq_disable(); + error = swsusp_arch_suspend(1); + local_irq_enable(); + return error; +} + + +/** + * swsusp_free - Free memory allocated to hold snapshot. + */ + +int swsusp_free(void) +{ + PRINTK( "Freeing prev allocated pagedir\n" ); + free_suspend_pagedir((unsigned long) pagedir_save); + return 0; +} + + +int software_suspend(void) +{ + struct pm_ops swsusp_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + }; + + pm_set_ops(&swsusp_ops); + return pm_suspend(PM_SUSPEND_DISK); +} + +static int __init resume_setup(char *str) +{ + if (strlen(str)) + strncpy(resume_file, str, 255); + return 1; +} + +static int __init noresume_setup(char *str) +{ + resume_file[0] = '\0'; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume=", resume_setup); + diff --git a/kernel/power/power.h b/kernel/power/power.h index e98de640155d..e0874ed266f5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -10,34 +10,27 @@ #ifdef CONFIG_SOFTWARE_SUSPEND -extern int swsusp_save(void); -extern int swsusp_write(void); -extern int swsusp_read(void); -extern int swsusp_restore(void); -extern int swsusp_free(void); +extern int pm_suspend_disk(void); + #else -static inline int swsusp_save(void) -{ - return 0; -} -static inline int swsusp_write(void) +static inline int pm_suspend_disk(void) { - return 0; -} -static inline int swsusp_read(void) -{ - return 0; -} -static inline int swsusp_restore(void) -{ - return 0; -} -static inline int swsusp_free(void) -{ - return 0; + return -EPERM; } #endif +extern struct semaphore pm_sem; +#define power_attr(_name) \ +static struct subsys_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +extern struct subsystem power_subsys; extern int freeze_processes(void); extern void thaw_processes(void); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c849a18c45f4..bb795b6dc537 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -34,38 +34,21 @@ * For TODOs,FIXMEs also look in Documentation/swsusp.txt */ -#include <linux/module.h> #include <linux/mm.h> +#include <linux/bio.h> #include <linux/suspend.h> -#include <linux/smp_lock.h> -#include <linux/file.h> -#include <linux/utsname.h> #include <linux/version.h> -#include <linux/delay.h> #include <linux/reboot.h> -#include <linux/bitops.h> -#include <linux/vt_kern.h> -#include <linux/kbd_kern.h> -#include <linux/keyboard.h> -#include <linux/spinlock.h> -#include <linux/genhd.h> -#include <linux/kernel.h> -#include <linux/major.h> -#include <linux/swap.h> -#include <linux/pm.h> #include <linux/device.h> -#include <linux/buffer_head.h> #include <linux/swapops.h> #include <linux/bootmem.h> -#include <asm/uaccess.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> -#include <asm/io.h> #include "power.h" -unsigned char software_suspend_enabled = 1; + +extern int swsusp_arch_suspend(int resume); #define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) #define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) @@ -76,9 +59,6 @@ extern char __nosave_begin, __nosave_end; extern int is_head_of_free_region(struct page *); -/* Locks */ -spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED; - /* Variables to be preserved over suspend */ static int pagedir_order_check; static int nr_copy_pages_check; @@ -436,11 +416,12 @@ static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) } -static int suspend_prepare_image(void) +int swsusp_suspend(void) { struct sysinfo i; unsigned int nr_needed_pages = 0; + read_swapfiles(); drain_local_pages(); pagedir_nosave = NULL; @@ -488,21 +469,28 @@ static int suspend_prepare_image(void) return 0; } + +/** + * suspend_save_image - Prepare and write saved image to swap. + * + * IRQs are re-enabled here so we can resume devices and safely write + * to the swap devices. We disable them again before we leave. + * + * The second lock_swapdevices() will unlock ignored swap devices since + * writing is finished. + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + static int suspend_save_image(void) { int error; - device_resume(); - lock_swapdevices(); error = write_suspend_image(); - lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */ - - /* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ + lock_swapdevices(); return error; } @@ -510,67 +498,32 @@ static int suspend_save_image(void) * Magic happens here */ -void do_magic_resume_1(void) -{ - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - PRINTK( "Waiting for DMAs to settle down...\n"); - /* We do not want some readahead with DMA to corrupt our memory, right? - Do it with disabled interrupts for best effect. That way, if some - driver scheduled DMA, we have good chance for DMA to finish ;-). */ - mdelay(1000); -} - -void do_magic_resume_2(void) +int swsusp_resume(void) { BUG_ON (nr_copy_pages_check != nr_copy_pages); BUG_ON (pagedir_order_check != pagedir_order); /* Even mappings of "global" things (vmalloc) need to be fixed */ __flush_tlb_global(); - spin_unlock_irq(&suspend_pagedir_lock); + return 0; } -/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: +/* swsusp_arch_suspend() is implemented in arch/?/power/swsusp.S, + and basically does: if (!resume) { - do_magic_suspend_1(); save_processor_state(); SAVE_REGISTERS - do_magic_suspend_2(); - return; + return swsusp_suspend(); } GO_TO_SWAPPER_PAGE_TABLES - do_magic_resume_1(); COPY_PAGES_BACK RESTORE_REGISTERS restore_processor_state(); - do_magic_resume_2(); + return swsusp_resume(); */ -void do_magic_suspend_1(void) -{ - mb(); - barrier(); - spin_lock_irq(&suspend_pagedir_lock); -} - -int do_magic_suspend_2(void) -{ - int is_problem; - read_swapfiles(); - is_problem = suspend_prepare_image(); - spin_unlock_irq(&suspend_pagedir_lock); - if (!is_problem) - return suspend_save_image(); - printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend); - barrier(); - mb(); - mdelay(1000); - return -EFAULT; -} /* More restore stuff */ @@ -701,61 +654,146 @@ static int __init sanity_check(struct suspend_header *sh) return 0; } -static int __init bdev_read_page(struct block_device *bdev, - long pos, void *buf) +static struct block_device * resume_bdev; + + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static void start_io(void) +{ + atomic_set(&io_done,1); +} + +static int end_io(struct bio * bio, unsigned int num, int err) { - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - brelse(bh); + atomic_set(&io_done,0); return 0; -} +} + +static void wait_io(void) +{ + blk_run_queues(); + while(atomic_read(&io_done)) + io_schedule(); +} + + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw,bio); + wait_io(); + Done: + bio_put(bio); + return error; +} + +static int +read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} + +static int +write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} + extern dev_t __init name_to_dev_t(const char *line); -static int __init read_suspend_image(struct block_device *bdev, - union diskpage *cur) + +#define next_entry(diskpage) diskpage->link.next + +static int __init read_suspend_image(void) { swp_entry_t next; int i, nr_pgdir_pages; + union diskpage *cur; + int error = 0; -#define PREPARENEXT \ - { next = cur->link.next; \ - next.val = swp_offset(next) * PAGE_SIZE; \ - } - - if (bdev_read_page(bdev, 0, cur)) return -EIO; + cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + if (!cur) + return -ENOMEM; - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || - (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { - printk(KERN_ERR "%sThis is normal swap space\n", name_resume ); - return -EINVAL; - } + if ((error = read_page(0, cur))) + goto Done; - PREPARENEXT; /* We have to read next position before we overwrite it */ + /* + * We have to read next position before we overwrite it + */ + next = next_entry(cur); if (!memcmp("S1",cur->swh.magic.magic,2)) memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); else if (!memcmp("S2",cur->swh.magic.magic,2)) memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else { - printk("swsusp: %s: Unable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); - return -EFAULT; + else if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || + (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { + printk(KERN_ERR "swsusp: Partition is normal swap space\n"); + error = -EINVAL; + goto Done; + } else { + printk(KERN_ERR "swsusp: Invalid partition type.\n"); + error = -EINVAL; + goto Done; } + /* + * Reset swap signature now. + */ + if ((error = write_page(0,cur))) + goto Done; + printk( "%sSignature found, resuming\n", name_resume ); MDELAY(1000); - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - if (sanity_check(&cur->sh)) /* Is this same machine? */ - return -EPERM; - PREPARENEXT; + if ((error = read_page(swp_offset(next), cur))) + goto Done; + /* Is this same machine? */ + if ((error = sanity_check(&cur->sh))) + goto Done; + next = next_entry(cur); pagedir_save = cur->sh.suspend_pagedir; nr_copy_pages = cur->sh.num_pbes; @@ -763,8 +801,10 @@ static int __init read_suspend_image(struct block_device *bdev, pagedir_order = get_bitmask_order(nr_pgdir_pages); pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); - if (!pagedir_nosave) - return -ENOMEM; + if (!pagedir_nosave) { + error = -ENOMEM; + goto Done; + } PRINTK( "%sReading pagedir, ", name_resume ); @@ -772,15 +812,17 @@ static int __init read_suspend_image(struct block_device *bdev, for (i=nr_pgdir_pages-1; i>=0; i--) { BUG_ON (!next.val); cur = (union diskpage *)((char *) pagedir_nosave)+i; - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - PREPARENEXT; + error = read_page(swp_offset(next), cur); + if (error) + goto FreePagedir; + next = next_entry(cur); } BUG_ON (next.val); - if (relocate_pagedir()) - return -ENOMEM; - if (check_pagedir()) - return -ENOMEM; + if ((error = relocate_pagedir())) + goto FreePagedir; + if ((error = check_pagedir())) + goto FreePagedir; printk( "Reading image data (%d pages): ", nr_copy_pages ); for(i=0; i < nr_copy_pages; i++) { @@ -789,11 +831,18 @@ static int __init read_suspend_image(struct block_device *bdev, printk( "." ); /* You do not need to check for overlaps... ... check_pagedir already did this work */ - if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address))) - return -EIO; + error = read_page(swp_offset(swap_address), + (char *)((pagedir_nosave+i)->address)); + if (error) + goto FreePagedir; } printk( "|\n" ); - return 0; + Done: + free_page((unsigned long)cur); + return error; + FreePagedir: + free_pages((unsigned long)pagedir_nosave,pagedir_order); + goto Done; } /** @@ -802,28 +851,34 @@ static int __init read_suspend_image(struct block_device *bdev, int swsusp_save(void) { + int error; + #if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) printk("swsusp is not supported with high- or discontig-mem.\n"); return -EPERM; #endif - return 0; + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + error = swsusp_arch_suspend(0); + local_irq_enable(); + return error; } /** * swsusp_write - Write saved memory image to swap. * - * do_magic(0) returns after system is resumed. + * swsusp_arch_suspend(0) returns after system is resumed. * - * do_magic() copies all "used" memory to "free" memory, then - * unsuspends all device drivers, and writes memory to disk + * swsusp_arch_suspend() copies all "used" memory to "free" memory, + * then unsuspends all device drivers, and writes memory to disk * using normal kernel mechanism. */ int swsusp_write(void) { - arch_prepare_suspend(); - return do_magic(0); + return suspend_save_image(); } @@ -833,7 +888,6 @@ int swsusp_write(void) int __init swsusp_read(void) { - union diskpage *cur; int error; char b[BDEVNAME_SIZE]; @@ -844,19 +898,13 @@ int __init swsusp_read(void) printk("swsusp: Resume From Partition: %s, Device: %s\n", resume_file, __bdevname(resume_device, b)); - cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); - if (cur) { - struct block_device *bdev; - bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); - if (!IS_ERR(bdev)) { - set_blocksize(bdev, PAGE_SIZE); - error = read_suspend_image(bdev, cur); - blkdev_put(bdev, BDEV_RAW); - } else - error = PTR_ERR(bdev); - free_page((unsigned long)cur); + resume_bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev, BDEV_RAW); } else - error = -ENOMEM; + error = PTR_ERR(resume_bdev); if (!error) PRINTK("Reading resume file was successful\n"); @@ -873,7 +921,11 @@ int __init swsusp_read(void) int __init swsusp_restore(void) { - return do_magic(1); + int error; + local_irq_disable(); + error = swsusp_arch_suspend(1); + local_irq_enable(); + return error; } @@ -885,13 +937,20 @@ int swsusp_free(void) { PRINTK( "Freeing prev allocated pagedir\n" ); free_suspend_pagedir((unsigned long) pagedir_save); - - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); return 0; } + +int software_suspend(void) +{ + struct pm_ops swsusp_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + }; + + pm_set_ops(&swsusp_ops); + return pm_suspend(PM_SUSPEND_DISK); +} + static int __init resume_setup(char *str) { if (strlen(str)) diff --git a/kernel/sched.c b/kernel/sched.c index 89f1bb28dacd..9dc251a8d8a5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -28,6 +28,7 @@ #include <linux/kernel_stat.h> #include <linux/security.h> #include <linux/notifier.h> +#include <linux/suspend.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/timer.h> @@ -2415,6 +2416,9 @@ static int migration_thread(void * data) struct list_head *head; migration_req_t *req; + if (current->flags & PF_FREEZE) + refrigerator(PF_IOTHREAD); + spin_lock_irq(&rq->lock); head = &rq->migration_queue; current->state = TASK_INTERRUPTIBLE; diff --git a/kernel/sys.c b/kernel/sys.c index 02b5a12dfd59..d77453173d29 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -456,7 +456,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user #ifdef CONFIG_SOFTWARE_SUSPEND case LINUX_REBOOT_CMD_SW_SUSPEND: - if (!pm_suspend(PM_SUSPEND_DISK)) + if (!software_suspend()) break; do_exit(0); break; |
