diff options
| author | Patrick Mochel <mochel@osdl.org> | 2003-09-09 18:10:40 -0700 |
|---|---|---|
| committer | Patrick Mochel <mochel@osdl.org> | 2003-09-09 18:10:40 -0700 |
| commit | c521afac12ad04a7a9a4eacc778a7db65b89efa0 (patch) | |
| tree | e2af5fdb6f4ee863ec67f97bba5b2acca767a1ce | |
| parent | b11a855891448d827182f88d9fa32d33fe3cd102 (diff) | |
| parent | ad50ff186e3544ce316bc24e8b37e82b840b42e1 (diff) | |
Merge osdl.org:/home/mochel/src/kernel/linux-2.5-virgin
into osdl.org:/home/mochel/src/kernel/linux-2.5-power
| -rw-r--r-- | arch/i386/Kconfig | 44 | ||||
| -rw-r--r-- | arch/i386/Makefile | 1 | ||||
| -rw-r--r-- | arch/i386/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/i386/power/Makefile | 2 | ||||
| -rw-r--r-- | arch/i386/power/cpu.c (renamed from arch/i386/kernel/suspend.c) | 0 | ||||
| -rw-r--r-- | arch/i386/power/swsusp.S (renamed from arch/i386/kernel/suspend_asm.S) | 16 | ||||
| -rw-r--r-- | drivers/acpi/sleep/main.c | 53 | ||||
| -rw-r--r-- | drivers/acpi/sleep/proc.c | 73 | ||||
| -rw-r--r-- | drivers/acpi/sleep/sleep.h | 3 | ||||
| -rw-r--r-- | drivers/base/core.c | 33 | ||||
| -rw-r--r-- | drivers/base/power/main.c | 13 | ||||
| -rw-r--r-- | drivers/base/power/power.h | 3 | ||||
| -rw-r--r-- | drivers/base/power/resume.c | 21 | ||||
| -rw-r--r-- | drivers/base/power/suspend.c | 10 | ||||
| -rw-r--r-- | include/asm-i386/suspend.h | 7 | ||||
| -rw-r--r-- | include/linux/suspend.h | 1 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 64 | ||||
| -rw-r--r-- | kernel/power/Makefile | 2 | ||||
| -rw-r--r-- | kernel/power/console.c | 2 | ||||
| -rw-r--r-- | kernel/power/disk.c | 335 | ||||
| -rw-r--r-- | kernel/power/main.c | 391 | ||||
| -rw-r--r-- | kernel/power/pmdisk.c | 969 | ||||
| -rw-r--r-- | kernel/power/power.h | 39 | ||||
| -rw-r--r-- | kernel/power/swsusp.c | 345 | ||||
| -rw-r--r-- | kernel/sched.c | 4 | ||||
| -rw-r--r-- | kernel/sys.c | 2 |
26 files changed, 1817 insertions, 618 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f337b2d31815..ce8e28c34b02 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -815,49 +815,7 @@ endmenu menu "Power management options (ACPI, APM)" depends on !X86_VOYAGER -config PM - bool "Power Management support" - ---help--- - "Power Management" means that parts of your computer are shut - off or put into a power conserving "sleep" mode if they are not - being used. There are two competing standards for doing this: APM - and ACPI. If you want to use either one, say Y here and then also - to the requisite support below. - - Power Management is most important for battery powered laptop - computers; if you have a laptop, check out the Linux Laptop home - page on the WWW at - <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the - Battery Powered Linux mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. - - Note that, even if you say N here, Linux on the x86 architecture - will issue the hlt instruction if nothing is to be done, thereby - sending the processor to sleep and saving power. - -config SOFTWARE_SUSPEND - bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP - ---help--- - Enable the possibilty of suspendig machine. It doesn't need APM. - You may suspend your machine by 'swsusp' or 'shutdown -z <time>' - (patch for sysvinit needed). - - It creates an image which is saved in your active swaps. By the next - booting the, pass 'resume=/dev/swappartition' and kernel will - detect the saved image, restore the memory from - it and then it continues to run as before you've suspended. - If you don't want the previous state to continue use the 'noresume' - kernel option. However note that your partitions will be fsck'd and - you must re-mkswap your swap partitions. It does not work with swap - files. - - Right now you may boot without resuming and then later resume but - in meantime you cannot use those swap partitions/files which were - involved in suspending. Also in this case there is a risk that buffers - on disk won't match with saved ones. - - For more information take a look at Documentation/swsusp.txt. +source kernel/power/Kconfig source "drivers/acpi/Kconfig" diff --git a/arch/i386/Makefile b/arch/i386/Makefile index c49b4749cf95..ca5d7812f35c 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -96,6 +96,7 @@ drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ +drivers-$(CONFIG_PM) += arch/i386/power/ CFLAGS += $(mflags-y) AFLAGS += $(mflags-y) diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 00d75afcaa67..b5ed0976d4db 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -17,9 +17,7 @@ obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_PM) += suspend.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile new file mode 100644 index 000000000000..8cfa4e8a719d --- /dev/null +++ b/arch/i386/power/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_PM) += cpu.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o diff --git a/arch/i386/kernel/suspend.c b/arch/i386/power/cpu.c index b71175ff7559..b71175ff7559 100644 --- a/arch/i386/kernel/suspend.c +++ b/arch/i386/power/cpu.c diff --git a/arch/i386/kernel/suspend_asm.S b/arch/i386/power/swsusp.S index 3d3bb7121c2c..a2af7992aab9 100644 --- a/arch/i386/kernel/suspend_asm.S +++ b/arch/i386/power/swsusp.S @@ -8,11 +8,10 @@ .text -ENTRY(do_magic) +ENTRY(swsusp_arch_suspend) pushl %ebx cmpl $0,8(%esp) jne .L1450 - call do_magic_suspend_1 call save_processor_state movl %esp, saved_context_esp @@ -25,14 +24,13 @@ ENTRY(do_magic) movl %edi, saved_context_edi pushfl ; popl saved_context_eflags - call do_magic_suspend_2 + call swsusp_suspend jmp .L1449 .p2align 4,,7 .L1450: movl $swapper_pg_dir-__PAGE_OFFSET,%ecx movl %ecx,%cr3 - call do_magic_resume_1 movl $0,loop cmpl $0,nr_copy_pages je .L1453 @@ -78,11 +76,13 @@ ENTRY(do_magic) movl saved_context_edx, %edx movl saved_context_esi, %esi movl saved_context_edi, %edi - call restore_processor_state pushl saved_context_eflags ; popfl - call do_magic_resume_2 + call swsusp_resume .L1449: - popl %ebx + popl %ebx + pushl %eax + call restore_processor_state + popl %eax ret .section .data.nosave @@ -91,4 +91,4 @@ loop: loop2: .quad 0 .previous -
\ No newline at end of file + diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index d3f29ce924ae..1338cc96b724 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -41,7 +41,6 @@ static u32 acpi_suspend_states[] = { static int acpi_pm_prepare(u32 state) { - int error = 0; u32 acpi_state = acpi_suspend_states[state]; if (!sleep_states[acpi_state]) @@ -56,21 +55,9 @@ static int acpi_pm_prepare(u32 state) acpi_set_firmware_waking_vector( (acpi_physical_address) acpi_wakeup_address); } - ACPI_FLUSH_CPU_CACHE(); - - /* Do arch specific saving of state. */ - if (state > PM_SUSPEND_STANDBY) { - if ((error = acpi_save_state_mem())) - goto Err; - } - acpi_enter_sleep_state_prep(acpi_state); - return 0; - Err: - acpi_set_firmware_waking_vector(0); - return error; } @@ -90,6 +77,15 @@ static int acpi_pm_enter(u32 state) u32 acpi_state = acpi_suspend_states[state]; ACPI_FLUSH_CPU_CACHE(); + + /* Do arch specific saving of state. */ + if (state > PM_SUSPEND_STANDBY) { + int error = acpi_save_state_mem(); + if (error) + return error; + } + + local_irq_save(flags); switch (state) { @@ -114,6 +110,15 @@ static int acpi_pm_enter(u32 state) local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); + /* restore processor state + * We should only be here if we're coming back from STR or STD. + * And, in the case of the latter, the memory image should have already + * been loaded from disk. + */ + if (state > PM_SUSPEND_STANDBY) + acpi_restore_state_mem(); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; } @@ -130,14 +135,6 @@ static int acpi_pm_finish(u32 state) { acpi_leave_sleep_state(state); - /* restore processor state - * We should only be here if we're coming back from STR or STD. - * And, in the case of the latter, the memory image should have already - * been loaded from disk. - */ - if (state > ACPI_STATE_S1) - acpi_restore_state_mem(); - /* reset firmware waking vector */ acpi_set_firmware_waking_vector((acpi_physical_address) 0); @@ -149,6 +146,20 @@ static int acpi_pm_finish(u32 state) } +int acpi_suspend(u32 acpi_state) +{ + u32 states[] = { + [1] = PM_SUSPEND_STANDBY, + [3] = PM_SUSPEND_MEM, + [4] = PM_SUSPEND_DISK, + }; + + if (acpi_state <= 4 && states[acpi_state]) + return pm_suspend(states[acpi_state]); + return -EINVAL; +} + + static struct pm_ops acpi_pm_ops = { .prepare = acpi_pm_prepare, .enter = acpi_pm_enter, diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 41cbde00b785..4a15b0000aa8 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -13,12 +13,71 @@ #include "sleep.h" +#define ACPI_SYSTEM_FILE_SLEEP "sleep" #define ACPI_SYSTEM_FILE_ALARM "alarm" #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME ("sleep") +static int acpi_system_sleep_seq_show(struct seq_file *seq, void *offset) +{ + int i; + + ACPI_FUNCTION_TRACE("acpi_system_sleep_seq_show"); + + for (i = 0; i <= ACPI_STATE_S5; i++) { + if (sleep_states[i]) { + seq_printf(seq,"S%d ", i); + if (i == ACPI_STATE_S4 && acpi_gbl_FACS->S4bios_f) + seq_printf(seq, "S4bios "); + } + } + + seq_puts(seq, "\n"); + + return 0; +} + +static int acpi_system_sleep_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, acpi_system_sleep_seq_show, PDE(inode)->data); +} + +static int +acpi_system_write_sleep ( + struct file *file, + const char *buffer, + size_t count, + loff_t *ppos) +{ + char str[12]; + u32 state = 0; + int error = 0; + + if (count > sizeof(str) - 1) + goto Done; + memset(str,0,sizeof(str)); + if (copy_from_user(str, buffer, count)) + return -EFAULT; + + /* Check for S4 bios request */ + if (!strcmp(str,"4b")) { + error = acpi_suspend(4); + goto Done; + } + state = simple_strtoul(str, NULL, 0); +#ifdef CONFIG_SOFTWARE_SUSPEND + if (state == 4) { + error = software_suspend(); + goto Done; + } +#endif + error = acpi_suspend(state); + Done: + return error ? error : count; +} + static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset) { u32 sec, min, hr; @@ -294,6 +353,14 @@ end: } +static struct file_operations acpi_system_sleep_fops = { + .open = acpi_system_sleep_open_fs, + .read = seq_read, + .write = acpi_system_write_sleep, + .llseek = seq_lseek, + .release = single_release, +}; + static struct file_operations acpi_system_alarm_fops = { .open = acpi_system_alarm_open_fs, .read = seq_read, @@ -307,6 +374,12 @@ static int acpi_sleep_proc_init(void) { struct proc_dir_entry *entry = NULL; + /* 'sleep' [R/W]*/ + entry = create_proc_entry(ACPI_SYSTEM_FILE_SLEEP, + S_IFREG|S_IRUGO|S_IWUSR, acpi_root_dir); + if (entry) + entry->proc_fops = &acpi_system_sleep_fops; + /* 'alarm' [R/W] */ entry = create_proc_entry(ACPI_SYSTEM_FILE_ALARM, S_IFREG|S_IRUGO|S_IWUSR, acpi_root_dir); diff --git a/drivers/acpi/sleep/sleep.h b/drivers/acpi/sleep/sleep.h index 97b72323f35e..ad38f4153a34 100644 --- a/drivers/acpi/sleep/sleep.h +++ b/drivers/acpi/sleep/sleep.h @@ -1,5 +1,4 @@ extern u8 sleep_states[]; - -extern acpi_status acpi_suspend (u32 state); +extern int acpi_suspend (u32 state); diff --git a/drivers/base/core.c b/drivers/base/core.c index e1b7c5b5af4a..932a427806ba 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -225,28 +225,30 @@ int device_add(struct device *dev) dev->kobj.parent = &parent->kobj; if ((error = kobject_add(&dev->kobj))) - goto register_done; - - /* now take care of our own registration */ - + goto Error; + if ((error = device_pm_add(dev))) + goto PMError; + if ((error = bus_add_device(dev))) + goto BusError; down_write(&devices_subsys.rwsem); if (parent) list_add_tail(&dev->node,&parent->children); up_write(&devices_subsys.rwsem); - bus_add_device(dev); - - device_pm_add(dev); - /* notify platform of device entry */ if (platform_notify) platform_notify(dev); - - register_done: - if (error && parent) - put_device(parent); + Done: put_device(dev); return error; + BusError: + device_pm_remove(dev); + PMError: + kobject_unregister(&dev->kobj); + Error: + if (parent) + put_device(parent); + goto Done; } @@ -312,8 +314,6 @@ void device_del(struct device * dev) { struct device * parent = dev->parent; - device_pm_remove(dev); - down_write(&devices_subsys.rwsem); if (parent) list_del_init(&dev->node); @@ -324,14 +324,11 @@ void device_del(struct device * dev) */ if (platform_notify_remove) platform_notify_remove(dev); - bus_remove_device(dev); - + device_pm_remove(dev); kobject_del(&dev->kobj); - if (parent) put_device(parent); - } /** diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d33bc5b6409f..f02478eb2173 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -36,12 +36,14 @@ DECLARE_MUTEX(dpm_sem); static inline void device_pm_hold(struct device * dev) { - atomic_inc(&dev->power.pm_users); + if (dev) + atomic_inc(&dev->power.pm_users); } static inline void device_pm_release(struct device * dev) { - atomic_inc(&dev->power.pm_users); + if (dev) + atomic_dec(&dev->power.pm_users); } @@ -61,11 +63,9 @@ static inline void device_pm_release(struct device * dev) void device_pm_set_parent(struct device * dev, struct device * parent) { struct device * old_parent = dev->power.pm_parent; - if (old_parent) - device_pm_release(old_parent); + device_pm_release(old_parent); dev->power.pm_parent = parent; - if (parent) - device_pm_hold(parent); + device_pm_hold(parent); } EXPORT_SYMBOL(device_pm_set_parent); @@ -91,6 +91,7 @@ void device_pm_remove(struct device * dev) dev->bus ? dev->bus->name : "No Bus", dev->kobj.name); down(&dpm_sem); dpm_sysfs_remove(dev); + device_pm_release(dev->power.pm_parent); list_del(&dev->power.entry); up(&dpm_sem); } diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index fde72b37f938..b930ccef5785 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -58,7 +58,8 @@ extern void dpm_sysfs_remove(struct device *); /* * resume.c */ -extern int dpm_resume(void); + +extern void dpm_resume(void); extern void dpm_power_up(void); extern int resume_device(struct device *); diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c index 9db84a9e41e3..637c52ad6ec6 100644 --- a/drivers/base/power/resume.c +++ b/drivers/base/power/resume.c @@ -28,6 +28,19 @@ int resume_device(struct device * dev) } + +void dpm_resume(void) +{ + while(!list_empty(&dpm_off)) { + struct list_head * entry = dpm_off.next; + struct device * dev = to_device(entry); + list_del_init(entry); + resume_device(dev); + list_add_tail(entry,&dpm_active); + } +} + + /** * device_resume - Restore state of each device in system. * @@ -38,13 +51,7 @@ int resume_device(struct device * dev) void device_resume(void) { down(&dpm_sem); - while(!list_empty(&dpm_off)) { - struct list_head * entry = dpm_off.next; - struct device * dev = to_device(entry); - list_del_init(entry); - resume_device(dev); - list_add_tail(entry,&dpm_active); - } + dpm_resume(); up(&dpm_sem); } diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index 6da8cdd69dce..19e660a21ecf 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -81,14 +81,18 @@ int device_suspend(u32 state) while(!list_empty(&dpm_active)) { struct list_head * entry = dpm_active.prev; struct device * dev = to_device(entry); - if ((error = suspend_device(dev,state))) - goto Error; + if ((error = suspend_device(dev,state))) { + if (error != -EAGAIN) + goto Error; + else + error = 0; + } } Done: up(&dpm_sem); return error; Error: - device_resume(); + dpm_resume(); goto Done; } diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index 0d22ec30019b..2febd2d28532 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -6,11 +6,12 @@ #include <asm/desc.h> #include <asm/i387.h> -static inline void +static inline int arch_prepare_suspend(void) { if (!cpu_has_pse) - panic("pse required"); + return -EPERM; + return 0; } /* image of the saved processor state */ @@ -38,8 +39,6 @@ struct saved_context { extern void save_processor_state(void); extern void restore_processor_state(void); -extern int do_magic(int resume); - #ifdef CONFIG_ACPI_SLEEP extern unsigned long saved_eip; extern unsigned long saved_esp; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 132db86c961a..ed8d796f1849 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -53,6 +53,7 @@ extern suspend_pagedir_t *pagedir_nosave __nosavedata; extern void do_suspend_lowlevel(int resume); extern void do_suspend_lowlevel_s4bios(int resume); +extern int software_suspend(void); #else /* CONFIG_SOFTWARE_SUSPEND */ static inline int software_suspend(void) { diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 000000000000..c30771af69f0 --- /dev/null +++ b/kernel/power/Kconfig @@ -0,0 +1,64 @@ +config PM + bool "Power Management support" + ---help--- + "Power Management" means that parts of your computer are shut + off or put into a power conserving "sleep" mode if they are not + being used. There are two competing standards for doing this: APM + and ACPI. If you want to use either one, say Y here and then also + to the requisite support below. + + Power Management is most important for battery powered laptop + computers; if you have a laptop, check out the Linux Laptop home + page on the WWW at + <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that, even if you say N here, Linux on the x86 architecture + will issue the hlt instruction if nothing is to be done, thereby + sending the processor to sleep and saving power. + +config SOFTWARE_SUSPEND + bool "Software Suspend (EXPERIMENTAL)" + depends on EXPERIMENTAL && PM && SWAP + ---help--- + Enable the possibilty of suspendig machine. It doesn't need APM. + You may suspend your machine by 'swsusp' or 'shutdown -z <time>' + (patch for sysvinit needed). + + It creates an image which is saved in your active swaps. By the next + booting the, pass 'resume=/dev/swappartition' and kernel will + detect the saved image, restore the memory from + it and then it continues to run as before you've suspended. + If you don't want the previous state to continue use the 'noresume' + kernel option. However note that your partitions will be fsck'd and + you must re-mkswap your swap partitions. It does not work with swap + files. + + Right now you may boot without resuming and then later resume but + in meantime you cannot use those swap partitions/files which were + involved in suspending. Also in this case there is a risk that buffers + on disk won't match with saved ones. + + For more information take a look at Documentation/swsusp.txt. + +config PM_DISK + bool "Suspend-to-Disk Support" + depends on PM && SWAP + ---help--- + Suspend-to-disk is a power management state in which the contents + of memory are stored on disk and the entire system is shut down or + put into a low-power state (e.g. ACPI S4). When the computer is + turned back on, the stored image is loaded from disk and execution + resumes from where it left off before suspending. + + This config option enables the core infrastructure necessary to + perform the suspend and resume transition. + + Currently, this suspend-to-disk implementation is based on a forked + version of the swsusp code base. As such, it's still experimental, + and still relies on CONFIG_SWAP. + + More information can be found in Documentation/power/. + + If unsure, Say N. diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9640751c4338..7f127b848827 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,4 +1,4 @@ obj-y := main.o process.o console.o pm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += disk.o swsusp.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/console.c b/kernel/power/console.c index 35b1f50d97de..c8a48236619b 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -8,7 +8,7 @@ #include <linux/kbd_kern.h> #include "power.h" -static int new_loglevel = 7; +static int new_loglevel = 10; static int orig_loglevel; static int orig_fgconsole, orig_kmsg; diff --git a/kernel/power/disk.c b/kernel/power/disk.c new file mode 100644 index 000000000000..64a3130a5411 --- /dev/null +++ b/kernel/power/disk.c @@ -0,0 +1,335 @@ +/* + * kernel/power/disk.c - Suspend-to-disk support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is release under the GPLv2 + * + */ + +#define DEBUG + + +#include <linux/suspend.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include "power.h" + + +extern u32 pm_disk_mode; +extern struct pm_ops * pm_ops; + +extern int swsusp_save(void); +extern int swsusp_write(void); +extern int swsusp_read(void); +extern int swsusp_restore(void); +extern int swsusp_free(void); + +extern long sys_sync(void); + + +/** + * power_down - Shut machine down for hibernate. + * @mode: Suspend-to-disk mode + * + * Use the platform driver, if configured so, and return gracefully if it + * fails. + * Otherwise, try to power off and reboot. If they fail, halt the machine, + * there ain't no turning back. + */ + +static int power_down(u32 mode) +{ + unsigned long flags; + int error = 0; + + local_irq_save(flags); + device_power_down(PM_SUSPEND_DISK); + switch(mode) { + case PM_DISK_PLATFORM: + error = pm_ops->enter(PM_SUSPEND_DISK); + break; + case PM_DISK_SHUTDOWN: + printk("Powering off system\n"); + machine_power_off(); + break; + case PM_DISK_REBOOT: + machine_restart(NULL); + break; + } + machine_halt(); + device_power_up(); + local_irq_restore(flags); + return 0; +} + + +static int in_suspend __nosavedata = 0; + + +/** + * free_some_memory - Try to free as much memory as possible + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped at this point, or + * livelock is possible. + */ + +static void free_some_memory(void) +{ + printk("Freeing memory: "); + while (shrink_all_memory(10000)) + printk("."); + printk("|\n"); + blk_run_queues(); +} + + +static inline void platform_finish(void) +{ + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->finish) + pm_ops->finish(PM_SUSPEND_DISK); + } +} + +static void finish(void) +{ + device_resume(); + platform_finish(); + thaw_processes(); + pm_restore_console(); +} + + +static int prepare(void) +{ + int error; + + pm_prepare_console(); + + sys_sync(); + if (freeze_processes()) { + error = -EBUSY; + goto Thaw; + } + + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->prepare) { + if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) + goto Thaw; + } + } + + /* Free memory before shutting down devices. */ + free_some_memory(); + + if ((error = device_suspend(PM_SUSPEND_DISK))) + goto Finish; + + return 0; + Finish: + platform_finish(); + Thaw: + thaw_processes(); + pm_restore_console(); + return error; +} + + +/** + * pm_suspend_disk - The granpappy of power management. + * + * If we're going through the firmware, then get it over with quickly. + * + * If not, then call swsusp to do it's thing, then figure out how + * to power down the system. + */ + +int pm_suspend_disk(void) +{ + int error; + + if ((error = prepare())) + return error; + + pr_debug("PM: Attempting to suspend to disk.\n"); + if (pm_disk_mode == PM_DISK_FIRMWARE) + return pm_ops->enter(PM_SUSPEND_DISK); + + pr_debug("PM: snapshotting memory.\n"); + in_suspend = 1; + if ((error = swsusp_save())) + goto Done; + + if (in_suspend) { + pr_debug("PM: writing image.\n"); + + /* + * FIXME: Leftover from swsusp. Are they necessary? + */ + mb(); + barrier(); + + error = swsusp_write(); + if (!error) { + error = power_down(pm_disk_mode); + pr_debug("PM: Power down failed.\n"); + } + } else + pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); + Done: + finish(); + return error; +} + + +/** + * pm_resume - Resume from a saved image. + * + * Called as a late_initcall (so all devices are discovered and + * initialized), we call swsusp to see if we have a saved image or not. + * If so, we quiesce devices, the restore the saved image. We will + * return above (in pm_suspend_disk() ) if everything goes well. + * Otherwise, we fail gracefully and return to the normally + * scheduled program. + * + */ + +static int pm_resume(void) +{ + int error; + + pr_debug("PM: Reading swsusp image.\n"); + + if ((error = swsusp_read())) + goto Done; + + pr_debug("PM: Preparing system for restore.\n"); + + if ((error = prepare())) + goto Free; + + barrier(); + mb(); + + /* FIXME: The following (comment and mdelay()) are from swsusp. + * Are they really necessary? + * + * We do not want some readahead with DMA to corrupt our memory, right? + * Do it with disabled interrupts for best effect. That way, if some + * driver scheduled DMA, we have good chance for DMA to finish ;-). + */ + pr_debug("PM: Waiting for DMAs to settle down.\n"); + mdelay(1000); + + pr_debug("PM: Restoring saved image.\n"); + swsusp_restore(); + pr_debug("PM: Restore failed, recovering.n"); + finish(); + Free: + swsusp_free(); + Done: + pr_debug("PM: Resume from disk failed.\n"); + return 0; +} + +late_initcall(pm_resume); + + +static char * pm_disk_modes[] = { + [PM_DISK_FIRMWARE] = "firmware", + [PM_DISK_PLATFORM] = "platform", + [PM_DISK_SHUTDOWN] = "shutdown", + [PM_DISK_REBOOT] = "reboot", +}; + +/** + * disk - Control suspend-to-disk mode + * + * Suspend-to-disk can be handled in several ways. The greatest + * distinction is who writes memory to disk - the firmware or the OS. + * If the firmware does it, we assume that it also handles suspending + * the system. + * If the OS does it, then we have three options for putting the system + * to sleep - using the platform driver (e.g. ACPI or other PM registers), + * powering off the system or rebooting the system (for testing). + * + * The system will support either 'firmware' or 'platform', and that is + * known a priori (and encoded in pm_ops). But, the user may choose + * 'shutdown' or 'reboot' as alternatives. + * + * show() will display what the mode is currently set to. + * store() will accept one of + * + * 'firmware' + * 'platform' + * 'shutdown' + * 'reboot' + * + * It will only change to 'firmware' or 'platform' if the system + * supports it (as determined from pm_ops->pm_disk_mode). + */ + +static ssize_t disk_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); +} + + +static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +{ + int error = 0; + int i; + u32 mode = 0; + + down(&pm_sem); + for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { + if (!strcmp(buf,pm_disk_modes[i])) { + mode = i; + break; + } + } + if (mode) { + if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) + pm_disk_mode = mode; + else { + if (pm_ops && pm_ops->enter && + (mode == pm_ops->pm_disk_mode)) + pm_disk_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + pr_debug("PM: suspend-to-disk mode set to '%s'\n", + pm_disk_modes[mode]); + up(&pm_sem); + return error ? error : n; +} + +power_attr(disk); + +static struct attribute * g[] = { + &disk_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); +} + +core_initcall(pm_disk_init); diff --git a/kernel/power/main.c b/kernel/power/main.c index 1b92f13d9a77..fd212e7ecd9f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,32 +8,23 @@ * */ +#define DEBUG + #include <linux/suspend.h> #include <linux/kobject.h> -#include <linux/reboot.h> #include <linux/string.h> +#include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/pm.h> -#include <linux/fs.h> #include "power.h" -static DECLARE_MUTEX(pm_sem); - -static struct pm_ops * pm_ops = NULL; - -static u32 pm_disk_mode = PM_DISK_SHUTDOWN; - -#ifdef CONFIG_SOFTWARE_SUSPEND -static int have_swsusp = 1; -#else -static int have_swsusp = 0; -#endif - -extern long sys_sync(void); +DECLARE_MUTEX(pm_sem); +struct pm_ops * pm_ops = NULL; +u32 pm_disk_mode = PM_DISK_SHUTDOWN; /** * pm_set_ops - Set the global power method table. @@ -51,171 +42,6 @@ void pm_set_ops(struct pm_ops * ops) /** - * pm_suspend_standby - Enter 'standby' state. - * - * 'standby' is also known as 'Power-On Suspend'. Here, we power down - * devices, disable interrupts, and enter the state. - */ - -static int pm_suspend_standby(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * pm_suspend_mem - Enter suspend-to-RAM state. - * - * Identical to pm_suspend_standby() - we power down devices, disable - * interrupts, and enter the low-power state. - */ - -static int pm_suspend_mem(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * power_down - Shut machine down for hibernate. - * @mode: Suspend-to-disk mode - * - * Use the platform driver, if configured so, and return gracefully if it - * fails. - * Otherwise, try to power off and reboot. If they fail, halt the machine, - * there ain't no turning back. - */ - -static int power_down(u32 mode) -{ - unsigned long flags; - int error = 0; - - local_irq_save(flags); - device_power_down(PM_SUSPEND_DISK); - switch(mode) { - case PM_DISK_PLATFORM: - error = pm_ops->enter(PM_SUSPEND_DISK); - if (error) { - device_power_up(); - local_irq_restore(flags); - return error; - } - case PM_DISK_SHUTDOWN: - machine_power_off(); - break; - case PM_DISK_REBOOT: - machine_restart(NULL); - break; - } - machine_halt(); - return 0; -} - - -static int in_suspend __nosavedata = 0; - - -/** - * free_some_memory - Try to free as much memory as possible - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or - * livelock is possible. - */ - -static void free_some_memory(void) -{ - printk("Freeing memory: "); - while (shrink_all_memory(10000)) - printk("."); - printk("|\n"); - blk_run_queues(); -} - - -/** - * pm_suspend_disk - The granpappy of power management. - * - * If we're going through the firmware, then get it over with quickly. - * - * If not, then call swsusp to do it's thing, then figure out how - * to power down the system. - */ - -static int pm_suspend_disk(void) -{ - int error; - - pr_debug("PM: Attempting to suspend to disk.\n"); - if (pm_disk_mode == PM_DISK_FIRMWARE) - return pm_ops->enter(PM_SUSPEND_DISK); - - if (!have_swsusp) - return -EPERM; - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - if ((error = swsusp_save())) - goto Done; - - if (in_suspend) { - pr_debug("PM: writing image.\n"); - error = swsusp_write(); - if (!error) - error = power_down(pm_disk_mode); - pr_debug("PM: Power down failed.\n"); - } else - pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); - Done: - return error; -} - - - -#define decl_state(_name) \ - { .name = __stringify(_name), .fn = pm_suspend_##_name } - -struct pm_state { - char * name; - int (*fn)(void); -} pm_states[] = { - [PM_SUSPEND_STANDBY] = decl_state(standby), - [PM_SUSPEND_MEM] = decl_state(mem), - [PM_SUSPEND_DISK] = decl_state(disk), - { NULL }, -}; - - -/** * suspend_prepare - Do prep work before entering low-power state. * @state: State we're entering. * @@ -228,36 +54,47 @@ static int suspend_prepare(u32 state) { int error = 0; + if (!pm_ops || !pm_ops->enter) + return -EPERM; + pm_prepare_console(); - sys_sync(); if (freeze_processes()) { error = -EAGAIN; goto Thaw; } - if (pm_ops && pm_ops->prepare) { + if (pm_ops->prepare) { if ((error = pm_ops->prepare(state))) goto Thaw; } - /* Free memory before shutting down devices. */ - if (state == PM_SUSPEND_DISK) - free_some_memory(); - if ((error = device_suspend(state))) goto Finish; - return 0; - Done: - pm_restore_console(); - return error; Finish: - if (pm_ops && pm_ops->finish) + if (pm_ops->finish) pm_ops->finish(state); Thaw: thaw_processes(); - goto Done; + pm_restore_console(); + return error; +} + + +static int suspend_enter(u32 state) +{ + int error = 0; + unsigned long flags; + + local_irq_save(flags); + if ((error = device_power_down(state))) + goto Done; + error = pm_ops->enter(state); + device_power_up(); + Done: + local_irq_restore(flags); + return error; } @@ -279,6 +116,16 @@ static void suspend_finish(u32 state) } + + +char * pm_states[] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", + [PM_SUSPEND_DISK] = "disk", + NULL, +}; + + /** * enter_state - Do common work of entering low-power state. * @state: pm_state structure for state we're entering. @@ -293,7 +140,6 @@ static void suspend_finish(u32 state) static int enter_state(u32 state) { int error; - struct pm_state * s = &pm_states[state]; if (down_trylock(&pm_sem)) return -EBUSY; @@ -304,12 +150,17 @@ static int enter_state(u32 state) goto Unlock; } - pr_debug("PM: Preparing system for suspend.\n"); + if (state == PM_SUSPEND_DISK) { + error = pm_suspend_disk(); + goto Unlock; + } + + pr_debug("PM: Preparing system for suspend\n"); if ((error = suspend_prepare(state))) goto Unlock; pr_debug("PM: Entering state.\n"); - error = s->fn(); + error = suspend_enter(state); pr_debug("PM: Finishing up.\n"); suspend_finish(state); @@ -335,138 +186,10 @@ int pm_suspend(u32 state) } -/** - * pm_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, the restore the saved image. We will - * return above (in pm_suspend_disk() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. - * - */ - -static int pm_resume(void) -{ - int error; - - if (!have_swsusp) - return 0; - - pr_debug("PM: Reading swsusp image.\n"); - - if ((error = swsusp_read())) - goto Done; - - pr_debug("PM: Preparing system for restore.\n"); - - if ((error = suspend_prepare(PM_SUSPEND_DISK))) - goto Free; - - pr_debug("PM: Restoring saved image.\n"); - swsusp_restore(); - - pr_debug("PM: Restore failed, recovering.n"); - suspend_finish(PM_SUSPEND_DISK); - Free: - swsusp_free(); - Done: - pr_debug("PM: Resume from disk failed.\n"); - return 0; -} - -late_initcall(pm_resume); - decl_subsys(power,NULL,NULL); -#define power_attr(_name) \ -static struct subsys_attribute _name##_attr = { \ - .attr = { \ - .name = __stringify(_name), \ - .mode = 0644, \ - }, \ - .show = _name##_show, \ - .store = _name##_store, \ -} - - -static char * pm_disk_modes[] = { - [PM_DISK_FIRMWARE] = "firmware", - [PM_DISK_PLATFORM] = "platform", - [PM_DISK_SHUTDOWN] = "shutdown", - [PM_DISK_REBOOT] = "reboot", -}; - -/** - * disk - Control suspend-to-disk mode - * - * Suspend-to-disk can be handled in several ways. The greatest - * distinction is who writes memory to disk - the firmware or the OS. - * If the firmware does it, we assume that it also handles suspending - * the system. - * If the OS does it, then we have three options for putting the system - * to sleep - using the platform driver (e.g. ACPI or other PM registers), - * powering off the system or rebooting the system (for testing). - * - * The system will support either 'firmware' or 'platform', and that is - * known a priori (and encoded in pm_ops). But, the user may choose - * 'shutdown' or 'reboot' as alternatives. - * - * show() will display what the mode is currently set to. - * store() will accept one of - * - * 'firmware' - * 'platform' - * 'shutdown' - * 'reboot' - * - * It will only change to 'firmware' or 'platform' if the system - * supports it (as determined from pm_ops->pm_disk_mode). - */ - -static ssize_t disk_show(struct subsystem * subsys, char * buf) -{ - return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); -} - - -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) -{ - int error = 0; - int i; - u32 mode = 0; - - down(&pm_sem); - for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { - if (!strcmp(buf,pm_disk_modes[i])) { - mode = i; - break; - } - } - if (mode) { - if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) - pm_disk_mode = mode; - else { - if (pm_ops && pm_ops->enter && - (mode == pm_ops->pm_disk_mode)) - pm_disk_mode = mode; - else - error = -EINVAL; - } - } else - error = -EINVAL; - - pr_debug("PM: suspend-to-disk mode set to '%s'\n", - pm_disk_modes[mode]); - up(&pm_sem); - return error ? error : n; -} - -power_attr(disk); - /** * state - control system power state. * @@ -480,27 +203,28 @@ power_attr(disk); static ssize_t state_show(struct subsystem * subsys, char * buf) { - struct pm_state * state; + int i; char * s = buf; - for (state = &pm_states[0]; state->name; state++) - s += sprintf(s,"%s ",state->name); + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i]) + s += sprintf(s,"%s ",pm_states[i]); + } s += sprintf(s,"\n"); return (s - buf); } static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) { - u32 state; - struct pm_state * s; + u32 state = PM_SUSPEND_STANDBY; + char ** s; int error; - for (state = 0; state < PM_SUSPEND_MAX; state++) { - s = &pm_states[state]; - if (s->name && !strcmp(buf,s->name)) + for (s = &pm_states[state]; *s; s++, state++) { + if (!strcmp(buf,*s)) break; } - if (s) + if (*s) error = enter_state(state); else error = -EINVAL; @@ -511,7 +235,6 @@ power_attr(state); static struct attribute * g[] = { &state_attr.attr, - &disk_attr.attr, NULL, }; @@ -520,7 +243,7 @@ static struct attribute_group attr_group = { }; -static int pm_init(void) +static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) diff --git a/kernel/power/pmdisk.c b/kernel/power/pmdisk.c new file mode 100644 index 000000000000..bb795b6dc537 --- /dev/null +++ b/kernel/power/pmdisk.c @@ -0,0 +1,969 @@ +/* + * linux/kernel/suspend.c + * + * This file is to realize architecture-independent + * machine suspend feature using pretty near only high-level routines + * + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> + * + * I'd like to thank the following people for their work: + * + * Pavel Machek <pavel@ucw.cz>: + * Modifications, defectiveness pointing, being with me at the very beginning, + * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. + * + * Steve Doddi <dirk@loth.demon.co.uk>: + * Support the possibility of hardware state restoring. + * + * Raph <grey.havens@earthling.net>: + * Support for preserving states of network devices and virtual console + * (including X and svgatextmode) + * + * Kurt Garloff <garloff@suse.de>: + * Straightened the critical function in order to prevent compilers from + * playing tricks with local variables. + * + * Andreas Mohr <a.mohr@mailto.de> + * + * Alex Badea <vampire@go.ro>: + * Fixed runaway init + * + * More state savers are welcome. Especially for the scsi layer... + * + * For TODOs,FIXMEs also look in Documentation/swsusp.txt + */ + +#include <linux/mm.h> +#include <linux/bio.h> +#include <linux/suspend.h> +#include <linux/version.h> +#include <linux/reboot.h> +#include <linux/device.h> +#include <linux/swapops.h> +#include <linux/bootmem.h> + +#include <asm/mmu_context.h> + +#include "power.h" + + +extern int swsusp_arch_suspend(int resume); + +#define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) +#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) +#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */ + +/* References to section boundaries */ +extern char __nosave_begin, __nosave_end; + +extern int is_head_of_free_region(struct page *); + +/* Variables to be preserved over suspend */ +static int pagedir_order_check; +static int nr_copy_pages_check; + +static char resume_file[256]; /* For resume= kernel option */ +static dev_t resume_device; +/* Local variables that should not be affected by save */ +unsigned int nr_copy_pages __nosavedata = 0; + +/* Suspend pagedir is allocated before final copy, therefore it + must be freed after resume + + Warning: this is evil. There are actually two pagedirs at time of + resume. One is "pagedir_save", which is empty frame allocated at + time of suspend, that must be freed. Second is "pagedir_nosave", + allocated at time of resume, that travels through memory not to + collide with anything. + */ +suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; +static suspend_pagedir_t *pagedir_save; +static int pagedir_order __nosavedata = 0; + +struct link { + char dummy[PAGE_SIZE - sizeof(swp_entry_t)]; + swp_entry_t next; +}; + +union diskpage { + union swap_header swh; + struct link link; + struct suspend_header sh; +}; + +/* + * XXX: We try to keep some more pages free so that I/O operations succeed + * without paging. Might this be more? + */ +#define PAGES_FOR_IO 512 + +static const char name_suspend[] = "Suspend Machine: "; +static const char name_resume[] = "Resume Machine: "; + +/* + * Debug + */ +#define DEBUG_DEFAULT +#undef DEBUG_PROCESS +#undef DEBUG_SLOW +#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */ + +#ifdef DEBUG_DEFAULT +# define PRINTK(f, a...) printk(f, ## a) +#else +# define PRINTK(f, a...) +#endif + +#ifdef DEBUG_SLOW +#define MDELAY(a) mdelay(a) +#else +#define MDELAY(a) +#endif + +/* + * Saving part... + */ + +static __inline__ int fill_suspend_header(struct suspend_header *sh) +{ + memset((char *)sh, 0, sizeof(*sh)); + + sh->version_code = LINUX_VERSION_CODE; + sh->num_physpages = num_physpages; + strncpy(sh->machine, system_utsname.machine, 8); + strncpy(sh->version, system_utsname.version, 20); + /* FIXME: Is this bogus? --RR */ + sh->num_cpus = num_online_cpus(); + sh->page_size = PAGE_SIZE; + sh->suspend_pagedir = pagedir_nosave; + BUG_ON (pagedir_save != pagedir_nosave); + sh->num_pbes = nr_copy_pages; + /* TODO: needed? mounted fs' last mounted date comparison + * [so they haven't been mounted since last suspend. + * Maybe it isn't.] [we'd need to do this for _all_ fs-es] + */ + return 0; +} + +/* We memorize in swapfile_used what swap devices are used for suspension */ +#define SWAPFILE_UNUSED 0 +#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ +#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ + +static unsigned short swapfile_used[MAX_SWAPFILES]; +static unsigned short root_swap; +#define MARK_SWAP_SUSPEND 0 +#define MARK_SWAP_RESUME 2 + +static void mark_swapfiles(swp_entry_t prev, int mode) +{ + swp_entry_t entry; + union diskpage *cur; + struct page *page; + + if (root_swap == 0xFFFF) /* ignored */ + return; + + page = alloc_page(GFP_ATOMIC); + if (!page) + panic("Out of memory in mark_swapfiles"); + cur = page_address(page); + /* XXX: this is dirty hack to get first page of swap file */ + entry = swp_entry(root_swap, 0); + rw_swap_page_sync(READ, entry, page); + + if (mode == MARK_SWAP_RESUME) { + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", + name_resume, cur->swh.magic.magic); + } else { + if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S1SUSP....",10); + else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S2SUSP....",10); + else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic); + cur->link.next = prev; /* prev is the first/last swap page of the resume area */ + /* link.next lies *no more* in last 4/8 bytes of magic */ + } + rw_swap_page_sync(WRITE, entry, page); + __free_page(page); +} + +static void read_swapfiles(void) /* This is called before saving image */ +{ + int i, len; + + len=strlen(resume_file); + root_swap = 0xFFFF; + + swap_list_lock(); + for(i=0; i<MAX_SWAPFILES; i++) { + if (swap_info[i].flags == 0) { + swapfile_used[i]=SWAPFILE_UNUSED; + } else { + if(!len) { + printk(KERN_WARNING "resume= option should be used to set suspend device" ); + if(root_swap == 0xFFFF) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else + swapfile_used[i] = SWAPFILE_IGNORED; + } else { + /* we ignore all swap devices that are not the resume_file */ + if (1) { +// FIXME if(resume_device == swap_info[i].swap_device) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else { +#if 0 + printk( "Resume: device %s (%x != %x) ignored\n", swap_info[i].swap_file->d_name.name, swap_info[i].swap_device, resume_device ); +#endif + swapfile_used[i] = SWAPFILE_IGNORED; + } + } + } + } + swap_list_unlock(); +} + +static void lock_swapdevices(void) /* This is called after saving image so modification + will be lost after resume... and that's what we want. */ +{ + int i; + + swap_list_lock(); + for(i = 0; i< MAX_SWAPFILES; i++) + if(swapfile_used[i] == SWAPFILE_IGNORED) { + swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to + lock_swapdevices can unlock the devices. */ + } + swap_list_unlock(); +} + +static int write_suspend_image(void) +{ + int i; + swp_entry_t entry, prev = { 0 }; + int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); + union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + unsigned long address; + struct page *page; + + printk( "Writing data to swap (%d pages): ", nr_copy_pages ); + for (i=0; i<nr_copy_pages; i++) { + if (!(i%100)) + printk( "." ); + if (!(entry = get_swap_page()).val) + panic("\nNot enough swapspace when writing data" ); + + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nPage %d: not enough swapspace on suspend device", i ); + + address = (pagedir_nosave+i)->address; + page = virt_to_page(address); + rw_swap_page_sync(WRITE, entry, page); + (pagedir_nosave+i)->swap_address = entry; + } + printk( "|\n" ); + printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); + for (i=0; i<nr_pgdir_pages; i++) { + cur = (union diskpage *)((char *) pagedir_nosave)+i; + BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE)); + printk( "." ); + if (!(entry = get_swap_page()).val) { + printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" ); + panic("Don't know how to recover"); + free_page((unsigned long) buffer); + return -ENOSPC; + } + + if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for pagedir on suspend device" ); + + BUG_ON (sizeof(swp_entry_t) != sizeof(long)); + BUG_ON (PAGE_SIZE % sizeof(struct pbe)); + + cur->link.next = prev; + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + } + printk("H"); + BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); + BUG_ON (sizeof(union diskpage) != PAGE_SIZE); + if (!(entry = get_swap_page()).val) + panic( "\nNot enough swapspace when writing header" ); + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for header on suspend device" ); + + cur = (void *) buffer; + if (fill_suspend_header(&cur->sh)) + panic("\nOut of memory while writing header"); + + cur->link.next = prev; + + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + + printk( "S" ); + mark_swapfiles(prev, MARK_SWAP_SUSPEND); + printk( "|\n" ); + + MDELAY(1000); + free_page((unsigned long) buffer); + return 0; +} + +/* if pagedir_p != NULL it also copies the counted pages */ +static int count_and_copy_data_pages(struct pbe *pagedir_p) +{ + int chunk_size; + int nr_copy_pages = 0; + int pfn; + struct page *page; + + BUG_ON (max_pfn != num_physpages); + + for (pfn = 0; pfn < max_pfn; pfn++) { + page = pfn_to_page(pfn); + + if (!PageReserved(page)) { + if (PageNosave(page)) + continue; + + if ((chunk_size=is_head_of_free_region(page))!=0) { + pfn += chunk_size - 1; + continue; + } + } else if (PageReserved(page)) { + BUG_ON (PageNosave(page)); + + /* + * Just copy whole code segment. Hopefully it is not that big. + */ + if ((ADDRESS(pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) && + (ADDRESS(pfn) < (unsigned long) ADDRESS2(&__nosave_end))) { + PRINTK("[nosave %lx]", ADDRESS(pfn)); + continue; + } + /* Hmm, perhaps copying all reserved pages is not too healthy as they may contain + critical bios data? */ + } else BUG(); + + nr_copy_pages++; + if (pagedir_p) { + pagedir_p->orig_address = ADDRESS(pfn); + copy_page((void *) pagedir_p->address, (void *) pagedir_p->orig_address); + pagedir_p++; + } + } + return nr_copy_pages; +} + +static void free_suspend_pagedir(unsigned long this_pagedir) +{ + struct page *page; + int pfn; + unsigned long this_pagedir_end = this_pagedir + + (PAGE_SIZE << pagedir_order); + + for(pfn = 0; pfn < num_physpages; pfn++) { + page = pfn_to_page(pfn); + if (!TestClearPageNosave(page)) + continue; + + if (ADDRESS(pfn) >= this_pagedir && ADDRESS(pfn) < this_pagedir_end) + continue; /* old pagedir gets freed in one */ + + free_page(ADDRESS(pfn)); + } + free_pages(this_pagedir, pagedir_order); +} + +static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) +{ + int i; + suspend_pagedir_t *pagedir; + struct pbe *p; + struct page *page; + + pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); + + p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); + if(!pagedir) + return NULL; + + page = virt_to_page(pagedir); + for(i=0; i < 1<<pagedir_order; i++) + SetPageNosave(page++); + + while(nr_copy_pages--) { + p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + if(!p->address) { + free_suspend_pagedir((unsigned long) pagedir); + return NULL; + } + SetPageNosave(virt_to_page(p->address)); + p->orig_address = 0; + p++; + } + return pagedir; +} + + +int swsusp_suspend(void) +{ + struct sysinfo i; + unsigned int nr_needed_pages = 0; + + read_swapfiles(); + drain_local_pages(); + + pagedir_nosave = NULL; + printk( "/critical section: Counting pages to copy" ); + nr_copy_pages = count_and_copy_data_pages(NULL); + nr_needed_pages = nr_copy_pages + PAGES_FOR_IO; + + printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); + if(nr_free_pages() < nr_needed_pages) { + printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", + name_suspend, nr_needed_pages-nr_free_pages()); + root_swap = 0xFFFF; + return 1; + } + si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. + We should only consider resume_device. */ + if (i.freeswap < nr_needed_pages) { + printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", + name_suspend, nr_needed_pages-i.freeswap); + return 1; + } + + PRINTK( "Alloc pagedir\n" ); + pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages); + if(!pagedir_nosave) { + /* Shouldn't happen */ + printk(KERN_CRIT "%sCouldn't allocate enough pages\n",name_suspend); + panic("Really should not happen"); + return 1; + } + nr_copy_pages_check = nr_copy_pages; + pagedir_order_check = pagedir_order; + + drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ + if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */ + BUG(); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + printk( "critical section/: done (%d pages copied)\n", nr_copy_pages ); + return 0; +} + + +/** + * suspend_save_image - Prepare and write saved image to swap. + * + * IRQs are re-enabled here so we can resume devices and safely write + * to the swap devices. We disable them again before we leave. + * + * The second lock_swapdevices() will unlock ignored swap devices since + * writing is finished. + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + +static int suspend_save_image(void) +{ + int error; + device_resume(); + lock_swapdevices(); + error = write_suspend_image(); + lock_swapdevices(); + return error; +} + +/* + * Magic happens here + */ + +int swsusp_resume(void) +{ + BUG_ON (nr_copy_pages_check != nr_copy_pages); + BUG_ON (pagedir_order_check != pagedir_order); + + /* Even mappings of "global" things (vmalloc) need to be fixed */ + __flush_tlb_global(); + return 0; +} + +/* swsusp_arch_suspend() is implemented in arch/?/power/swsusp.S, + and basically does: + + if (!resume) { + save_processor_state(); + SAVE_REGISTERS + return swsusp_suspend(); + } + GO_TO_SWAPPER_PAGE_TABLES + COPY_PAGES_BACK + RESTORE_REGISTERS + restore_processor_state(); + return swsusp_resume(); + + */ + + +/* More restore stuff */ + +/* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */ +static void __init copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from) +{ + int i; + char *topointer=(char *)to, *frompointer=(char *)from; + + for(i=0; i < 1 << pagedir_order; i++) { + copy_page(topointer, frompointer); + topointer += PAGE_SIZE; + frompointer += PAGE_SIZE; + } +} + +#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0) + +/* + * Returns true if given address/order collides with any orig_address + */ +static int __init does_collide_order(suspend_pagedir_t *pagedir, + unsigned long addr, int order) +{ + int i; + unsigned long addre = addr + (PAGE_SIZE<<order); + + for(i=0; i < nr_copy_pages; i++) + if((pagedir+i)->orig_address >= addr && + (pagedir+i)->orig_address < addre) + return 1; + + return 0; +} + +/* + * We check here that pagedir & pages it points to won't collide with pages + * where we're going to restore from the loaded pages later + */ +static int __init check_pagedir(void) +{ + int i; + + for(i=0; i < nr_copy_pages; i++) { + unsigned long addr; + + do { + addr = get_zeroed_page(GFP_ATOMIC); + if(!addr) + return -ENOMEM; + } while (does_collide(addr)); + + (pagedir_nosave+i)->address = addr; + } + return 0; +} + +static int __init relocate_pagedir(void) +{ + /* + * We have to avoid recursion (not to overflow kernel stack), + * and that's why code looks pretty cryptic + */ + suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave; + void **eaten_memory = NULL; + void **c = eaten_memory, *m, *f; + + printk("Relocating pagedir"); + + if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { + printk("not necessary\n"); + return 0; + } + + while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) { + memset(m, 0, PAGE_SIZE); + if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order)) + break; + eaten_memory = m; + printk( "." ); + *eaten_memory = c; + c = eaten_memory; + } + + if (!m) + return -ENOMEM; + + pagedir_nosave = new_pagedir = m; + copy_pagedir(new_pagedir, old_pagedir); + + c = eaten_memory; + while(c) { + printk(":"); + f = *c; + c = *c; + if (f) + free_pages((unsigned long)f, pagedir_order); + } + printk("|\n"); + return 0; +} + +/* + * Sanity check if this image makes sense with this kernel/swap context + * I really don't think that it's foolproof but more than nothing.. + */ + +static int __init sanity_check_failed(char *reason) +{ + printk(KERN_ERR "%s%s\n",name_resume,reason); + return -EPERM; +} + +static int __init sanity_check(struct suspend_header *sh) +{ + if(sh->version_code != LINUX_VERSION_CODE) + return sanity_check_failed("Incorrect kernel version"); + if(sh->num_physpages != num_physpages) + return sanity_check_failed("Incorrect memory size"); + if(strncmp(sh->machine, system_utsname.machine, 8)) + return sanity_check_failed("Incorrect machine type"); + if(strncmp(sh->version, system_utsname.version, 20)) + return sanity_check_failed("Incorrect version"); + if(sh->num_cpus != num_online_cpus()) + return sanity_check_failed("Incorrect number of cpus"); + if(sh->page_size != PAGE_SIZE) + return sanity_check_failed("Incorrect PAGE_SIZE"); + return 0; +} + +static struct block_device * resume_bdev; + + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static void start_io(void) +{ + atomic_set(&io_done,1); +} + +static int end_io(struct bio * bio, unsigned int num, int err) +{ + atomic_set(&io_done,0); + return 0; +} + +static void wait_io(void) +{ + blk_run_queues(); + while(atomic_read(&io_done)) + io_schedule(); +} + + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw,bio); + wait_io(); + Done: + bio_put(bio); + return error; +} + +static int +read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} + +static int +write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} + + +extern dev_t __init name_to_dev_t(const char *line); + + +#define next_entry(diskpage) diskpage->link.next + +static int __init read_suspend_image(void) +{ + swp_entry_t next; + int i, nr_pgdir_pages; + union diskpage *cur; + int error = 0; + + cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + if (!cur) + return -ENOMEM; + + if ((error = read_page(0, cur))) + goto Done; + + /* + * We have to read next position before we overwrite it + */ + next = next_entry(cur); + + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || + (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { + printk(KERN_ERR "swsusp: Partition is normal swap space\n"); + error = -EINVAL; + goto Done; + } else { + printk(KERN_ERR "swsusp: Invalid partition type.\n"); + error = -EINVAL; + goto Done; + } + + /* + * Reset swap signature now. + */ + if ((error = write_page(0,cur))) + goto Done; + + printk( "%sSignature found, resuming\n", name_resume ); + MDELAY(1000); + + if ((error = read_page(swp_offset(next), cur))) + goto Done; + /* Is this same machine? */ + if ((error = sanity_check(&cur->sh))) + goto Done; + next = next_entry(cur); + + pagedir_save = cur->sh.suspend_pagedir; + nr_copy_pages = cur->sh.num_pbes; + nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); + pagedir_order = get_bitmask_order(nr_pgdir_pages); + + pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); + if (!pagedir_nosave) { + error = -ENOMEM; + goto Done; + } + + PRINTK( "%sReading pagedir, ", name_resume ); + + /* We get pages in reverse order of saving! */ + for (i=nr_pgdir_pages-1; i>=0; i--) { + BUG_ON (!next.val); + cur = (union diskpage *)((char *) pagedir_nosave)+i; + error = read_page(swp_offset(next), cur); + if (error) + goto FreePagedir; + next = next_entry(cur); + } + BUG_ON (next.val); + + if ((error = relocate_pagedir())) + goto FreePagedir; + if ((error = check_pagedir())) + goto FreePagedir; + + printk( "Reading image data (%d pages): ", nr_copy_pages ); + for(i=0; i < nr_copy_pages; i++) { + swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; + if (!(i%100)) + printk( "." ); + /* You do not need to check for overlaps... + ... check_pagedir already did this work */ + error = read_page(swp_offset(swap_address), + (char *)((pagedir_nosave+i)->address)); + if (error) + goto FreePagedir; + } + printk( "|\n" ); + Done: + free_page((unsigned long)cur); + return error; + FreePagedir: + free_pages((unsigned long)pagedir_nosave,pagedir_order); + goto Done; +} + +/** + * swsusp_save - Snapshot memory + */ + +int swsusp_save(void) +{ + int error; + +#if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) + printk("swsusp is not supported with high- or discontig-mem.\n"); + return -EPERM; +#endif + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + error = swsusp_arch_suspend(0); + local_irq_enable(); + return error; +} + + +/** + * swsusp_write - Write saved memory image to swap. + * + * swsusp_arch_suspend(0) returns after system is resumed. + * + * swsusp_arch_suspend() copies all "used" memory to "free" memory, + * then unsuspends all device drivers, and writes memory to disk + * using normal kernel mechanism. + */ + +int swsusp_write(void) +{ + return suspend_save_image(); +} + + +/** + * swsusp_read - Read saved image from swap. + */ + +int __init swsusp_read(void) +{ + int error; + char b[BDEVNAME_SIZE]; + + if (!strlen(resume_file)) + return -ENOENT; + + resume_device = name_to_dev_t(resume_file); + printk("swsusp: Resume From Partition: %s, Device: %s\n", + resume_file, __bdevname(resume_device, b)); + + resume_bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev, BDEV_RAW); + } else + error = PTR_ERR(resume_bdev); + + if (!error) + PRINTK("Reading resume file was successful\n"); + else + printk( "%sError %d resuming\n", name_resume, error ); + MDELAY(1000); + return error; +} + + +/** + * swsusp_restore - Replace running kernel with saved image. + */ + +int __init swsusp_restore(void) +{ + int error; + local_irq_disable(); + error = swsusp_arch_suspend(1); + local_irq_enable(); + return error; +} + + +/** + * swsusp_free - Free memory allocated to hold snapshot. + */ + +int swsusp_free(void) +{ + PRINTK( "Freeing prev allocated pagedir\n" ); + free_suspend_pagedir((unsigned long) pagedir_save); + return 0; +} + + +int software_suspend(void) +{ + struct pm_ops swsusp_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + }; + + pm_set_ops(&swsusp_ops); + return pm_suspend(PM_SUSPEND_DISK); +} + +static int __init resume_setup(char *str) +{ + if (strlen(str)) + strncpy(resume_file, str, 255); + return 1; +} + +static int __init noresume_setup(char *str) +{ + resume_file[0] = '\0'; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume=", resume_setup); + diff --git a/kernel/power/power.h b/kernel/power/power.h index e98de640155d..e0874ed266f5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -10,34 +10,27 @@ #ifdef CONFIG_SOFTWARE_SUSPEND -extern int swsusp_save(void); -extern int swsusp_write(void); -extern int swsusp_read(void); -extern int swsusp_restore(void); -extern int swsusp_free(void); +extern int pm_suspend_disk(void); + #else -static inline int swsusp_save(void) -{ - return 0; -} -static inline int swsusp_write(void) +static inline int pm_suspend_disk(void) { - return 0; -} -static inline int swsusp_read(void) -{ - return 0; -} -static inline int swsusp_restore(void) -{ - return 0; -} -static inline int swsusp_free(void) -{ - return 0; + return -EPERM; } #endif +extern struct semaphore pm_sem; +#define power_attr(_name) \ +static struct subsys_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +extern struct subsystem power_subsys; extern int freeze_processes(void); extern void thaw_processes(void); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c849a18c45f4..bb795b6dc537 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -34,38 +34,21 @@ * For TODOs,FIXMEs also look in Documentation/swsusp.txt */ -#include <linux/module.h> #include <linux/mm.h> +#include <linux/bio.h> #include <linux/suspend.h> -#include <linux/smp_lock.h> -#include <linux/file.h> -#include <linux/utsname.h> #include <linux/version.h> -#include <linux/delay.h> #include <linux/reboot.h> -#include <linux/bitops.h> -#include <linux/vt_kern.h> -#include <linux/kbd_kern.h> -#include <linux/keyboard.h> -#include <linux/spinlock.h> -#include <linux/genhd.h> -#include <linux/kernel.h> -#include <linux/major.h> -#include <linux/swap.h> -#include <linux/pm.h> #include <linux/device.h> -#include <linux/buffer_head.h> #include <linux/swapops.h> #include <linux/bootmem.h> -#include <asm/uaccess.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> -#include <asm/io.h> #include "power.h" -unsigned char software_suspend_enabled = 1; + +extern int swsusp_arch_suspend(int resume); #define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) #define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) @@ -76,9 +59,6 @@ extern char __nosave_begin, __nosave_end; extern int is_head_of_free_region(struct page *); -/* Locks */ -spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED; - /* Variables to be preserved over suspend */ static int pagedir_order_check; static int nr_copy_pages_check; @@ -436,11 +416,12 @@ static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) } -static int suspend_prepare_image(void) +int swsusp_suspend(void) { struct sysinfo i; unsigned int nr_needed_pages = 0; + read_swapfiles(); drain_local_pages(); pagedir_nosave = NULL; @@ -488,21 +469,28 @@ static int suspend_prepare_image(void) return 0; } + +/** + * suspend_save_image - Prepare and write saved image to swap. + * + * IRQs are re-enabled here so we can resume devices and safely write + * to the swap devices. We disable them again before we leave. + * + * The second lock_swapdevices() will unlock ignored swap devices since + * writing is finished. + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + static int suspend_save_image(void) { int error; - device_resume(); - lock_swapdevices(); error = write_suspend_image(); - lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */ - - /* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ + lock_swapdevices(); return error; } @@ -510,67 +498,32 @@ static int suspend_save_image(void) * Magic happens here */ -void do_magic_resume_1(void) -{ - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - PRINTK( "Waiting for DMAs to settle down...\n"); - /* We do not want some readahead with DMA to corrupt our memory, right? - Do it with disabled interrupts for best effect. That way, if some - driver scheduled DMA, we have good chance for DMA to finish ;-). */ - mdelay(1000); -} - -void do_magic_resume_2(void) +int swsusp_resume(void) { BUG_ON (nr_copy_pages_check != nr_copy_pages); BUG_ON (pagedir_order_check != pagedir_order); /* Even mappings of "global" things (vmalloc) need to be fixed */ __flush_tlb_global(); - spin_unlock_irq(&suspend_pagedir_lock); + return 0; } -/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: +/* swsusp_arch_suspend() is implemented in arch/?/power/swsusp.S, + and basically does: if (!resume) { - do_magic_suspend_1(); save_processor_state(); SAVE_REGISTERS - do_magic_suspend_2(); - return; + return swsusp_suspend(); } GO_TO_SWAPPER_PAGE_TABLES - do_magic_resume_1(); COPY_PAGES_BACK RESTORE_REGISTERS restore_processor_state(); - do_magic_resume_2(); + return swsusp_resume(); */ -void do_magic_suspend_1(void) -{ - mb(); - barrier(); - spin_lock_irq(&suspend_pagedir_lock); -} - -int do_magic_suspend_2(void) -{ - int is_problem; - read_swapfiles(); - is_problem = suspend_prepare_image(); - spin_unlock_irq(&suspend_pagedir_lock); - if (!is_problem) - return suspend_save_image(); - printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend); - barrier(); - mb(); - mdelay(1000); - return -EFAULT; -} /* More restore stuff */ @@ -701,61 +654,146 @@ static int __init sanity_check(struct suspend_header *sh) return 0; } -static int __init bdev_read_page(struct block_device *bdev, - long pos, void *buf) +static struct block_device * resume_bdev; + + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static void start_io(void) +{ + atomic_set(&io_done,1); +} + +static int end_io(struct bio * bio, unsigned int num, int err) { - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - brelse(bh); + atomic_set(&io_done,0); return 0; -} +} + +static void wait_io(void) +{ + blk_run_queues(); + while(atomic_read(&io_done)) + io_schedule(); +} + + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw,bio); + wait_io(); + Done: + bio_put(bio); + return error; +} + +static int +read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} + +static int +write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} + extern dev_t __init name_to_dev_t(const char *line); -static int __init read_suspend_image(struct block_device *bdev, - union diskpage *cur) + +#define next_entry(diskpage) diskpage->link.next + +static int __init read_suspend_image(void) { swp_entry_t next; int i, nr_pgdir_pages; + union diskpage *cur; + int error = 0; -#define PREPARENEXT \ - { next = cur->link.next; \ - next.val = swp_offset(next) * PAGE_SIZE; \ - } - - if (bdev_read_page(bdev, 0, cur)) return -EIO; + cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + if (!cur) + return -ENOMEM; - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || - (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { - printk(KERN_ERR "%sThis is normal swap space\n", name_resume ); - return -EINVAL; - } + if ((error = read_page(0, cur))) + goto Done; - PREPARENEXT; /* We have to read next position before we overwrite it */ + /* + * We have to read next position before we overwrite it + */ + next = next_entry(cur); if (!memcmp("S1",cur->swh.magic.magic,2)) memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); else if (!memcmp("S2",cur->swh.magic.magic,2)) memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else { - printk("swsusp: %s: Unable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); - return -EFAULT; + else if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || + (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { + printk(KERN_ERR "swsusp: Partition is normal swap space\n"); + error = -EINVAL; + goto Done; + } else { + printk(KERN_ERR "swsusp: Invalid partition type.\n"); + error = -EINVAL; + goto Done; } + /* + * Reset swap signature now. + */ + if ((error = write_page(0,cur))) + goto Done; + printk( "%sSignature found, resuming\n", name_resume ); MDELAY(1000); - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - if (sanity_check(&cur->sh)) /* Is this same machine? */ - return -EPERM; - PREPARENEXT; + if ((error = read_page(swp_offset(next), cur))) + goto Done; + /* Is this same machine? */ + if ((error = sanity_check(&cur->sh))) + goto Done; + next = next_entry(cur); pagedir_save = cur->sh.suspend_pagedir; nr_copy_pages = cur->sh.num_pbes; @@ -763,8 +801,10 @@ static int __init read_suspend_image(struct block_device *bdev, pagedir_order = get_bitmask_order(nr_pgdir_pages); pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); - if (!pagedir_nosave) - return -ENOMEM; + if (!pagedir_nosave) { + error = -ENOMEM; + goto Done; + } PRINTK( "%sReading pagedir, ", name_resume ); @@ -772,15 +812,17 @@ static int __init read_suspend_image(struct block_device *bdev, for (i=nr_pgdir_pages-1; i>=0; i--) { BUG_ON (!next.val); cur = (union diskpage *)((char *) pagedir_nosave)+i; - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - PREPARENEXT; + error = read_page(swp_offset(next), cur); + if (error) + goto FreePagedir; + next = next_entry(cur); } BUG_ON (next.val); - if (relocate_pagedir()) - return -ENOMEM; - if (check_pagedir()) - return -ENOMEM; + if ((error = relocate_pagedir())) + goto FreePagedir; + if ((error = check_pagedir())) + goto FreePagedir; printk( "Reading image data (%d pages): ", nr_copy_pages ); for(i=0; i < nr_copy_pages; i++) { @@ -789,11 +831,18 @@ static int __init read_suspend_image(struct block_device *bdev, printk( "." ); /* You do not need to check for overlaps... ... check_pagedir already did this work */ - if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address))) - return -EIO; + error = read_page(swp_offset(swap_address), + (char *)((pagedir_nosave+i)->address)); + if (error) + goto FreePagedir; } printk( "|\n" ); - return 0; + Done: + free_page((unsigned long)cur); + return error; + FreePagedir: + free_pages((unsigned long)pagedir_nosave,pagedir_order); + goto Done; } /** @@ -802,28 +851,34 @@ static int __init read_suspend_image(struct block_device *bdev, int swsusp_save(void) { + int error; + #if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) printk("swsusp is not supported with high- or discontig-mem.\n"); return -EPERM; #endif - return 0; + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + error = swsusp_arch_suspend(0); + local_irq_enable(); + return error; } /** * swsusp_write - Write saved memory image to swap. * - * do_magic(0) returns after system is resumed. + * swsusp_arch_suspend(0) returns after system is resumed. * - * do_magic() copies all "used" memory to "free" memory, then - * unsuspends all device drivers, and writes memory to disk + * swsusp_arch_suspend() copies all "used" memory to "free" memory, + * then unsuspends all device drivers, and writes memory to disk * using normal kernel mechanism. */ int swsusp_write(void) { - arch_prepare_suspend(); - return do_magic(0); + return suspend_save_image(); } @@ -833,7 +888,6 @@ int swsusp_write(void) int __init swsusp_read(void) { - union diskpage *cur; int error; char b[BDEVNAME_SIZE]; @@ -844,19 +898,13 @@ int __init swsusp_read(void) printk("swsusp: Resume From Partition: %s, Device: %s\n", resume_file, __bdevname(resume_device, b)); - cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); - if (cur) { - struct block_device *bdev; - bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); - if (!IS_ERR(bdev)) { - set_blocksize(bdev, PAGE_SIZE); - error = read_suspend_image(bdev, cur); - blkdev_put(bdev, BDEV_RAW); - } else - error = PTR_ERR(bdev); - free_page((unsigned long)cur); + resume_bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev, BDEV_RAW); } else - error = -ENOMEM; + error = PTR_ERR(resume_bdev); if (!error) PRINTK("Reading resume file was successful\n"); @@ -873,7 +921,11 @@ int __init swsusp_read(void) int __init swsusp_restore(void) { - return do_magic(1); + int error; + local_irq_disable(); + error = swsusp_arch_suspend(1); + local_irq_enable(); + return error; } @@ -885,13 +937,20 @@ int swsusp_free(void) { PRINTK( "Freeing prev allocated pagedir\n" ); free_suspend_pagedir((unsigned long) pagedir_save); - - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); return 0; } + +int software_suspend(void) +{ + struct pm_ops swsusp_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + }; + + pm_set_ops(&swsusp_ops); + return pm_suspend(PM_SUSPEND_DISK); +} + static int __init resume_setup(char *str) { if (strlen(str)) diff --git a/kernel/sched.c b/kernel/sched.c index 89f1bb28dacd..9dc251a8d8a5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -28,6 +28,7 @@ #include <linux/kernel_stat.h> #include <linux/security.h> #include <linux/notifier.h> +#include <linux/suspend.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/timer.h> @@ -2415,6 +2416,9 @@ static int migration_thread(void * data) struct list_head *head; migration_req_t *req; + if (current->flags & PF_FREEZE) + refrigerator(PF_IOTHREAD); + spin_lock_irq(&rq->lock); head = &rq->migration_queue; current->state = TASK_INTERRUPTIBLE; diff --git a/kernel/sys.c b/kernel/sys.c index 02b5a12dfd59..d77453173d29 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -456,7 +456,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user #ifdef CONFIG_SOFTWARE_SUSPEND case LINUX_REBOOT_CMD_SW_SUSPEND: - if (!pm_suspend(PM_SUSPEND_DISK)) + if (!software_suspend()) break; do_exit(0); break; |
