From 1eede070a59e1cc73da51e1aaa00d9ab86572cfc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 23:00:01 +0200 Subject: Introduce new top level suspend and hibernation callbacks Introduce 'struct pm_ops' and 'struct pm_ext_ops' ('ext' meaning 'extended') representing suspend and hibernation operations for bus types, device classes, device types and device drivers. Modify the PM core to use 'struct pm_ops' and 'struct pm_ext_ops' objects, if defined, instead of the ->suspend(), ->resume(), ->suspend_late(), and ->resume_early() callbacks (the old callbacks will be considered as legacy and gradually phased out). The main purpose of doing this is to separate suspend (aka S2RAM and standby) callbacks from hibernation callbacks in such a way that the new callbacks won't take arguments and the semantics of each of them will be clearly specified. This has been requested for multiple times by many people, including Linus himself, and the reason is that within the current scheme if ->resume() is called, for example, it's difficult to say why it's been called (ie. is it a resume from RAM or from hibernation or a suspend/hibernation failure etc.?). The second purpose is to make the suspend/hibernation callbacks more flexible so that device drivers can handle more than they can within the current scheme. For example, some drivers may need to prevent new children of the device from being registered before their ->suspend() callbacks are executed or they may want to carry out some operations requiring the availability of some other devices, not directly bound via the parent-child relationship, in order to prepare for the execution of ->suspend(), etc. Ultimately, we'd like to stop using the freezing of tasks for suspend and therefore the drivers' suspend/hibernation code will have to take care of the handling of the user space during suspend/hibernation. That, in turn, would be difficult within the current scheme, without the new ->prepare() and ->complete() callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- kernel/power/disk.c | 22 +++++++++++++++------- kernel/power/main.c | 6 ++++-- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 14a656cdc652..d416be0efa8a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -193,6 +193,7 @@ static int create_image(int platform_mode) if (error) return error; + device_pm_lock(); local_irq_disable(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. @@ -224,9 +225,11 @@ static int create_image(int platform_mode) /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ - device_power_up(); + device_power_up(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -280,7 +283,8 @@ int hibernation_snapshot(int platform_mode) Finish: platform_finish(platform_mode); Resume_devices: - device_resume(); + device_resume(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Resume_console: resume_console(); Close: @@ -300,8 +304,9 @@ static int resume_target_kernel(void) { int error; + device_pm_lock(); local_irq_disable(); - error = device_power_down(PMSG_PRETHAW); + error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -329,9 +334,10 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); - device_power_up(); + device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); + device_pm_unlock(); return error; } @@ -350,7 +356,7 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); - error = device_suspend(PMSG_PRETHAW); + error = device_suspend(PMSG_QUIESCE); if (error) goto Finish; @@ -362,7 +368,7 @@ int hibernation_restore(int platform_mode) enable_nonboot_cpus(); } platform_restore_cleanup(platform_mode); - device_resume(); + device_resume(PMSG_RECOVER); Finish: resume_console(); pm_restore_console(); @@ -403,6 +409,7 @@ int hibernation_platform_enter(void) if (error) goto Finish; + device_pm_lock(); local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { @@ -411,6 +418,7 @@ int hibernation_platform_enter(void) while (1); } local_irq_enable(); + device_pm_unlock(); /* * We don't need to reenable the nonboot CPUs or resume consoles, since @@ -419,7 +427,7 @@ int hibernation_platform_enter(void) Finish: hibernation_ops->finish(); Resume_devices: - device_resume(); + device_resume(PMSG_RESTORE); Resume_console: resume_console(); Close: diff --git a/kernel/power/main.c b/kernel/power/main.c index 6a6d5eb3524e..d023b6b584e5 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -228,6 +228,7 @@ static int suspend_enter(suspend_state_t state) { int error = 0; + device_pm_lock(); arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -239,10 +240,11 @@ static int suspend_enter(suspend_state_t state) if (!suspend_test(TEST_CORE)) error = suspend_ops->enter(state); - device_power_up(); + device_power_up(PMSG_RESUME); Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + device_pm_unlock(); return error; } @@ -291,7 +293,7 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); Resume_devices: - device_resume(); + device_resume(PMSG_RESUME); Resume_console: resume_console(); Close: -- cgit v1.2.3 From d8f3de0d2412bb91639cfefc5b3c79dbf3812212 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Jun 2008 23:24:06 +0200 Subject: Suspend-related patches for 2.6.27 ACPI PM: Add possibility to change suspend sequence There are some systems out there that don't work correctly with our current suspend/hibernation code ordering. Provide a workaround for these systems allowing them to pass 'acpi_sleep=old_ordering' in the kernel command line so that it will use the pre-ACPI 2.0 ("old") suspend code ordering. Unfortunately, this requires us to add a platform hook to the resuming of devices for recovering the platform in case one of the device drivers' .suspend() routines returns error code. Namely, ACPI 1.0 specifies that _PTS should be called before suspending devices, but _WAK still should be called before resuming them in order to undo the changes made by _PTS. However, if there is an error during suspending devices, they are automatically resumed without returning control to the PM core, so the _WAK has to be called from within device_resume() in that cases. The patch also reorders and refactors the ACPI suspend/hibernation code to avoid duplication as far as reasonably possible. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 6 +- arch/x86/kernel/acpi/sleep.c | 2 + drivers/acpi/sleep/main.c | 276 +++++++++++++++++++++--------------- drivers/base/power/main.c | 2 - include/linux/acpi.h | 3 + include/linux/suspend.h | 14 +- kernel/power/disk.c | 28 +++- kernel/power/main.c | 10 +- 8 files changed, 215 insertions(+), 126 deletions(-) (limited to 'kernel/power') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9cf7b34f2db0..18d793ea0dd3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -147,10 +147,14 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep } + Format: { s3_bios, s3_mode, s3_beep, old_ordering } See Documentation/power/video.txt for s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + old_ordering causes the ACPI 1.0 ordering of the _PTS + control method, wrt putting devices into low power + states, to be enforced (the ACPI 2.0 ordering of _PTS is + used by default). acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode Format: { level | edge | high | low } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee9964b..882e970032d5 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -124,6 +124,8 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 2; if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; + if (strncmp(str, "old_ordering", 12) == 0) + acpi_old_suspend_ordering(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 0f2caea2fc83..4addf8ad50ae 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -24,10 +24,6 @@ u8 sleep_states[ACPI_S_STATE_COUNT]; -#ifdef CONFIG_PM_SLEEP -static u32 acpi_target_sleep_state = ACPI_STATE_S0; -#endif - static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP @@ -50,9 +46,96 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } -#ifdef CONFIG_SUSPEND -static struct platform_suspend_ops acpi_suspend_ops; +#ifdef CONFIG_PM_SLEEP +static u32 acpi_target_sleep_state = ACPI_STATE_S0; + +/* + * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the + * user to request that behavior by using the 'acpi_old_suspend_ordering' + * kernel command line option that causes the following variable to be set. + */ +static bool old_suspend_ordering; + +void __init acpi_old_suspend_ordering(void) +{ + old_suspend_ordering = true; +} + +/** + * acpi_pm_disable_gpes - Disable the GPEs. + */ +static int acpi_pm_disable_gpes(void) +{ + acpi_hw_disable_all_gpes(); + return 0; +} + +/** + * __acpi_pm_prepare - Prepare the platform to enter the target state. + * + * If necessary, set the firmware waking vector and do arch-specific + * nastiness to get the wakeup code to the waking vector. + */ +static int __acpi_pm_prepare(void) +{ + int error = acpi_sleep_prepare(acpi_target_sleep_state); + + if (error) + acpi_target_sleep_state = ACPI_STATE_S0; + return error; +} + +/** + * acpi_pm_prepare - Prepare the platform to enter the target sleep + * state and disable the GPEs. + */ +static int acpi_pm_prepare(void) +{ + int error = __acpi_pm_prepare(); + + if (!error) + acpi_hw_disable_all_gpes(); + return error; +} + +/** + * acpi_pm_finish - Instruct the platform to leave a sleep state. + * + * This is called after we wake back up (or if entering the sleep state + * failed). + */ +static void acpi_pm_finish(void) +{ + u32 acpi_state = acpi_target_sleep_state; + + if (acpi_state == ACPI_STATE_S0) + return; + printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n", + acpi_state); + acpi_disable_wakeup_device(acpi_state); + acpi_leave_sleep_state(acpi_state); + + /* reset firmware waking vector */ + acpi_set_firmware_waking_vector((acpi_physical_address) 0); + + acpi_target_sleep_state = ACPI_STATE_S0; +} + +/** + * acpi_pm_end - Finish up suspend sequence. + */ +static void acpi_pm_end(void) +{ + /* + * This is necessary in case acpi_pm_finish() is not called during a + * failing transition to a sleep state. + */ + acpi_target_sleep_state = ACPI_STATE_S0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_SUSPEND extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -66,7 +149,6 @@ static u32 acpi_suspend_states[] = { * acpi_suspend_begin - Set the target system sleep state to the state * associated with given @pm_state, if supported. */ - static int acpi_suspend_begin(suspend_state_t pm_state) { u32 acpi_state = acpi_suspend_states[pm_state]; @@ -82,25 +164,6 @@ static int acpi_suspend_begin(suspend_state_t pm_state) return error; } -/** - * acpi_suspend_prepare - Do preliminary suspend work. - * - * If necessary, set the firmware waking vector and do arch-specific - * nastiness to get the wakeup code to the waking vector. - */ - -static int acpi_suspend_prepare(void) -{ - int error = acpi_sleep_prepare(acpi_target_sleep_state); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - /** * acpi_suspend_enter - Actually enter a sleep state. * @pm_state: ignored @@ -109,7 +172,6 @@ static int acpi_suspend_prepare(void) * assembly, which in turn call acpi_enter_sleep_state(). * It's unfortunate, but it works. Please fix if you're feeling frisky. */ - static int acpi_suspend_enter(suspend_state_t pm_state) { acpi_status status = AE_OK; @@ -166,39 +228,6 @@ static int acpi_suspend_enter(suspend_state_t pm_state) return ACPI_SUCCESS(status) ? 0 : -EFAULT; } -/** - * acpi_suspend_finish - Instruct the platform to leave a sleep state. - * - * This is called after we wake back up (or if entering the sleep state - * failed). - */ - -static void acpi_suspend_finish(void) -{ - u32 acpi_state = acpi_target_sleep_state; - - acpi_disable_wakeup_device(acpi_state); - acpi_leave_sleep_state(acpi_state); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; -} - -/** - * acpi_suspend_end - Finish up suspend sequence. - */ - -static void acpi_suspend_end(void) -{ - /* - * This is necessary in case acpi_suspend_finish() is not called during a - * failing transition to a sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} - static int acpi_suspend_state_valid(suspend_state_t pm_state) { u32 acpi_state; @@ -218,10 +247,39 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state) static struct platform_suspend_ops acpi_suspend_ops = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin, - .prepare = acpi_suspend_prepare, + .prepare = acpi_pm_prepare, + .enter = acpi_suspend_enter, + .finish = acpi_pm_finish, + .end = acpi_pm_end, +}; + +/** + * acpi_suspend_begin_old - Set the target system sleep state to the + * state associated with given @pm_state, if supported, and + * execute the _PTS control method. This function is used if the + * pre-ACPI 2.0 suspend ordering has been requested. + */ +static int acpi_suspend_begin_old(suspend_state_t pm_state) +{ + int error = acpi_suspend_begin(pm_state); + + if (!error) + error = __acpi_pm_prepare(); + return error; +} + +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_suspend_ops acpi_suspend_ops_old = { + .valid = acpi_suspend_state_valid, + .begin = acpi_suspend_begin_old, + .prepare = acpi_pm_disable_gpes, .enter = acpi_suspend_enter, - .finish = acpi_suspend_finish, - .end = acpi_suspend_end, + .finish = acpi_pm_finish, + .end = acpi_pm_end, + .recover = acpi_pm_finish, }; #endif /* CONFIG_SUSPEND */ @@ -229,22 +287,9 @@ static struct platform_suspend_ops acpi_suspend_ops = { static int acpi_hibernation_begin(void) { acpi_target_sleep_state = ACPI_STATE_S4; - return 0; } -static int acpi_hibernation_prepare(void) -{ - int error = acpi_sleep_prepare(ACPI_STATE_S4); - - if (error) { - acpi_target_sleep_state = ACPI_STATE_S0; - return error; - } - - return ACPI_SUCCESS(acpi_hw_disable_all_gpes()) ? 0 : -EFAULT; -} - static int acpi_hibernation_enter(void) { acpi_status status = AE_OK; @@ -274,52 +319,55 @@ static void acpi_hibernation_leave(void) acpi_leave_sleep_state_prep(ACPI_STATE_S4); } -static void acpi_hibernation_finish(void) +static void acpi_pm_enable_gpes(void) { - acpi_disable_wakeup_device(ACPI_STATE_S4); - acpi_leave_sleep_state(ACPI_STATE_S4); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; + acpi_hw_enable_all_runtime_gpes(); } -static void acpi_hibernation_end(void) -{ - /* - * This is necessary in case acpi_hibernation_finish() is not called - * during a failing transition to the sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; -} +static struct platform_hibernation_ops acpi_hibernation_ops = { + .begin = acpi_hibernation_begin, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_prepare, + .finish = acpi_pm_finish, + .prepare = acpi_pm_prepare, + .enter = acpi_hibernation_enter, + .leave = acpi_hibernation_leave, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, +}; -static int acpi_hibernation_pre_restore(void) +/** + * acpi_hibernation_begin_old - Set the target system sleep state to + * ACPI_STATE_S4 and execute the _PTS control method. This + * function is used if the pre-ACPI 2.0 suspend ordering has been + * requested. + */ +static int acpi_hibernation_begin_old(void) { - acpi_status status; - - status = acpi_hw_disable_all_gpes(); - - return ACPI_SUCCESS(status) ? 0 : -EFAULT; -} + int error = acpi_sleep_prepare(ACPI_STATE_S4); -static void acpi_hibernation_restore_cleanup(void) -{ - acpi_hw_enable_all_runtime_gpes(); + if (!error) + acpi_target_sleep_state = ACPI_STATE_S4; + return error; } -static struct platform_hibernation_ops acpi_hibernation_ops = { - .begin = acpi_hibernation_begin, - .end = acpi_hibernation_end, - .pre_snapshot = acpi_hibernation_prepare, - .finish = acpi_hibernation_finish, - .prepare = acpi_hibernation_prepare, +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_hibernation_ops acpi_hibernation_ops_old = { + .begin = acpi_hibernation_begin_old, + .end = acpi_pm_end, + .pre_snapshot = acpi_pm_disable_gpes, + .finish = acpi_pm_finish, + .prepare = acpi_pm_disable_gpes, .enter = acpi_hibernation_enter, .leave = acpi_hibernation_leave, - .pre_restore = acpi_hibernation_pre_restore, - .restore_cleanup = acpi_hibernation_restore_cleanup, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, + .recover = acpi_pm_finish, }; -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATION */ int acpi_suspend(u32 acpi_state) { @@ -461,13 +509,15 @@ int __init acpi_sleep_init(void) } } - suspend_set_ops(&acpi_suspend_ops); + suspend_set_ops(old_suspend_ordering ? + &acpi_suspend_ops_old : &acpi_suspend_ops); #endif #ifdef CONFIG_HIBERNATION status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); if (ACPI_SUCCESS(status)) { - hibernation_set_ops(&acpi_hibernation_ops); + hibernation_set_ops(old_suspend_ordering ? + &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; printk(" S4"); } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d571204aaff7..3250c5257b74 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -781,8 +781,6 @@ int device_suspend(pm_message_t state) error = dpm_prepare(state); if (!error) error = dpm_suspend(state); - if (error) - device_resume(resume_event(state)); return error; } EXPORT_SYMBOL_GPL(device_suspend); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7a..33adcf91ef41 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -234,6 +234,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_check_mem_region(resource_size_t start, resource_size_t n, const char *name); +#ifdef CONFIG_PM_SLEEP +void __init acpi_old_suspend_ordering(void); +#endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a6977423baf7..e8e69159af71 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -86,6 +86,11 @@ typedef int __bitwise suspend_state_t; * that implement @begin(), but platforms implementing @begin() should * also provide a @end() which cleans up transitions aborted before * @enter(). + * + * @recover: Recover the platform from a suspend failure. + * Called by the PM core if the suspending of devices fails. + * This callback is optional and should only be implemented by platforms + * which require special recovery actions in that situation. */ struct platform_suspend_ops { int (*valid)(suspend_state_t state); @@ -94,6 +99,7 @@ struct platform_suspend_ops { int (*enter)(suspend_state_t state); void (*finish)(void); void (*end)(void); + void (*recover)(void); }; #ifdef CONFIG_SUSPEND @@ -149,7 +155,7 @@ extern void mark_free_pages(struct zone *zone); * The methods in this structure allow a platform to carry out special * operations required by it during a hibernation transition. * - * All the methods below must be implemented. + * All the methods below, except for @recover(), must be implemented. * * @begin: Tell the platform driver that we're starting hibernation. * Called right after shrinking memory and before freezing devices. @@ -189,6 +195,11 @@ extern void mark_free_pages(struct zone *zone); * @restore_cleanup: Clean up after a failing image restoration. * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). + * + * @recover: Recover the platform from a failure to suspend devices. + * Called by the PM core if the suspending of devices during hibernation + * fails. This callback is optional and should only be implemented by + * platforms which require special recovery actions in that situation. */ struct platform_hibernation_ops { int (*begin)(void); @@ -200,6 +211,7 @@ struct platform_hibernation_ops { void (*leave)(void); int (*pre_restore)(void); void (*restore_cleanup)(void); + void (*recover)(void); }; #ifdef CONFIG_HIBERNATION diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d416be0efa8a..f011e0870b52 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -179,6 +179,17 @@ static void platform_restore_cleanup(int platform_mode) hibernation_ops->restore_cleanup(); } +/** + * platform_recover - recover the platform from a failure to suspend + * devices. + */ + +static void platform_recover(int platform_mode) +{ + if (platform_mode && hibernation_ops && hibernation_ops->recover) + hibernation_ops->recover(); +} + /** * create_image - freeze devices that need to be frozen with interrupts * off, create the hibernation image and thaw those devices. Control @@ -258,10 +269,10 @@ int hibernation_snapshot(int platform_mode) suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) - goto Resume_console; + goto Recover_platform; if (hibernation_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; error = platform_pre_snapshot(platform_mode); if (error || hibernation_test(TEST_PLATFORM)) @@ -285,11 +296,14 @@ int hibernation_snapshot(int platform_mode) Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Resume_console: resume_console(); Close: platform_end(platform_mode); return error; + + Recover_platform: + platform_recover(platform_mode); + goto Resume_devices; } /** @@ -398,8 +412,11 @@ int hibernation_platform_enter(void) suspend_console(); error = device_suspend(PMSG_HIBERNATE); - if (error) - goto Resume_console; + if (error) { + if (hibernation_ops->recover) + hibernation_ops->recover(); + goto Resume_devices; + } error = hibernation_ops->prepare(); if (error) @@ -428,7 +445,6 @@ int hibernation_platform_enter(void) hibernation_ops->finish(); Resume_devices: device_resume(PMSG_RESTORE); - Resume_console: resume_console(); Close: hibernation_ops->end(); diff --git a/kernel/power/main.c b/kernel/power/main.c index d023b6b584e5..3398f4651aa1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,11 +269,11 @@ int suspend_devices_and_enter(suspend_state_t state) error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_console; + goto Recover_platform; } if (suspend_test(TEST_DEVICES)) - goto Resume_devices; + goto Recover_platform; if (suspend_ops->prepare) { error = suspend_ops->prepare(); @@ -294,12 +294,16 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_ops->finish(); Resume_devices: device_resume(PMSG_RESUME); - Resume_console: resume_console(); Close: if (suspend_ops->end) suspend_ops->end(); return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; } /** -- cgit v1.2.3 From 0775b3dbcb6d17b531b36df520ddab735647f3f7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:08 -0700 Subject: suspend, xen: enable PM_SLEEP for CONFIG_XEN Xen save/restore requires PM_SLEEP to be set without requiring SUSPEND or HIBERNATION. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- kernel/power/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b45da40e8d25..1436c47cb39b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -82,7 +82,7 @@ config PM_SLEEP_SMP config PM_SLEEP bool - depends on SUSPEND || HIBERNATION + depends on SUSPEND || HIBERNATION || XEN default y config SUSPEND -- cgit v1.2.3 From 6717ef1aa750b54ddb9d8854b91707ee21f0ad23 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 22:17:01 +0200 Subject: Revert "suspend, xen: enable PM_SLEEP for CONFIG_XEN" This reverts commit 6fbbec428c8e7bb617da2e8a589af2e97bcf3bc4. Rafael doesnt like it - it breaks various assumptions. Signed-off-by: Ingo Molnar --- kernel/power/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 1436c47cb39b..b45da40e8d25 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -82,7 +82,7 @@ config PM_SLEEP_SMP config PM_SLEEP bool - depends on SUSPEND || HIBERNATION || XEN + depends on SUSPEND || HIBERNATION default y config SUSPEND -- cgit v1.2.3 From ebb12db51f6c13b30752fcf506baad4c617b153c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:04:29 +0200 Subject: Freezer: Introduce PF_FREEZER_NOSIG The freezer currently attempts to distinguish kernel threads from user space tasks by checking if their mm pointer is unset and it does not send fake signals to kernel threads. However, there are kernel threads, mostly related to networking, that behave like user space tasks and may want to be sent a fake signal to be frozen. Introduce the new process flag PF_FREEZER_NOSIG that will be set by default for all kernel threads and make the freezer only send fake signals to the tasks having PF_FREEZER_NOSIG unset. Provide the set_freezable_with_signal() function to be called by the kernel threads that want to be sent a fake signal for freezing. This patch should not change the freezer's observable behavior. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/freezer.h | 10 +++++ include/linux/sched.h | 1 + kernel/kthread.c | 2 +- kernel/power/process.c | 97 +++++++++++++++++++++---------------------------- 4 files changed, 54 insertions(+), 56 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 08934995c7ab..deddeedf3257 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -127,6 +127,15 @@ static inline void set_freezable(void) current->flags &= ~PF_NOFREEZE; } +/* + * Tell the freezer that the current task should be frozen by it and that it + * should send a fake signal to the task to freeze it. + */ +static inline void set_freezable_with_signal(void) +{ + current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); +} + /* * Freezer-friendly wrappers around wait_event_interruptible() and * wait_event_interruptible_timeout(), originally defined in @@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index 21349173d148..ba2f859c6e4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1494,6 +1494,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/kthread.c b/kernel/kthread.c index 97747cdd37c9..ac3fb7326641 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -235,7 +235,7 @@ int kthreadd(void *unused) set_user_nice(tsk, KTHREAD_NICE_LEVEL); set_cpus_allowed(tsk, CPU_MASK_ALL); - current->flags |= PF_NOFREEZE; + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/power/process.c b/kernel/power/process.c index f1d0b345c9ba..5fb87652f214 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -19,9 +19,6 @@ */ #define TIMEOUT (20 * HZ) -#define FREEZER_KERNEL_THREADS 0 -#define FREEZER_USER_SPACE 1 - static inline int freezeable(struct task_struct * p) { if ((p == current) || @@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } -static int has_mm(struct task_struct *p) +static inline bool should_send_signal(struct task_struct *p) { - return (p->mm && !(p->flags & PF_BORROWED_MM)); + return !(p->flags & PF_FREEZER_NOSIG); } /** * freeze_task - send a freeze request to given task * @p: task to send the request to - * @with_mm_only: if set, the request will only be sent if the task has its - * own mm - * Return value: 0, if @with_mm_only is set and the task has no mm of its - * own or the task is frozen, 1, otherwise + * @sig_only: if set, the request will only be sent if the task has the + * PF_FREEZER_NOSIG flag unset + * Return value: 'false', if @sig_only is set and the task has + * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise * - * The freeze request is sent by seting the tasks's TIF_FREEZE flag and + * The freeze request is sent by setting the tasks's TIF_FREEZE flag and * either sending a fake signal to it or waking it up, depending on whether - * or not it has its own mm (ie. it is a user land task). If @with_mm_only - * is set and the task has no mm of its own (ie. it is a kernel thread), - * its TIF_FREEZE flag should not be set. - * - * The task_lock() is necessary to prevent races with exit_mm() or - * use_mm()/unuse_mm() from occuring. + * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task + * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its + * TIF_FREEZE flag will not be set. */ -static int freeze_task(struct task_struct *p, int with_mm_only) +static bool freeze_task(struct task_struct *p, bool sig_only) { - int ret = 1; + /* + * We first check if the task is freezing and next if it has already + * been frozen to avoid the race with frozen_process() which first marks + * the task as frozen and next clears its TIF_FREEZE. + */ + if (!freezing(p)) { + rmb(); + if (frozen(p)) + return false; - task_lock(p); - if (freezing(p)) { - if (has_mm(p)) { - if (!signal_pending(p)) - fake_signal_wake_up(p); - } else { - if (with_mm_only) - ret = 0; - else - wake_up_state(p, TASK_INTERRUPTIBLE); - } + if (!sig_only || should_send_signal(p)) + set_freeze_flag(p); + else + return false; + } + + if (should_send_signal(p)) { + if (!signal_pending(p)) + fake_signal_wake_up(p); + } else if (sig_only) { + return false; } else { - rmb(); - if (frozen(p)) { - ret = 0; - } else { - if (has_mm(p)) { - set_freeze_flag(p); - fake_signal_wake_up(p); - } else { - if (with_mm_only) { - ret = 0; - } else { - set_freeze_flag(p); - wake_up_state(p, TASK_INTERRUPTIBLE); - } - } - } + wake_up_state(p, TASK_INTERRUPTIBLE); } - task_unlock(p); - return ret; + + return true; } static void cancel_freezing(struct task_struct *p) @@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p) } } -static int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; @@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space) if (frozen(p) || !freezeable(p)) continue; - if (!freeze_task(p, freeze_user_space)) + if (!freeze_task(p, sig_only)) continue; /* @@ -235,13 +222,13 @@ int freeze_processes(void) int error; printk("Freezing user space processes ... "); - error = try_to_freeze_tasks(FREEZER_USER_SPACE); + error = try_to_freeze_tasks(true); if (error) goto Exit; printk("done.\n"); printk("Freezing remaining freezable tasks ... "); - error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + error = try_to_freeze_tasks(false); if (error) goto Exit; printk("done."); @@ -251,7 +238,7 @@ int freeze_processes(void) return error; } -static void thaw_tasks(int thaw_user_space) +static void thaw_tasks(bool nosig_only) { struct task_struct *g, *p; @@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (!p->mm == thaw_user_space) + if (nosig_only && should_send_signal(p)) continue; thaw_process(p); @@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space) void thaw_processes(void) { printk("Restarting tasks ... "); - thaw_tasks(FREEZER_KERNEL_THREADS); - thaw_tasks(FREEZER_USER_SPACE); + thaw_tasks(true); + thaw_tasks(false); schedule(); printk("done.\n"); } -- cgit v1.2.3 From 52d11025dba32bed696eaee1822b26529e764770 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 11 Jun 2008 22:07:52 +0200 Subject: snapshot: Push BKL down into ioctl handlers Push BKL down into ioctl handlers - snapshot device. Signed-off-by: Alan Cox Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- kernel/power/user.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/user.c b/kernel/power/user.c index f5512cb3aa86..658262b15994 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -164,8 +165,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } -static int snapshot_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long snapshot_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = 0; struct snapshot_data *data; @@ -181,6 +182,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, data = filp->private_data; + lock_kernel(); + switch (cmd) { case SNAPSHOT_FREEZE: @@ -389,7 +392,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENOTTY; } - + unlock_kernel(); return error; } @@ -399,7 +402,7 @@ static const struct file_operations snapshot_fops = { .read = snapshot_read, .write = snapshot_write, .llseek = no_llseek, - .ioctl = snapshot_ioctl, + .unlocked_ioctl = snapshot_ioctl, }; static struct miscdevice snapshot_device = { -- cgit v1.2.3 From 25f2f3daadaf0768a61d02ee3ed3d9a21e9dc46c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:09:45 +0200 Subject: snapshot: Use pm_mutex for mutual exclusion We can avoid taking the BKL in snapshot_ioctl() if pm_mutex is used to prevent the ioctls from being executed concurrently. In addition, although it is only possible to open /dev/snapshot once, the task which has done that may spawn a child that will inherit the open descriptor, so in theory they can call snapshot_write(), snapshot_read() and snapshot_release() concurrently. pm_mutex can also be used for mutual exclusion in such cases. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown --- kernel/power/user.c | 68 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 26 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/user.c b/kernel/power/user.c index 658262b15994..a6332a313262 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -70,16 +70,22 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + mutex_lock(&pm_mutex); + + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } if ((filp->f_flags & O_ACCMODE) == O_RDWR) { atomic_inc(&snapshot_device_available); - return -ENOSYS; + error = -ENOSYS; + goto Unlock; } if(create_basic_memory_bitmaps()) { atomic_inc(&snapshot_device_available); - return -ENOMEM; + error = -ENOMEM; + goto Unlock; } nonseekable_open(inode, filp); data = &snapshot_state; @@ -99,33 +105,36 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); } - if (error) { + if (error) atomic_inc(&snapshot_device_available); - return error; - } data->frozen = 0; data->ready = 0; data->platform_support = 0; - return 0; + Unlock: + mutex_unlock(&pm_mutex); + + return error; } static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; + mutex_lock(&pm_mutex); + swsusp_free(); free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) { - mutex_lock(&pm_mutex); + if (data->frozen) thaw_processes(); - mutex_unlock(&pm_mutex); - } pm_notifier_call_chain(data->mode == O_WRONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); + + mutex_unlock(&pm_mutex); + return 0; } @@ -135,9 +144,13 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; - if (!data->ready) - return -ENODATA; + if (!data->ready) { + res = -ENODATA; + goto Unlock; + } res = snapshot_read_next(&data->handle, count); if (res > 0) { if (copy_to_user(buf, data_of(data->handle), res)) @@ -145,6 +158,10 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, else *offp = data->handle.offset; } + + Unlock: + mutex_unlock(&pm_mutex); + return res; } @@ -154,6 +171,8 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, struct snapshot_data *data; ssize_t res; + mutex_lock(&pm_mutex); + data = filp->private_data; res = snapshot_write_next(&data->handle, count); if (res > 0) { @@ -162,6 +181,9 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, else *offp = data->handle.offset; } + + mutex_unlock(&pm_mutex); + return res; } @@ -180,16 +202,16 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - data = filp->private_data; + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; - lock_kernel(); + data = filp->private_data; switch (cmd) { case SNAPSHOT_FREEZE: if (data->frozen) break; - mutex_lock(&pm_mutex); printk("Syncing filesystems ... "); sys_sync(); printk("done.\n"); @@ -197,7 +219,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = freeze_processes(); if (error) thaw_processes(); - mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; break; @@ -205,9 +226,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; - mutex_lock(&pm_mutex); thaw_processes(); - mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -310,16 +329,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = -EPERM; break; } - if (!mutex_trylock(&pm_mutex)) { - error = -EBUSY; - break; - } /* * Tasks are frozen and the notifiers have been called with * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); - mutex_unlock(&pm_mutex); break; case SNAPSHOT_PLATFORM_SUPPORT: @@ -392,7 +406,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = -ENOTTY; } - unlock_kernel(); + + mutex_unlock(&pm_mutex); + return error; } -- cgit v1.2.3 From 93a0886e2368eafb9df5e2021fb185195cee88b2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Jul 2008 13:43:42 -0700 Subject: x86, xen, power: fix up config dependencies on PM Xen save/restore needs bits of code enabled by PM_SLEEP, and PM_SLEEP depends on PM. So make XEN_SAVE_RESTORE depend on PM and PM_SLEEP depend on XEN_SAVE_RESTORE. Signed-off-by: Jeremy Fitzhardinge Acked-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar --- arch/x86/xen/Kconfig | 5 +++++ kernel/power/Kconfig | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 20b49729bed5..3815e425f470 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -23,3 +23,8 @@ config XEN_MAX_DOMAIN_MEMORY according to the maximum possible memory size of a Xen domain. This array uses 1 page per gigabyte, so there's no need to be too stingy here. + +config XEN_SAVE_RESTORE + bool + depends on PM + default y \ No newline at end of file diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b45da40e8d25..59dfdf1e1d20 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -82,7 +82,7 @@ config PM_SLEEP_SMP config PM_SLEEP bool - depends on SUSPEND || HIBERNATION + depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE default y config SUSPEND -- cgit v1.2.3