System Suspend and Hibernate
System-wide power states: suspend-to-RAM, hibernate, and the PM state machine
Sleep states
Linux supports multiple system-level sleep states, described by ACPI S-states and the kernel's internal model:
| Kernel state | ACPI | Description |
|---|---|---|
freeze |
S0 (idle) | Freeze user processes, suspend devices. CPU stays powered. Very fast resume. |
standby |
S1 | Shallow sleep. CPU powered down but state retained. |
mem |
S3 | Suspend-to-RAM (STR). All state in DRAM, rest powered off. Common laptop sleep. |
disk |
S4 | Hibernate (STD). Memory image written to swap/file, power completely off. |
# Check which states are available
cat /sys/power/state
# freeze mem disk
# Enter suspend-to-RAM
echo mem | sudo tee /sys/power/state
The suspend state machine
/sys/power/state write "mem"
│
▼
kernel/power/suspend.c: pm_suspend()
│
├─ 1. Sync filesystems
│
├─ 2. Freeze userspace processes
│ freeze_processes() — set TIF_SIGPENDING, tasks reach
│ try_to_freeze() and enter the refrigerator (freezable points)
│
├─ 3. Suspend devices (reverse probe order)
│ dpm_suspend_start() → each driver's .suspend callback
│ (deepest devices in tree suspended first)
│
├─ 4. Disable non-boot CPUs (CPU hotplug)
│
├─ 5. Suspend syscore (timers, IRQ chips, clocksource)
│ syscore_suspend() — last things before hardware off
│
├─ 6. Enter hardware sleep
│ acpi_suspend_enter() or platform-specific
│ ──────────────── ASLEEP ────────────────
│
└─ 7. Resume (reverse order):
syscore_resume()
Enable CPUs
dpm_resume_end() → each driver's .resume callback
Thaw processes
PM notifiers
Notifiers allow subsystems to react to suspend/resume events:
#include <linux/suspend.h>
static int mysubsys_pm_notify(struct notifier_block *nb,
unsigned long action, void *data)
{
switch (action) {
case PM_SUSPEND_PREPARE:
/* About to suspend: flush caches, stop background work */
flush_workqueue(mysubsys_wq);
break;
case PM_POST_SUSPEND:
/* Resumed: restart background work */
queue_delayed_work(mysubsys_wq, &mysubsys_work, HZ);
break;
case PM_HIBERNATION_PREPARE:
break;
case PM_POST_HIBERNATION:
break;
case PM_RESTORE_PREPARE: /* before loading hibernate image */
break;
case PM_POST_RESTORE:
break;
}
return NOTIFY_OK;
}
static struct notifier_block mysubsys_pm_nb = {
.notifier_call = mysubsys_pm_notify,
};
/* Register */
register_pm_notifier(&mysubsys_pm_nb);
/* Unregister */
unregister_pm_notifier(&mysubsys_pm_nb);
Device suspend callbacks
The full driver PM callback sequence for system suspend:
dpm_prepare() → driver .prepare() (optional; prepare for suspend)
dpm_suspend() → driver .suspend() (save state, stop DMA)
dpm_suspend_late() → driver .suspend_late() (last operations before power off)
dpm_suspend_noirq()→ driver .suspend_noirq() (IRQs disabled; final operations)
─────────────── hardware suspended ───────────────
dpm_resume_noirq() → driver .resume_noirq() (IRQs still disabled)
dpm_resume_early() → driver .resume_early() (restore critical state)
dpm_resume() → driver .resume() (full restore)
dpm_complete() → driver .complete() (post-resume cleanup)
For most drivers, only .suspend and .resume need implementation. The _noirq and _late/_early variants are for hardware that requires very late/early access.
Minimal suspend implementation
static int mydriver_suspend(struct device *dev)
{
struct mydata *priv = dev_get_drvdata(dev);
/* Stop hardware operations */
mydriver_stop_hw(priv);
/* Save registers that hardware won't retain */
priv->saved_config = readl(priv->base + CONFIG_REG);
priv->saved_irq_mask = readl(priv->base + IRQ_MASK_REG);
/* Disable clocks/power */
clk_disable_unprepare(priv->clk);
return 0;
}
static int mydriver_resume(struct device *dev)
{
struct mydata *priv = dev_get_drvdata(dev);
int ret;
/* Re-enable clocks/power */
ret = clk_prepare_enable(priv->clk);
if (ret)
return ret;
/* Restore saved registers */
writel(priv->saved_config, priv->base + CONFIG_REG);
writel(priv->saved_irq_mask, priv->base + IRQ_MASK_REG);
/* Reinitialize hardware */
mydriver_init_hw(priv);
return 0;
}
/* Shared PM ops using the SET_ macros */
static const struct dev_pm_ops mydriver_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(mydriver_suspend, mydriver_resume)
SET_RUNTIME_PM_OPS(mydriver_runtime_suspend,
mydriver_runtime_resume, NULL)
};
Wakeup sources
A wakeup source is a hardware event that can wake the system from sleep. Common wakeup sources: keyboard, touchpad, NIC (Wake-on-LAN), RTC alarm, USB.
/* Driver: register a wakeup source */
static int mydriver_probe(struct platform_device *pdev)
{
struct mydata *priv;
/* ... */
device_init_wakeup(&pdev->dev, true); /* this device can wake the system */
/* Configure hardware to assert wakeup interrupt during suspend */
/* ... */
return 0;
}
/* Before suspend: arm the wakeup interrupt */
static int mydriver_suspend(struct device *dev)
{
if (device_may_wakeup(dev))
enable_irq_wake(priv->irq); /* route IRQ as wakeup source */
/* ... */
}
static int mydriver_resume(struct device *dev)
{
if (device_may_wakeup(dev))
disable_irq_wake(priv->irq);
/* ... */
}
Wakeup source tracking
#include <linux/pm_wakeup.h>
/* The kernel tracks active wakeup sources */
struct wakeup_source {
const char *name;
struct list_head entry;
spinlock_t lock;
struct wake_irq *wakeirq;
struct timer_list timer;
unsigned long timer_expires;
ktime_t total_time;
ktime_t max_time;
ktime_t last_time;
ktime_t start_prevent_time;
ktime_t prevent_sleep_time;
unsigned long event_count;
unsigned long active_count;
unsigned long relax_count;
unsigned long expire_count;
unsigned long wakeup_count;
struct device *dev;
unsigned int active:1;
};
# Which wakeup sources are active (blocking suspend)?
cat /sys/kernel/debug/wakeup_sources
# name active_count event_count wakeup_count expire_count active_since total_time max_time last_change prevent_suspend_time
# NETLINK 0 1234 0 0 0 0 0 12345 0
# battery 1 5678 1 0 12345 67890 12345 67890 12345
# Which IRQs can wake the system?
cat /proc/interrupts | grep -i wake
# Last wakeup reason (after resume)
cat /sys/kernel/debug/suspend_stats
Hibernate (suspend-to-disk)
Hibernation saves the entire memory image to disk, then powers off. On resume, the image is read back and execution continues exactly where it left off.
Hibernate state machine
1. Freeze processes
2. Create memory snapshot (swsusp_save):
- Walk all pages, copy to free pages or swap
- The "hibernation image" = all in-use memory pages
3. Write image to swap partition or hibernation file
4. Power off
─────────── powered off ──────────
5. Boot (normal cold boot; bootloader has no special role)
6. Kernel: check resume= parameter or /sys/power/resume device for hibernation image
8. Read image from swap
9. swsusp_restore: overwrite running kernel pages
10. Resume execution at hibernation point
Configuring hibernate destination
# Use a specific swap partition
echo /dev/sda2 | sudo tee /sys/power/resume
# Or a swap file (requires offset)
echo offset=$(swap-offset /swapfile) | sudo tee /sys/power/resume_offset
echo /dev/sda1 | sudo tee /sys/power/resume # device containing swapfile
# Check current hibernate device
cat /sys/power/resume
Freeze (s2idle / S0ix)
Modern laptops use s2idle (suspend-to-idle, sometimes called S0ix or "connected standby"). Unlike S3, the CPU stays in a very shallow sleep state, allowing:
- Bluetooth/WiFi to stay connected
- Background tasks to run at reduced rate
- Faster wake time (~1 second vs ~3 seconds for S3)
# Force s2idle (ignore platform S3 support)
echo s2idle | sudo tee /sys/power/mem_sleep
# Or prefer S3 if available
echo deep | sudo tee /sys/power/mem_sleep
cat /sys/power/mem_sleep
# s2idle [deep] (brackets = current selection)
Diagnosing suspend failures
# Verbose suspend logging
echo 1 | sudo tee /sys/power/pm_debug_messages
echo 1 | sudo tee /sys/power/pm_print_times
# After failed suspend attempt
dmesg | grep -E "PM:|suspend|freeze" | tail -50
# Which device failed to suspend?
dmesg | grep "error during suspend"
# Time each driver's suspend callback takes
cat /sys/kernel/debug/suspend_stats
# success: 42
# fail: 2
# failed_freeze: 0
# failed_prepare: 0
# failed_suspend: 1 ← driver failed here
# failed_suspend_noirq: 0
# Enable PM tracepoints for detailed timeline
echo 1 > /sys/kernel/tracing/events/power/device_pm_callback_start/enable
echo 1 > /sys/kernel/tracing/events/power/device_pm_callback_end/enable
cat /sys/kernel/tracing/trace_pipe > /tmp/suspend_trace &
echo mem | sudo tee /sys/power/state
# Ctrl-C background cat, examine /tmp/suspend_trace
Further reading
- cpufreq — CPU frequency scaling
- Runtime PM — device-level power management
- Device Drivers: platform driver — dev_pm_ops integration
- Interrupts: Timers — RTC alarm as wakeup source
kernel/power/in the kernel tree — suspend/hibernate coreDocumentation/admin-guide/pm/in the kernel tree