Skip to content

kexec

Boot a new kernel from within a running kernel, without hardware reset

What kexec does

kexec loads a new kernel into memory and jumps to it directly, bypassing BIOS/UEFI POST and the bootloader:

Normal boot:      BIOS/UEFI → GRUB → kernel
kexec boot:       running kernel → kexec_exec → new kernel
                  (BIOS/POST skipped → 5-10 second faster reboot)

Primary uses: 1. Fast reboot: upgrade kernel in seconds instead of minutes 2. kdump: crash kernel boots capture kernel to save vmcore 3. A/B kernel updates: atomically switch to a new kernel

kexec_load syscall

/* kernel/kexec.c */
/*
 * kexec_load(entry, nr_segments, segments, flags)
 *   entry:      entry point of new kernel
 *   nr_segments: number of memory segments to load
 *   segments:   array of kexec_segment structs
 *   flags:      KEXEC_ON_CRASH=for kdump, KEXEC_ARCH=arch, etc.
 */

struct kexec_segment {
    const void  __user *buf;   /* userspace buffer with segment data */
    size_t               bufsz; /* size of buffer */
    const void          *mem;  /* target physical address */
    size_t               memsz; /* size at target */
};

kexec_file_load: signature-verified loading

kexec_file_load takes a file descriptor instead of raw memory segments, enabling kernel image signature verification:

/* kexec_file_load(kernel_fd, initrd_fd, cmdline_len, cmdline, flags) */
int kexec_file_load(int kernel_fd,      /* open("/boot/vmlinuz", O_RDONLY) */
                     int initrd_fd,      /* initrd file descriptor */
                     unsigned long cmdline_len,
                     const char __user *cmdline,
                     unsigned long flags);

If CONFIG_KEXEC_VERIFY_SIG=y, the kernel image must be signed with a trusted key — useful for Secure Boot compatibility.

Machine kexec: the jump

When kexec -e is run, machine_kexec() performs the actual handoff:

/* arch/x86/kernel/machine_kexec_64.c */
void machine_kexec(struct kimage *image)
{
    unsigned long page_list;
    unsigned long reboot_code_buffer_phys;
    void *reboot_code_buffer;

    /* Disable interrupts */
    local_irq_disable();

    /* Stop all other CPUs */
    native_smp_send_stop();

    /* Copy identity-mapped page tables (kexec needs to access
       the new kernel's pages which are at physical addresses) */
    page_list = image->head & PAGE_MASK;

    /* Jump to the relocation trampoline */
    reboot_code_buffer = page_address(image->control_code_page);
    relocate_kernel((unsigned long)page_list,
                     reboot_code_buffer,
                     image->start,
                     image->preserve_context,
                     image->arch.pgtable);
}

/* relocate_kernel (assembly): */
/*   1. Switch to identity-mapped page tables */
/*   2. Copy new kernel segments to their final locations */
/*   3. Jump to new kernel entry point */

kimage: the loaded kernel

/* include/linux/kexec.h */
struct kimage {
    kimage_entry_t  head;            /* page list head */
    kimage_entry_t *entry;           /* current position in page list */
    kimage_entry_t *last_entry;      /* end of page list */

    unsigned long    start;          /* entry point of new kernel */

    struct page     *control_code_page; /* page for relocation code */
    struct page     *swap_page;         /* temporary page for copying */

    unsigned long    nr_segments;    /* number of loaded segments */
    struct kexec_segment segment[KEXEC_SEGMENT_MAX]; /* up to 16 segments */

    struct list_head control_pages;  /* pages used by kexec itself */
    struct list_head dest_pages;     /* pages for new kernel */
    struct list_head unusable_pages; /* pages that can't be used */

    /* ... */
    unsigned long    flags;
    int              type;           /* KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH */
};

kdump integration

kdump uses kexec to boot a capture kernel when the primary kernel crashes:

Primary kernel running
    │ panic() / oops
machine_crash_shutdown()
    │ stop all CPUs
    │ save registers
machine_kexec(kexec_crash_image)
Capture kernel boots
    │ reads /proc/vmcore (primary kernel's memory)
makedumpfile saves vmcore → reboot

The capture kernel is loaded into a reserved physical memory region (crashkernel=256M) during normal boot. The primary kernel never uses this region, so it's intact after a crash.

# Load kdump kernel at boot
kexec -p /boot/vmlinuz-kdump \
    --initrd=/boot/initrd-kdump.img \
    --reuse-cmdline \
    --append="irqpoll nr_cpus=1 reset_devices"

# List loaded kernels
cat /sys/kernel/kexec_crash_loaded
# 1 = crash kernel loaded

cat /sys/kernel/kexec_loaded
# 0 = no normal kexec kernel loaded

kexec userspace tool

# Load a kernel for kexec reboot
kexec -l /boot/vmlinuz \
    --initrd=/boot/initrd.img \
    --reuse-cmdline       # use current boot cmdline

# Load with explicit command line
kexec -l /boot/vmlinuz \
    --initrd=/boot/initrd.img \
    --append="root=/dev/sda1 quiet"

# Execute: jump to new kernel immediately
kexec -e

# Or: schedule for next reboot (via systemctl)
systemctl kexec
# runs kexec -e during shutdown

# Load crash kernel
kexec -p /boot/vmlinuz-crash \
    --initrd=/boot/initrd-crash.img \
    --append="1 irqpoll"

# Unload crash kernel
kexec -p -u

kexec in the kernel

Shutdown sequence

/* kernel/kexec_core.c */
int kernel_kexec(void)
{
    int error = 0;

    /* Execute pre-kexec notifiers */
    error = blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);

    kernel_restart_prepare(NULL);

    /* Disable SMP: all other CPUs stopped */
    migrate_to_reboot_cpu();
    syscore_shutdown();

    /* Jump to new kernel */
    machine_kexec(kexec_image);

    /* Should not return */
    BUG();
    return error;
}

Preserving EFI runtime services

/* For EFI systems: preserve EFI runtime memory */
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
    /* Mark EFI runtime regions as preserved */
    /* New kernel can use EFI runtime services */
}

Observing kexec

# Check kexec support in kernel
grep KEXEC /boot/config-$(uname -r)
# CONFIG_KEXEC=y
# CONFIG_KEXEC_FILE=y
# CONFIG_KEXEC_VERIFY_SIG=y   (optional)

# Memory reserved for kdump
cat /proc/iomem | grep -i crash
# 100000000-10fffffff : Crash kernel

# Boot source detection: was this a kexec boot?
cat /sys/kernel/kexec_loaded
# After kexec reboot: bootloader may set ACPI table flag
dmesg | grep -i kexec

# Timing: kexec vs cold boot
time systemctl kexec   # ~5 seconds
# vs
time reboot            # ~60 seconds (BIOS POST)

Further reading