// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2025, Google LLC. * Pasha Tatashin */ /** * DOC: Live Update Orchestrator (LUO) * * Live Update is a specialized, kexec-based reboot process that allows a * running kernel to be updated from one version to another while preserving * the state of selected resources and keeping designated hardware devices * operational. For these devices, DMA activity may continue throughout the * kernel transition. * * While the primary use case driving this work is supporting live updates of * the Linux kernel when it is used as a hypervisor in cloud environments, the * LUO framework itself is designed to be workload-agnostic. Live Update * facilitates a full kernel version upgrade for any type of system. * * For example, a non-hypervisor system running an in-memory cache like * memcached with many gigabytes of data can use LUO. The userspace service * can place its cache into a memfd, have its state preserved by LUO, and * restore it immediately after the kernel kexec. * * Whether the system is running virtual machines, containers, a * high-performance database, or networking services, LUO's primary goal is to * enable a full kernel update by preserving critical userspace state and * keeping essential devices operational. * * The core of LUO is a mechanism that tracks the progress of a live update, * along with a callback API that allows other kernel subsystems to participate * in the process. Example subsystems that can hook into LUO include: kvm, * iommu, interrupts, vfio, participating filesystems, and memory management. * * LUO uses Kexec Handover to transfer memory state from the current kernel to * the next kernel. For more details see * Documentation/core-api/kho/concepts.rst. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "kexec_handover_internal.h" #include "luo_internal.h" static struct { bool enabled; void *fdt_out; void *fdt_in; u64 liveupdate_num; } luo_global; static int __init early_liveupdate_param(char *buf) { return kstrtobool(buf, &luo_global.enabled); } early_param("liveupdate", early_liveupdate_param); static int __init luo_early_startup(void) { phys_addr_t fdt_phys; int err, ln_size; const void *ptr; if (!kho_is_enabled()) { if (liveupdate_enabled()) pr_warn("Disabling liveupdate because KHO is disabled\n"); luo_global.enabled = false; return 0; } /* Retrieve LUO subtree, and verify its format. */ err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys); if (err) { if (err != -ENOENT) { pr_err("failed to retrieve FDT '%s' from KHO: %pe\n", LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err)); return err; } return 0; } luo_global.fdt_in = phys_to_virt(fdt_phys); err = fdt_node_check_compatible(luo_global.fdt_in, 0, LUO_FDT_COMPATIBLE); if (err) { pr_err("FDT '%s' is incompatible with '%s' [%d]\n", LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err); return -EINVAL; } ln_size = 0; ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM, &ln_size); if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) { pr_err("Unable to get live update number '%s' [%d]\n", LUO_FDT_LIVEUPDATE_NUM, ln_size); return -EINVAL; } luo_global.liveupdate_num = get_unaligned((u64 *)ptr); pr_info("Retrieved live update data, liveupdate number: %lld\n", luo_global.liveupdate_num); err = luo_session_setup_incoming(luo_global.fdt_in); if (err) return err; return 0; } static int __init liveupdate_early_init(void) { int err; err = luo_early_startup(); if (err) { luo_global.enabled = false; luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n", ERR_PTR(err)); } return err; } early_initcall(liveupdate_early_init); /* Called during boot to create outgoing LUO fdt tree */ static int __init luo_fdt_setup(void) { const u64 ln = luo_global.liveupdate_num + 1; void *fdt_out; int err; fdt_out = kho_alloc_preserve(LUO_FDT_SIZE); if (IS_ERR(fdt_out)) { pr_err("failed to allocate/preserve FDT memory\n"); return PTR_ERR(fdt_out); } err = fdt_create(fdt_out, LUO_FDT_SIZE); err |= fdt_finish_reservemap(fdt_out); err |= fdt_begin_node(fdt_out, ""); err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE); err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln)); err |= luo_session_setup_outgoing(fdt_out); err |= fdt_end_node(fdt_out); err |= fdt_finish(fdt_out); if (err) goto exit_free; err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out); if (err) goto exit_free; luo_global.fdt_out = fdt_out; return 0; exit_free: kho_unpreserve_free(fdt_out); pr_err("failed to prepare LUO FDT: %d\n", err); return err; } /* * late initcall because it initializes the outgoing tree that is needed only * once userspace starts using /dev/liveupdate. */ static int __init luo_late_startup(void) { int err; if (!liveupdate_enabled()) return 0; err = luo_fdt_setup(); if (err) luo_global.enabled = false; return err; } late_initcall(luo_late_startup); /* Public Functions */ /** * liveupdate_reboot() - Kernel reboot notifier for live update final * serialization. * * This function is invoked directly from the reboot() syscall pathway * if kexec is in progress. * * If any callback fails, this function aborts KHO, undoes the freeze() * callbacks, and returns an error. */ int liveupdate_reboot(void) { int err; if (!liveupdate_enabled()) return 0; err = luo_session_serialize(); if (err) return err; err = kho_finalize(); if (err) { pr_err("kho_finalize failed %d\n", err); /* * kho_finalize() may return libfdt errors, to aboid passing to * userspace unknown errors, change this to EAGAIN. */ err = -EAGAIN; } return err; } /** * liveupdate_enabled - Check if the live update feature is enabled. * * This function returns the state of the live update feature flag, which * can be controlled via the ``liveupdate`` kernel command-line parameter. * * @return true if live update is enabled, false otherwise. */ bool liveupdate_enabled(void) { return luo_global.enabled; } /** * DOC: LUO ioctl Interface * * The IOCTL user-space control interface for the LUO subsystem. * It registers a character device, typically found at ``/dev/liveupdate``, * which allows a userspace agent to manage the LUO state machine and its * associated resources, such as preservable file descriptors. * * To ensure that the state machine is controlled by a single entity, access * to this device is exclusive: only one process is permitted to have * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will * fail with -EBUSY until the first process closes its file descriptor. * This singleton model simplifies state management by preventing conflicting * commands from multiple userspace agents. */ struct luo_device_state { struct miscdevice miscdev; atomic_t in_use; }; static int luo_ioctl_create_session(struct luo_ucmd *ucmd) { struct liveupdate_ioctl_create_session *argp = ucmd->cmd; struct file *file; int err; argp->fd = get_unused_fd_flags(O_CLOEXEC); if (argp->fd < 0) return argp->fd; err = luo_session_create(argp->name, &file); if (err) goto err_put_fd; err = luo_ucmd_respond(ucmd, sizeof(*argp)); if (err) goto err_put_file; fd_install(argp->fd, file); return 0; err_put_file: fput(file); err_put_fd: put_unused_fd(argp->fd); return err; } static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd) { struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd; struct file *file; int err; argp->fd = get_unused_fd_flags(O_CLOEXEC); if (argp->fd < 0) return argp->fd; err = luo_session_retrieve(argp->name, &file); if (err < 0) goto err_put_fd; err = luo_ucmd_respond(ucmd, sizeof(*argp)); if (err) goto err_put_file; fd_install(argp->fd, file); return 0; err_put_file: fput(file); err_put_fd: put_unused_fd(argp->fd); return err; } static int luo_open(struct inode *inodep, struct file *filep) { struct luo_device_state *ldev = container_of(filep->private_data, struct luo_device_state, miscdev); if (atomic_cmpxchg(&ldev->in_use, 0, 1)) return -EBUSY; /* Always return -EIO to user if deserialization fail */ if (luo_session_deserialize()) { atomic_set(&ldev->in_use, 0); return -EIO; } return 0; } static int luo_release(struct inode *inodep, struct file *filep) { struct luo_device_state *ldev = container_of(filep->private_data, struct luo_device_state, miscdev); atomic_set(&ldev->in_use, 0); return 0; } union ucmd_buffer { struct liveupdate_ioctl_create_session create; struct liveupdate_ioctl_retrieve_session retrieve; }; struct luo_ioctl_op { unsigned int size; unsigned int min_size; unsigned int ioctl_num; int (*execute)(struct luo_ucmd *ucmd); }; #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \ .size = sizeof(_struct) + \ BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ sizeof(_struct)), \ .min_size = offsetofend(_struct, _last), \ .ioctl_num = _ioctl, \ .execute = _fn, \ } static const struct luo_ioctl_op luo_ioctl_ops[] = { IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session, struct liveupdate_ioctl_create_session, name), IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session, struct liveupdate_ioctl_retrieve_session, name), }; static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { const struct luo_ioctl_op *op; struct luo_ucmd ucmd = {}; union ucmd_buffer buf; unsigned int nr; int err; nr = _IOC_NR(cmd); if (nr < LIVEUPDATE_CMD_BASE || (nr - LIVEUPDATE_CMD_BASE) >= ARRAY_SIZE(luo_ioctl_ops)) { return -EINVAL; } ucmd.ubuffer = (void __user *)arg; err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); if (err) return err; op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE]; if (op->ioctl_num != cmd) return -ENOIOCTLCMD; if (ucmd.user_size < op->min_size) return -EINVAL; ucmd.cmd = &buf; err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, ucmd.user_size); if (err) return err; return op->execute(&ucmd); } static const struct file_operations luo_fops = { .owner = THIS_MODULE, .open = luo_open, .release = luo_release, .unlocked_ioctl = luo_ioctl, }; static struct luo_device_state luo_dev = { .miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "liveupdate", .fops = &luo_fops, }, .in_use = ATOMIC_INIT(0), }; static int __init liveupdate_ioctl_init(void) { if (!liveupdate_enabled()) return 0; return misc_register(&luo_dev.miscdev); } late_initcall(liveupdate_ioctl_init);