summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 12:13:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 12:13:01 -0800
commit136114e0abf03005e182d75761ab694648e6d388 (patch)
tree05c61b103fc9cb72a7cae99680a4b524347e9616 /kernel
parent4cff5c05e076d2ee4e34122aa956b84a2eaac587 (diff)
parent0dddf20b4fd4afd59767acc144ad4da60259f21f (diff)
Merge tag 'mm-nonmm-stable-2026-02-12-10-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - "ocfs2: give ocfs2 the ability to reclaim suballocator free bg" saves disk space by teaching ocfs2 to reclaim suballocator block group space (Heming Zhao) - "Add ARRAY_END(), and use it to fix off-by-one bugs" adds the ARRAY_END() macro and uses it in various places (Alejandro Colomar) - "vmcoreinfo: support VMCOREINFO_BYTES larger than PAGE_SIZE" makes the vmcore code future-safe, if VMCOREINFO_BYTES ever exceeds the page size (Pnina Feder) - "kallsyms: Prevent invalid access when showing module buildid" cleans up kallsyms code related to module buildid and fixes an invalid access crash when printing backtraces (Petr Mladek) - "Address page fault in ima_restore_measurement_list()" fixes a kexec-related crash that can occur when booting the second-stage kernel on x86 (Harshit Mogalapalli) - "kho: ABI headers and Documentation updates" updates the kexec handover ABI documentation (Mike Rapoport) - "Align atomic storage" adds the __aligned attribute to atomic_t and atomic64_t definitions to get natural alignment of both types on csky, m68k, microblaze, nios2, openrisc and sh (Finn Thain) - "kho: clean up page initialization logic" simplifies the page initialization logic in kho_restore_page() (Pratyush Yadav) - "Unload linux/kernel.h" moves several things out of kernel.h and into more appropriate places (Yury Norov) - "don't abuse task_struct.group_leader" removes the usage of ->group_leader when it is "obviously unnecessary" (Oleg Nesterov) - "list private v2 & luo flb" adds some infrastructure improvements to the live update orchestrator (Pasha Tatashin) * tag 'mm-nonmm-stable-2026-02-12-10-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (107 commits) watchdog/hardlockup: simplify perf event probe and remove per-cpu dependency procfs: fix missing RCU protection when reading real_parent in do_task_stat() watchdog/softlockup: fix sample ring index wrap in need_counting_irqs() kcsan, compiler_types: avoid duplicate type issues in BPF Type Format kho: fix doc for kho_restore_pages() tests/liveupdate: add in-kernel liveupdate test liveupdate: luo_flb: introduce File-Lifecycle-Bound global state liveupdate: luo_file: Use private list list: add kunit test for private list primitives list: add primitives for private list manipulations delayacct: fix uapi timespec64 definition panic: add panic_force_cpu= parameter to redirect panic to a specific CPU netclassid: use thread_group_leader(p) in update_classid_task() RDMA/umem: don't abuse current->group_leader drm/pan*: don't abuse current->group_leader drm/amd: kill the outdated "Only the pthreads threading model is supported" checks drm/amdgpu: don't abuse current->group_leader android/binder: use same_thread_group(proc->tsk, current) in binder_mmap() android/binder: don't abuse current->group_leader kho: skip memoryless NUMA nodes when reserving scratch areas ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/bpf/core.c5
-rw-r--r--kernel/bpf/rqspinlock.c1
-rw-r--r--kernel/bpf/syscall.c1
-rw-r--r--kernel/configs/debug.config2
-rw-r--r--kernel/crash_core.c17
-rw-r--r--kernel/crash_dump_dm_crypt.c21
-rw-r--r--kernel/debug/gdbstub.c1
-rw-r--r--kernel/delayacct.c33
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/kallsyms.c73
-rw-r--r--kernel/kcsan/kcsan_test.c4
-rw-r--r--kernel/kexec_file.c131
-rw-r--r--kernel/liveupdate/Kconfig17
-rw-r--r--kernel/liveupdate/Makefile1
-rw-r--r--kernel/liveupdate/kexec_handover.c147
-rw-r--r--kernel/liveupdate/luo_core.c10
-rw-r--r--kernel/liveupdate/luo_file.c39
-rw-r--r--kernel/liveupdate/luo_flb.c654
-rw-r--r--kernel/liveupdate/luo_internal.h22
-rw-r--r--kernel/module/kallsyms.c9
-rw-r--r--kernel/panic.c164
-rw-r--r--kernel/sched/stats.h8
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/trace.c7
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/tsacct.c2
-rw-r--r--kernel/ucount.c2
-rw-r--r--kernel/vmcore_info.c6
-rw-r--r--kernel/watchdog.c12
-rw-r--r--kernel/watchdog_perf.c50
31 files changed, 1211 insertions, 240 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 39c4f26c484d..592d927e70f9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -32,6 +32,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/file.h>
+#include <linux/hex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/atomic.h>
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index dc906dfdff94..5ab6bace7d0d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -25,6 +25,7 @@
#include <linux/prandom.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/hex.h>
#include <linux/objtool.h>
#include <linux/overflow.h>
#include <linux/rbtree_latch.h>
@@ -716,8 +717,8 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
-int __bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym)
+int bpf_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char *sym)
{
struct bpf_ksym *ksym;
int ret = 0;
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index 2fdfa828e3d3..e4e338cdb437 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -695,7 +695,6 @@ __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock)
int ret;
BUILD_BUG_ON(sizeof(rqspinlock_t) != sizeof(struct bpf_res_spin_lock));
- BUILD_BUG_ON(__alignof__(rqspinlock_t) != __alignof__(struct bpf_res_spin_lock));
preempt_disable();
ret = res_spin_lock((rqspinlock_t *)lock);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 683c332dbafb..dd89bf809772 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -9,6 +9,7 @@
#include <linux/bpf_verifier.h>
#include <linux/bsearch.h>
#include <linux/btf.h>
+#include <linux/hex.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 9f6ab7dabf67..774702591d26 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -84,7 +84,7 @@ CONFIG_SLUB_DEBUG_ON=y
# Debug Oops, Lockups and Hangs
#
CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0
-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_PANIC_ON_OOPS=y
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 99dac1aa972a..3952b3e102e0 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -44,9 +44,15 @@ note_buf_t __percpu *crash_notes;
int kimage_crash_copy_vmcoreinfo(struct kimage *image)
{
- struct page *vmcoreinfo_page;
+ struct page *vmcoreinfo_base;
+ struct page *vmcoreinfo_pages[DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE)];
+ unsigned int order, nr_pages;
+ int i;
void *safecopy;
+ nr_pages = DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE);
+ order = get_order(VMCOREINFO_BYTES);
+
if (!IS_ENABLED(CONFIG_CRASH_DUMP))
return 0;
if (image->type != KEXEC_TYPE_CRASH)
@@ -61,12 +67,15 @@ int kimage_crash_copy_vmcoreinfo(struct kimage *image)
* happens to generate vmcoreinfo note, hereby we rely on
* vmap for this purpose.
*/
- vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
- if (!vmcoreinfo_page) {
+ vmcoreinfo_base = kimage_alloc_control_pages(image, order);
+ if (!vmcoreinfo_base) {
pr_warn("Could not allocate vmcoreinfo buffer\n");
return -ENOMEM;
}
- safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+ for (i = 0; i < nr_pages; i++)
+ vmcoreinfo_pages[i] = vmcoreinfo_base + i;
+
+ safecopy = vmap(vmcoreinfo_pages, nr_pages, VM_MAP, PAGE_KERNEL);
if (!safecopy) {
pr_warn("Could not vmap vmcoreinfo buffer\n");
return -ENOMEM;
diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c
index 401423ba477d..37129243054d 100644
--- a/kernel/crash_dump_dm_crypt.c
+++ b/kernel/crash_dump_dm_crypt.c
@@ -143,6 +143,7 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key)
{
const struct user_key_payload *ukp;
struct key *key;
+ int ret = 0;
kexec_dprintk("Requesting logon key %s", dm_key->key_desc);
key = request_key(&key_type_logon, dm_key->key_desc, NULL);
@@ -152,20 +153,28 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key)
return PTR_ERR(key);
}
+ down_read(&key->sem);
ukp = user_key_payload_locked(key);
- if (!ukp)
- return -EKEYREVOKED;
+ if (!ukp) {
+ ret = -EKEYREVOKED;
+ goto out;
+ }
if (ukp->datalen > KEY_SIZE_MAX) {
pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
memcpy(dm_key->data, ukp->data, ukp->datalen);
dm_key->key_size = ukp->datalen;
kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size,
dm_key->key_desc, dm_key->data);
- return 0;
+
+out:
+ up_read(&key->sem);
+ key_put(key);
+ return ret;
}
struct config_key {
@@ -223,7 +232,7 @@ static void config_key_release(struct config_item *item)
key_count--;
}
-static struct configfs_item_operations config_key_item_ops = {
+static const struct configfs_item_operations config_key_item_ops = {
.release = config_key_release,
};
@@ -298,7 +307,7 @@ static struct configfs_attribute *config_keys_attrs[] = {
* Note that, since no extra work is required on ->drop_item(),
* no ->drop_item() is provided.
*/
-static struct configfs_group_operations config_keys_group_ops = {
+static const struct configfs_group_operations config_keys_group_ops = {
.make_item = config_keys_make_item,
};
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 22fe969c5d2e..f586afd76c80 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -27,6 +27,7 @@
#include <linux/kernel.h>
#include <linux/sched/signal.h>
+#include <linux/hex.h>
#include <linux/kgdb.h>
#include <linux/kdb.h>
#include <linux/serial_core.h>
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 30e7912ebb0d..2e55c493c98b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -18,6 +18,8 @@
do { \
d->type##_delay_max = tsk->delays->type##_delay_max; \
d->type##_delay_min = tsk->delays->type##_delay_min; \
+ d->type##_delay_max_ts.tv_sec = tsk->delays->type##_delay_max_ts.tv_sec; \
+ d->type##_delay_max_ts.tv_nsec = tsk->delays->type##_delay_max_ts.tv_nsec; \
tmp = d->type##_delay_total + tsk->delays->type##_delay; \
d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \
d->type##_count += tsk->delays->type##_count; \
@@ -104,7 +106,8 @@ void __delayacct_tsk_init(struct task_struct *tsk)
* Finish delay accounting for a statistic using its timestamps (@start),
* accumulator (@total) and @count
*/
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count,
+ u64 *max, u64 *min, struct timespec64 *ts)
{
s64 ns = local_clock() - *start;
unsigned long flags;
@@ -113,8 +116,10 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
raw_spin_lock_irqsave(lock, flags);
*total += ns;
(*count)++;
- if (ns > *max)
+ if (ns > *max) {
*max = ns;
+ ktime_get_real_ts64(ts);
+ }
if (*min == 0 || ns < *min)
*min = ns;
raw_spin_unlock_irqrestore(lock, flags);
@@ -137,7 +142,8 @@ void __delayacct_blkio_end(struct task_struct *p)
&p->delays->blkio_delay,
&p->delays->blkio_count,
&p->delays->blkio_delay_max,
- &p->delays->blkio_delay_min);
+ &p->delays->blkio_delay_min,
+ &p->delays->blkio_delay_max_ts);
}
int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -170,6 +176,8 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->cpu_delay_max = tsk->sched_info.max_run_delay;
d->cpu_delay_min = tsk->sched_info.min_run_delay;
+ d->cpu_delay_max_ts.tv_sec = tsk->sched_info.max_run_delay_ts.tv_sec;
+ d->cpu_delay_max_ts.tv_nsec = tsk->sched_info.max_run_delay_ts.tv_nsec;
tmp = (s64)d->cpu_delay_total + t2;
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
tmp = (s64)d->cpu_run_virtual_total + t3;
@@ -217,7 +225,8 @@ void __delayacct_freepages_end(void)
&current->delays->freepages_delay,
&current->delays->freepages_count,
&current->delays->freepages_delay_max,
- &current->delays->freepages_delay_min);
+ &current->delays->freepages_delay_min,
+ &current->delays->freepages_delay_max_ts);
}
void __delayacct_thrashing_start(bool *in_thrashing)
@@ -241,7 +250,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
&current->delays->thrashing_delay,
&current->delays->thrashing_count,
&current->delays->thrashing_delay_max,
- &current->delays->thrashing_delay_min);
+ &current->delays->thrashing_delay_min,
+ &current->delays->thrashing_delay_max_ts);
}
void __delayacct_swapin_start(void)
@@ -256,7 +266,8 @@ void __delayacct_swapin_end(void)
&current->delays->swapin_delay,
&current->delays->swapin_count,
&current->delays->swapin_delay_max,
- &current->delays->swapin_delay_min);
+ &current->delays->swapin_delay_min,
+ &current->delays->swapin_delay_max_ts);
}
void __delayacct_compact_start(void)
@@ -271,7 +282,8 @@ void __delayacct_compact_end(void)
&current->delays->compact_delay,
&current->delays->compact_count,
&current->delays->compact_delay_max,
- &current->delays->compact_delay_min);
+ &current->delays->compact_delay_min,
+ &current->delays->compact_delay_max_ts);
}
void __delayacct_wpcopy_start(void)
@@ -286,7 +298,8 @@ void __delayacct_wpcopy_end(void)
&current->delays->wpcopy_delay,
&current->delays->wpcopy_count,
&current->delays->wpcopy_delay_max,
- &current->delays->wpcopy_delay_min);
+ &current->delays->wpcopy_delay_min,
+ &current->delays->wpcopy_delay_max_ts);
}
void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -296,8 +309,10 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
raw_spin_lock_irqsave(&task->delays->lock, flags);
task->delays->irq_delay += delta;
task->delays->irq_count++;
- if (delta > task->delays->irq_delay_max)
+ if (delta > task->delays->irq_delay_max) {
task->delays->irq_delay_max = delta;
+ ktime_get_real_ts64(&task->delays->irq_delay_max_ts);
+ }
if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min))
task->delays->irq_delay_min = delta;
raw_spin_unlock_irqrestore(&task->delays->lock, flags);
diff --git a/kernel/fork.c b/kernel/fork.c
index 9c5effbdbdc1..e832da9d15a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1357,7 +1357,7 @@ struct file *get_task_exe_file(struct task_struct *task)
* @task: The task.
*
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
- * this kernel workthread has transiently adopted a user mm with use_mm,
+ * this kernel workthread has transiently adopted a user mm with kthread_use_mm,
* to do its AIO) is not set and if so returns a reference to it, after
* bumping up the use count. User must release the mm via mmput()
* after use. Typically used by /proc and ptrace.
@@ -2069,7 +2069,7 @@ __latent_entropy struct task_struct *copy_process(
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
/*
- * Clear TID on mm_release()?
+ * TID is cleared in mm_release() when the task exits
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6125724aadb1..aec2f06858af 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -347,7 +347,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
return 1;
}
return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
- !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
+ !!bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
static int kallsyms_lookup_buildid(unsigned long addr,
@@ -357,8 +357,21 @@ static int kallsyms_lookup_buildid(unsigned long addr,
{
int ret;
- namebuf[KSYM_NAME_LEN - 1] = 0;
+ /*
+ * kallsyms_lookus() returns pointer to namebuf on success and
+ * NULL on error. But some callers ignore the return value.
+ * Instead they expect @namebuf filled either with valid
+ * or empty string.
+ */
namebuf[0] = 0;
+ /*
+ * Initialize the module-related return values. They are not set
+ * when the symbol is in vmlinux or it is a bpf address.
+ */
+ if (modname)
+ *modname = NULL;
+ if (modbuildid)
+ *modbuildid = NULL;
if (is_ksym_addr(addr)) {
unsigned long pos;
@@ -367,10 +380,6 @@ static int kallsyms_lookup_buildid(unsigned long addr,
/* Grab name */
kallsyms_expand_symbol(get_symbol_offset(pos),
namebuf, KSYM_NAME_LEN);
- if (modname)
- *modname = NULL;
- if (modbuildid)
- *modbuildid = NULL;
return strlen(namebuf);
}
@@ -379,12 +388,11 @@ static int kallsyms_lookup_buildid(unsigned long addr,
ret = module_address_lookup(addr, symbolsize, offset,
modname, modbuildid, namebuf);
if (!ret)
- ret = bpf_address_lookup(addr, symbolsize,
- offset, modname, namebuf);
+ ret = bpf_address_lookup(addr, symbolsize, offset, namebuf);
if (!ret)
- ret = ftrace_mod_address_lookup(addr, symbolsize,
- offset, modname, namebuf);
+ ret = ftrace_mod_address_lookup(addr, symbolsize, offset,
+ modname, modbuildid, namebuf);
return ret;
}
@@ -428,6 +436,37 @@ int lookup_symbol_name(unsigned long addr, char *symname)
return lookup_module_symbol_name(addr, symname);
}
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+
+static int append_buildid(char *buffer, const char *modname,
+ const unsigned char *buildid)
+{
+ if (!modname)
+ return 0;
+
+ if (!buildid) {
+ pr_warn_once("Undefined buildid for the module %s\n", modname);
+ return 0;
+ }
+
+ /* build ID should match length of sprintf */
+#ifdef CONFIG_MODULES
+ static_assert(sizeof(typeof_member(struct module, build_id)) == 20);
+#endif
+
+ return sprintf(buffer, " %20phN", buildid);
+}
+
+#else /* CONFIG_STACKTRACE_BUILD_ID */
+
+static int append_buildid(char *buffer, const char *modname,
+ const unsigned char *buildid)
+{
+ return 0;
+}
+
+#endif /* CONFIG_STACKTRACE_BUILD_ID */
+
/* Look up a kernel symbol and return it in a text buffer. */
static int __sprint_symbol(char *buffer, unsigned long address,
int symbol_offset, int add_offset, int add_buildid)
@@ -437,6 +476,9 @@ static int __sprint_symbol(char *buffer, unsigned long address,
unsigned long offset, size;
int len;
+ /* Prevent module removal until modname and modbuildid are printed */
+ guard(rcu)();
+
address += symbol_offset;
len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
buffer);
@@ -450,15 +492,8 @@ static int __sprint_symbol(char *buffer, unsigned long address,
if (modname) {
len += sprintf(buffer + len, " [%s", modname);
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
- if (add_buildid && buildid) {
- /* build ID should match length of sprintf */
-#if IS_ENABLED(CONFIG_MODULES)
- static_assert(sizeof(typeof_member(struct module, build_id)) == 20);
-#endif
- len += sprintf(buffer + len, " %20phN", buildid);
- }
-#endif
+ if (add_buildid)
+ len += append_buildid(buffer + len, modname, buildid);
len += sprintf(buffer + len, "]");
}
diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c
index 219d22857c98..8ef8167be745 100644
--- a/kernel/kcsan/kcsan_test.c
+++ b/kernel/kcsan/kcsan_test.c
@@ -176,7 +176,7 @@ static bool __report_matches(const struct expect_report *r)
/* Title */
cur = expect[0];
- end = &expect[0][sizeof(expect[0]) - 1];
+ end = ARRAY_END(expect[0]);
cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ",
is_assert ? "assert: race" : "data-race");
if (r->access[1].fn) {
@@ -200,7 +200,7 @@ static bool __report_matches(const struct expect_report *r)
/* Access 1 */
cur = expect[1];
- end = &expect[1][sizeof(expect[1]) - 1];
+ end = ARRAY_END(expect[1]);
if (!r->access[1].fn)
cur += scnprintf(cur, end - cur, "race at unknown origin, with ");
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index eb62a9794242..2bfbb2d144e6 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -883,6 +883,60 @@ out_free_sha_regions:
#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
/*
+ * kexec_purgatory_find_symbol - find a symbol in the purgatory
+ * @pi: Purgatory to search in.
+ * @name: Name of the symbol.
+ *
+ * Return: pointer to symbol in read-only symtab on success, NULL on error.
+ */
+static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+ const char *name)
+{
+ const Elf_Shdr *sechdrs;
+ const Elf_Ehdr *ehdr;
+ const Elf_Sym *syms;
+ const char *strtab;
+ int i, k;
+
+ if (!pi->ehdr)
+ return NULL;
+
+ ehdr = pi->ehdr;
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+ if (sechdrs[i].sh_link >= ehdr->e_shnum)
+ /* Invalid strtab section number */
+ continue;
+ strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset;
+ syms = (void *)ehdr + sechdrs[i].sh_offset;
+
+ /* Go through symbols for a match */
+ for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+ if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+ continue;
+
+ if (strcmp(strtab + syms[k].st_name, name) != 0)
+ continue;
+
+ if (syms[k].st_shndx == SHN_UNDEF ||
+ syms[k].st_shndx >= ehdr->e_shnum) {
+ pr_debug("Symbol: %s has bad section index %d.\n",
+ name, syms[k].st_shndx);
+ return NULL;
+ }
+
+ /* Found the symbol we are looking for */
+ return &syms[k];
+ }
+ }
+
+ return NULL;
+}
+/*
* kexec_purgatory_setup_kbuf - prepare buffer to load purgatory.
* @pi: Purgatory to be loaded.
* @kbuf: Buffer to setup.
@@ -960,6 +1014,10 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
unsigned long offset;
size_t sechdrs_size;
Elf_Shdr *sechdrs;
+ const Elf_Sym *entry_sym;
+ u16 entry_shndx = 0;
+ unsigned long entry_off = 0;
+ bool start_fixed = false;
int i;
/*
@@ -977,6 +1035,12 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
bss_addr = kbuf->mem + kbuf->bufsz;
kbuf->image->start = pi->ehdr->e_entry;
+ entry_sym = kexec_purgatory_find_symbol(pi, "purgatory_start");
+ if (entry_sym) {
+ entry_shndx = entry_sym->st_shndx;
+ entry_off = entry_sym->st_value;
+ }
+
for (i = 0; i < pi->ehdr->e_shnum; i++) {
unsigned long align;
void *src, *dst;
@@ -994,6 +1058,13 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
offset = ALIGN(offset, align);
+ if (!start_fixed && entry_sym && i == entry_shndx &&
+ (sechdrs[i].sh_flags & SHF_EXECINSTR) &&
+ entry_off < sechdrs[i].sh_size) {
+ kbuf->image->start = kbuf->mem + offset + entry_off;
+ start_fixed = true;
+ }
+
/*
* Check if the segment contains the entry point, if so,
* calculate the value of image->start based on it.
@@ -1004,13 +1075,14 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
* is not set to the initial value, and warn the user so they
* have a chance to fix their purgatory's linker script.
*/
- if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
+ if (!start_fixed && sechdrs[i].sh_flags & SHF_EXECINSTR &&
pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
pi->ehdr->e_entry < (sechdrs[i].sh_addr
+ sechdrs[i].sh_size) &&
- !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
+ kbuf->image->start == pi->ehdr->e_entry) {
kbuf->image->start -= sechdrs[i].sh_addr;
kbuf->image->start += kbuf->mem + offset;
+ start_fixed = true;
}
src = (void *)pi->ehdr + sechdrs[i].sh_offset;
@@ -1128,61 +1200,6 @@ out_free_kbuf:
return ret;
}
-/*
- * kexec_purgatory_find_symbol - find a symbol in the purgatory
- * @pi: Purgatory to search in.
- * @name: Name of the symbol.
- *
- * Return: pointer to symbol in read-only symtab on success, NULL on error.
- */
-static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
- const char *name)
-{
- const Elf_Shdr *sechdrs;
- const Elf_Ehdr *ehdr;
- const Elf_Sym *syms;
- const char *strtab;
- int i, k;
-
- if (!pi->ehdr)
- return NULL;
-
- ehdr = pi->ehdr;
- sechdrs = (void *)ehdr + ehdr->e_shoff;
-
- for (i = 0; i < ehdr->e_shnum; i++) {
- if (sechdrs[i].sh_type != SHT_SYMTAB)
- continue;
-
- if (sechdrs[i].sh_link >= ehdr->e_shnum)
- /* Invalid strtab section number */
- continue;
- strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset;
- syms = (void *)ehdr + sechdrs[i].sh_offset;
-
- /* Go through symbols for a match */
- for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
- if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
- continue;
-
- if (strcmp(strtab + syms[k].st_name, name) != 0)
- continue;
-
- if (syms[k].st_shndx == SHN_UNDEF ||
- syms[k].st_shndx >= ehdr->e_shnum) {
- pr_debug("Symbol: %s has bad section index %d.\n",
- name, syms[k].st_shndx);
- return NULL;
- }
-
- /* Found the symbol we are looking for */
- return &syms[k];
- }
- }
-
- return NULL;
-}
-
void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
{
struct purgatory_info *pi = &image->purgatory_info;
diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig
index d2aeaf13c3ac..1a8513f16ef7 100644
--- a/kernel/liveupdate/Kconfig
+++ b/kernel/liveupdate/Kconfig
@@ -54,7 +54,6 @@ config KEXEC_HANDOVER_ENABLE_DEFAULT
config LIVEUPDATE
bool "Live Update Orchestrator"
depends on KEXEC_HANDOVER
- depends on SHMEM
help
Enable the Live Update Orchestrator. Live Update is a mechanism,
typically based on kexec, that allows the kernel to be updated
@@ -73,4 +72,20 @@ config LIVEUPDATE
If unsure, say N.
+config LIVEUPDATE_MEMFD
+ bool "Live update support for memfd"
+ depends on LIVEUPDATE
+ depends on MEMFD_CREATE
+ depends on SHMEM
+ default LIVEUPDATE
+ help
+ Enable live update support for memfd regions. This allows preserving
+ memfd-backed memory across kernel live updates.
+
+ This can be used to back VM memory with memfds, allowing the guest
+ memory to persist, or for other user workloads needing to preserve
+ pages.
+
+ If unsure, say N.
+
endmenu
diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile
index 7cad2eece32d..d2f779cbe279 100644
--- a/kernel/liveupdate/Makefile
+++ b/kernel/liveupdate/Makefile
@@ -3,6 +3,7 @@
luo-y := \
luo_core.o \
luo_file.o \
+ luo_flb.o \
luo_session.o
obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 90d411a59f76..fb3a7b67676e 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -15,6 +15,7 @@
#include <linux/count_zeros.h>
#include <linux/kexec.h>
#include <linux/kexec_handover.h>
+#include <linux/kho/abi/kexec_handover.h>
#include <linux/libfdt.h>
#include <linux/list.h>
#include <linux/memblock.h>
@@ -24,7 +25,6 @@
#include <asm/early_ioremap.h>
-#include "kexec_handover_internal.h"
/*
* KHO is tightly coupled with mm init and needs access to some of mm
* internal APIs.
@@ -33,10 +33,7 @@
#include "../kexec_internal.h"
#include "kexec_handover_internal.h"
-#define KHO_FDT_COMPATIBLE "kho-v1"
-#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map"
-#define PROP_SUB_FDT "fdt"
-
+/* The magic token for preserved pages */
#define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */
/*
@@ -219,10 +216,32 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
return 0;
}
+/* For physically contiguous 0-order pages. */
+static void kho_init_pages(struct page *page, unsigned long nr_pages)
+{
+ for (unsigned long i = 0; i < nr_pages; i++)
+ set_page_count(page + i, 1);
+}
+
+static void kho_init_folio(struct page *page, unsigned int order)
+{
+ unsigned long nr_pages = (1 << order);
+
+ /* Head page gets refcount of 1. */
+ set_page_count(page, 1);
+
+ /* For higher order folios, tail pages get a page count of zero. */
+ for (unsigned long i = 1; i < nr_pages; i++)
+ set_page_count(page + i, 0);
+
+ if (order > 0)
+ prep_compound_page(page, order);
+}
+
static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
{
struct page *page = pfn_to_online_page(PHYS_PFN(phys));
- unsigned int nr_pages, ref_cnt;
+ unsigned long nr_pages;
union kho_page_info info;
if (!page)
@@ -240,20 +259,11 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
/* Clear private to make sure later restores on this page error out. */
page->private = 0;
- /* Head page gets refcount of 1. */
- set_page_count(page, 1);
- /*
- * For higher order folios, tail pages get a page count of zero.
- * For physically contiguous order-0 pages every pages gets a page
- * count of 1
- */
- ref_cnt = is_folio ? 0 : 1;
- for (unsigned int i = 1; i < nr_pages; i++)
- set_page_count(page + i, ref_cnt);
-
- if (is_folio && info.order)
- prep_compound_page(page, info.order);
+ if (is_folio)
+ kho_init_folio(page, info.order);
+ else
+ kho_init_pages(page, nr_pages);
/* Always mark headpage's codetag as empty to avoid accounting mismatch */
clear_page_tag_ref(page);
@@ -289,9 +299,9 @@ EXPORT_SYMBOL_GPL(kho_restore_folio);
* Restore a contiguous list of order 0 pages that was preserved with
* kho_preserve_pages().
*
- * Return: 0 on success, error code on failure
+ * Return: the first page on success, NULL on failure.
*/
-struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages)
+struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages)
{
const unsigned long start_pfn = PHYS_PFN(phys);
const unsigned long end_pfn = start_pfn + nr_pages;
@@ -386,7 +396,7 @@ static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
void *ptr;
u64 phys;
- ptr = fdt_getprop_w(kho_out.fdt, 0, PROP_PRESERVED_MEMORY_MAP, NULL);
+ ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL);
/* Check and discard previous memory map */
phys = get_unaligned((u64 *)ptr);
@@ -474,7 +484,7 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
const void *mem_ptr;
int len;
- mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
+ mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
if (!mem_ptr || len != sizeof(u64)) {
pr_err("failed to get preserved memory bitmaps\n");
return 0;
@@ -645,11 +655,13 @@ static void __init kho_reserve_scratch(void)
scratch_size_update();
/* FIXME: deal with node hot-plug/remove */
- kho_scratch_cnt = num_online_nodes() + 2;
+ kho_scratch_cnt = nodes_weight(node_states[N_MEMORY]) + 2;
size = kho_scratch_cnt * sizeof(*kho_scratch);
kho_scratch = memblock_alloc(size, PAGE_SIZE);
- if (!kho_scratch)
+ if (!kho_scratch) {
+ pr_err("Failed to reserve scratch array\n");
goto err_disable_kho;
+ }
/*
* reserve scratch area in low memory for lowmem allocations in the
@@ -658,8 +670,10 @@ static void __init kho_reserve_scratch(void)
size = scratch_size_lowmem;
addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0,
ARCH_LOW_ADDRESS_LIMIT);
- if (!addr)
+ if (!addr) {
+ pr_err("Failed to reserve lowmem scratch buffer\n");
goto err_free_scratch_desc;
+ }
kho_scratch[i].addr = addr;
kho_scratch[i].size = size;
@@ -668,20 +682,28 @@ static void __init kho_reserve_scratch(void)
/* reserve large contiguous area for allocations without nid */
size = scratch_size_global;
addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES);
- if (!addr)
+ if (!addr) {
+ pr_err("Failed to reserve global scratch buffer\n");
goto err_free_scratch_areas;
+ }
kho_scratch[i].addr = addr;
kho_scratch[i].size = size;
i++;
- for_each_online_node(nid) {
+ /*
+ * Loop over nodes that have both memory and are online. Skip
+ * memoryless nodes, as we can not allocate scratch areas there.
+ */
+ for_each_node_state(nid, N_MEMORY) {
size = scratch_size_node(nid);
addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES,
0, MEMBLOCK_ALLOC_ACCESSIBLE,
nid, true);
- if (!addr)
+ if (!addr) {
+ pr_err("Failed to reserve nid %d scratch buffer\n", nid);
goto err_free_scratch_areas;
+ }
kho_scratch[i].addr = addr;
kho_scratch[i].size = size;
@@ -735,7 +757,8 @@ int kho_add_subtree(const char *name, void *fdt)
goto out_pack;
}
- err = fdt_setprop(root_fdt, off, PROP_SUB_FDT, &phys, sizeof(phys));
+ err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME,
+ &phys, sizeof(phys));
if (err < 0)
goto out_pack;
@@ -766,7 +789,7 @@ void kho_remove_subtree(void *fdt)
const u64 *val;
int len;
- val = fdt_getprop(root_fdt, off, PROP_SUB_FDT, &len);
+ val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len);
if (!val || len != sizeof(phys_addr_t))
continue;
@@ -831,7 +854,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
*
* Return: 0 on success, error code on failure
*/
-int kho_preserve_pages(struct page *page, unsigned int nr_pages)
+int kho_preserve_pages(struct page *page, unsigned long nr_pages)
{
struct kho_mem_track *track = &kho_out.track;
const unsigned long start_pfn = page_to_pfn(page);
@@ -875,7 +898,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages);
* kho_preserve_pages() call. Unpreserving arbitrary sub-ranges of larger
* preserved blocks is not supported.
*/
-void kho_unpreserve_pages(struct page *page, unsigned int nr_pages)
+void kho_unpreserve_pages(struct page *page, unsigned long nr_pages)
{
struct kho_mem_track *track = &kho_out.track;
const unsigned long start_pfn = page_to_pfn(page);
@@ -885,21 +908,6 @@ void kho_unpreserve_pages(struct page *page, unsigned int nr_pages)
}
EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
-struct kho_vmalloc_hdr {
- DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
-};
-
-#define KHO_VMALLOC_SIZE \
- ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
- sizeof(phys_addr_t))
-
-struct kho_vmalloc_chunk {
- struct kho_vmalloc_hdr hdr;
- phys_addr_t phys[KHO_VMALLOC_SIZE];
-};
-
-static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
-
/* vmalloc flags KHO supports */
#define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP)
@@ -1315,7 +1323,7 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
if (offset < 0)
return -ENOENT;
- val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len);
+ val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len);
if (!val || len != sizeof(*val))
return -EINVAL;
@@ -1335,7 +1343,7 @@ static __init int kho_out_fdt_setup(void)
err |= fdt_finish_reservemap(root);
err |= fdt_begin_node(root, "");
err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
- err |= fdt_property(root, PROP_PRESERVED_MEMORY_MAP, &empty_mem_map,
+ err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map,
sizeof(empty_mem_map));
err |= fdt_end_node(root);
err |= fdt_finish(root);
@@ -1451,46 +1459,40 @@ void __init kho_memory_init(void)
void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
phys_addr_t scratch_phys, u64 scratch_len)
{
+ unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
struct kho_scratch *scratch = NULL;
phys_addr_t mem_map_phys;
void *fdt = NULL;
- int err = 0;
- unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
+ int err;
/* Validate the input FDT */
fdt = early_memremap(fdt_phys, fdt_len);
if (!fdt) {
pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
- err = -EFAULT;
- goto out;
+ goto err_report;
}
err = fdt_check_header(fdt);
if (err) {
pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
fdt_phys, err);
- err = -EINVAL;
- goto out;
+ goto err_unmap_fdt;
}
err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
if (err) {
pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
fdt_phys, KHO_FDT_COMPATIBLE, err);
- err = -EINVAL;
- goto out;
+ goto err_unmap_fdt;
}
mem_map_phys = kho_get_mem_map_phys(fdt);
- if (!mem_map_phys) {
- err = -ENOENT;
- goto out;
- }
+ if (!mem_map_phys)
+ goto err_unmap_fdt;
scratch = early_memremap(scratch_phys, scratch_len);
if (!scratch) {
pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
scratch_phys, scratch_len);
- err = -EFAULT;
- goto out;
+ goto err_unmap_fdt;
}
/*
@@ -1507,7 +1509,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
if (WARN_ON(err)) {
pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
&area->addr, &size, ERR_PTR(err));
- goto out;
+ goto err_unmap_scratch;
}
pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
}
@@ -1529,13 +1531,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
kho_scratch_cnt = scratch_cnt;
pr_info("found kexec handover data.\n");
-out:
- if (fdt)
- early_memunmap(fdt, fdt_len);
- if (scratch)
- early_memunmap(scratch, scratch_len);
- if (err)
- pr_warn("disabling KHO revival: %d\n", err);
+ return;
+
+err_unmap_scratch:
+ early_memunmap(scratch, scratch_len);
+err_unmap_fdt:
+ early_memunmap(fdt, fdt_len);
+err_report:
+ pr_warn("disabling KHO revival\n");
}
/* Helper functions for kexec_file_load */
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c
index 944663d99dd9..dda7bb57d421 100644
--- a/kernel/liveupdate/luo_core.c
+++ b/kernel/liveupdate/luo_core.c
@@ -35,8 +35,7 @@
* iommu, interrupts, vfio, participating filesystems, and memory management.
*
* LUO uses Kexec Handover to transfer memory state from the current kernel to
- * the next kernel. For more details see
- * Documentation/core-api/kho/concepts.rst.
+ * the next kernel. For more details see Documentation/core-api/kho/index.rst.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -128,7 +127,9 @@ static int __init luo_early_startup(void)
if (err)
return err;
- return 0;
+ err = luo_flb_setup_incoming(luo_global.fdt_in);
+
+ return err;
}
static int __init liveupdate_early_init(void)
@@ -165,6 +166,7 @@ static int __init luo_fdt_setup(void)
err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
err |= luo_session_setup_outgoing(fdt_out);
+ err |= luo_flb_setup_outgoing(fdt_out);
err |= fdt_end_node(fdt_out);
err |= fdt_finish(fdt_out);
if (err)
@@ -226,6 +228,8 @@ int liveupdate_reboot(void)
if (err)
return err;
+ luo_flb_serialize();
+
err = kho_finalize();
if (err) {
pr_err("kho_finalize failed %d\n", err);
diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c
index 9f7283379ebc..4c7df52a6507 100644
--- a/kernel/liveupdate/luo_file.c
+++ b/kernel/liveupdate/luo_file.c
@@ -104,6 +104,7 @@
#include <linux/io.h>
#include <linux/kexec_handover.h>
#include <linux/kho/abi/luo.h>
+#include <linux/list_private.h>
#include <linux/liveupdate.h>
#include <linux/module.h>
#include <linux/sizes.h>
@@ -273,7 +274,7 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
goto err_fput;
err = -ENOENT;
- luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+ list_private_for_each_entry(fh, &luo_file_handler_list, list) {
if (fh->ops->can_preserve(fh, file)) {
err = 0;
break;
@@ -284,10 +285,14 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
if (err)
goto err_free_files_mem;
+ err = luo_flb_file_preserve(fh);
+ if (err)
+ goto err_free_files_mem;
+
luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL);
if (!luo_file) {
err = -ENOMEM;
- goto err_free_files_mem;
+ goto err_flb_unpreserve;
}
luo_file->file = file;
@@ -311,6 +316,8 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd)
err_kfree:
kfree(luo_file);
+err_flb_unpreserve:
+ luo_flb_file_unpreserve(fh);
err_free_files_mem:
luo_free_files_mem(file_set);
err_fput:
@@ -352,6 +359,7 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set)
args.serialized_data = luo_file->serialized_data;
args.private_data = luo_file->private_data;
luo_file->fh->ops->unpreserve(&args);
+ luo_flb_file_unpreserve(luo_file->fh);
list_del(&luo_file->list);
file_set->count--;
@@ -627,6 +635,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set,
args.retrieved = luo_file->retrieved;
luo_file->fh->ops->finish(&args);
+ luo_flb_file_finish(luo_file->fh);
}
/**
@@ -758,7 +767,7 @@ int luo_file_deserialize(struct luo_file_set *file_set,
bool handler_found = false;
struct luo_file *luo_file;
- luo_list_for_each_private(fh, &luo_file_handler_list, list) {
+ list_private_for_each_entry(fh, &luo_file_handler_list, list) {
if (!strcmp(fh->compatible, file_ser[i].compatible)) {
handler_found = true;
break;
@@ -833,7 +842,7 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
return -EBUSY;
/* Check for duplicate compatible strings */
- luo_list_for_each_private(fh_iter, &luo_file_handler_list, list) {
+ list_private_for_each_entry(fh_iter, &luo_file_handler_list, list) {
if (!strcmp(fh_iter->compatible, fh->compatible)) {
pr_err("File handler registration failed: Compatible string '%s' already registered.\n",
fh->compatible);
@@ -848,10 +857,13 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh)
goto err_resume;
}
+ INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, flb_list));
INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list));
list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list);
luo_session_resume();
+ liveupdate_test_register(fh);
+
return 0;
err_resume:
@@ -868,23 +880,38 @@ err_resume:
*
* It ensures safe removal by checking that:
* No live update session is currently in progress.
+ * No FLB registered with this file handler.
*
* If the unregistration fails, the internal test state is reverted.
*
* Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live
- * update is in progress, can't quiesce live update.
+ * update is in progress, can't quiesce live update or FLB is registred with
+ * this file handler.
*/
int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh)
{
+ int err = -EBUSY;
+
if (!liveupdate_enabled())
return -EOPNOTSUPP;
+ liveupdate_test_unregister(fh);
+
if (!luo_session_quiesce())
- return -EBUSY;
+ goto err_register;
+
+ if (!list_empty(&ACCESS_PRIVATE(fh, flb_list)))
+ goto err_resume;
list_del(&ACCESS_PRIVATE(fh, list));
module_put(fh->ops->owner);
luo_session_resume();
return 0;
+
+err_resume:
+ luo_session_resume();
+err_register:
+ liveupdate_test_register(fh);
+ return err;
}
diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c
new file mode 100644
index 000000000000..4c437de5c0b0
--- /dev/null
+++ b/kernel/liveupdate/luo_flb.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/**
+ * DOC: LUO File Lifecycle Bound Global Data
+ *
+ * File-Lifecycle-Bound (FLB) objects provide a mechanism for managing global
+ * state that is shared across multiple live-updatable files. The lifecycle of
+ * this shared state is tied to the preservation of the files that depend on it.
+ *
+ * An FLB represents a global resource, such as the IOMMU core state, that is
+ * required by multiple file descriptors (e.g., all VFIO fds).
+ *
+ * The preservation of the FLB's state is triggered when the *first* file
+ * depending on it is preserved. The cleanup of this state (unpreserve or
+ * finish) is triggered when the *last* file depending on it is unpreserved or
+ * finished.
+ *
+ * Handler Dependency: A file handler declares its dependency on one or more
+ * FLBs by registering them via liveupdate_register_flb().
+ *
+ * Callback Model: Each FLB is defined by a set of operations
+ * (&struct liveupdate_flb_ops) that LUO invokes at key points:
+ *
+ * - .preserve(): Called for the first file. Saves global state.
+ * - .unpreserve(): Called for the last file (if aborted pre-reboot).
+ * - .retrieve(): Called on-demand in the new kernel to restore the state.
+ * - .finish(): Called for the last file in the new kernel for cleanup.
+ *
+ * This reference-counted approach ensures that shared state is saved exactly
+ * once and restored exactly once, regardless of how many files depend on it,
+ * and that its lifecycle is correctly managed across the kexec transition.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cleanup.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/kexec_handover.h>
+#include <linux/kho/abi/luo.h>
+#include <linux/libfdt.h>
+#include <linux/list_private.h>
+#include <linux/liveupdate.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/unaligned.h>
+#include "luo_internal.h"
+
+#define LUO_FLB_PGCNT 1ul
+#define LUO_FLB_MAX (((LUO_FLB_PGCNT << PAGE_SHIFT) - \
+ sizeof(struct luo_flb_header_ser)) / sizeof(struct luo_flb_ser))
+
+struct luo_flb_header {
+ struct luo_flb_header_ser *header_ser;
+ struct luo_flb_ser *ser;
+ bool active;
+};
+
+struct luo_flb_global {
+ struct luo_flb_header incoming;
+ struct luo_flb_header outgoing;
+ struct list_head list;
+ long count;
+};
+
+static struct luo_flb_global luo_flb_global = {
+ .list = LIST_HEAD_INIT(luo_flb_global.list),
+};
+
+/*
+ * struct luo_flb_link - Links an FLB definition to a file handler's internal
+ * list of dependencies.
+ * @flb: A pointer to the registered &struct liveupdate_flb definition.
+ * @list: The list_head for linking.
+ */
+struct luo_flb_link {
+ struct liveupdate_flb *flb;
+ struct list_head list;
+};
+
+/* luo_flb_get_private - Access private field, and if needed initialize it. */
+static struct luo_flb_private *luo_flb_get_private(struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = &ACCESS_PRIVATE(flb, private);
+
+ if (!private->initialized) {
+ mutex_init(&private->incoming.lock);
+ mutex_init(&private->outgoing.lock);
+ INIT_LIST_HEAD(&private->list);
+ private->users = 0;
+ private->initialized = true;
+ }
+
+ return private;
+}
+
+static int luo_flb_file_preserve_one(struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+
+ scoped_guard(mutex, &private->outgoing.lock) {
+ if (!private->outgoing.count) {
+ struct liveupdate_flb_op_args args = {0};
+ int err;
+
+ args.flb = flb;
+ err = flb->ops->preserve(&args);
+ if (err)
+ return err;
+ private->outgoing.data = args.data;
+ private->outgoing.obj = args.obj;
+ }
+ private->outgoing.count++;
+ }
+
+ return 0;
+}
+
+static void luo_flb_file_unpreserve_one(struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+
+ scoped_guard(mutex, &private->outgoing.lock) {
+ private->outgoing.count--;
+ if (!private->outgoing.count) {
+ struct liveupdate_flb_op_args args = {0};
+
+ args.flb = flb;
+ args.data = private->outgoing.data;
+ args.obj = private->outgoing.obj;
+
+ if (flb->ops->unpreserve)
+ flb->ops->unpreserve(&args);
+
+ private->outgoing.data = 0;
+ private->outgoing.obj = NULL;
+ }
+ }
+}
+
+static int luo_flb_retrieve_one(struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+ struct luo_flb_header *fh = &luo_flb_global.incoming;
+ struct liveupdate_flb_op_args args = {0};
+ bool found = false;
+ int err;
+
+ guard(mutex)(&private->incoming.lock);
+
+ if (private->incoming.finished)
+ return -ENODATA;
+
+ if (private->incoming.retrieved)
+ return 0;
+
+ if (!fh->active)
+ return -ENODATA;
+
+ for (int i = 0; i < fh->header_ser->count; i++) {
+ if (!strcmp(fh->ser[i].name, flb->compatible)) {
+ private->incoming.data = fh->ser[i].data;
+ private->incoming.count = fh->ser[i].count;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ args.flb = flb;
+ args.data = private->incoming.data;
+
+ err = flb->ops->retrieve(&args);
+ if (err)
+ return err;
+
+ private->incoming.obj = args.obj;
+ private->incoming.retrieved = true;
+
+ return 0;
+}
+
+static void luo_flb_file_finish_one(struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+ u64 count;
+
+ scoped_guard(mutex, &private->incoming.lock)
+ count = --private->incoming.count;
+
+ if (!count) {
+ struct liveupdate_flb_op_args args = {0};
+
+ if (!private->incoming.retrieved) {
+ int err = luo_flb_retrieve_one(flb);
+
+ if (WARN_ON(err))
+ return;
+ }
+
+ scoped_guard(mutex, &private->incoming.lock) {
+ args.flb = flb;
+ args.obj = private->incoming.obj;
+ flb->ops->finish(&args);
+
+ private->incoming.data = 0;
+ private->incoming.obj = NULL;
+ private->incoming.finished = true;
+ }
+ }
+}
+
+/**
+ * luo_flb_file_preserve - Notifies FLBs that a file is about to be preserved.
+ * @fh: The file handler for the preserved file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler. It increments the reference count for each FLB. If the count becomes
+ * 1, it triggers the FLB's .preserve() callback to save the global state.
+ *
+ * This operation is atomic. If any FLB's .preserve() op fails, it will roll
+ * back by calling .unpreserve() on any FLBs that were successfully preserved
+ * during this call.
+ *
+ * Context: Called from luo_preserve_file()
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int luo_flb_file_preserve(struct liveupdate_file_handler *fh)
+{
+ struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+ struct luo_flb_link *iter;
+ int err = 0;
+
+ list_for_each_entry(iter, flb_list, list) {
+ err = luo_flb_file_preserve_one(iter->flb);
+ if (err)
+ goto exit_err;
+ }
+
+ return 0;
+
+exit_err:
+ list_for_each_entry_continue_reverse(iter, flb_list, list)
+ luo_flb_file_unpreserve_one(iter->flb);
+
+ return err;
+}
+
+/**
+ * luo_flb_file_unpreserve - Notifies FLBs that a dependent file was unpreserved.
+ * @fh: The file handler for the unpreserved file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler, in reverse order of registration. It decrements the reference count
+ * for each FLB. If the count becomes 0, it triggers the FLB's .unpreserve()
+ * callback to clean up the global state.
+ *
+ * Context: Called when a preserved file is being cleaned up before reboot
+ * (e.g., from luo_file_unpreserve_files()).
+ */
+void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh)
+{
+ struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+ struct luo_flb_link *iter;
+
+ list_for_each_entry_reverse(iter, flb_list, list)
+ luo_flb_file_unpreserve_one(iter->flb);
+}
+
+/**
+ * luo_flb_file_finish - Notifies FLBs that a dependent file has been finished.
+ * @fh: The file handler for the finished file.
+ *
+ * This function iterates through all FLBs associated with the given file
+ * handler, in reverse order of registration. It decrements the incoming
+ * reference count for each FLB. If the count becomes 0, it triggers the FLB's
+ * .finish() callback for final cleanup in the new kernel.
+ *
+ * Context: Called from luo_file_finish() for each file being finished.
+ */
+void luo_flb_file_finish(struct liveupdate_file_handler *fh)
+{
+ struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+ struct luo_flb_link *iter;
+
+ list_for_each_entry_reverse(iter, flb_list, list)
+ luo_flb_file_finish_one(iter->flb);
+}
+
+/**
+ * liveupdate_register_flb - Associate an FLB with a file handler and register it globally.
+ * @fh: The file handler that will now depend on the FLB.
+ * @flb: The File-Lifecycle-Bound object to associate.
+ *
+ * Establishes a dependency, informing the LUO core that whenever a file of
+ * type @fh is preserved, the state of @flb must also be managed.
+ *
+ * On the first registration of a given @flb object, it is added to a global
+ * registry. This function checks for duplicate registrations, both for a
+ * specific handler and globally, and ensures the total number of unique
+ * FLBs does not exceed the system limit.
+ *
+ * Context: Typically called from a subsystem's module init function after
+ * both the handler and the FLB have been defined and initialized.
+ * Return: 0 on success. Returns a negative errno on failure:
+ * -EINVAL if arguments are NULL or not initialized.
+ * -ENOMEM on memory allocation failure.
+ * -EEXIST if this FLB is already registered with this handler.
+ * -ENOSPC if the maximum number of global FLBs has been reached.
+ * -EOPNOTSUPP if live update is disabled or not configured.
+ */
+int liveupdate_register_flb(struct liveupdate_file_handler *fh,
+ struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+ struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+ struct luo_flb_link *link __free(kfree) = NULL;
+ struct liveupdate_flb *gflb;
+ struct luo_flb_link *iter;
+ int err;
+
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ if (WARN_ON(!flb->ops->preserve || !flb->ops->unpreserve ||
+ !flb->ops->retrieve || !flb->ops->finish)) {
+ return -EINVAL;
+ }
+
+ /*
+ * File handler must already be registered, as it initializes the
+ * flb_list
+ */
+ if (WARN_ON(list_empty(&ACCESS_PRIVATE(fh, list))))
+ return -EINVAL;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ /*
+ * Ensure the system is quiescent (no active sessions).
+ * This acts as a global lock for registration: no other thread can
+ * be in this section, and no sessions can be creating/using FDs.
+ */
+ if (!luo_session_quiesce())
+ return -EBUSY;
+
+ /* Check that this FLB is not already linked to this file handler */
+ err = -EEXIST;
+ list_for_each_entry(iter, flb_list, list) {
+ if (iter->flb == flb)
+ goto err_resume;
+ }
+
+ /*
+ * If this FLB is not linked to global list it's the first time the FLB
+ * is registered
+ */
+ if (!private->users) {
+ if (WARN_ON(!list_empty(&private->list))) {
+ err = -EINVAL;
+ goto err_resume;
+ }
+
+ if (luo_flb_global.count == LUO_FLB_MAX) {
+ err = -ENOSPC;
+ goto err_resume;
+ }
+
+ /* Check that compatible string is unique in global list */
+ list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) {
+ if (!strcmp(gflb->compatible, flb->compatible))
+ goto err_resume;
+ }
+
+ if (!try_module_get(flb->ops->owner)) {
+ err = -EAGAIN;
+ goto err_resume;
+ }
+
+ list_add_tail(&private->list, &luo_flb_global.list);
+ luo_flb_global.count++;
+ }
+
+ /* Finally, link the FLB to the file handler */
+ private->users++;
+ link->flb = flb;
+ list_add_tail(&no_free_ptr(link)->list, flb_list);
+ luo_session_resume();
+
+ return 0;
+
+err_resume:
+ luo_session_resume();
+ return err;
+}
+
+/**
+ * liveupdate_unregister_flb - Remove an FLB dependency from a file handler.
+ * @fh: The file handler that is currently depending on the FLB.
+ * @flb: The File-Lifecycle-Bound object to remove.
+ *
+ * Removes the association between the specified file handler and the FLB
+ * previously established by liveupdate_register_flb().
+ *
+ * This function manages the global lifecycle of the FLB. It decrements the
+ * FLB's usage count. If this was the last file handler referencing this FLB,
+ * the FLB is removed from the global registry and the reference to its
+ * owner module (acquired during registration) is released.
+ *
+ * Context: This function ensures the session is quiesced (no active FDs
+ * being created) during the update. It is typically called from a
+ * subsystem's module exit function.
+ * Return: 0 on success.
+ * -EOPNOTSUPP if live update is disabled.
+ * -EBUSY if the live update session is active and cannot be quiesced.
+ * -ENOENT if the FLB was not found in the file handler's list.
+ */
+int liveupdate_unregister_flb(struct liveupdate_file_handler *fh,
+ struct liveupdate_flb *flb)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+ struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list);
+ struct luo_flb_link *iter;
+ int err = -ENOENT;
+
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ /*
+ * Ensure the system is quiescent (no active sessions).
+ * This acts as a global lock for unregistration.
+ */
+ if (!luo_session_quiesce())
+ return -EBUSY;
+
+ /* Find and remove the link from the file handler's list */
+ list_for_each_entry(iter, flb_list, list) {
+ if (iter->flb == flb) {
+ list_del(&iter->list);
+ kfree(iter);
+ err = 0;
+ break;
+ }
+ }
+
+ if (err)
+ goto err_resume;
+
+ private->users--;
+ /*
+ * If this is the last file-handler with which we are registred, remove
+ * from the global list, and relese module reference.
+ */
+ if (!private->users) {
+ list_del_init(&private->list);
+ luo_flb_global.count--;
+ module_put(flb->ops->owner);
+ }
+
+ luo_session_resume();
+
+ return 0;
+
+err_resume:
+ luo_session_resume();
+ return err;
+}
+
+/**
+ * liveupdate_flb_get_incoming - Retrieve the incoming FLB object.
+ * @flb: The FLB definition.
+ * @objp: Output parameter; will be populated with the live shared object.
+ *
+ * Returns a pointer to its shared live object for the incoming (post-reboot)
+ * path.
+ *
+ * If this is the first time the object is requested in the new kernel, this
+ * function will trigger the FLB's .retrieve() callback to reconstruct the
+ * object from its preserved state. Subsequent calls will return the same
+ * cached object.
+ *
+ * Return: 0 on success, or a negative errno on failure. -ENODATA means no
+ * incoming FLB data, -ENOENT means specific flb not found in the incoming
+ * data, and -EOPNOTSUPP when live update is disabled or not configured.
+ */
+int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ if (!private->incoming.obj) {
+ int err = luo_flb_retrieve_one(flb);
+
+ if (err)
+ return err;
+ }
+
+ guard(mutex)(&private->incoming.lock);
+ *objp = private->incoming.obj;
+
+ return 0;
+}
+
+/**
+ * liveupdate_flb_get_outgoing - Retrieve the outgoing FLB object.
+ * @flb: The FLB definition.
+ * @objp: Output parameter; will be populated with the live shared object.
+ *
+ * Returns a pointer to its shared live object for the outgoing (pre-reboot)
+ * path.
+ *
+ * This function assumes the object has already been created by the FLB's
+ * .preserve() callback, which is triggered when the first dependent file
+ * is preserved.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp)
+{
+ struct luo_flb_private *private = luo_flb_get_private(flb);
+
+ if (!liveupdate_enabled())
+ return -EOPNOTSUPP;
+
+ guard(mutex)(&private->outgoing.lock);
+ *objp = private->outgoing.obj;
+
+ return 0;
+}
+
+int __init luo_flb_setup_outgoing(void *fdt_out)
+{
+ struct luo_flb_header_ser *header_ser;
+ u64 header_ser_pa;
+ int err;
+
+ header_ser = kho_alloc_preserve(LUO_FLB_PGCNT << PAGE_SHIFT);
+ if (IS_ERR(header_ser))
+ return PTR_ERR(header_ser);
+
+ header_ser_pa = virt_to_phys(header_ser);
+
+ err = fdt_begin_node(fdt_out, LUO_FDT_FLB_NODE_NAME);
+ err |= fdt_property_string(fdt_out, "compatible",
+ LUO_FDT_FLB_COMPATIBLE);
+ err |= fdt_property(fdt_out, LUO_FDT_FLB_HEADER, &header_ser_pa,
+ sizeof(header_ser_pa));
+ err |= fdt_end_node(fdt_out);
+
+ if (err)
+ goto err_unpreserve;
+
+ header_ser->pgcnt = LUO_FLB_PGCNT;
+ luo_flb_global.outgoing.header_ser = header_ser;
+ luo_flb_global.outgoing.ser = (void *)(header_ser + 1);
+ luo_flb_global.outgoing.active = true;
+
+ return 0;
+
+err_unpreserve:
+ kho_unpreserve_free(header_ser);
+
+ return err;
+}
+
+int __init luo_flb_setup_incoming(void *fdt_in)
+{
+ struct luo_flb_header_ser *header_ser;
+ int err, header_size, offset;
+ const void *ptr;
+ u64 header_ser_pa;
+
+ offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_FLB_NODE_NAME);
+ if (offset < 0) {
+ pr_err("Unable to get FLB node [%s]\n", LUO_FDT_FLB_NODE_NAME);
+
+ return -ENOENT;
+ }
+
+ err = fdt_node_check_compatible(fdt_in, offset,
+ LUO_FDT_FLB_COMPATIBLE);
+ if (err) {
+ pr_err("FLB node is incompatible with '%s' [%d]\n",
+ LUO_FDT_FLB_COMPATIBLE, err);
+
+ return -EINVAL;
+ }
+
+ header_size = 0;
+ ptr = fdt_getprop(fdt_in, offset, LUO_FDT_FLB_HEADER, &header_size);
+ if (!ptr || header_size != sizeof(u64)) {
+ pr_err("Unable to get FLB header property '%s' [%d]\n",
+ LUO_FDT_FLB_HEADER, header_size);
+
+ return -EINVAL;
+ }
+
+ header_ser_pa = get_unaligned((u64 *)ptr);
+ header_ser = phys_to_virt(header_ser_pa);
+
+ luo_flb_global.incoming.header_ser = header_ser;
+ luo_flb_global.incoming.ser = (void *)(header_ser + 1);
+ luo_flb_global.incoming.active = true;
+
+ return 0;
+}
+
+/**
+ * luo_flb_serialize - Serializes all active FLB objects for KHO.
+ *
+ * This function is called from the reboot path. It iterates through all
+ * registered File-Lifecycle-Bound (FLB) objects. For each FLB that has been
+ * preserved (i.e., its reference count is greater than zero), it writes its
+ * metadata into the memory region designated for Kexec Handover.
+ *
+ * The serialized data includes the FLB's compatibility string, its opaque
+ * data handle, and the final reference count. This allows the new kernel to
+ * find the appropriate handler and reconstruct the FLB's state.
+ *
+ * Context: Called from liveupdate_reboot() just before kho_finalize().
+ */
+void luo_flb_serialize(void)
+{
+ struct luo_flb_header *fh = &luo_flb_global.outgoing;
+ struct liveupdate_flb *gflb;
+ int i = 0;
+
+ list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) {
+ struct luo_flb_private *private = luo_flb_get_private(gflb);
+
+ if (private->outgoing.count > 0) {
+ strscpy(fh->ser[i].name, gflb->compatible,
+ sizeof(fh->ser[i].name));
+ fh->ser[i].data = private->outgoing.data;
+ fh->ser[i].count = private->outgoing.count;
+ i++;
+ }
+ }
+
+ fh->header_ser->count = i;
+}
diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h
index c8973b543d1d..8083d8739b09 100644
--- a/kernel/liveupdate/luo_internal.h
+++ b/kernel/liveupdate/luo_internal.h
@@ -40,13 +40,6 @@ static inline int luo_ucmd_respond(struct luo_ucmd *ucmd,
*/
#define luo_restore_fail(__fmt, ...) panic(__fmt, ##__VA_ARGS__)
-/* Mimics list_for_each_entry() but for private list head entries */
-#define luo_list_for_each_private(pos, head, member) \
- for (struct list_head *__iter = (head)->next; \
- __iter != (head) && \
- ({ pos = container_of(__iter, typeof(*(pos)), member); 1; }); \
- __iter = __iter->next)
-
/**
* struct luo_file_set - A set of files that belong to the same sessions.
* @files_list: An ordered list of files associated with this session, it is
@@ -107,4 +100,19 @@ int luo_file_deserialize(struct luo_file_set *file_set,
void luo_file_set_init(struct luo_file_set *file_set);
void luo_file_set_destroy(struct luo_file_set *file_set);
+int luo_flb_file_preserve(struct liveupdate_file_handler *fh);
+void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh);
+void luo_flb_file_finish(struct liveupdate_file_handler *fh);
+int __init luo_flb_setup_outgoing(void *fdt);
+int __init luo_flb_setup_incoming(void *fdt);
+void luo_flb_serialize(void);
+
+#ifdef CONFIG_LIVEUPDATE_TEST
+void liveupdate_test_register(struct liveupdate_file_handler *fh);
+void liveupdate_test_unregister(struct liveupdate_file_handler *fh);
+#else
+static inline void liveupdate_test_register(struct liveupdate_file_handler *fh) { }
+static inline void liveupdate_test_unregister(struct liveupdate_file_handler *fh) { }
+#endif
+
#endif /* _LINUX_LUO_INTERNAL_H */
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 00a60796327c..0fc11e45df9b 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -334,13 +334,8 @@ int module_address_lookup(unsigned long addr,
if (mod) {
if (modname)
*modname = mod->name;
- if (modbuildid) {
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
- *modbuildid = mod->build_id;
-#else
- *modbuildid = NULL;
-#endif
- }
+ if (modbuildid)
+ *modbuildid = module_buildid(mod);
sym = find_kallsyms_symbol(mod, addr, size, offset);
diff --git a/kernel/panic.c b/kernel/panic.c
index 0c20fcaae98a..c78600212b6c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -42,6 +42,7 @@
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
+#define PANIC_MSG_BUFSZ 1024
#ifdef CONFIG_SMP
/*
@@ -74,6 +75,8 @@ EXPORT_SYMBOL_GPL(panic_timeout);
unsigned long panic_print;
+static int panic_force_cpu = -1;
+
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
EXPORT_SYMBOL(panic_notifier_list);
@@ -300,6 +303,150 @@ void __weak crash_smp_send_stop(void)
}
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+atomic_t panic_redirect_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+
+#if defined(CONFIG_SMP) && defined(CONFIG_CRASH_DUMP)
+static char *panic_force_buf;
+
+static int __init panic_force_cpu_setup(char *str)
+{
+ int cpu;
+
+ if (!str)
+ return -EINVAL;
+
+ if (kstrtoint(str, 0, &cpu) || cpu < 0 || cpu >= nr_cpu_ids) {
+ pr_warn("panic_force_cpu: invalid value '%s'\n", str);
+ return -EINVAL;
+ }
+
+ panic_force_cpu = cpu;
+ return 0;
+}
+early_param("panic_force_cpu", panic_force_cpu_setup);
+
+static int __init panic_force_cpu_late_init(void)
+{
+ if (panic_force_cpu < 0)
+ return 0;
+
+ panic_force_buf = kmalloc(PANIC_MSG_BUFSZ, GFP_KERNEL);
+
+ return 0;
+}
+late_initcall(panic_force_cpu_late_init);
+
+static void do_panic_on_target_cpu(void *info)
+{
+ panic("%s", (char *)info);
+}
+
+/**
+ * panic_smp_redirect_cpu - Redirect panic to target CPU
+ * @target_cpu: CPU that should handle the panic
+ * @msg: formatted panic message
+ *
+ * Default implementation uses IPI. Architectures with NMI support
+ * can override this for more reliable delivery.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+int __weak panic_smp_redirect_cpu(int target_cpu, void *msg)
+{
+ static call_single_data_t panic_csd;
+
+ panic_csd.func = do_panic_on_target_cpu;
+ panic_csd.info = msg;
+
+ return smp_call_function_single_async(target_cpu, &panic_csd);
+}
+
+/**
+ * panic_try_force_cpu - Redirect panic to a specific CPU for crash kernel
+ * @fmt: panic message format string
+ * @args: arguments for format string
+ *
+ * Some platforms require panic handling to occur on a specific CPU
+ * for the crash kernel to function correctly. This function redirects
+ * panic handling to the CPU specified via the panic_force_cpu= boot parameter.
+ *
+ * Returns false if panic should proceed on current CPU.
+ * Returns true if panic was redirected.
+ */
+__printf(1, 0)
+static bool panic_try_force_cpu(const char *fmt, va_list args)
+{
+ int this_cpu = raw_smp_processor_id();
+ int old_cpu = PANIC_CPU_INVALID;
+ const char *msg;
+
+ /* Feature not enabled via boot parameter */
+ if (panic_force_cpu < 0)
+ return false;
+
+ /* Already on target CPU - proceed normally */
+ if (this_cpu == panic_force_cpu)
+ return false;
+
+ /* Target CPU is offline, can't redirect */
+ if (!cpu_online(panic_force_cpu)) {
+ pr_warn("panic: target CPU %d is offline, continuing on CPU %d\n",
+ panic_force_cpu, this_cpu);
+ return false;
+ }
+
+ /* Another panic already in progress */
+ if (panic_in_progress())
+ return false;
+
+ /*
+ * Only one CPU can do the redirect. Use atomic cmpxchg to ensure
+ * we don't race with another CPU also trying to redirect.
+ */
+ if (!atomic_try_cmpxchg(&panic_redirect_cpu, &old_cpu, this_cpu))
+ return false;
+
+ /*
+ * Use dynamically allocated buffer if available, otherwise
+ * fall back to static message for early boot panics or allocation failure.
+ */
+ if (panic_force_buf) {
+ vsnprintf(panic_force_buf, PANIC_MSG_BUFSZ, fmt, args);
+ msg = panic_force_buf;
+ } else {
+ msg = "Redirected panic (buffer unavailable)";
+ }
+
+ console_verbose();
+ bust_spinlocks(1);
+
+ pr_emerg("panic: Redirecting from CPU %d to CPU %d for crash kernel.\n",
+ this_cpu, panic_force_cpu);
+
+ /* Dump original CPU before redirecting */
+ if (!test_taint(TAINT_DIE) &&
+ oops_in_progress <= 1 &&
+ IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
+ dump_stack();
+ }
+
+ if (panic_smp_redirect_cpu(panic_force_cpu, (void *)msg) != 0) {
+ atomic_set(&panic_redirect_cpu, PANIC_CPU_INVALID);
+ pr_warn("panic: failed to redirect to CPU %d, continuing on CPU %d\n",
+ panic_force_cpu, this_cpu);
+ return false;
+ }
+
+ /* IPI/NMI sent, this CPU should stop */
+ return true;
+}
+#else
+__printf(1, 0)
+static inline bool panic_try_force_cpu(const char *fmt, va_list args)
+{
+ return false;
+}
+#endif /* CONFIG_SMP && CONFIG_CRASH_DUMP */
bool panic_try_start(void)
{
@@ -428,7 +575,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
*/
void vpanic(const char *fmt, va_list args)
{
- static char buf[1024];
+ static char buf[PANIC_MSG_BUFSZ];
long i, i_next = 0, len;
int state = 0;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
@@ -452,6 +599,15 @@ void vpanic(const char *fmt, va_list args)
local_irq_disable();
preempt_disable_notrace();
+ /* Redirect panic to target CPU if configured via panic_force_cpu=. */
+ if (panic_try_force_cpu(fmt, args)) {
+ /*
+ * Mark ourselves offline so panic_other_cpus_shutdown() won't wait
+ * for us on architectures that check num_online_cpus().
+ */
+ set_cpu_online(smp_processor_id(), false);
+ panic_smp_self_stop();
+ }
/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
@@ -484,7 +640,11 @@ void vpanic(const char *fmt, va_list args)
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
*/
- if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
+ if (atomic_read(&panic_redirect_cpu) != PANIC_CPU_INVALID &&
+ panic_force_cpu == raw_smp_processor_id()) {
+ pr_emerg("panic: Redirected from CPU %d, skipping stack dump.\n",
+ atomic_read(&panic_redirect_cpu));
+ } else if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
panic_this_cpu_backtrace_printed = true;
} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
dump_stack();
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index c903f1a42891..a612cf253c87 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -253,8 +253,10 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
delta = rq_clock(rq) - t->sched_info.last_queued;
t->sched_info.last_queued = 0;
t->sched_info.run_delay += delta;
- if (delta > t->sched_info.max_run_delay)
+ if (delta > t->sched_info.max_run_delay) {
t->sched_info.max_run_delay = delta;
+ ktime_get_real_ts64(&t->sched_info.max_run_delay_ts);
+ }
if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
t->sched_info.min_run_delay = delta;
rq_sched_info_dequeue(rq, delta);
@@ -278,8 +280,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
t->sched_info.run_delay += delta;
t->sched_info.last_arrival = now;
t->sched_info.pcount++;
- if (delta > t->sched_info.max_run_delay)
+ if (delta > t->sched_info.max_run_delay) {
t->sched_info.max_run_delay = delta;
+ ktime_get_real_ts64(&t->sched_info.max_run_delay_ts);
+ }
if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay))
t->sched_info.min_run_delay = delta;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f9b10c633bdd..8fb38722fd5c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -8112,7 +8112,8 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
+ unsigned long *off, char **modname,
+ const unsigned char **modbuildid, char *sym)
{
struct ftrace_mod_map *mod_map;
int ret = 0;
@@ -8124,6 +8125,8 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
if (ret) {
if (modname)
*modname = mod_map->mod->name;
+ if (modbuildid)
+ *modbuildid = module_buildid(mod_map->mod);
break;
}
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8bd4ec08fb36..b1cb30a7b83d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1178,11 +1178,10 @@ EXPORT_SYMBOL_GPL(__trace_array_puts);
* __trace_puts - write a constant string into the trace buffer.
* @ip: The address of the caller
* @str: The constant string to write
- * @size: The size of the string.
*/
-int __trace_puts(unsigned long ip, const char *str, int size)
+int __trace_puts(unsigned long ip, const char *str)
{
- return __trace_array_puts(printk_trace, ip, str, size);
+ return __trace_array_puts(printk_trace, ip, str, strlen(str));
}
EXPORT_SYMBOL_GPL(__trace_puts);
@@ -1201,7 +1200,7 @@ int __trace_bputs(unsigned long ip, const char *str)
int size = sizeof(struct bputs_entry);
if (!printk_binsafe(tr))
- return __trace_puts(ip, str, strlen(str));
+ return __trace_puts(ip, str);
if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c11edec5d8f5..8428c437cb9d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2119,7 +2119,7 @@ extern void tracing_log_err(struct trace_array *tr,
* about performance). The internal_trace_puts() is for such
* a purpose.
*/
-#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str))
+#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str)
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 6ea2f6363b90..5c153106e642 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -125,7 +125,7 @@ static void __acct_update_integrals(struct task_struct *tsk,
{
u64 time, delta;
- if (!likely(tsk->mm))
+ if (unlikely(!tsk->mm || (tsk->flags & PF_KTHREAD)))
return;
time = stime + utime;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 586af49fc03e..fc4a8f2d3096 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -47,7 +47,7 @@ static int set_permissions(struct ctl_table_header *head,
int mode;
/* Allow users with CAP_SYS_RESOURCE unrestrained access */
- if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+ if (ns_capable_noaudit(user_ns, CAP_SYS_RESOURCE))
mode = (table->mode & S_IRWXU) >> 6;
else
/* Allow all others at most read-only access */
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index 46fc1050f1bb..8d82913223a1 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -141,7 +141,9 @@ EXPORT_SYMBOL_GPL(hwerr_log_error_type);
static int __init crash_save_vmcoreinfo_init(void)
{
- vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+ int order;
+ order = get_order(VMCOREINFO_BYTES);
+ vmcoreinfo_data = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!vmcoreinfo_data) {
pr_warn("Memory allocation for vmcoreinfo_data failed\n");
return -ENOMEM;
@@ -150,7 +152,7 @@ static int __init crash_save_vmcoreinfo_init(void)
vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
GFP_KERNEL | __GFP_ZERO);
if (!vmcoreinfo_note) {
- free_page((unsigned long)vmcoreinfo_data);
+ free_pages((unsigned long)vmcoreinfo_data, order);
vmcoreinfo_data = NULL;
pr_warn("Memory allocation for vmcoreinfo_note failed\n");
return -ENOMEM;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 366122f4a0f8..7d675781bc91 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -363,7 +363,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
- IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC;
static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;
@@ -550,7 +550,7 @@ static bool need_counting_irqs(void)
u8 util;
int tail = __this_cpu_read(cpustat_tail);
- tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT;
+ tail = (tail + NUM_SAMPLE_PERIODS - 1) % NUM_SAMPLE_PERIODS;
util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
return util > HARDIRQ_PERCENT_THRESH;
}
@@ -774,8 +774,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts, period_ts, now;
struct pt_regs *regs = get_irq_regs();
- int duration;
int softlockup_all_cpu_backtrace;
+ int duration, thresh_count;
unsigned long flags;
if (!watchdog_enabled)
@@ -879,7 +879,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT);
- if (softlockup_panic)
+ thresh_count = duration / get_softlockup_thresh();
+
+ if (softlockup_panic && thresh_count >= softlockup_panic)
panic("softlockup: hung tasks");
}
@@ -1228,7 +1230,7 @@ static const struct ctl_table watchdog_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "softlockup_sys_info",
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index d3ca70e3c256..cf05775a96d3 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -118,18 +118,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
watchdog_hardlockup_check(smp_processor_id(), regs);
}
-static int hardlockup_detector_event_create(void)
+static struct perf_event *hardlockup_detector_event_create(unsigned int cpu)
{
- unsigned int cpu;
struct perf_event_attr *wd_attr;
struct perf_event *evt;
- /*
- * Preemption is not disabled because memory will be allocated.
- * Ensure CPU-locality by calling this in per-CPU kthread.
- */
- WARN_ON(!is_percpu_thread());
- cpu = raw_smp_processor_id();
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
@@ -143,14 +136,7 @@ static int hardlockup_detector_event_create(void)
watchdog_overflow_callback, NULL);
}
- if (IS_ERR(evt)) {
- pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
- PTR_ERR(evt));
- return PTR_ERR(evt);
- }
- WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak");
- this_cpu_write(watchdog_ev, evt);
- return 0;
+ return evt;
}
/**
@@ -159,17 +145,26 @@ static int hardlockup_detector_event_create(void)
*/
void watchdog_hardlockup_enable(unsigned int cpu)
{
+ struct perf_event *evt;
+
WARN_ON_ONCE(cpu != smp_processor_id());
- if (hardlockup_detector_event_create())
+ evt = hardlockup_detector_event_create(cpu);
+ if (IS_ERR(evt)) {
+ pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
+ PTR_ERR(evt));
return;
+ }
/* use original value for check */
if (!atomic_fetch_inc(&watchdog_cpus))
pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
+ WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak");
+ this_cpu_write(watchdog_ev, evt);
+
watchdog_init_timestamp();
- perf_event_enable(this_cpu_read(watchdog_ev));
+ perf_event_enable(evt);
}
/**
@@ -263,19 +258,30 @@ bool __weak __init arch_perf_nmi_is_available(void)
*/
int __init watchdog_hardlockup_probe(void)
{
+ struct perf_event *evt;
+ unsigned int cpu;
int ret;
if (!arch_perf_nmi_is_available())
return -ENODEV;
- ret = hardlockup_detector_event_create();
+ if (!hw_nmi_get_sample_period(watchdog_thresh))
+ return -EINVAL;
- if (ret) {
+ /*
+ * Test hardware PMU availability by creating a temporary perf event.
+ * The event is released immediately.
+ */
+ cpu = raw_smp_processor_id();
+ evt = hardlockup_detector_event_create(cpu);
+ if (IS_ERR(evt)) {
pr_info("Perf NMI watchdog permanently disabled\n");
+ ret = PTR_ERR(evt);
} else {
- perf_event_release_kernel(this_cpu_read(watchdog_ev));
- this_cpu_write(watchdog_ev, NULL);
+ perf_event_release_kernel(evt);
+ ret = 0;
}
+
return ret;
}