From 4c46a471be12216347ba707f8eadadbf5d68e698 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 3 Jun 2025 16:38:53 +0530 Subject: firmware: arm_ffa: Fix the missing entry in struct ffa_indirect_msg_hdr As per the spec, one 32 bit reserved entry is missing here, add it. Signed-off-by: Viresh Kumar Fixes: 910cc1acc9b4 ("firmware: arm_ffa: Add support for passing UUID in FFA_MSG_SEND2") Reviewed-by: Bertrand Marquis Message-Id: <28a624fbf416975de4fbe08cfbf7c2db89cb630e.1748948911.git.viresh.kumar@linaro.org> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; -- cgit v1.2.3 From cf0233491b3a15933234a26efd9ecbc1c0764674 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jun 2025 14:25:49 +0300 Subject: phy: use per-PHY lockdep keys If the PHY driver uses another PHY internally (e.g. in case of eUSB2, repeaters are represented as PHYs), then it would trigger the following lockdep splat because all PHYs use a single static lockdep key and thus lockdep can not identify whether there is a dependency or not and reports a false positive. Make PHY subsystem use dynamic lockdep keys, assigning each driver a separate key. This way lockdep can correctly identify dependency graph between mutexes. ============================================ WARNING: possible recursive locking detected 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 Not tainted -------------------------------------------- kworker/u51:0/78 is trying to acquire lock: ffff0008116554f0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c but task is already holding lock: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&phy->mutex); lock(&phy->mutex); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by kworker/u51:0/78: #0: ffff000800010948 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x18c/0x5ec #1: ffff80008036bdb0 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x1b4/0x5ec #2: ffff0008094ac8f8 (&dev->mutex){....}-{4:4}, at: __device_attach+0x38/0x188 #3: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c stack backtrace: CPU: 0 UID: 0 PID: 78 Comm: kworker/u51:0 Not tainted 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 PREEMPT Hardware name: Qualcomm CRD, BIOS 6.0.240904.BOOT.MXF.2.4-00528.1-HAMOA-1 09/ 4/2024 Workqueue: events_unbound deferred_probe_work_func Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x90/0xd0 dump_stack+0x18/0x24 print_deadlock_bug+0x258/0x348 __lock_acquire+0x10fc/0x1f84 lock_acquire+0x1c8/0x338 __mutex_lock+0xb8/0x59c mutex_lock_nested+0x24/0x30 phy_init+0x4c/0x12c snps_eusb2_hsphy_init+0x54/0x1a0 phy_init+0xe0/0x12c dwc3_core_init+0x450/0x10b4 dwc3_core_probe+0xce4/0x15fc dwc3_probe+0x64/0xb0 platform_probe+0x68/0xc4 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x208/0x5ec worker_thread+0x1c0/0x368 kthread+0x14c/0x20c ret_from_fork+0x10/0x20 Fixes: 3584f6392f09 ("phy: qcom: phy-qcom-snps-eusb2: Add support for eUSB2 repeater") Fixes: e2463559ff1d ("phy: amlogic: Add Amlogic AXG PCIE PHY Driver") Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Reported-by: Johan Hovold Link: https://lore.kernel.org/lkml/ZnpoAVGJMG4Zu-Jw@hovoldconsulting.com/ Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250605-phy-subinit-v3-1-1e1e849e10cd@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/phy-core.c | 5 ++++- include/linux/phy/phy.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 8e2daea81666..04a5a34e7a95 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -994,7 +994,8 @@ struct phy *phy_create(struct device *dev, struct device_node *node, } device_initialize(&phy->dev); - mutex_init(&phy->mutex); + lockdep_register_key(&phy->lockdep_key); + mutex_init_with_key(&phy->mutex, &phy->lockdep_key); phy->dev.class = &phy_class; phy->dev.parent = dev; @@ -1259,6 +1260,8 @@ static void phy_release(struct device *dev) dev_vdbg(dev, "releasing '%s'\n", dev_name(dev)); debugfs_remove_recursive(phy->debugfs); regulator_put(phy->pwr); + mutex_destroy(&phy->mutex); + lockdep_unregister_key(&phy->lockdep_key); ida_free(&phy_ida, phy->id); kfree(phy); } diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 437769e061b7..13add0c2c407 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -154,6 +154,7 @@ struct phy_attrs { * @id: id of the phy device * @ops: function pointers for performing phy operations * @mutex: mutex to protect phy_ops + * @lockdep_key: lockdep information for this mutex * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers * @attrs: used to specify PHY specific attributes @@ -165,6 +166,7 @@ struct phy { int id; const struct phy_ops *ops; struct mutex mutex; + struct lock_class_key lockdep_key; int init_count; int power_count; struct phy_attrs attrs; -- cgit v1.2.3 From d8010d4ba43e9f790925375a7de100604a5e2dba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 11 Sep 2024 10:53:08 +0200 Subject: x86/bugs: Add a Transient Scheduler Attacks mitigation Add the required features detection glue to bugs.c et all in order to support the TSA mitigation. Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/kernel-parameters.txt | 13 +++ arch/x86/Kconfig | 9 ++ arch/x86/include/asm/cpufeatures.h | 6 +- arch/x86/include/asm/mwait.h | 2 +- arch/x86/include/asm/nospec-branch.h | 14 ++- arch/x86/kernel/cpu/amd.c | 44 ++++++++ arch/x86/kernel/cpu/bugs.c | 124 +++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 14 ++- arch/x86/kernel/cpu/scattered.c | 2 + arch/x86/kvm/svm/vmenter.S | 6 + drivers/base/cpu.c | 3 + include/linux/cpu.h | 1 + 13 files changed, 232 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bf85f4de6862..ab8cd337f43a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..07e22ba5bfe3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7488,6 +7488,19 @@ having this key zero'ed is acceptable. E.g. in testing scenarios. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: [x86] reliable: mark tsc clocksource as reliable, this diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..71dfe7d7c786 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index cc34c3fd197b..82bd9eb73b3c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -80,7 +80,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5dcd75bb5e0d..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -308,19 +308,25 @@ * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw x86_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:x86_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -602,7 +608,7 @@ static __always_inline void x86_clear_cpu_buffers(void) /** * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS - * vulnerability + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..23c535871a7e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 258ed3d2b6a9..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -1487,6 +1491,94 @@ static void __init its_apply_mitigation(void) set_return_thunk(its_return_thunk); } +#undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..f7b9fca82bda 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7779ab0ca7ce..efc575a00edd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_ghostwrite.attr, &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 3b18405763c1ebb1efc15feef5563c9cdb2cc3a7 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 11 Jun 2025 14:14:15 +0300 Subject: usb: acpi: fix device link removal The device link to the USB4 host interface has to be removed manually since it's no longer auto removed. Fixes: 623dae3e7084 ("usb: acpi: fix boot hang due to early incorrect 'tunneled' USB3 device links") Cc: stable Signed-off-by: Heikki Krogerus Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20250611111415.2707865-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 3 +++ drivers/usb/core/usb-acpi.c | 4 +++- include/linux/usb.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5c12dfdef569..6bb6e92cb0a4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2368,6 +2368,9 @@ void usb_disconnect(struct usb_device **pdev) usb_remove_ep_devs(&udev->ep0); usb_unlock_device(udev); + if (udev->usb4_link) + device_link_del(udev->usb4_link); + /* Unregister the device. The device driver is responsible * for de-configuring the device and invoking the remove-device * notifier chain (used by usbfs and possibly others). diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index ea1ce8beb0cb..489dbdc96f94 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); */ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) { - const struct device_link *link; + struct device_link *link; struct usb_port *port_dev; struct usb_hub *hub; @@ -188,6 +188,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) dev_dbg(&port_dev->dev, "Created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); + udev->usb4_link = link; + return 0; } diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; -- cgit v1.2.3 From 4580dbef5ce0f95a4bd8ac2d007bc4fbf1539332 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 Jun 2025 13:28:08 -0400 Subject: KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 7 +++++++ arch/x86/include/asm/shared/tdx.h | 1 + arch/x86/kvm/vmx/tdx.c | 23 +++++++++++++++++++++++ include/uapi/linux/kvm.h | 4 ++++ 4 files changed, 35 insertions(+) (limited to 'include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9abf93ee5f65..f0d961436d0f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7196,6 +7196,10 @@ The valid value for 'flags' is: u64 leaf; u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + u64 ret; + u64 vector; + } setup_event_notify; }; } tdx; @@ -7226,6 +7230,9 @@ status of TDVMCALLs. The output values for the given leaf should be placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` field of the union. +* ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to +set up a notification interrupt for vector ``vector``. + KVM may add support for more values in the future that may cause a userspace exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, it will enter with output fields already valid; in the common case, the diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..b4055a746ecd 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1530,6 +1530,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1562,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ -- cgit v1.2.3 From 64f7548aad63d2fbca2eeb6eb33361c218ebd5a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 21:19:40 +0200 Subject: lib/crypto: sha256: Mark sha256_choose_blocks as __always_inline When the compiler chooses to not inline sha256_choose_blocks() in the purgatory code, it fails to link against the missing CPU specific version: x86_64-linux-ld: arch/x86/purgatory/purgatory.ro: in function `sha256_choose_blocks.part.0': sha256.c:(.text+0x6a6): undefined reference to `irq_fpu_usable' sha256.c:(.text+0x6c7): undefined reference to `sha256_blocks_arch' sha256.c:(.text+0x6cc): undefined reference to `sha256_blocks_simd' Mark this function as __always_inline to prevent this, same as sha256_finup(). Fixes: 5b90a779bc54 ("crypto: lib/sha256 - Add helpers for block-based shash") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250620191952.1867578-1-arnd@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { -- cgit v1.2.3 From cbe4134ea4bc493239786220bd69cb8a13493190 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Fri, 20 Jun 2025 07:03:30 +0000 Subject: fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass Export anon_inode_make_secure_inode() to allow KVM guest_memfd to create anonymous inodes with proper security context. This replaces the current pattern of calling alloc_anon_inode() followed by inode_init_security_anon() for creating security context manually. This change also fixes a security regression in secretmem where the S_PRIVATE flag was not cleared after alloc_anon_inode(), causing LSM/SELinux checks to be bypassed for secretmem file descriptors. As guest_memfd currently resides in the KVM module, we need to export this symbol for use outside the core kernel. In the future, guest_memfd might be moved to core-mm, at which point the symbols no longer would have to be exported. When/if that happens is still unclear. Fixes: 2bfe15c52612 ("mm: create security context for memfd_secret inodes") Suggested-by: David Hildenbrand Suggested-by: Mike Rapoport Signed-off-by: Shivank Garg Link: https://lore.kernel.org/20250620070328.803704-3-shivankg@amd.com Acked-by: "Mike Rapoport (Microsoft)" Signed-off-by: Christian Brauner --- fs/anon_inodes.c | 23 ++++++++++++++++++----- include/linux/fs.h | 2 ++ mm/secretmem.c | 9 +-------- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e51e7d88980a..1d847a939f29 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..3a8e643c4279 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3606,6 +3606,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; diff --git a/mm/secretmem.c b/mm/secretmem.c index 589b26c2d553..9a11a38a6770 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - int err; - inode = alloc_anon_inode(secretmem_mnt->mnt_sb); + inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL); - if (err) { - file = ERR_PTR(err); - goto err_free_inode; - } - file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) -- cgit v1.2.3 From a24cc6ce1933eade12aa2b9859de0fcd2dac2c06 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 23 Jun 2025 10:34:08 +0200 Subject: futex: Initialize futex_phash_new during fork(). During a hash resize operation the new private hash is stored in mm_struct::futex_phash_new if the current hash can not be immediately replaced. The new hash must not be copied during fork() into the new task. Doing so will lead to a double-free of the memory by the two tasks. Initialize the mm_struct::futex_phash_new during fork(). Closes: https://lore.kernel.org/all/aFBQ8CBKmRzEqIfS@mozart.vkv.me/ Fixes: bd54df5ea7cad ("futex: Allow to resize the private local hash") Reported-by: Calvin Owens Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Calvin Owens Link: https://lkml.kernel.org/r/20250623083408.jTiJiC6_@linutronix.de --- include/linux/futex.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } -- cgit v1.2.3 From af4db5a35a4ef7a68046883bfd12468007db38f1 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 18 Jun 2025 22:49:42 +0000 Subject: usb: typec: altmodes/displayport: do not index invalid pin_assignments A poorly implemented DisplayPort Alt Mode port partner can indicate that its pin assignment capabilities are greater than the maximum value, DP_PIN_ASSIGN_F. In this case, calls to pin_assignment_show will cause a BRK exception due to an out of bounds array access. Prevent for loop in pin_assignment_show from accessing invalid values in pin_assignments by adding DP_PIN_ASSIGN_MAX value in typec_dp.h and using i < DP_PIN_ASSIGN_MAX as a loop condition. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250618224943.3263103-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 2 +- include/linux/usb/typec_dp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index b09b58d7311d..773786129dfb 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -677,7 +677,7 @@ static ssize_t pin_assignment_show(struct device *dev, assignments = get_current_pin_assignments(dp); - for (i = 0; assignments; assignments >>= 1, i++) { + for (i = 0; assignments && i < DP_PIN_ASSIGN_MAX; assignments >>= 1, i++) { if (assignments & 1) { if (i == cur) len += sprintf(buf + len, "[%s] ", diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ -- cgit v1.2.3 From 67caa528ae08cd05e485c0ea6aea0baaf6579b06 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 10:28:41 -0600 Subject: ublk: fix narrowing warnings in UAPI header When a C++ file compiled with -Wc++11-narrowing includes the UAPI header linux/ublk_cmd.h, ublk_sqe_addr_to_auto_buf_reg()'s assignments of u64 values to u8, u16, and u32 fields result in compiler warnings. Add explicit casts to the intended types to avoid these warnings. Drop the unnecessary bitmasks. Reported-by: Uday Shankar Signed-off-by: Caleb Sander Mateos Fixes: 99c1e4eb6a3f ("ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621162842.337452-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c062109cb686 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -450,10 +450,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; -- cgit v1.2.3 From 81b4d1a1d03301dcca8af5c58eded9e535f1f6ed Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 11:10:14 -0600 Subject: ublk: update UBLK_F_SUPPORT_ZERO_COPY comment in UAPI header UBLK_F_SUPPORT_ZERO_COPY has a very old comment describing the initial idea for how zero-copy would be implemented. The actual implementation added in commit 1f6540e2aabb ("ublk: zc register/unregister bvec") uses io_uring registered buffers rather than shared memory mapping. Remove the inaccurate remarks about mapping ublk request memory into the ublk server's address space and requiring 4K block size. Replace them with a description of the current zero-copy mechanism. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621171015.354932-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c062109cb686..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) -- cgit v1.2.3 From 51a4273dcab39dd1e850870945ccec664352d383 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 8 Apr 2025 15:02:11 +0530 Subject: KVM: SVM: Add missing member in SNP_LAUNCH_START command structure The sev_data_snp_launch_start structure should include a 4-byte desired_tsc_khz field before the gosvw field, which was missed in the initial implementation. As a result, the structure is 4 bytes shorter than expected by the firmware, causing the gosvw field to start 4 bytes early. Fix this by adding the missing 4-byte member for the desired TSC frequency. Fixes: 3a45dc2b419e ("crypto: ccp: Define the SEV-SNP commands") Cc: stable@vger.kernel.org Suggested-by: Tom Lendacky Reviewed-by: Tom Lendacky Tested-by: Vaishali Thakkar Signed-off-by: Nikunj A Dadhania Link: https://lore.kernel.org/r/20250408093213.57962-3-nikunj@amd.com Signed-off-by: Sean Christopherson --- include/linux/psp-sev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; -- cgit v1.2.3 From c8dc579169738a3546f57ecb38e62d3872a3cc04 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Mon, 9 Jun 2025 11:53:56 -0400 Subject: i2c: amd-isp: Initialize unique adapter name Initialize unique name for amdisp i2c adapter, which is used in the platform driver to detect the matching adapter for i2c_client creation. Add definition of amdisp i2c adapter name in a new header file (include/linux/soc/amd/isp4_misc.h) as it is referred in different driver modules. Tested-by: Randy Dunlap Signed-off-by: Pratap Nirujogi Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250609155601.1477055-3-pratap.nirujogi@amd.com --- MAINTAINERS | 1 + drivers/i2c/busses/i2c-designware-amdisp.c | 2 ++ include/linux/soc/amd/isp4_misc.h | 12 ++++++++++++ 3 files changed, 15 insertions(+) create mode 100644 include/linux/soc/amd/isp4_misc.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index c3f7fbd0d67a..8719f097aae3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24063,6 +24063,7 @@ M: Bin Du L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-amdisp.c +F: include/linux/soc/amd/isp4_misc.h SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index ad6f08338124..450793d5f839 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "i2c-designware-core.h" @@ -62,6 +63,7 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) adap = &isp_i2c_dev->adapter; adap->owner = THIS_MODULE; + scnprintf(adap->name, sizeof(adap->name), AMDISP_I2C_ADAP_NAME); ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; /* use dynamically allocated adapter id */ diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif -- cgit v1.2.3 From f5769359c5b241978e6933672bb78b3adc36aa18 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Fri, 20 Jun 2025 02:31:54 +0800 Subject: mm/alloc_tag: fix the kmemleak false positive issue in the allocation of the percpu variable tag->counters When loading a module, as long as the module has memory allocation operations, kmemleak produces a false positive report that resembles the following: unreferenced object (percpu) 0x7dfd232a1650 (size 16): comm "modprobe", pid 1301, jiffies 4294940249 hex dump (first 16 bytes on cpu 2): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_alloc_percpu+0xb4/0xd0 pcpu_alloc_noprof+0x700/0x1098 load_module+0xd4/0x348 codetag_module_init+0x20c/0x450 codetag_load_module+0x70/0xb8 load_module+0xef8/0x1608 init_module_from_file+0xec/0x158 idempotent_init_module+0x354/0x608 __arm64_sys_finit_module+0xbc/0x150 invoke_syscall+0xd4/0x258 el0_svc_common.constprop.0+0xb4/0x240 do_el0_svc+0x48/0x68 el0_svc+0x40/0xf8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x1ac/0x1b0 This is because the module can only indirectly reference alloc_tag_counters through the alloc_tag section, which misleads kmemleak. However, we don't have a kmemleak ignore interface for percpu allocations yet. So let's create one and invoke it for tag->counters. [gehao@kylinos.cn: fix build error when CONFIG_DEBUG_KMEMLEAK=n, s/igonore/ignore/] Link: https://lkml.kernel.org/r/20250620093102.2416767-1-hao.ge@linux.dev Link: https://lkml.kernel.org/r/20250619183154.2122608-1-hao.ge@linux.dev Fixes: 12ca42c23775 ("alloc_tag: allocate percpu counters for module tags dynamically") Signed-off-by: Hao Ge Reviewed-by: Catalin Marinas Acked-by: Suren Baghdasaryan [lib/alloc_tag.c] Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/kmemleak.h | 4 ++++ lib/alloc_tag.c | 8 +++++++- mm/kmemleak.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index d48b80f3f007..3a74d63a959e 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -10,6 +10,7 @@ #include #include #include +#include #define ALLOCINFO_FILE_NAME "allocinfo" #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) @@ -632,8 +633,13 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag mod->name); return -ENOMEM; } - } + /* + * Avoid a kmemleak false positive. The pointer to the counters is stored + * in the alloc_tag section of the module and cannot be directly accessed. + */ + kmemleak_ignore_percpu(tag->counters); + } return 0; } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da9cee34ee1b..8d588e685311 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1246,6 +1246,20 @@ void __ref kmemleak_transient_leak(const void *ptr) } EXPORT_SYMBOL(kmemleak_transient_leak); +/** + * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu + * address argument + * @ptr: percpu address of the object + */ +void __ref kmemleak_ignore_percpu(const void __percpu *ptr) +{ + pr_debug("%s(0x%px)\n", __func__, ptr); + + if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) + make_black_object((unsigned long)ptr, OBJECT_PERCPU); +} +EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu); + /** * kmemleak_ignore - ignore an allocated object * @ptr: pointer to beginning of the object -- cgit v1.2.3 From 12ffc3b1513ebc1f11ae77d053948504a94a68a6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Jun 2025 16:43:44 -0500 Subject: PM: Restrict swap use to later in the suspend sequence Currently swap is restricted before drivers have had a chance to do their prepare() PM callbacks. Restricting swap this early means that if a driver needs to evict some content from memory into sawp in it's prepare callback, it won't be able to. On AMD dGPUs this can lead to failed suspends under memory pressure situations as all VRAM must be evicted to system memory or swap. Move the swap restriction to right after all devices have had a chance to do the prepare() callback. If there is any problem with the sequence, restore swap in the appropriate dpm resume callbacks or error handling paths. Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello Tested-by: Nat Wittstock Tested-by: Lucian Langa Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 5 ++++- include/linux/suspend.h | 5 +++++ kernel/kexec_core.c | 1 + kernel/power/hibernate.c | 3 --- kernel/power/power.h | 5 ----- kernel/power/suspend.c | 3 +-- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index eebe699fdf4f..bf77d28e959f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1236,6 +1236,7 @@ void dpm_complete(pm_message_t state) */ void dpm_resume_end(pm_message_t state) { + pm_restore_gfp_mask(); dpm_resume(state); dpm_complete(state); } @@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state) error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); - else + else { + pm_restrict_gfp_mask(); error = dpm_suspend(state); + } dpm_show_time(starttime, state, error, "start"); return error; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9c59fa480b0b..3a9a9f240dbc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1136,6 +1136,7 @@ int kernel_kexec(void) Resume_devices: dpm_resume_end(PMSG_RESTORE); Resume_console: + pm_restore_gfp_mask(); console_resume_all(); thaw_processes(); Restore_console: diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..9216e3b91d3b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode) } console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); @@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode) BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); - pm_restore_gfp_mask(); console_resume_all(); pm_restore_console(); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index cb1d71562002..7ccd709af93f 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {} /* kernel/power/main.c */ extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); -void pm_restrict_gfp_mask(void); -void pm_restore_gfp_mask(void); -#else -static inline void pm_restrict_gfp_mask(void) {} -static inline void pm_restore_gfp_mask(void) {} #endif #ifdef CONFIG_HIGHMEM diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 76b141b9aac0..bb608b68fb30 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state) return error; Recover_platform: + pm_restore_gfp_mask(); platform_recover(state); goto Resume_devices; } @@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state) trace_suspend_resume(TPS("suspend_enter"), state, false); pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]); - pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); - pm_restore_gfp_mask(); Finish: events_check_enabled = false; -- cgit v1.2.3 From 2d22b63f3a5aae2088708941d08cf0f01f430a58 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:04:59 +0200 Subject: drm/mipi-dsi: Add dev_is_mipi_dsi function This will be especially useful for generic panels (like panel-simple) which can take different code path depending on if they are MIPI-DSI devices or platform devices. Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-1-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_mipi_dsi.c | 3 ++- include/drm/drm_mipi_dsi.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index e5184a0c2465..21fd647f8ce1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI -- cgit v1.2.3 From ddaad4ad774d4ae02047ef873a8e38f62a4b7b01 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 18 Jun 2025 22:22:50 +0200 Subject: mtd: nand: qpic_common: prevent out of bounds access of BAM arrays The common QPIC code does not do any boundary checking when it handles the command elements and scatter gater list arrays of a BAM transaction, thus it allows to access out of bounds elements in those. Although it is the responsibility of the given driver to allocate enough space for all possible BAM transaction variations, however there can be mistakes in the driver code which can lead to hidden memory corruption issues which are hard to debug. This kind of problem has been observed during testing the 'spi-qpic-snand' driver. Although the driver has been fixed with a preceding patch, but it still makes sense to reduce the chance of having such errors again later. In order to prevent such errors, change the qcom_alloc_bam_transaction() function to store the number of elements of the arrays in the 'bam_transaction' strucutre during allocation. Also, add sanity checks to the qcom_prep_bam_dma_desc_{cmd,data}() functions to avoid using out of bounds indices for the arrays. Tested-by: Lakshmi Sowjanya D # on SDX75 Acked-by: Miquel Raynal Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250618-qpic-snand-avoid-mem-corruption-v3-2-319c71296cda@gmail.com Signed-off-by: Mark Brown --- drivers/mtd/nand/qpic_common.c | 30 ++++++++++++++++++++++++++---- include/linux/mtd/nand-qpic-common.h | 8 ++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/qpic_common.c b/drivers/mtd/nand/qpic_common.c index 4dc4d65e7d32..8e604cc22ca3 100644 --- a/drivers/mtd/nand/qpic_common.c +++ b/drivers/mtd/nand/qpic_common.c @@ -57,14 +57,15 @@ qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc) bam_txn_buf += sizeof(*bam_txn); bam_txn->bam_ce = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->bam_ce) * QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn->bam_ce_nitems = QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn_buf += sizeof(*bam_txn->bam_ce) * bam_txn->bam_ce_nitems; bam_txn->cmd_sgl = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->cmd_sgl) * QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn->cmd_sgl_nitems = QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn_buf += sizeof(*bam_txn->cmd_sgl) * bam_txn->cmd_sgl_nitems; bam_txn->data_sgl = bam_txn_buf; + bam_txn->data_sgl_nitems = QPIC_PER_CW_DATA_SGL * num_cw; init_completion(&bam_txn->txn_done); @@ -238,6 +239,11 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; u32 offset; + if (bam_txn->bam_ce_pos + size > bam_txn->bam_ce_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "CE"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_pos]; /* fill the command desc */ @@ -258,6 +264,12 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, /* use the separate sgl after this command */ if (flags & NAND_BAM_NEXT_SGL) { + if (bam_txn->cmd_sgl_pos >= bam_txn->cmd_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", + "CMD sgl"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_start]; bam_ce_size = (bam_txn->bam_ce_pos - bam_txn->bam_ce_start) * @@ -297,10 +309,20 @@ int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; if (read) { + if (bam_txn->rx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "RX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->rx_sgl_pos], vaddr, size); bam_txn->rx_sgl_pos++; } else { + if (bam_txn->tx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "TX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->tx_sgl_pos], vaddr, size); bam_txn->tx_sgl_pos++; diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; -- cgit v1.2.3 From 96893cdd4760ad94a438c1523cc5ca2470e04670 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 29 Jun 2025 18:15:32 +0200 Subject: spi: Raise limit on number of chip selects to 24 We have a system which uses 24 SPI chip selects, raise the hard coded limit accordingly. Signed-off-by: Marc Kleine-Budde Link: https://patch.msgid.link/20250629-spi-increase-number-of-cs-v2-1-85a0a09bab32@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; -- cgit v1.2.3 From b1c26e059536d8acbf9d508374f4b76537e20fb7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Jun 2025 15:58:16 -0500 Subject: Move FCH header to a location accessible by all archs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new header fch.h was created to store registers used by different AMD drivers. This header was included by i2c-piix4 in commit 624b0d5696a8 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to "). To prevent compile failures on non-x86 archs i2c-piix4 was set to only compile on x86 by commit 7e173eb82ae9717 ("i2c: piix4: Make CONFIG_I2C_PIIX4 dependent on CONFIG_X86"). This was not a good decision because loongarch and mips both actually support i2c-piix4 and set it enabled in the defconfig. Move the header to a location accessible by all architectures. Fixes: 624b0d5696a89 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to ") Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250610205817.3912944-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- arch/x86/include/asm/amd/fch.h | 13 ------------- arch/x86/kernel/cpu/amd.c | 2 +- drivers/i2c/busses/i2c-piix4.c | 2 +- drivers/platform/x86/amd/pmc/pmc-quirks.c | 2 +- include/linux/platform_data/x86/amd-fch.h | 13 +++++++++++++ 5 files changed, 16 insertions(+), 16 deletions(-) delete mode 100644 arch/x86/include/asm/amd/fch.h create mode 100644 include/linux/platform_data/x86/amd-fch.h (limited to 'include') diff --git a/arch/x86/include/asm/amd/fch.h b/arch/x86/include/asm/amd/fch.h deleted file mode 100644 index 2cf5153edbc2..000000000000 --- a/arch/x86/include/asm/amd/fch.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_AMD_FCH_H_ -#define _ASM_X86_AMD_FCH_H_ - -#define FCH_PM_BASE 0xFED80300 - -/* Register offsets from PM base: */ -#define FCH_PM_DECODEEN 0x00 -#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) -#define FCH_PM_SCRATCH 0x80 -#define FCH_PM_S5_RESET_STATUS 0xC0 - -#endif /* _ASM_X86_AMD_FCH_H_ */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..9543d5de4e7d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 9d3a4dc2bd60..ac3bb550303f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "i2c-piix4.h" diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index f292111bd065..131f10b68308 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "pmc.h" diff --git a/include/linux/platform_data/x86/amd-fch.h b/include/linux/platform_data/x86/amd-fch.h new file mode 100644 index 000000000000..2cf5153edbc2 --- /dev/null +++ b/include/linux/platform_data/x86/amd-fch.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_FCH_H_ +#define _ASM_X86_AMD_FCH_H_ + +#define FCH_PM_BASE 0xFED80300 + +/* Register offsets from PM base: */ +#define FCH_PM_DECODEEN 0x00 +#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) +#define FCH_PM_SCRATCH 0x80 +#define FCH_PM_S5_RESET_STATUS 0xC0 + +#endif /* _ASM_X86_AMD_FCH_H_ */ -- cgit v1.2.3 From 9df7b5ebead649b00bf9a53a798e4bf83a1318fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:37 +0100 Subject: netfs: Fix double put of request If a netfs request finishes during the pause loop, it will have the ref that belongs to the IN_PROGRESS flag removed at that point - however, if it then goes to the final wait loop, that will *also* put the ref because it sees that the IN_PROGRESS flag is clear and incorrectly assumes that this happened when it called the collector. In fact, since IN_PROGRESS is clear, we shouldn't call the collector again since it's done all the cleanup, such as calling ->ki_complete(). Fix this by making netfs_collect_in_app() just return, indicating that we're done if IN_PROGRESS is removed. Fixes: 2b1424cd131c ("netfs: Fix wait/wake to be consistent about the waitqueue used") Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-3-dhowells@redhat.com Tested-by: Steve French Reviewed-by: Paulo Alcantara cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 5 +++++ include/trace/events/netfs.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 0a54b1203486..8cf73b237269 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -383,6 +383,11 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, { bool need_collect = false, inactive = true, done = true; + if (!netfs_check_rreq_in_progress(rreq)) { + trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); + return 1; /* Done */ + } + for (int i = 0; i < NR_IO_STREAMS; i++) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[i]; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..ba35dc66e986 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -56,6 +56,7 @@ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ -- cgit v1.2.3 From 5e1e6ec2e346c0850f304c31abdef4158007474e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:46 +0100 Subject: netfs: Merge i_size update functions Netfslib has two functions for updating the i_size after a write: one for buffered writes into the pagecache and one for direct/unbuffered writes. However, what needs to be done is much the same in both cases, so merge them together. This does raise one question, though: should updating the i_size after a direct write do the same estimated update of i_blocks as is done for buffered writes. Also get rid of the cleanup function pointer from netfs_io_request as it's only used for direct write to update i_size; instead do the i_size setting directly from write collection. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-12-dhowells@redhat.com cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 36 +++++++++++++++++++++--------------- fs/netfs/direct_write.c | 19 ------------------- fs/netfs/internal.h | 6 ++++++ fs/netfs/write_collect.c | 6 ++++-- include/linux/netfs.h | 1 - 5 files changed, 31 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b87ef3fe4ea4..f27ea5099a68 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -53,30 +53,38 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, * data written into the pagecache until we can find out from the server what * the values actually are. */ -static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, - loff_t i_size, loff_t pos, size_t copied) +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied) { + loff_t i_size, end = pos + copied; blkcnt_t add; size_t gap; + if (end <= i_size_read(inode)) + return; + if (ctx->ops->update_i_size) { - ctx->ops->update_i_size(inode, pos); + ctx->ops->update_i_size(inode, end); return; } spin_lock(&inode->i_lock); - i_size_write(inode, pos); + + i_size = i_size_read(inode); + if (end > i_size) { + i_size_write(inode, end); #if IS_ENABLED(CONFIG_FSCACHE) - fscache_update_cookie(ctx->cache, NULL, &pos); + fscache_update_cookie(ctx->cache, NULL, &end); #endif - gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); - if (copied > gap) { - add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); + if (copied > gap) { + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); - inode->i_blocks = min_t(blkcnt_t, - DIV_ROUND_UP(pos, SECTOR_SIZE), - inode->i_blocks + add); + inode->i_blocks = min_t(blkcnt_t, + DIV_ROUND_UP(end, SECTOR_SIZE), + inode->i_blocks + add); + } } spin_unlock(&inode->i_lock); } @@ -113,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; - loff_t i_size, pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; @@ -346,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ + netfs_update_i_size(ctx, inode, pos, copied); pos += copied; - i_size = i_size_read(inode); - if (pos > i_size) - netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 3efa5894b2c0..dcf2b096cc4e 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,24 +9,6 @@ #include #include "internal.h" -static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) -{ - struct inode *inode = wreq->inode; - unsigned long long end = wreq->start + wreq->transferred; - - if (wreq->error || end <= i_size_read(inode)) - return; - - spin_lock(&inode->i_lock); - if (end > i_size_read(inode)) { - if (wreq->netfs_ops->update_i_size) - wreq->netfs_ops->update_i_size(inode, end); - else - i_size_write(inode, end); - } - spin_unlock(&inode->i_lock); -} - /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -102,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (async) wreq->iocb = iocb; wreq->len = iov_iter_count(&wreq->buffer.iter); - wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { _debug("begin = %zd", ret); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index d6656d2b54ab..f9bb9464a147 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -27,6 +27,12 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); +/* + * buffered_write.c + */ +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied); + /* * main.c */ diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 2ac85a819b71..33a93258f36e 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if (wreq->cleanup) - wreq->cleanup(wreq); + if ((wreq->origin == NETFS_UNBUFFERED_WRITE || + wreq->origin == NETFS_DIO_WRITE) && + !wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..d8186b90fb38 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,7 +279,6 @@ struct netfs_io_request { #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* -- cgit v1.2.3 From 4e32541076833f5ce2e23523c9faa25f7b2cc96f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:47 +0100 Subject: netfs: Renumber the NETFS_RREQ_* flags to make traces easier to read Renumber the NETFS_RREQ_* flags to put the most useful status bits in the bottom nibble - and therefore the last hex digit in the trace output - making it easier to grasp the state at a glance. In particular, put the IN_PROGRESS flag in bit 0 and ALL_QUEUED at bit 1. Also make the flags field in /proc/fs/netfs/requests larger to accommodate all the flags. Also make the flags field in the netfs_sreq tracepoint larger to accommodate all the NETFS_SREQ_* flags. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-13-dhowells@redhat.com Reviewed-by: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/main.c | 6 +++--- include/linux/netfs.h | 20 ++++++++++---------- include/trace/events/netfs.h | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 3db401d269e7..73da6c9f5777 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) if (v == &netfs_io_requests) { seq_puts(m, - "REQUEST OR REF FL ERR OPS COVERAGE\n" - "======== == === == ==== === =========\n" + "REQUEST OR REF FLAG ERR OPS COVERAGE\n" + "======== == === ==== ==== === =========\n" ); return 0; } rreq = list_entry(v, struct netfs_io_request, proc_link); seq_printf(m, - "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", + "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx", rreq->debug_id, netfs_origins[rreq->origin], refcount_read(&rreq->ref), diff --git a/include/linux/netfs.h b/include/linux/netfs.h index d8186b90fb38..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,17 +265,17 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index ba35dc66e986..c2d581429a7b 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -367,7 +367,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), -- cgit v1.2.3 From 90b3ccf514578ca3a6ac25db51a29a48e34e0f1b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:48 +0100 Subject: netfs: Update tracepoints in a number of ways Make a number of updates to the netfs tracepoints: (1) Remove a duplicate trace from netfs_unbuffered_write_iter_locked(). (2) Move the trace in netfs_wake_rreq_flag() to after the flag is cleared so that the change appears in the trace. (3) Differentiate the use of netfs_rreq_trace_wait/woke_queue symbols. (4) Don't do so many trace emissions in the wait functions as some of them are redundant. (5) In netfs_collect_read_results(), differentiate a subreq that's being abandoned vs one that has been consumed in a regular way. (6) Add a tracepoint to indicate the call to ->ki_complete(). (7) Don't double-increment the subreq_counter when retrying a write. (8) Move the netfs_sreq_trace_io_progress tracepoint within cifs code to just MID_RESPONSE_RECEIVED and add different tracepoints for other MID states and note check failure. Signed-off-by: David Howells Co-developed-by: Paulo Alcantara Signed-off-by: Paulo Alcantara Link: https://lore.kernel.org/20250701163852.2171681-14-dhowells@redhat.com cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/direct_write.c | 1 - fs/netfs/internal.h | 2 +- fs/netfs/misc.c | 14 ++++++-------- fs/netfs/read_collect.c | 12 +++++++++--- fs/netfs/write_collect.c | 4 +++- fs/netfs/write_retry.c | 1 - fs/smb/client/cifssmb.c | 20 ++++++++++++++++++++ fs/smb/client/smb2pdu.c | 26 ++++++++++++++++++++++---- include/trace/events/netfs.h | 26 ++++++++++++++++++-------- 9 files changed, 79 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index dcf2b096cc4e..a16660ab7f83 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -91,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } if (!async) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); ret = netfs_wait_for_write(wreq); if (ret > 0) iocb->ki_pos += ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index f9bb9464a147..d4f16fefd965 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -273,9 +273,9 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, enum netfs_rreq_trace trace) { if (test_bit(rreq_flag, &rreq->flags)) { - trace_netfs_rreq(rreq, trace); clear_bit_unlock(rreq_flag, &rreq->flags); smp_mb__after_atomic(); /* Set flag before task state */ + trace_netfs_rreq(rreq, trace); wake_up(&rreq->waitq); } } diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 127a269938bb..20748bcfbf59 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -359,7 +359,7 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, if (!netfs_check_subreq_in_progress(subreq)) continue; - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); for (;;) { prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); @@ -368,10 +368,10 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } } + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); finish_wait(&rreq->waitq, &myself); } @@ -437,7 +437,6 @@ static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, ssize_t ret; for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -457,11 +456,12 @@ static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, if (!netfs_check_rreq_in_progress(rreq)) break; + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); finish_wait(&rreq->waitq, &myself); ret = rreq->error; @@ -504,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, { DEFINE_WAIT(myself); - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -530,10 +528,10 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, break; schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); finish_wait(&rreq->waitq, &myself); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index cceed9d629c6..3e804da1e1eb 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -293,7 +293,9 @@ reassess: spin_lock(&rreq->lock); remove = front; - trace_netfs_sreq(front, netfs_sreq_trace_discard); + trace_netfs_sreq(front, + notes & ABANDON_SREQ ? + netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); @@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 33a93258f36e..0f3a36852a4d 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -421,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq) if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; - if (wreq->iocb->ki_complete) + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); + } wreq->iocb = VFS_PTR_POISON; } diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 7158657061e9..fc9c3e0d34d8 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -146,7 +146,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; - subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; subreq->retry_count = 1; diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 0e509a0433fb..75142f49d65d 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1334,7 +1334,11 @@ cifs_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -1344,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } if (rdata->result == -ENODATA) { @@ -1714,11 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 084ee66e73fd..7f6186c2e60d 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4567,7 +4567,11 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -4578,11 +4582,15 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } #ifdef CONFIG_CIFS_SMB_DIRECT /* @@ -4835,11 +4843,14 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; result = smb2_check_receive(mid, server, 0); - if (result != 0) + if (result != 0) { + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad); break; + } written = le32_to_cpu(rsp->DataLength); /* @@ -4861,15 +4872,23 @@ smb2_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } @@ -4909,7 +4928,6 @@ smb2_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index c2d581429a7b..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,13 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ @@ -64,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -83,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -90,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -97,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -143,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ -- cgit v1.2.3 From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 13 Jun 2025 14:20:13 -0700 Subject: drm/sched: Increment job count before swapping tail spsc queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small race exists between spsc_queue_push and the run-job worker, in which spsc_queue_push may return not-first while the run-job worker has already idled due to the job count being zero. If this race occurs, job scheduling stops, leading to hangs while waiting on the job’s DMA fences. Seal this race by incrementing the job count before appending to the SPSC queue. This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in an SVM test case. Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location") Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com --- include/drm/spsc_queue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer -- cgit v1.2.3 From 59710a26a289ad4e7ef227d22063e964930928b0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 30 Jun 2025 15:37:46 -0400 Subject: Bluetooth: hci_core: Remove check of BDADDR_ANY in hci_conn_hash_lookup_big_state The check for destination to be BDADDR_ANY is no longer necessary with the introduction of BIS_LINK. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc8f544e20e..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1350,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { -- cgit v1.2.3 From c5fd399a24c8e2865524361f7dc4d4a6899be4f4 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 1 Jul 2025 17:55:41 +1000 Subject: wifi: mac80211: correctly identify S1G short beacon mac80211 identifies a short beacon by the presence of the next TBTT field, however the standard actually doesn't explicitly state that the next TBTT can't be in a long beacon or even that it is required in a short beacon - and as a result this validation does not work for all vendor implementations. The standard explicitly states that an S1G long beacon shall contain the S1G beacon compatibility element as the first element in a beacon transmitted at a TBTT that is not a TSBTT (Target Short Beacon Transmission Time) as per IEEE80211-2024 11.1.3.10.1. This is validated by 9.3.4.3 Table 9-76 which states that the S1G beacon compatibility element is only allowed in the full set and is not allowed in the minimum set of elements permitted for use within short beacons. Correctly identify short beacons by the lack of an S1G beacon compatibility element as the first element in an S1G beacon frame. Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Signed-off-by: Simon Wadsworth Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250701075541.162619-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 45 +++++++++++++++++++++++++++++++++------------ net/mac80211/mlme.c | 7 +++++-- 2 files changed, 38 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -662,18 +662,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); } -/** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2d46d4af60d7..7ddb8e77b4c7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7195,6 +7195,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; + struct ieee80211_ext *ext = NULL; size_t baselen; struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; @@ -7220,7 +7221,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; + ext = (void *)mgmt; variable = ext->u.s1g_beacon.variable + ieee80211_s1g_optional_len(ext->frame_control); } @@ -7407,7 +7408,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || - ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + (ext && ieee80211_is_s1g_short_beacon(ext->frame_control, + parse_params.start, + parse_params.len))) goto free; link->u.mgd.beacon_crc = ncrc; link->u.mgd.beacon_crc_valid = true; -- cgit v1.2.3 From 4cdf1bdd45ac78a088773722f009883af30ad318 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Fri, 4 Jul 2025 11:21:34 +0200 Subject: block: reject bs > ps block devices when THP is disabled If THP is disabled and when a block device with logical block size > page size is present, the following null ptr deref panic happens during boot: [ [13.2 mK AOSAN: null-ptr-deref in range [0x0000000000000000-0x0000000000K0 0 0[07] [ 13.017749] RIP: 0010:create_empty_buffers+0x3b/0x380 [ 13.025448] Call Trace: [ 13.025692] [ 13.025895] block_read_full_folio+0x610/0x780 [ 13.026379] ? __pfx_blkdev_get_block+0x10/0x10 [ 13.027008] ? __folio_batch_add_and_move+0x1fa/0x2b0 [ 13.027548] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.028080] filemap_read_folio+0x9b/0x200 [ 13.028526] ? __pfx_filemap_read_folio+0x10/0x10 [ 13.029030] ? __filemap_get_folio+0x43/0x620 [ 13.029497] do_read_cache_folio+0x155/0x3b0 [ 13.029962] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.030381] read_part_sector+0xb7/0x2a0 [ 13.030805] read_lba+0x174/0x2c0 [ 13.045348] nvme_scan_ns+0x684/0x850 [nvme_core] [ 13.045858] ? __pfx_nvme_scan_ns+0x10/0x10 [nvme_core] [ 13.046414] ? _raw_spin_unlock+0x15/0x40 [ 13.046843] ? __switch_to+0x523/0x10a0 [ 13.047253] ? kvm_clock_get_cycles+0x14/0x30 [ 13.047742] ? __pfx_nvme_scan_ns_async+0x10/0x10 [nvme_core] [ 13.048353] async_run_entry_fn+0x96/0x4f0 [ 13.048787] process_one_work+0x667/0x10a0 [ 13.049219] worker_thread+0x63c/0xf60 As large folio support depends on THP, only allow bs > ps block devices if THP is enabled. Fixes: 47dd67532303 ("block/bdev: lift block size restrictions to 64k") Signed-off-by: Pankaj Raghav Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20250704092134.289491-1-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 332b56f323d9..369a8e63c865 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) -- cgit v1.2.3 From 1e3b66e326015f77bc4b36976bebeedc2ac0f588 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 3 Jul 2025 13:23:29 +0200 Subject: vsock: fix `vsock_proto` declaration From commit 634f1a7110b4 ("vsock: support sockmap"), `struct proto vsock_proto`, defined in af_vsock.c, is not static anymore, since it's used by vsock_bpf.c. If CONFIG_BPF_SYSCALL is not defined, `make C=2` will print a warning: $ make O=build C=2 W=1 net/vmw_vsock/ ... CC [M] net/vmw_vsock/af_vsock.o CHECK ../net/vmw_vsock/af_vsock.c ../net/vmw_vsock/af_vsock.c:123:14: warning: symbol 'vsock_proto' was not declared. Should it be static? Declare `vsock_proto` regardless of CONFIG_BPF_SYSCALL, since it's defined in af_vsock.c, which is built regardless of CONFIG_BPF_SYSCALL. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20250703112329.28365-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_vsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else -- cgit v1.2.3 From 70b9c0c11e55167b9552ef395bc00f4920299177 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 30 Jun 2025 15:02:18 +0200 Subject: uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again (2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BITS_PER_LONG does not exist in UAPI headers, so can't be used by the UAPI __GENMASK(). Instead __BITS_PER_LONG needs to be used. When __GENMASK() was introduced in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"), the code was fine. A broken revert in 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") introduced the incorrect usage of BITS_PER_LONG. That was fixed in commit 11fcf368506d ("uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again"). But a broken sync of the kernel headers with the tools/ headers in commit fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") undid the fix. Reapply the fix and while at it also fix the tools header. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Signed-off-by: Thomas Weißschuh Acked-by: Yury Norov (NVIDIA) Signed-off-by: Yury Norov (NVIDIA) --- include/uapi/linux/bits.h | 4 ++-- tools/include/uapi/linux/bits.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From fc582cd26e888b0652bc1494f252329453fd3b23 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Jul 2025 11:00:32 -0600 Subject: io_uring/msg_ring: ensure io_kiocb freeing is deferred for RCU syzbot reports that defer/local task_work adding via msg_ring can hit a request that has been freed: CPU: 1 UID: 0 PID: 19356 Comm: iou-wrk-19354 Not tainted 6.16.0-rc4-syzkaller-00108-g17bbde2e1716 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 io_req_local_work_add io_uring/io_uring.c:1184 [inline] __io_req_task_work_add+0x589/0x950 io_uring/io_uring.c:1252 io_msg_remote_post io_uring/msg_ring.c:103 [inline] io_msg_data_remote io_uring/msg_ring.c:133 [inline] __io_msg_ring_data+0x820/0xaa0 io_uring/msg_ring.c:151 io_msg_ring_data io_uring/msg_ring.c:173 [inline] io_msg_ring+0x134/0xa00 io_uring/msg_ring.c:314 __io_issue_sqe+0x17e/0x4b0 io_uring/io_uring.c:1739 io_issue_sqe+0x165/0xfd0 io_uring/io_uring.c:1762 io_wq_submit_work+0x6e9/0xb90 io_uring/io_uring.c:1874 io_worker_handle_work+0x7cd/0x1180 io_uring/io-wq.c:642 io_wq_worker+0x42f/0xeb0 io_uring/io-wq.c:696 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 which is supposed to be safe with how requests are allocated. But msg ring requests alloc and free on their own, and hence must defer freeing to a sane time. Add an rcu_head and use kfree_rcu() in both spots where requests are freed. Only the one in io_msg_tw_complete() is strictly required as it has been visible on the other ring, but use it consistently in the other spot as well. This should not cause any other issues outside of KASAN rightfully complaining about it. Link: https://lore.kernel.org/io-uring/686cd2ea.a00a0220.338033.0007.GAE@google.com/ Reported-by: syzbot+54cbbfb4db9145d26fc2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Fixes: 0617bb500bfa ("io_uring/msg_ring: improve handling of target CQE posting") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ io_uring/msg_ring.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 71400d6cefc8..4c2578f2efcb 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw) spin_unlock(&ctx->msg_lock); } if (req) - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); percpu_ref_put(&ctx->refs); } @@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { if (!READ_ONCE(ctx->submitter_task)) { - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); return -EOWNERDEAD; } req->opcode = IORING_OP_NOP; -- cgit v1.2.3 From f6bfc9afc7510cb5e6fbe0a17c507917b0120280 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 7 Jul 2025 15:11:55 +0200 Subject: drm/framebuffer: Acquire internal references on GEM handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acquire GEM handles in drm_framebuffer_init() and release them in the corresponding drm_framebuffer_cleanup(). Ties the handle's lifetime to the framebuffer. Not all GEM buffer objects have GEM handles. If not set, no refcounting takes place. This is the case for some fbdev emulation. This is not a problem as these GEM objects do not use dma-bufs and drivers will not release them while fbdev emulation is running. Framebuffer flags keep a bit per color plane of which the framebuffer holds a GEM handle reference. As all drivers use drm_framebuffer_init(), they will now all hold dma-buf references as fixed in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers"). In the GEM framebuffer helpers, restore the original ref counting on buffer objects. As the helpers for handle refcounting are now no longer called from outside the DRM core, unexport the symbols. v3: - don't mix internal flags with mode flags (Christian) v2: - track framebuffer handle refs by flag - drop gma500 cleanup (Christian) Signed-off-by: Thomas Zimmermann Fixes: 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/dri-devel/20250703115915.3096-1-spasswolf@web.de/ Tested-by: Bert Karwatzki Tested-by: Mario Limonciello Tested-by: Borislav Petkov (AMD) Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250707131224.249496-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_framebuffer.c | 31 +++++++++++++++++++++-- drivers/gpu/drm/drm_gem.c | 38 ++++++++++++++++++---------- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 16 +++++------- drivers/gpu/drm/drm_internal.h | 2 +- include/drm/drm_framebuffer.h | 7 +++++ 5 files changed, 68 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..63a70f285cce 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free); int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4bf0a76bb35e..aad6ac9748cc 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -223,23 +223,34 @@ static void drm_gem_object_handle_get(struct drm_gem_object *obj) } /** - * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any * @obj: GEM object * - * Acquires a reference on the GEM buffer object's handle. Required - * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked() - * to release the reference. + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise */ -void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj) +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; guard(mutex)(&dev->object_name_lock); - drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */ + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + drm_gem_object_handle_get(obj); + + return true; } -EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked); /** * drm_gem_object_handle_free - release resources bound to userspace handles @@ -272,7 +283,7 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } /** - * drm_gem_object_handle_put_unlocked - releases reference on user-space handles + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle * @obj: GEM object * * Releases a reference on the GEM buffer object's handle. Possibly releases @@ -283,14 +294,14 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -303,7 +314,6 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) if (final) drm_gem_object_put(obj); } -EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked); /* * Called at device or object close to release the file's diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 14a87788695d..6f72e7a0f427 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb) unsigned int i; for (i = 0; i < fb->format->num_planes; i++) - drm_gem_object_handle_put_unlocked(fb->obj[i]); + drm_gem_object_put(fb->obj[i]); drm_framebuffer_cleanup(fb); kfree(fb); @@ -182,10 +182,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, if (!objs[i]) { drm_dbg_kms(dev, "Failed to lookup GEM object\n"); ret = -ENOENT; - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; } - drm_gem_object_handle_get_unlocked(objs[i]); - drm_gem_object_put(objs[i]); min_size = (height - 1) * mode_cmd->pitches[i] + drm_format_info_min_pitch(info, i, width) @@ -195,22 +193,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, drm_dbg_kms(dev, "GEM object size (%zu) smaller than minimum size (%u) for plane %d\n", objs[i]->size, min_size, i); - drm_gem_object_handle_put_unlocked(objs[i]); + drm_gem_object_put(objs[i]); ret = -EINVAL; - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; } } ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); if (ret) - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; return 0; -err_gem_object_handle_put_unlocked: +err_gem_object_put: while (i > 0) { --i; - drm_gem_object_handle_put_unlocked(objs[i]); + drm_gem_object_put(objs[i]); } return ret; } diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index be77d61a16ce..60c282881958 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,7 +161,7 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); -void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include #include #include #include @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -188,6 +191,10 @@ struct drm_framebuffer { * DRM_MODE_FB_MODIFIERS. */ int flags; + /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ -- cgit v1.2.3 From bd46cece51a36ef088f22ef0416ac13b0a46d5b0 Mon Sep 17 00:00:00 2001 From: Simona Vetter Date: Mon, 7 Jul 2025 17:18:13 +0200 Subject: drm/gem: Fix race in drm_gem_handle_create_tail() Object creation is a careful dance where we must guarantee that the object is fully constructed before it is visible to other threads, and GEM buffer objects are no difference. Final publishing happens by calling drm_gem_handle_create(). After that the only allowed thing to do is call drm_gem_object_put() because a concurrent call to the GEM_CLOSE ioctl with a correctly guessed id (which is trivial since we have a linear allocator) can already tear down the object again. Luckily most drivers get this right, the very few exceptions I've pinged the relevant maintainers for. Unfortunately we also need drm_gem_handle_create() when creating additional handles for an already existing object (e.g. GETFB ioctl or the various bo import ioctl), and hence we cannot have a drm_gem_handle_create_and_put() as the only exported function to stop these issues from happening. Now unfortunately the implementation of drm_gem_handle_create() isn't living up to standards: It does correctly finishe object initialization at the global level, and hence is safe against a concurrent tear down. But it also sets up the file-private aspects of the handle, and that part goes wrong: We fully register the object in the drm_file.object_idr before calling drm_vma_node_allow() or obj->funcs->open, which opens up races against concurrent removal of that handle in drm_gem_handle_delete(). Fix this with the usual two-stage approach of first reserving the handle id, and then only registering the object after we've completed the file-private setup. Jacek reported this with a testcase of concurrently calling GEM_CLOSE on a freshly-created object (which also destroys the object), but it should be possible to hit this with just additional handles created through import or GETFB without completed destroying the underlying object with the concurrent GEM_CLOSE ioctl calls. Note that the close-side of this race was fixed in f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), which means a cool 9 years have passed until someone noticed that we need to make this symmetry or there's still gaps left :-/ Without the 2-stage close approach we'd still have a race, therefore that's an integral part of this bugfix. More importantly, this means we can have NULL pointers behind allocated id in our drm_file.object_idr. We need to check for that now: - drm_gem_handle_delete() checks for ERR_OR_NULL already - drm_gem.c:object_lookup() also chekcs for NULL - drm_gem_release() should never be called if there's another thread still existing that could call into an IOCTL that creates a new handle, so cannot race. For paranoia I added a NULL check to drm_gem_object_release_handle() though. - most drivers (etnaviv, i915, msm) are find because they use idr_find(), which maps both ENOENT and NULL to NULL. - drivers using idr_for_each_entry() should also be fine, because idr_get_next does filter out NULL entries and continues the iteration. - The same holds for drm_show_memory_stats(). v2: Use drm_WARN_ON (Thomas) Reported-by: Jacek Lawrynowicz Tested-by: Jacek Lawrynowicz Reviewed-by: Thomas Zimmermann Cc: stable@vger.kernel.org Cc: Jacek Lawrynowicz Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Simona Vetter Signed-off-by: Simona Vetter Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250707151814.603897-1-simona.vetter@ffwll.ch --- drivers/gpu/drm/drm_gem.c | 10 +++++++++- include/drm/drm_file.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index aad6ac9748cc..ac0524595bd6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -325,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); @@ -445,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -466,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; -- cgit v1.2.3 From 82241a83cd15aaaf28200a40ad1a8b480012edaf Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 5 Jun 2025 20:58:29 +0800 Subject: mm: fix the inaccurate memory statistics issue for users On some large machines with a high number of CPUs running a 64K pagesize kernel, we found that the 'RES' field is always 0 displayed by the top command for some processes, which will cause a lot of confusion for users. PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 875525 root 20 0 12480 0 0 R 0.3 0.0 0:00.08 top 1 root 20 0 172800 0 0 S 0.0 0.0 0:04.52 systemd The main reason is that the batch size of the percpu counter is quite large on these machines, caching a significant percpu value, since converting mm's rss stats into percpu_counter by commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"). Intuitively, the batch number should be optimized, but on some paths, performance may take precedence over statistical accuracy. Therefore, introducing a new interface to add the percpu statistical count and display it to users, which can remove the confusion. In addition, this change is not expected to be on a performance-critical path, so the modification should be acceptable. In addition, the 'mm->rss_stat' is updated by using add_mm_counter() and dec/inc_mm_counter(), which are all wrappers around percpu_counter_add_batch(). In percpu_counter_add_batch(), there is percpu batch caching to avoid 'fbc->lock' contention. This patch changes task_mem() and task_statm() to get the accurate mm counters under the 'fbc->lock', but this should not exacerbate kernel 'mm->rss_stat' lock contention due to the percpu batch caching of the mm counters. The following test also confirm the theoretical analysis. I run the stress-ng that stresses anon page faults in 32 threads on my 32 cores machine, while simultaneously running a script that starts 32 threads to busy-loop pread each stress-ng thread's /proc/pid/status interface. From the following data, I did not observe any obvious impact of this patch on the stress-ng tests. w/o patch: stress-ng: info: [6848] 4,399,219,085,152 CPU Cycles 67.327 B/sec stress-ng: info: [6848] 1,616,524,844,832 Instructions 24.740 B/sec (0.367 instr. per cycle) stress-ng: info: [6848] 39,529,792 Page Faults Total 0.605 M/sec stress-ng: info: [6848] 39,529,792 Page Faults Minor 0.605 M/sec w/patch: stress-ng: info: [2485] 4,462,440,381,856 CPU Cycles 68.382 B/sec stress-ng: info: [2485] 1,615,101,503,296 Instructions 24.750 B/sec (0.362 instr. per cycle) stress-ng: info: [2485] 39,439,232 Page Faults Total 0.604 M/sec stress-ng: info: [2485] 39,439,232 Page Faults Minor 0.604 M/sec On comparing a very simple app which just allocates & touches some memory against v6.1 (which doesn't have f1a7941243c1) and latest Linus tree (4c06e63b9203) I can see that on latest Linus tree the values for VmRSS, RssAnon and RssFile from /proc/self/status are all zeroes while they do report values on v6.1 and a Linus tree with this patch. Link: https://lkml.kernel.org/r/f4586b17f66f97c174f7fd1f8647374fdb53de1c.1749119050.git.baolin.wang@linux.alibaba.com Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter") Signed-off-by: Baolin Wang Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Tested-by Donet Tom Acked-by: Shakeel Butt Acked-by: SeongJae Park Acked-by: Michal Hocko Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 14 +++++++------- include/linux/mm.h | 5 +++++ 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4be91eb6ea5c..751479eb128f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; - anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); + anon = get_mm_counter_sum(mm, MM_ANONPAGES); + file = get_mm_counter_sum(mm, MM_FILEPAGES); + shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; - swap = get_mm_counter(mm, MM_SWAPENTS); + swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); @@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); + *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; - *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); + *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) -- cgit v1.2.3 From db6cc3f4ac2e6cdc898fc9cbc8b32ae1bf56bdad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 4 Jul 2025 21:56:20 +0800 Subject: Revert "sched/numa: add statistics of numa balance task" This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382. This commit introduces per-memcg/task NUMA balance statistics, but unfortunately it introduced a NULL pointer exception due to the following race condition: After a swap task candidate was chosen, its mm_struct pointer was set to NULL due to task exit. Later, when performing the actual task swapping, the p->mm caused the problem. CPU0 CPU1 : ... task_numa_migrate task_numa_find_cpu task_numa_compare # a normal task p is chosen env->best_task = p # p exit: exit_signals(p); p->flags |= PF_EXITING exit_mm p->mm = NULL; migrate_swap_stop __migrate_swap_task((arg->src_task, arg->dst_cpu) count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL task_lock() should be held and the PF_EXITING flag needs to be checked to prevent this from happening. After discussion, the conclusion was that adding a lock is not worthwhile for some statistics calculations. Revert the change and rely on the tracepoint for this purpose. Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task") Signed-off-by: Chen Yu Reported-by: Jirka Hladky Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/ Reported-by: Srikanth Aithal Reported-by: Suneeth D Acked-by: Michal Hocko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jiri Hladky Cc: Libo Chen Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 6 ------ include/linux/sched.h | 4 ---- include/linux/vm_event_item.h | 2 -- kernel/sched/core.c | 9 ++------- kernel/sched/debug.c | 4 ---- mm/memcontrol.c | 2 -- mm/vmstat.c | 2 -- 7 files changed, 2 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0cc35a14afbe..bd98ea3175ec 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,12 +1732,6 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. - numa_task_migrated (npn) - Number of task migration by NUMA balancing. - - numa_task_swapped (npn) - Number of task swap by NUMA balancing. - pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec68fc686bd7..81c6df746df1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { - __schedstat_inc(p->stats.numa_task_swapped); - count_vm_numa_event(NUMA_TASK_SWAP); - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); - if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -7939,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - __schedstat_inc(p->stats.numa_task_migrated); - count_vm_numa_event(NUMA_TASK_MIGRATE); - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); + /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9d71baf08075..557246880a7e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); -#ifdef CONFIG_NUMA_BALANCING - P_SCHEDSTAT(numa_task_migrated); - P_SCHEDSTAT(numa_task_swapped); -#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 902da8a9c643..70fdeda1120b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif }; diff --git a/mm/vmstat.c b/mm/vmstat.c index 429ae5339bfe..a78d70ddeacd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", - "numa_task_migrated", - "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", -- cgit v1.2.3 From dd831ac8221e691e9e918585b1003c7071df0379 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sat, 5 Jul 2025 14:21:43 -0700 Subject: net/sched: sch_qfq: Fix null-deref in agg_dequeue To prevent a potential crash in agg_dequeue (net/sched/sch_qfq.c) when cl->qdisc->ops->peek(cl->qdisc) returns NULL, we check the return value before using it, similar to the existing approach in sch_hfsc.c. To avoid code duplication, the following changes are made: 1. Changed qdisc_warn_nonwc(include/net/pkt_sched.h) into a static inline function. 2. Moved qdisc_peek_len from net/sched/sch_hfsc.c to include/net/pkt_sched.h so that sch_qfq can reuse it. 3. Applied qdisc_peek_len in agg_dequeue to avoid crashing. Signed-off-by: Xiang Mei Reviewed-by: Cong Wang Link: https://patch.msgid.link/20250705212143.3982664-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- include/net/pkt_sched.h | 25 ++++++++++++++++++++++++- net/sched/sch_api.c | 10 ---------- net/sched/sch_hfsc.c | 16 ---------------- net/sched/sch_qfq.c | 2 +- 4 files changed, 25 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 241e86cec9c5..d7c767b861a4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -601,16 +601,6 @@ out: qdisc_skb_cb(skb)->pkt_len = pkt_len; } -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) -{ - if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); - qdisc->flags |= TCQ_F_WARN_NONWC; - } -} -EXPORT_SYMBOL(qdisc_warn_nonwc); - static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a7745170e84..d8fd35da32a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static unsigned int -qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - unsigned int len; - - skb = sch->ops->peek(sch); - if (unlikely(skb == NULL)) { - qdisc_warn_nonwc("qdisc_peek_len", sch); - return 0; - } - len = qdisc_pkt_len(skb); - - return len; -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bf1282cb22eb..bcce36608871 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -989,7 +989,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del_init(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } -- cgit v1.2.3 From a8b289f0f2dcbadd8c207ad8f33cf7ba2b4eb088 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Jul 2025 10:00:12 +0200 Subject: irqchip/irq-msi-lib: Fix build with PCI disabled The armada-370-xp irqchip fails in some randconfig builds because of a missing declaration: In file included from drivers/irqchip/irq-armada-370-xp.c:23: include/linux/irqchip/irq-msi-lib.h:25:39: error: 'struct msi_domain_info' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] Add a forward declaration for the msi_domain_info structure. [ tglx: Fixed up the subsystem prefix. Is it really that hard to get right? ] Fixes: e51b27438a10 ("irqchip: Make irq-msi-lib.h globally available") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20250710080021.2303640-1-arnd@kernel.org --- include/linux/irqchip/irq-msi-lib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -- cgit v1.2.3 From 18cdb3d982da8976b28d57691eb256ec5688fad2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jul 2025 12:45:17 +0000 Subject: netfilter: flowtable: account for Ethernet header in nf_flow_pppoe_proto() syzbot found a potential access to uninit-value in nf_flow_pppoe_proto() Blamed commit forgot the Ethernet header. BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_hook_entry_hookfn include/linux/netfilter.h:157 [inline] nf_hook_slow+0xe1/0x3d0 net/netfilter/core.c:623 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] nf_ingress net/core/dev.c:5742 [inline] __netif_receive_skb_core+0x4aff/0x70c0 net/core/dev.c:5837 __netif_receive_skb_one_core net/core/dev.c:5975 [inline] __netif_receive_skb+0xcc/0xac0 net/core/dev.c:6090 netif_receive_skb_internal net/core/dev.c:6176 [inline] netif_receive_skb+0x57/0x630 net/core/dev.c:6235 tun_rx_batched+0x1df/0x980 drivers/net/tun.c:1485 tun_get_user+0x4ee0/0x6b40 drivers/net/tun.c:1938 tun_chr_write_iter+0x3e9/0x5c0 drivers/net/tun.c:1984 new_sync_write fs/read_write.c:593 [inline] vfs_write+0xb4b/0x1580 fs/read_write.c:686 ksys_write fs/read_write.c:738 [inline] __do_sys_write fs/read_write.c:749 [inline] Reported-by: syzbot+bf6ed459397e307c3ad2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/686bc073.a00a0220.c7b3.0086.GAE@google.com/T/#u Fixes: 87b3593bed18 ("netfilter: flowtable: validate pppoe header") Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20250707124517.614489-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); -- cgit v1.2.3 From 36a686c0784fcccdaa4f38b498a9ef0d42ea7cb8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 10 Jul 2025 18:43:42 +0200 Subject: Revert "netfilter: nf_tables: Add notifications for hook changes" This reverts commit 465b9ee0ee7bc268d7f261356afd6c4262e48d82. Such notifications fit better into core or nfnetlink_hook code, following the NFNL_MSG_HOOK_GET message format. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 --- include/uapi/linux/netfilter/nf_tables.h | 10 ------ include/uapi/linux/netfilter/nfnetlink.h | 2 -- net/netfilter/nf_tables_api.c | 59 -------------------------------- net/netfilter/nfnetlink.c | 1 - net/netfilter/nft_chain_filter.c | 2 -- 6 files changed, 79 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e4d8e451e935..5e49619ae49c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1142,11 +1142,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -struct nft_hook; -void nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event); - enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 518ba144544c..2beb30be2c5f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -142,8 +142,6 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, NFT_MSG_GETSETELEM_RESET, - NFT_MSG_NEWDEV, - NFT_MSG_DELDEV, NFT_MSG_MAX, }; @@ -1786,18 +1784,10 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) - * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) - * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, - NFTA_DEVICE_TABLE, - NFTA_DEVICE_FLOWTABLE, - NFTA_DEVICE_CHAIN, - NFTA_DEVICE_SPEC, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 50d807af2649..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -25,8 +25,6 @@ enum nfnetlink_groups { #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA NFNLGRP_NFTRACE, #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE - NFNLGRP_NFT_DEV, -#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24c71ecb2179..a7240736f98e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9686,64 +9686,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, } EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu); -static void -nf_tables_device_notify(const struct nft_table *table, int attr, - const char *name, const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - struct net *net = dev_net(dev); - struct nlmsghdr *nlh; - struct sk_buff *skb; - u16 flags = 0; - - if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV)) - return; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - goto err; - - event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family, - NFNETLINK_V0, nft_base_seq(net)); - if (!nlh) - goto err; - - if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) || - nla_put_string(skb, attr, name) || - nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) || - nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) - goto err; - - nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV, - nlmsg_report(nlh), GFP_KERNEL); - return; -err: - if (skb) - kfree_skb(skb); - nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS); -} - -void -nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN, - chain->name, hook, dev, event); -} - -static void -nf_tables_flowtable_device_notify(const struct nft_flowtable *ft, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE, - ft->name, hook, dev, event); -} - static int nft_flowtable_event(unsigned long event, struct net_device *dev, struct nft_flowtable *flowtable, bool changename) { @@ -9791,7 +9733,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_flowtable_device_notify(flowtable, hook, dev, event); break; } return 0; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ac77fc21632d..e598a2a252b0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, - [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES, }; static struct nfnl_net *nfnl_pernet(struct net *net) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 846d48ba8965..b16185e9a6dd 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_chain_device_notify(&basechain->chain, - hook, dev, event); break; } return 0; -- cgit v1.2.3 From 444020f4bf06fb86805ee7e7ceec0375485fd94d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 14 Jul 2025 14:21:30 +0200 Subject: wifi: cfg80211: remove scan request n_channels counted_by This reverts commit e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by"). This really has been a completely failed experiment. There were no actual bugs found, and yet at this point we already have four "fixes" to it, with nothing to show for but code churn, and it never even made the code any safer. In all of the cases that ended up getting "fixed", the structure is also internally inconsistent after the n_channels setting as the channel list isn't actually filled yet. You cannot scan with such a structure, that's just wrong. In mac80211, the struct is also reused multiple times, so initializing it once is no good. Some previous "fixes" (e.g. one in brcm80211) are also just setting n_channels before accessing the array, under the assumption that the code is correct and the array can be accessed, further showing that the whole thing is just pointless when the allocation count and use count are not separate. If we really wanted to fix it, we'd need to separately track the number of channels allocated and the number of channels currently used, but given that no bugs were found despite the numerous syzbot reports, that'd just be a waste of time. Remove the __counted_by() annotation. We really should also remove a number of the n_channels settings that are setting up a structure that's inconsistent, but that can wait. Reported-by: syzbot+e834e757bd9b3d3e1251@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e834e757bd9b3d3e1251 Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") Link: https://patch.msgid.link/20250714142130.9b0bbb7e1f07.I09112ccde72d445e11348fc2bef68942cb2ffc94@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1848dc8ec99..10248d527616 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2690,7 +2690,7 @@ struct cfg80211_scan_request { s8 tsf_report_link_id; /* keep last */ - struct ieee80211_channel *channels[] __counted_by(n_channels); + struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) -- cgit v1.2.3 From dfef8d87a031ac1a46dde3de804e0fcf3c3a6afd Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:43 +0200 Subject: Bluetooth: hci_core: fix typos in macros The provided macro parameter is named 'dev' (rather than 'hdev', which may be a variable on the stack where the macro is used). Fixes: a9a830a676a9 ("Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE") Fixes: 6126ffabba6b ("Bluetooth: Introduce HCI_CONN_FLAG_DEVICE_PRIVACY device flag") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0da011fc8146..052c91613bb9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1940,11 +1940,11 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ - (hdev->commands[39] & 0x04)) + ((dev)->commands[39] & 0x04)) #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ -- cgit v1.2.3 From cdee6a4416b2a57c89082929cc60e2275bb32a3a Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:44 +0200 Subject: Bluetooth: hci_core: add missing braces when using macro parameters Macro parameters should always be put into braces when accessing it. Fixes: 4fc9857ab8c6 ("Bluetooth: hci_sync: Add check simultaneous roles support") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 052c91613bb9..367ca43f45d1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -829,20 +829,20 @@ extern struct mutex hci_cb_list_lock; #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) -#define hci_dev_clear_volatile_flags(hdev) \ - do { \ - hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ - hci_dev_clear_flag(hdev, HCI_LE_ADV); \ - hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ - hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ +#define hci_dev_clear_volatile_flags(hdev) \ + do { \ + hci_dev_clear_flag((hdev), HCI_LE_SCAN); \ + hci_dev_clear_flag((hdev), HCI_LE_ADV); \ + hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \ + hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \ + hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \ } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ - (hdev->le_states[4] & 0x08) && /* Central */ \ - (hdev->le_states[4] & 0x40) && /* Peripheral */ \ - (hdev->le_states[3] & 0x10)) /* Simultaneous */ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + ((hdev)->le_states[4] & 0x08) && /* Central */ \ + ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ + ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); -- cgit v1.2.3 From 6851a0c228fc040dce8e4c393004209e7372e0a3 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:45 +0200 Subject: Bluetooth: hci_dev: replace 'quirks' integer by 'quirk_flags' bitmap The 'quirks' member already ran out of bits on some platforms some time ago. Replace the integer member by a bitmap in order to have enough bits in future. Replace raw bit operations by accessor macros. Fixes: ff26b2dd6568 ("Bluetooth: Add quirk for broken READ_VOICE_SETTING") Fixes: 127881334eaa ("Bluetooth: Add quirk for broken READ_PAGE_SCAN_TYPE") Suggested-by: Pauli Virtanen Tested-by: Ivan Pravdin Signed-off-by: Kiran K Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/bfusb.c | 2 +- drivers/bluetooth/bpa10x.c | 2 +- drivers/bluetooth/btbcm.c | 8 ++--- drivers/bluetooth/btintel.c | 28 ++++++++-------- drivers/bluetooth/btintel_pcie.c | 8 ++--- drivers/bluetooth/btmtksdio.c | 4 +-- drivers/bluetooth/btmtkuart.c | 2 +- drivers/bluetooth/btnxpuart.c | 2 +- drivers/bluetooth/btqca.c | 2 +- drivers/bluetooth/btqcomsmd.c | 2 +- drivers/bluetooth/btrtl.c | 10 +++--- drivers/bluetooth/btsdio.c | 2 +- drivers/bluetooth/btusb.c | 70 ++++++++++++++++++++-------------------- drivers/bluetooth/hci_aml.c | 2 +- drivers/bluetooth/hci_bcm.c | 4 +-- drivers/bluetooth/hci_bcm4377.c | 10 +++--- drivers/bluetooth/hci_intel.c | 2 +- drivers/bluetooth/hci_ldisc.c | 6 ++-- drivers/bluetooth/hci_ll.c | 4 +-- drivers/bluetooth/hci_nokia.c | 2 +- drivers/bluetooth/hci_qca.c | 14 ++++---- drivers/bluetooth/hci_serdev.c | 8 ++--- drivers/bluetooth/hci_vhci.c | 8 ++--- drivers/bluetooth/virtio_bt.c | 10 +++--- include/net/bluetooth/hci.h | 2 ++ include/net/bluetooth/hci_core.h | 28 +++++++++------- net/bluetooth/hci_core.c | 4 +-- net/bluetooth/hci_debugfs.c | 8 ++--- net/bluetooth/hci_event.c | 19 ++++++----- net/bluetooth/hci_sync.c | 59 +++++++++++++++++---------------- net/bluetooth/mgmt.c | 38 +++++++++++----------- net/bluetooth/msft.c | 2 +- 32 files changed, 187 insertions(+), 185 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 0d6ad50da046..8df310983bf6 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -670,7 +670,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i hdev->flush = bfusb_flush; hdev->send = bfusb_send_frame; - set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 1fa58c059cbf..8b43dfc755de 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -398,7 +398,7 @@ static int bpa10x_probe(struct usb_interface *intf, hdev->send = bpa10x_send_frame; hdev->set_diag = bpa10x_set_diag; - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 0a60660fc8ce..3a3a56ddbb06 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -135,7 +135,7 @@ int btbcm_check_bdaddr(struct hci_dev *hdev) if (btbcm_set_bdaddr_from_efi(hdev) != 0) { bt_dev_info(hdev, "BCM: Using default device address (%pMR)", &bda->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } } @@ -467,7 +467,7 @@ static int btbcm_print_controller_features(struct hci_dev *hdev) /* Read DMI and disable broken Read LE Min/Max Tx Power */ if (dmi_first_match(disable_broken_read_transmit_power)) - set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER); return 0; } @@ -706,7 +706,7 @@ int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_m btbcm_check_bdaddr(hdev); - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); return 0; } @@ -769,7 +769,7 @@ int btbcm_setup_apple(struct hci_dev *hdev) kfree_skb(skb); } - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); return 0; } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 06575a0b9aee..06016ac3965c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -88,7 +88,7 @@ int btintel_check_bdaddr(struct hci_dev *hdev) if (!bacmp(&bda->bdaddr, BDADDR_INTEL)) { bt_dev_err(hdev, "Found Intel default device address (%pMR)", &bda->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } kfree_skb(skb); @@ -2027,7 +2027,7 @@ static int btintel_download_fw(struct hci_dev *hdev, */ if (!bacmp(¶ms->otp_bdaddr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } download: @@ -2295,7 +2295,7 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev, */ if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } } @@ -3435,9 +3435,9 @@ static int btintel_setup_combined(struct hci_dev *hdev) } /* Apply the common HCI quirks for Intel device */ - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG); /* Set up the quality report callback for Intel devices */ hdev->set_quality_report = btintel_set_quality_report; @@ -3475,8 +3475,8 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ if (!btintel_test_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -3491,11 +3491,11 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All Legacy bootloader devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* These variants don't seem to support LE Coded PHY */ - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3571,10 +3571,10 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All Legacy bootloader devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* These variants don't seem to support LE Coded PHY */ - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3600,7 +3600,7 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All TLV based devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index e1c688dd2d45..f4e3fb54fe76 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2081,9 +2081,9 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) } /* Apply the common HCI quirks for Intel device */ - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG); /* Set up the quality report callback for Intel devices */ hdev->set_quality_report = btintel_set_quality_report; @@ -2123,7 +2123,7 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) * * All TLV based devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index c16a3518b8ff..4fc673640bfc 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1141,7 +1141,7 @@ static int btmtksdio_setup(struct hci_dev *hdev) } /* Enable WBS with mSBC codec */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Enable GPIO reset mechanism */ if (bdev->reset) { @@ -1384,7 +1384,7 @@ static int btmtksdio_probe(struct sdio_func *func, SET_HCIDEV_DEV(hdev, &func->dev); hdev->manufacturer = 70; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); sdio_set_drvdata(func, bdev); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index c97e260fcb0c..51400a891f6e 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -872,7 +872,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) SET_HCIDEV_DEV(hdev, &serdev->dev); hdev->manufacturer = 70; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); if (btmtkuart_is_standalone(bdev)) { err = clk_prepare_enable(bdev->osc); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 1088db6056a4..24f9b52605a1 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1807,7 +1807,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) "local-bd-address", (u8 *)&ba, sizeof(ba)); if (bacmp(&ba, BDADDR_ANY)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (hci_register_dev(hdev) < 0) { dev_err(&serdev->dev, "Can't register HCI device\n"); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index edefb9dc76aa..7c958d6065be 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -739,7 +739,7 @@ static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *co bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bacmp(&bda->bdaddr, &config->bdaddr)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); kfree_skb(skb); diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index c0eb71d6ffd3..d2e13fcb6bab 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -117,7 +117,7 @@ static int btqcomsmd_setup(struct hci_dev *hdev) /* Devices do not have persistent storage for BD address. Retrieve * it from the firmware node property. */ - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); return 0; } diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 7838c89e529e..4d182cf6e037 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) /* Enable controller to do both LE scan and BR/EDR inquiry * simultaneously. */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* Enable central-peripheral role (able to create new connections with * an existing connection in slave role). @@ -1301,7 +1301,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8851B: case CHIP_ID_8922A: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* RTL8852C needs to transmit mSBC data continuously without * the zero length of USB packets for the ALT 6 supported chips @@ -1312,7 +1312,8 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) if (btrtl_dev->project_id == CHIP_ID_8852A || btrtl_dev->project_id == CHIP_ID_8852B || btrtl_dev->project_id == CHIP_ID_8852C) - set_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER); hci_set_aosp_capable(hdev); break; @@ -1331,8 +1332,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) * but it doesn't support any features from page 2 - * it either responds with garbage or with error status */ - set_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2, - &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2); break; default: break; diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index a69feb08486a..8325655ce6aa 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -327,7 +327,7 @@ static int btsdio_probe(struct sdio_func *func, hdev->send = btsdio_send_frame; if (func->vendor == 0x0104 && func->device == 0x00c5) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9ab661d2d1e6..64509f5bfc99 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2472,18 +2472,18 @@ static int btusb_setup_csr(struct hci_dev *hdev) * Probably will need to be expanded in the future; * without these the controller will lock up. */ - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); - set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL); + hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_VOICE_SETTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE); /* Clear the reset quirk since this is not an actual * early Bluetooth 1.1 device from CSR. */ - clear_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); - clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_clear_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); + hci_clear_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* * Special workaround for these BT 4.0 chip clones, and potentially more: @@ -3494,7 +3494,7 @@ static int btusb_setup_qca(struct hci_dev *hdev) /* Mark HCI_OP_ENHANCED_SETUP_SYNC_CONN as broken as it doesn't seem to * work with the likes of HSP/HFP mSBC. */ - set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN); return 0; } @@ -4008,10 +4008,10 @@ static int btusb_probe(struct usb_interface *intf, } #endif if (id->driver_info & BTUSB_CW6622) - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); if (id->driver_info & BTUSB_BCM2045) - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); if (id->driver_info & BTUSB_BCM92035) hdev->setup = btusb_setup_bcm92035; @@ -4068,8 +4068,8 @@ static int btusb_probe(struct usb_interface *intf, hdev->reset = btmtk_reset_sync; hdev->set_bdaddr = btmtk_set_bdaddr; hdev->send = btusb_send_frame_mtk; - set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); data->recv_acl = btmtk_usb_recv_acl; data->suspend = btmtk_usb_suspend; data->resume = btmtk_usb_resume; @@ -4077,20 +4077,20 @@ static int btusb_probe(struct usb_interface *intf, } if (id->driver_info & BTUSB_SWAVE) { - set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS); } if (id->driver_info & BTUSB_INTEL_BOOT) { hdev->manufacturer = 2; - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); } if (id->driver_info & BTUSB_ATH3012) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); } if (id->driver_info & BTUSB_QCA_ROME) { @@ -4098,7 +4098,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; hdev->reset = btusb_qca_reset; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); btusb_check_needs_reset_resume(intf); } @@ -4112,7 +4112,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_wcn6855; hdev->reset = btusb_qca_reset; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); hci_set_msft_opcode(hdev, 0xFD70); } @@ -4140,35 +4140,35 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_ACTIONS_SEMI) { /* Support is advertised, but not implemented */ - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_CREATE_CONN); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT); } if (!reset) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) { if (!disable_scofix) - set_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE); } if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; if (id->driver_info & BTUSB_WIDEBAND_SPEECH) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); if (id->driver_info & BTUSB_INVALID_LE_STATES) - set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); } if (id->driver_info & BTUSB_CSR) { @@ -4177,10 +4177,10 @@ static int btusb_probe(struct usb_interface *intf, /* Old firmware would otherwise execute USB reset */ if (bcdDevice < 0x117) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); /* This must be set first in case we disable it for fakes */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* Fake CSR devices with broken commands */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0a12 && @@ -4193,7 +4193,7 @@ static int btusb_probe(struct usb_interface *intf, /* New sniffer firmware has crippled HCI interface */ if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); } if (id->driver_info & BTUSB_INTEL_BOOT) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 1394c575aa6d..707e90f80130 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -424,7 +424,7 @@ static int aml_check_bdaddr(struct hci_dev *hdev) if (!bacmp(&paddr->bdaddr, AML_BDADDR_DEFAULT)) { bt_dev_info(hdev, "amlbt using default bdaddr (%pM)", &paddr->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } exit: diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 9684eb16059b..f96617b85d87 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -643,8 +643,8 @@ static int bcm_setup(struct hci_uart *hu) * Allow the bootloader to set a valid address through the * device tree. */ - if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks); + if (hci_test_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR)) + hci_set_quirk(hu->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (!bcm_request_irq(bcm)) err = bcm_setup_sleep(hu); diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 9bce53e49cfa..8a9aa33776b0 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -1435,7 +1435,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); + hci_set_quirk(bcm4377->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); kfree_skb(skb); return 0; @@ -2389,13 +2389,13 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) hdev->setup = bcm4377_hci_setup; if (bcm4377->hw->broken_mws_transport_config) - set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG); if (bcm4377->hw->broken_ext_scan) - set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN); if (bcm4377->hw->broken_le_coded) - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); if (bcm4377->hw->broken_le_ext_adv_report_phy) - set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY); pci_set_drvdata(pdev, bcm4377); hci_set_drvdata(hdev, bcm4377); diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 811f33701f84..d22fbb7f9fc5 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -660,7 +660,7 @@ static int intel_setup(struct hci_uart *hu) */ if (!bacmp(¶ms.otp_bdaddr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } /* With this Intel bootloader only the hardware variant and device diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index acba83156de9..d0adae3267b4 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -667,13 +667,13 @@ static int hci_uart_register_dev(struct hci_uart *hu) SET_HCIDEV_DEV(hdev, hu->tty->dev); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); /* Only call open() for the protocol after hdev is fully initialized as * open() (or a timer/workqueue it starts) may attempt to reference it. diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index e19e9bd49555..7044c86325ce 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -649,11 +649,11 @@ static int ll_setup(struct hci_uart *hu) /* This means that there was an error getting the BD address * during probe, so mark the device as having a bad address. */ - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); } else if (bacmp(&lldev->bdaddr, BDADDR_ANY)) { err = ll_set_bdaddr(hu->hdev, &lldev->bdaddr); if (err) - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); } /* Operational speed if any */ diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index 9fc10a16fd96..cd7575c20f65 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -439,7 +439,7 @@ static int nokia_setup(struct hci_uart *hu) if (btdev->man_id == NOKIA_ID_BCM2048) { hu->hdev->set_bdaddr = btbcm_set_bdaddr; - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); dev_dbg(dev, "bcm2048 has invalid bluetooth address!"); } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 3ec0be496820..33c43503714b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1892,7 +1892,7 @@ static int qca_setup(struct hci_uart *hu) /* Enable controller to do both LE scan and BR/EDR inquiry * simultaneously. */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); switch (soc_type) { case QCA_QCA2066: @@ -1944,7 +1944,7 @@ retry: case QCA_WCN7850: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bdaddr_property_broken) - set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN); hci_set_aosp_capable(hdev); @@ -2487,7 +2487,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) hdev = qcadev->serdev_hu.hdev; if (power_ctrl_enabled) { - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); hdev->shutdown = qca_power_off; } @@ -2496,11 +2496,11 @@ static int qca_serdev_probe(struct serdev_device *serdev) * be queried via hci. Same with the valid le states quirk. */ if (data->capabilities & QCA_CAP_WIDEBAND_SPEECH) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) - set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES); } return 0; @@ -2550,7 +2550,7 @@ static void qca_serdev_shutdown(struct device *dev) * invoked and the SOC is already in the initial state, so * don't also need to send the VSC. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) || + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP) || hci_dev_test_flag(hdev, HCI_SETUP)) return; diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 89a22e9b3253..593d9cefbbf9 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -152,7 +152,7 @@ static int hci_uart_close(struct hci_dev *hdev) * BT SOC is completely powered OFF during BT OFF, holding port * open may drain the battery. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP)) { clear_bit(HCI_UART_PROTO_READY, &hu->flags); serdev_device_close(hu->serdev); } @@ -358,13 +358,13 @@ int hci_uart_register_device_priv(struct hci_uart *hu, SET_HCIDEV_DEV(hdev, &hu->serdev->dev); if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags)) - set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 59f4d7bdffdc..f7d8c3c00655 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -415,16 +415,16 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) hdev->get_codec_config_data = vhci_get_codec_config_data; hdev->wakeup = vhci_wakeup; hdev->setup = vhci_setup; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); - set_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); + hci_set_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED); /* bit 6 is for external configuration */ if (opcode & 0x40) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); /* bit 7 is for raw device */ if (opcode & 0x80) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 756f292df9e8..6f1a37e85c6a 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -327,17 +327,17 @@ static int virtbt_probe(struct virtio_device *vdev) hdev->setup = virtbt_setup_intel; hdev->shutdown = virtbt_shutdown_generic; hdev->set_bdaddr = virtbt_set_bdaddr_intel; - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); break; case VIRTIO_BT_CONFIG_VENDOR_REALTEK: hdev->manufacturer = 93; hdev->setup = virtbt_setup_realtek; hdev->shutdown = virtbt_shutdown_generic; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); break; } } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 82cbd54443ac..c79901f2dc2a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -377,6 +377,8 @@ enum { * This quirk must be set before hci_register_dev is called. */ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, + + __HCI_NUM_QUIRKS, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 367ca43f45d1..f79f59e67114 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -464,7 +464,7 @@ struct hci_dev { unsigned int auto_accept_delay; - unsigned long quirks; + DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS); atomic_t cmd_cnt; unsigned int acl_cnt; @@ -656,6 +656,10 @@ struct hci_dev { u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; +#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags) +#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags) +#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags) + #define HCI_PHY_HANDLE(handle) (handle & 0xff) enum conn_reasons { @@ -839,7 +843,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \ ((hdev)->le_states[4] & 0x08) && /* Central */ \ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ @@ -1931,8 +1935,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \ - !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_LE_CODED)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) @@ -1944,27 +1948,27 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING)) /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ #define enhanced_sync_conn_capable(dev) \ (((dev)->commands[29] & 0x08) && \ - !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) @@ -1979,8 +1983,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); */ #define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \ ext_adv_capable(dev)) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Periodic advertising support */ #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) @@ -1997,7 +2001,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ - (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks))) + (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 14d7221b8ac0..441cb1700f99 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2654,7 +2654,7 @@ int hci_register_dev(struct hci_dev *hdev) /* Devices that are marked for raw-only usage are unconfigured * and should not be included in normal operation. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* Mark Remote Wakeup connection flag as supported if driver has wakeup @@ -2784,7 +2784,7 @@ int hci_register_suspend_notifier(struct hci_dev *hdev) int ret = 0; if (!hdev->suspend_notifier.notifier_call && - !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + !hci_test_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index f625074d1f00..99e2e9fc70e8 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -38,7 +38,7 @@ static ssize_t __name ## _read(struct file *file, \ struct hci_dev *hdev = file->private_data; \ char buf[3]; \ \ - buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \ + buf[0] = test_bit(__quirk, hdev->quirk_flags) ? 'Y' : 'N'; \ buf[1] = '\n'; \ buf[2] = '\0'; \ return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \ @@ -59,10 +59,10 @@ static ssize_t __name ## _write(struct file *file, \ if (err) \ return err; \ \ - if (enable == test_bit(__quirk, &hdev->quirks)) \ + if (enable == test_bit(__quirk, hdev->quirk_flags)) \ return -EALREADY; \ \ - change_bit(__quirk, &hdev->quirks); \ + change_bit(__quirk, hdev->quirk_flags); \ \ return count; \ } \ @@ -1356,7 +1356,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, * for the vendor callback. Instead just store the desired value and * the setting will be programmed when the controller gets powered on. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) && (!test_bit(HCI_RUNNING, &hdev->flags) || hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) goto done; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 992131f88a45..cf4b30ac9e0e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -908,8 +908,8 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, return rp->status; if (hdev->max_page < rp->max_page) { - if (test_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2, - &hdev->quirks)) + if (hci_test_quirk(hdev, + HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2)) bt_dev_warn(hdev, "broken local ext features page 2"); else hdev->max_page = rp->max_page; @@ -936,7 +936,7 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt); hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt); - if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE)) { hdev->sco_mtu = 64; hdev->sco_pkts = 8; } @@ -2971,7 +2971,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, * state to indicate completion. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || - !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); goto unlock; } @@ -2990,7 +2990,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, * state to indicate completion. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || - !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } @@ -3614,8 +3614,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, /* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers * to avoid unexpected SMP command errors when pairing. */ - if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, - &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT)) goto notify; /* Set the default Authenticated Payload Timeout after @@ -5914,7 +5913,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || + (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES) || !(hdev->le_states[3] & 0x10))) return NULL; @@ -6310,8 +6309,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); - if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, - &hdev->quirks)) { + if (hci_test_quirk(hdev, + HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY)) { info->primary_phy &= 0x1f; info->secondary_phy &= 0x1f; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index acbf06aa3dd7..7938c004071c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -393,7 +393,7 @@ static void le_scan_disable(struct work_struct *work) if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) goto _return; - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) { if (!test_bit(HCI_INQUIRY, &hdev->flags) && hdev->discovery.state != DISCOVERY_RESOLVING) goto discov_stopped; @@ -3587,7 +3587,7 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN)) baswap(&hdev->public_addr, &ba); else bacpy(&hdev->public_addr, &ba); @@ -3662,7 +3662,7 @@ static int hci_init0_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); /* Reset */ - if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) { err = hci_reset_sync(hdev); if (err) return err; @@ -3675,7 +3675,7 @@ static int hci_unconf_init_sync(struct hci_dev *hdev) { int err; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return 0; err = hci_init0_sync(hdev); @@ -3718,7 +3718,7 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev) * supported commands. */ if (hdev->hci_ver > BLUETOOTH_VER_1_1 && - !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS)) return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL, HCI_CMD_TIMEOUT); @@ -3732,7 +3732,7 @@ static int hci_init1_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); /* Reset */ - if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) { err = hci_reset_sync(hdev); if (err) return err; @@ -3795,7 +3795,7 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type, if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; memset(&cp, 0, sizeof(cp)); @@ -3822,7 +3822,7 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev) * a hci_set_event_filter_sync() call succeeds, but we do * the check both for parity and as a future reminder. */ - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00, @@ -3846,7 +3846,7 @@ static int hci_write_sync_flowctl_sync(struct hci_dev *hdev) /* Check if the controller supports SCO and HCI_OP_WRITE_SYNC_FLOWCTL */ if (!lmp_sco_capable(hdev) || !(hdev->commands[10] & BIT(4)) || - !test_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED)) return 0; memset(&cp, 0, sizeof(cp)); @@ -3921,7 +3921,7 @@ static int hci_write_inquiry_mode_sync(struct hci_dev *hdev) u8 mode; if (!lmp_inq_rssi_capable(hdev) && - !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE)) return 0; /* If Extended Inquiry Result events are supported, then @@ -4111,7 +4111,7 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) } if (lmp_inq_rssi_capable(hdev) || - test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE)) events[4] |= 0x02; /* Inquiry Result with RSSI */ if (lmp_ext_feat_capable(hdev)) @@ -4163,7 +4163,7 @@ static int hci_read_stored_link_key_sync(struct hci_dev *hdev) struct hci_cp_read_stored_link_key cp; if (!(hdev->commands[6] & 0x20) || - test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY)) return 0; memset(&cp, 0, sizeof(cp)); @@ -4212,7 +4212,7 @@ static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4226,7 +4226,7 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev) * this command in the bit mask of supported commands. */ if (!(hdev->commands[13] & 0x01) || - test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE, @@ -4421,7 +4421,7 @@ static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev) static int hci_le_read_tx_power_sync(struct hci_dev *hdev) { if (!(hdev->commands[38] & 0x80) || - test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER, @@ -4464,7 +4464,7 @@ static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev) __le16 timeout = cpu_to_le16(hdev->rpa_timeout); if (!(hdev->commands[35] & 0x04) || - test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT, @@ -4609,7 +4609,7 @@ static int hci_delete_stored_link_key_sync(struct hci_dev *hdev) * just disable this command. */ if (!(hdev->commands[6] & 0x80) || - test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY)) return 0; memset(&cp, 0, sizeof(cp)); @@ -4735,7 +4735,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) if (!(hdev->commands[18] & 0x08) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING)) return 0; if (enabled == hdev->err_data_reporting) @@ -4948,7 +4948,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) size_t i; if (!hci_dev_test_flag(hdev, HCI_SETUP) && - !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP)) return 0; bt_dev_dbg(hdev, ""); @@ -4959,7 +4959,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) ret = hdev->setup(hdev); for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { - if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) + if (hci_test_quirk(hdev, hci_broken_table[i].quirk)) bt_dev_warn(hdev, "%s", hci_broken_table[i].desc); } @@ -4967,10 +4967,10 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) * BD_ADDR invalid before creating the HCI device or in * its setup callback. */ - invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + invalid_bdaddr = hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (!ret) { - if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY) && !bacmp(&hdev->public_addr, BDADDR_ANY)) hci_dev_get_bd_addr_from_property(hdev); @@ -4992,7 +4992,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) * In case any of them is set, the controller has to * start up as unconfigured. */ - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || invalid_bdaddr) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); @@ -5052,7 +5052,7 @@ static int hci_dev_init_sync(struct hci_dev *hdev) * then they need to be reprogrammed after the init procedure * completed. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) ret = hdev->set_diag(hdev, true); @@ -5309,7 +5309,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); hci_reset_sync(hdev); @@ -5959,7 +5959,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) own_addr_type = ADDR_LE_DEV_PUBLIC; if (hci_is_adv_monitoring(hdev) || - (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER) && hdev->discovery.result_filtering)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one @@ -6022,8 +6022,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev) * and LE scanning are done sequentially with separate * timeouts. */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) { timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); /* During simultaneous discovery, we double LE scan * interval. We must leave some time for the controller @@ -6100,7 +6099,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev) /* Some fake CSR controllers lock up after setting this type of * filter, so avoid sending the request altogether. */ - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; /* Always clear event filter when starting */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1485b455ade4..63dba0503653 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -464,7 +464,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { @@ -522,7 +522,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { @@ -576,7 +576,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) @@ -612,12 +612,12 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, static bool is_configured(struct hci_dev *hdev) { - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) return false; - if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && + if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) return false; @@ -628,12 +628,12 @@ static __le32 get_missing_options(struct hci_dev *hdev) { u32 options = 0; - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) options |= MGMT_OPTION_EXTERNAL_CONFIG; - if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && + if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) options |= MGMT_OPTION_PUBLIC_ADDRESS; @@ -669,7 +669,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev, memset(&rp, 0, sizeof(rp)); rp.manufacturer = cpu_to_le16(hdev->manufacturer); - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if (hdev->set_bdaddr) @@ -828,8 +828,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; } @@ -841,8 +840,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_ADVERTISING; } - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || - hdev->set_bdaddr) + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || hdev->set_bdaddr) settings |= MGMT_SETTING_CONFIGURATION; if (cis_central_capable(hdev)) @@ -4307,7 +4305,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, bt_dev_dbg(hdev, "sock %p", sk); - if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) + if (!hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_STATUS_NOT_SUPPORTED); @@ -7935,7 +7933,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_INVALID_PARAMS); - if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + if (!hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_NOT_SUPPORTED); @@ -9338,7 +9336,7 @@ void mgmt_index_added(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { @@ -9362,7 +9360,7 @@ void mgmt_index_removed(struct hci_dev *hdev) struct mgmt_ev_ext_index ev; struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX }; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return; mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); @@ -10089,7 +10087,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, if (hdev->discovery.rssi != HCI_RSSI_INVALID && (rssi == HCI_RSSI_INVALID || (rssi < hdev->discovery.rssi && - !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) + !hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)))) return false; if (hdev->discovery.uuid_count != 0) { @@ -10107,7 +10105,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, /* If duplicate filtering does not report RSSI changes, then restart * scanning to ensure updated result with updated RSSI values. */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)) { /* Validate RSSI value against the RSSI threshold once more. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && rssi < hdev->discovery.rssi) diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 5a8ccc491b14..c560d8467669 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -989,7 +989,7 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb) handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false); - if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER)) { if (!handle_data) return; mgmt_handle = handle_data->mgmt_handle; -- cgit v1.2.3 From 2d72afb340657f03f7261e9243b44457a9228ac7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 16 Jul 2025 20:39:14 +0200 Subject: netfilter: nf_conntrack: fix crash due to removal of uninitialised entry A crash in conntrack was reported while trying to unlink the conntrack entry from the hash bucket list: [exception RIP: __nf_ct_delete_from_lists+172] [..] #7 [ff539b5a2b043aa0] nf_ct_delete at ffffffffc124d421 [nf_conntrack] #8 [ff539b5a2b043ad0] nf_ct_gc_expired at ffffffffc124d999 [nf_conntrack] #9 [ff539b5a2b043ae0] __nf_conntrack_find_get at ffffffffc124efbc [nf_conntrack] [..] The nf_conn struct is marked as allocated from slab but appears to be in a partially initialised state: ct hlist pointer is garbage; looks like the ct hash value (hence crash). ct->status is equal to IPS_CONFIRMED|IPS_DYING, which is expected ct->timeout is 30000 (=30s), which is unexpected. Everything else looks like normal udp conntrack entry. If we ignore ct->status and pretend its 0, the entry matches those that are newly allocated but not yet inserted into the hash: - ct hlist pointers are overloaded and store/cache the raw tuple hash - ct->timeout matches the relative time expected for a new udp flow rather than the absolute 'jiffies' value. If it were not for the presence of IPS_CONFIRMED, __nf_conntrack_find_get() would have skipped the entry. Theory is that we did hit following race: cpu x cpu y cpu z found entry E found entry E E is expired nf_ct_delete() return E to rcu slab init_conntrack E is re-inited, ct->status set to 0 reply tuplehash hnnode.pprev stores hash value. cpu y found E right before it was deleted on cpu x. E is now re-inited on cpu z. cpu y was preempted before checking for expiry and/or confirm bit. ->refcnt set to 1 E now owned by skb ->timeout set to 30000 If cpu y were to resume now, it would observe E as expired but would skip E due to missing CONFIRMED bit. nf_conntrack_confirm gets called sets: ct->status |= CONFIRMED This is wrong: E is not yet added to hashtable. cpu y resumes, it observes E as expired but CONFIRMED: nf_ct_expired() -> yes (ct->timeout is 30s) confirmed bit set. cpu y will try to delete E from the hashtable: nf_ct_delete() -> set DYING bit __nf_ct_delete_from_lists Even this scenario doesn't guarantee a crash: cpu z still holds the table bucket lock(s) so y blocks: wait for spinlock held by z CONFIRMED is set but there is no guarantee ct will be added to hash: "chaintoolong" or "clash resolution" logic both skip the insert step. reply hnnode.pprev still stores the hash value. unlocks spinlock return NF_DROP In case CPU z does insert the entry into the hashtable, cpu y will unlink E again right away but no crash occurs. Without 'cpu y' race, 'garbage' hlist is of no consequence: ct refcnt remains at 1, eventually skb will be free'd and E gets destroyed via: nf_conntrack_put -> nf_conntrack_destroy -> nf_ct_destroy. To resolve this, move the IPS_CONFIRMED assignment after the table insertion but before the unlock. Pablo points out that the confirm-bit-store could be reordered to happen before hlist add resp. the timeout fixup, so switch to set_bit and before_atomic memory barrier to prevent this. It doesn't matter if other CPUs can observe a newly inserted entry right before the CONFIRMED bit was set: Such event cannot be distinguished from above "E is the old incarnation" case: the entry will be skipped. Also change nf_ct_should_gc() to first check the confirmed bit. The gc sequence is: 1. Check if entry has expired, if not skip to next entry 2. Obtain a reference to the expired entry. 3. Call nf_ct_should_gc() to double-check step 1. nf_ct_should_gc() is thus called only for entries that already failed an expiry check. After this patch, once the confirmed bit check passes ct->timeout has been altered to reflect the absolute 'best before' date instead of a relative time. Step 3 will therefore not remove the entry. Without this change to nf_ct_should_gc() we could still get this sequence: 1. Check if entry has expired. 2. Obtain a reference. 3. Call nf_ct_should_gc() to double-check step 1: 4 - entry is still observed as expired 5 - meanwhile, ct->timeout is corrected to absolute value on other CPU and confirm bit gets set 6 - confirm bit is seen 7 - valid entry is removed again First do check 6), then 4) so the gc expiry check always picks up either confirmed bit unset (entry gets skipped) or expiry re-check failure for re-inited conntrack objects. This change cannot be backported to releases before 5.19. Without commit 8a75a2c17410 ("netfilter: conntrack: remove unconfirmed list") |= IPS_CONFIRMED line cannot be moved without further changes. Cc: Razvan Cojocaru Link: https://lore.kernel.org/netfilter-devel/20250627142758.25664-1-fw@strlen.de/ Link: https://lore.kernel.org/netfilter-devel/4239da15-83ff-4ca4-939d-faef283471bb@gmail.com/ Fixes: 1397af5bfd7d ("netfilter: conntrack: remove the percpu dying list") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 15 +++++++++++++-- net/netfilter/nf_conntrack_core.c | 26 ++++++++++++++++++++------ 2 files changed, 33 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3f02a45773e8..ca26274196b9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct) /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { - return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && - !nf_ct_is_dying(ct); + if (!nf_ct_is_confirmed(ct)) + return false; + + /* load ct->timeout after is_confirmed() test. + * Pairs with __nf_conntrack_confirm() which: + * 1. Increases ct->timeout value + * 2. Inserts ct into rcu hlist + * 3. Sets the confirmed bit + * 4. Unlocks the hlist lock + */ + smp_acquire__after_ctrl_dep(); + + return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 201d3c4ec623..e51f0b441109 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1124,6 +1124,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + /* confirmed bit must be set after hlist add, not before: + * loser_ct can still be visible to other cpu due to + * SLAB_TYPESAFE_BY_RCU. + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &loser_ct->status); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; @@ -1260,8 +1266,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - ct->status |= IPS_CONFIRMED; - if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; @@ -1293,7 +1297,7 @@ chaintoolong: } } - /* Timer relative to confirmation time, not original + /* Timeout is relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; @@ -1301,11 +1305,21 @@ chaintoolong: __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after - * starting the timer and setting the CONFIRMED bit. The RCU barriers - * guarantee that no other CPU can find the conntrack before the above - * stores are visible. + * setting ct->timeout. The RCU barriers guarantee that no other CPU + * can find the conntrack before the above stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); + + /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups + * skip entries that lack this bit. This happens when a CPU is looking + * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU + * or when another CPU encounters this entry right after the insertion + * but before the set-confirm-bit below. This bit must not be set until + * after __nf_conntrack_hash_insert(). + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); -- cgit v1.2.3 From 962fb1f651c2cf2083e0c3ef53ba69e3b96d3fbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:42 +0100 Subject: rxrpc: Fix recv-recv race of completed call If a call receives an event (such as incoming data), the call gets placed on the socket's queue and a thread in recvmsg can be awakened to go and process it. Once the thread has picked up the call off of the queue, further events will cause it to be requeued, and once the socket lock is dropped (recvmsg uses call->user_mutex to allow the socket to be used in parallel), a second thread can come in and its recvmsg can pop the call off the socket queue again. In such a case, the first thread will be receiving stuff from the call and the second thread will be blocked on call->user_mutex. The first thread can, at this point, process both the event that it picked call for and the event that the second thread picked the call for and may see the call terminate - in which case the call will be "released", decoupling the call from the user call ID assigned to it (RXRPC_USER_CALL_ID in the control message). The first thread will return okay, but then the second thread will wake up holding the user_mutex and, if it sees that the call has been released by the first thread, it will BUG thusly: kernel BUG at net/rxrpc/recvmsg.c:474! Fix this by just dequeuing the call and ignoring it if it is seen to be already released. We can't tell userspace about it anyway as the user call ID has become stale. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: Junvyyang, Tencent Zhuque Lab Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: LePremierHomme cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 +++ net/rxrpc/call_accept.c | 1 + net/rxrpc/recvmsg.c | 19 +++++++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 378d2dfc7392..e7dcfb1369b6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -330,12 +330,15 @@ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ + EM(rxrpc_call_see_already_released, "SEE alrdy-rl") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ + EM(rxrpc_call_see_discard, "SEE discard ") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 226b4bf82747..a4d76f2da684 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -219,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + rxrpc_see_call(call, rxrpc_call_see_discard); rcu_assign_pointer(call->socket, rx); if (rx->app_ops && rx->app_ops->discard_new_call) { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 86a27fb55a1c..6990e37697de 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -447,6 +447,16 @@ try_again: goto try_again; } + rxrpc_see_call(call, rxrpc_call_see_recvmsg); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + list_del_init(&call->recvmsg_link); + spin_unlock_irq(&rx->recvmsg_lock); + release_sock(&rx->sk); + trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else @@ -470,8 +480,13 @@ try_again: release_sock(&rx->sk); - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - BUG(); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + mutex_unlock(&call->user_mutex); + if (!(flags & MSG_PEEK)) + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } ret = rxrpc_recvmsg_user_id(call, msg, flags); if (ret < 0) -- cgit v1.2.3 From 2fd895842d49c23137ae48252dd211e5d6d8a3ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:43 +0100 Subject: rxrpc: Fix notification vs call-release vs recvmsg When a call is released, rxrpc takes the spinlock and removes it from ->recvmsg_q in an effort to prevent racing recvmsg() invocations from seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when actually removing a call from the queue; it doesn't, however, take it in the lead up to that when it checks to see if the queue is empty. It *does* hold the socket lock, which prevents a recvmsg/recvmsg race - but this doesn't prevent sendmsg from ending the call because sendmsg() drops the socket lock and relies on the call->user_mutex. Fix this by firstly removing the bit in rxrpc_release_call() that dequeues the released call and, instead, rely on recvmsg() to simply discard released calls (done in a preceding fix). Secondly, rxrpc_notify_socket() is abandoned if the call is already marked as released rather than trying to be clever by setting both pointers in call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and can still race, resulting in a released call on the queue, but recvmsg() will now clean that up. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Junvyyang, Tencent Zhuque Lab cc: LePremierHomme cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 ++- net/rxrpc/call_object.c | 28 ++++++++++++---------------- net/rxrpc/recvmsg.c | 4 ++++ 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e7dcfb1369b6..de6f6d25767c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,10 +322,10 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ - EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ @@ -338,6 +338,7 @@ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 15067ff7b1f2..918f41d97a2f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false, putu = false; + bool putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); @@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_put_call_slot(call); - /* Make sure we don't get any more notifications */ + /* Note that at this point, the call may still be on or may have been + * added back on to the socket receive queue. recvmsg() must discard + * released calls. The CALL_RELEASED flag should prevent further + * notifications. + */ spin_lock_irq(&rx->recvmsg_lock); - - if (!list_empty(&call->recvmsg_link)) { - _debug("unlinking once-pending call %p { e=%lx f=%lx }", - call, call->events, call->flags); - list_del(&call->recvmsg_link); - put = true; - } - - /* list_empty() must return false in rxrpc_notify_socket() */ - call->recvmsg_link.next = NULL; - call->recvmsg_link.prev = NULL; - spin_unlock_irq(&rx->recvmsg_lock); - if (put) - rxrpc_put_call(call, rxrpc_call_put_unnotify); write_lock(&rx->call_lock); @@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) rxrpc_put_call(call, rxrpc_call_put_release_sock); } + while ((call = list_first_entry_or_null(&rx->recvmsg_q, + struct rxrpc_call, recvmsg_link))) { + list_del_init(&call->recvmsg_link); + rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q); + } + _leave(""); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6990e37697de..7fa7e77f6bb9 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call) if (!list_empty(&call->recvmsg_link)) return; + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_notify_released); + return; + } rcu_read_lock(); -- cgit v1.2.3