From 8a3514d348de87a9d5e2ac00fbac4faae0b97996 Mon Sep 17 00:00:00 2001 From: Sanjeev Yadav Date: Fri, 23 May 2025 13:14:01 -0700 Subject: scsi: core: ufs: Fix a hang in the error handler ufshcd_err_handling_prepare() calls ufshcd_rpm_get_sync(). The latter function can only succeed if UFSHCD_EH_IN_PROGRESS is not set because resuming involves submitting a SCSI command and ufshcd_queuecommand() returns SCSI_MLQUEUE_HOST_BUSY if UFSHCD_EH_IN_PROGRESS is set. Fix this hang by setting UFSHCD_EH_IN_PROGRESS after ufshcd_rpm_get_sync() has been called instead of before. Backtrace: __switch_to+0x174/0x338 __schedule+0x600/0x9e4 schedule+0x7c/0xe8 schedule_timeout+0xa4/0x1c8 io_schedule_timeout+0x48/0x70 wait_for_common_io+0xa8/0x160 //waiting on START_STOP wait_for_completion_io_timeout+0x10/0x20 blk_execute_rq+0xe4/0x1e4 scsi_execute_cmd+0x108/0x244 ufshcd_set_dev_pwr_mode+0xe8/0x250 __ufshcd_wl_resume+0x94/0x354 ufshcd_wl_runtime_resume+0x3c/0x174 scsi_runtime_resume+0x64/0xa4 rpm_resume+0x15c/0xa1c __pm_runtime_resume+0x4c/0x90 // Runtime resume ongoing ufshcd_err_handler+0x1a0/0xd08 process_one_work+0x174/0x808 worker_thread+0x15c/0x490 kthread+0xf4/0x1ec ret_from_fork+0x10/0x20 Signed-off-by: Sanjeev Yadav [ bvanassche: rewrote patch description ] Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250523201409.1676055-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index a7513f256057..a1d7c8660c1b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6623,9 +6623,14 @@ static void ufshcd_err_handler(struct work_struct *work) up(&hba->host_sem); return; } - ufshcd_set_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_err_handling_prepare(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_set_eh_in_progress(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba, false); spin_lock_irqsave(hba->host->host_lock, flags); -- cgit v1.2.3 From 6678791ee3da0b78c28fe7d77814097f53cbb8df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:21 +0100 Subject: KVM: arm64: Add assignment-specific sysreg accessor Assigning a value to a system register doesn't do what it is supposed to be doing if that register is one that has RESx bits. The main problem is that we use __vcpu_sys_reg(), which can be used both as a lvalue and rvalue. When used as a lvalue, the bit masking occurs *before* the new value is assigned, meaning that we (1) do pointless work on the old cvalue, and (2) potentially assign an invalid value as we fail to apply the masks to it. Fix this by providing a new __vcpu_assign_sys_reg() that does what it says on the tin, and sanitises the *new* value instead of the old one. This comes with a significant amount of churn. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 ++++++++ arch/arm64/kvm/arch_timer.c | 16 +++++------ arch/arm64/kvm/hyp/exception.c | 4 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 6 ++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 +-- arch/arm64/kvm/hyp/vhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 44 +++++++++++++++--------------- arch/arm64/kvm/pmu-emul.c | 14 +++++----- arch/arm64/kvm/sys_regs.c | 38 ++++++++++++++------------ arch/arm64/kvm/sys_regs.h | 4 +-- arch/arm64/kvm/vgic/vgic-v3-nested.c | 10 +++---- 12 files changed, 86 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d941abc6b5ee..3b84ad91116b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1107,6 +1107,17 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); + +#define __vcpu_assign_sys_reg(v, r, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ (*({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5133dcbfe9f7..5a67a6d4d95b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); break; default: WARN_ON(1); @@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); break; default: WARN_ON(1); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 424a5107cddb..6a2a899a344e 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, @@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, } else if (has_vhe()) { write_sysreg_el1(val, SYS_SPSR); } else { - __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index eef310cdbdbd..aa5b561b9218 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); + __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2)); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -457,7 +457,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) */ if (vcpu_has_sve(vcpu)) { zcr_el1 = read_sysreg_el1(SYS_ZCR); - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1); /* * The guest's state is always saved using the guest's max VL. diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index b9cff893bbe0..4d0dbea4c56f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); - __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); - __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2)); + __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2)); if (has_vhe() || kvm_debug_regs_in_use(vcpu)) - __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); + __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2)); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8e8848de4d47..e9198e56e784 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR)); /* * On saving/restoring guest sve state, always use the maximum VL for * the guest. The layout of the data when saving the sve state depends @@ -79,7 +79,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) - __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR)); if (system_supports_sve()) __hyp_sve_restore_host(); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c9b330dc2066..09df2b42bc1b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -223,9 +223,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ val = read_sysreg_el0(SYS_CNTP_CVAL); if (map.direct_ptimer == vcpu_ptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val); if (map.direct_ptimer == vcpu_hptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val); offset = read_sysreg_s(SYS_CNTPOFF_EL2); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 3814b0b2c937..34c7bf7fe9de 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -18,17 +18,17 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) { /* These registers are common with EL1 */ - __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); - __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); - - __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); - __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); - __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); - __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); - __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); - __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); - __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); - __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1)); + __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1)); + + __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR)); + __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0)); + __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1)); + __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR)); + __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR)); + __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR)); + __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR)); + __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR)); /* * In VHE mode those registers are compatible between EL1 and EL2, @@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) * are always trapped, ensuring that the in-memory * copy is always up-to-date. A small blessing... */ - __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); - __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); - __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); - __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR)); + __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0)); + __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1)); + __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR)); if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2)); if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); - __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); + __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0)); + __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR)); } if (ctxt_has_s1poe(&vcpu->arch.ctxt)) - __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR); + __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR)); } /* @@ -74,9 +74,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; } - __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); - __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); - __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); + __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); + __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 25c29107f13f..4f0ae8073f78 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) val |= lower_32_bits(val); } - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(pmc); @@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); - __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); } @@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) reg = counter_index_to_reg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); kvm_pmu_release_perf_event(pmc); } @@ -503,7 +503,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg); /* No overflow? move on */ if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) @@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); /* The reset bits don't indicate any state, and shouldn't be saved. */ - __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); + __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P))); if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); @@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 reg; reg = counter_index_to_evtreg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm))); kvm_pmu_create_perf_event(pmc); } @@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr) u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2); val &= ~MDCR_EL2_HPMN; val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters); - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); } } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 707c651aff03..93d0ca7ed936 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * to reverse-translate virtual EL2 system registers for a * non-VHE guest hypervisor. */ - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); switch (reg) { case CNTHCTL_EL2: @@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) return; memory_write: - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * The value of PMCR.N field is included when the * vCPU register is read via kvm_vcpu_read_pmcr(). */ - __vcpu_sys_reg(vcpu, r->reg) = pmcr; + __vcpu_assign_sys_reg(vcpu, r->reg, pmcr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval); else /* return PMSELR.SEL field */ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) @@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va { u64 mask = kvm_pmu_accessible_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, r->reg) = val & mask; + __vcpu_assign_sys_reg(vcpu, r->reg, val & mask); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!vcpu_mode_priv(vcpu)) return undef_access(vcpu, p, r); - __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = - p->regval & ARMV8_PMU_USERENR_MASK; + __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0, + (p->regval & ARMV8_PMU_USERENR_MASK)); } else { p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; @@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -2207,7 +2207,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (kvm_has_mte(vcpu->kvm)) clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); - __vcpu_sys_reg(vcpu, r->reg) = clidr; + __vcpu_assign_sys_reg(vcpu, r->reg, clidr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -2221,7 +2221,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -2398,7 +2398,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SP_EL1); @@ -2422,7 +2422,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); @@ -2434,7 +2434,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1); @@ -2448,7 +2448,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) val |= HCR_E2H; - return __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + + return __vcpu_sys_reg(vcpu, r->reg); } static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, @@ -2619,7 +2621,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); } - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); /* * Request a reload of the PMU to enable/disable the counters @@ -2748,7 +2750,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters; + __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters); return vcpu->kvm->arch.nr_pmu_counters; } @@ -5006,7 +5008,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); ret = 0; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc6338d38766..ef97d9fc67cc 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL); return __vcpu_sys_reg(vcpu, r->reg); } @@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_assign_sys_reg(vcpu, r->reg, r->val); return __vcpu_sys_reg(vcpu, r->reg); } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 4f6954c30674..d22a8ad7bcc5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -356,12 +356,12 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); val &= ~ICH_HCR_EL2_EOIcount_MASK; val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); - __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; - __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); for (i = 0; i < 4; i++) { - __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; - __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); + __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); } for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { @@ -370,7 +370,7 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val &= ~ICH_LR_STATE; val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; - __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); s_cpu_if->vgic_lr[i] = 0; } -- cgit v1.2.3 From 8800b7c4bbede3cd40831726d3f98e8080baf4df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:22 +0100 Subject: KVM: arm64: Add RMW specific sysreg accessor In a number of cases, we perform a Read-Modify-Write operation on a system register, meaning that we would apply the RESx masks twice. Instead, provide a new accessor that performs this RMW operation, allowing the masks to be applied exactly once per operation. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/debug.c | 4 ++-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 ++-- arch/arm64/kvm/nested.c | 2 +- arch/arm64/kvm/pmu-emul.c | 10 +++++----- arch/arm64/kvm/sys_regs.c | 22 +++++++++++----------- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3b84ad91116b..8b8c649f225b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1118,6 +1118,17 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); ctxt_sys_reg(ctxt, (r)) = __v; \ } while (0) +#define __vcpu_rmw_sys_reg(v, r, op, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + __v op (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ (*({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 0e4c805e7e89..1a7dab333f55 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu) void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) { if (val & OSLAR_EL1_OSLK) - __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK); else - __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK); preempt_disable(); kvm_arch_vcpu_put(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 34c7bf7fe9de..73e4bc7fde9e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -70,8 +70,8 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) */ val = read_sysreg_el1(SYS_CNTKCTL); val &= CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS); + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val); } __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 4a53e4147fb0..5b191f4dc566 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1757,7 +1757,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) - (void)__vcpu_sys_reg(vcpu, sr); + __vcpu_rmw_sys_reg(vcpu, sr, |=, 0); return 0; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4f0ae8073f78..b03dbda7f1ab 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -510,7 +510,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, continue; /* Mark overflow */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), @@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, perf_event->attr.sample_period = period; perf_event->hw.sample_period = period; - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), @@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask); kvm_pmu_reprogram_counter_mask(vcpu, mask); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 93d0ca7ed936..e89d345e42d0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mask |= GENMASK(n - 1, 0); reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= mask; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask); return __vcpu_sys_reg(vcpu, r->reg); } @@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return 0; reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK); return __vcpu_sys_reg(vcpu, r->reg); } @@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val); else /* accessing PMCNTENCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val); kvm_pmu_reprogram_counter_mask(vcpu, val); } else { @@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val); else /* accessing PMINTENCLR_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val); } else { p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } @@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask)); else /* accessing PMOVSCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask)); } else { p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } @@ -4786,7 +4786,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) r->reset(vcpu, r); if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) - (void)__vcpu_sys_reg(vcpu, r->reg); + __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0); } set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); -- cgit v1.2.3 From b61fae4ee120f337b8700dff43b2fd639f3bf6a5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:23 +0100 Subject: KVM: arm64: Don't use __vcpu_sys_reg() to get the address of a sysreg We are about to prevent the use of __vcpu_sys_reg() as a lvalue, and getting the address of a rvalue is not a thing. Update the couple of places where we do this to use the __ctxt_sys_reg() accessor, which return the address of a register. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/fpsimd.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5a67a6d4d95b..c6b4fb4c8e08 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (vcpu_has_nv(vcpu)) { struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7f6e43d25691..8f6c8f57c6b9 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -103,8 +103,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; - fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR); - fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR); + fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR); + fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR); fp_state.fp_type = &vcpu->arch.fp_type; if (vcpu_has_sve(vcpu)) -- cgit v1.2.3 From b5fa1f91e11fdf74ad4e2ac6dae246a57cbd2d95 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:24 +0100 Subject: KVM: arm64: Make __vcpu_sys_reg() a pure rvalue operand Now that we don't have any use of __vcpu_sys_reg() as a lvalue, remove the in-place update, and directly return the sanitised value. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8b8c649f225b..382b382d14da 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1130,13 +1130,13 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); } while (0) #define __vcpu_sys_reg(v,r) \ - (*({ \ + ({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ - u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ - *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ - __r; \ - })) + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + __v; \ + }) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); -- cgit v1.2.3 From 9a9864fd09c7e52440c05e70609bf5ee8da425d0 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:10 +0200 Subject: KVM: arm64: selftests: Fix help text for arch_timer_edge_cases Fix the help text for arch_timer_edge_cases to show the correct option for setting the wait time. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-2-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..c4716e0c1438 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -986,7 +986,7 @@ static void test_print_help(char *name) pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", WAIT_TEST_MS); pr_info("\t-p: Test physical timer (default: true)\n"); pr_info("\t-v: Test virtual timer (default: true)\n"); -- cgit v1.2.3 From 050632ae6571d0f148fa0d4b90b6409a12645461 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:11 +0200 Subject: KVM: arm64: selftests: Fix thread migration in arch_timer_edge_cases arch_timer_edge_cases tries to migrate itself across host cpus. Before the first test, it migrates to cpu 0 by setting up an affinity mask with only bit 0 set. After that it looks for the next possible cpu in the current affinity mask which still has only bit 0 set. So there is no migration at all. Fix this by reading the default mask at start and use this to find the next cpu in each iteration. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-3-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index c4716e0c1438..a813b4c6c817 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -849,17 +849,17 @@ static void guest_code(enum arch_timer timer) GUEST_DONE(); } +static cpu_set_t default_cpuset; + static uint32_t next_pcpu(void) { uint32_t max = get_nprocs(); uint32_t cur = sched_getcpu(); uint32_t next = cur; - cpu_set_t cpuset; + cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); - sched_getaffinity(0, sizeof(cpuset), &cpuset); - do { next = (next + 1) % CPU_SETSIZE; } while (!CPU_ISSET(next, &cpuset)); @@ -1046,6 +1046,8 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv)) exit(KSFT_SKIP); + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); -- cgit v1.2.3 From 05ce38d489dbc96eb1dd700b287f949cb8cc0610 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:12 +0200 Subject: KVM: arm64: selftests: Fix xVAL init in arch_timer_edge_cases arch_timer_edge_cases hits the following assertion in < 10% of the test runs: ==== Test Assertion Failure ==== arm64/arch_timer_edge_cases.c:490: timer_get_cntct(timer) >= DEF_CNT + (timer_get_cntfrq() * (uint64_t)(delta_2_ms) / 1000) pid=17110 tid=17110 errno=4 - Interrupted system call 1 0x0000000000404ec7: test_run at arch_timer_edge_cases.c:945 2 0x0000000000401fa3: main at arch_timer_edge_cases.c:1074 3 0x0000ffffa774b587: ?? ??:0 4 0x0000ffffa774b65f: ?? ??:0 5 0x000000000040206f: _start at ??:? timer_get_cntct(timer) >= DEF_CNT + msec_to_cycles(delta_2_ms) Enabling the timer without proper xval initialization in set_tval_irq() resulted in an early interrupt during timer reprogramming. Make sure to set the xval before setting the enable bit. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-4-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a813b4c6c817..be8bbedc933b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -191,8 +191,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); } static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, -- cgit v1.2.3 From fad4cf944839da7f5c3376243aa353295c88f588 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:13 +0200 Subject: KVM: arm64: selftests: Determine effective counter width in arch_timer_edge_cases arch_timer_edge_cases uses ~0 as the maximum counter value, however there's no architectural guarantee that this is valid. Figure out the effective counter width based on the effective frequency like it's done by the kernel. This also serves as a workaround for AC03_CPU_14 that led to the following assertion failure on ampere-one machines: ==== Test Assertion Failure ==== arm64/arch_timer_edge_cases.c:169: timer_condition == istatus pid=11236 tid=11236 errno=4 - Interrupted system call 1 0x0000000000404ce7: test_run at arch_timer_edge_cases.c:938 2 0x0000000000401ebb: main at arch_timer_edge_cases.c:1053 3 0x0000ffff9fa8625b: ?? ??:0 4 0x0000ffff9fa8633b: ?? ??:0 5 0x0000000000401fef: _start at ??:? 0x1 != 0x0 (timer_condition != istatus) Note that the following subtest only worked since the counter initialized with CVAL_MAX would instantly overflow (which is no longer the case): test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); To fix this we could swap CVAL_MAX for 0 here but since that is already done by test_move_counters_behind_timers() let's remove that subtest. Link: https://lore.kernel.org/kvmarm/ac1de1d2-ef2b-d439-dc48-8615e121b07b@redhat.com Link: https://amperecomputing.com/assets/AmpereOne_Developer_ER_v0_80_20240823_28945022f4.pdf Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-5-sebott@redhat.com Signed-off-by: Marc Zyngier --- .../selftests/kvm/arm64/arch_timer_edge_cases.c | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index be8bbedc933b..b4d22b3ab7cc 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -22,7 +22,8 @@ #include "gic.h" #include "vgic.h" -static const uint64_t CVAL_MAX = ~0ULL; +/* Depends on counter width. */ +static uint64_t CVAL_MAX; /* tval is a signed 32-bit int. */ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; @@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ static const uint32_t TIMEOUT_NO_IRQ_US = 50000; -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; /* Number of runs. */ static const uint32_t NR_TEST_ITERS_DEF = 5; @@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, wm); } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); - } } /* @@ -975,6 +970,8 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, test_init_timer_irq(*vm, *vcpu); vgic_v3_setup(*vm, 1, 64); sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); } static void test_print_help(char *name) @@ -1035,6 +1032,17 @@ static bool parse_args(int argc, char *argv[]) return false; } +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -1047,6 +1055,7 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); -- cgit v1.2.3 From 308a3a8ce8ea41b26c46169f3263e50f5997c28e Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 31 May 2025 18:24:58 +0300 Subject: Bluetooth: hci_core: fix list_for_each_entry_rcu usage Releasing + re-acquiring RCU lock inside list_for_each_entry_rcu() loop body is not correct. Fix by taking the update-side hdev->lock instead. Fixes: c7eaf80bfb0c ("Bluetooth: Fix hci_link_tx_to RCU lock usage") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3b49828160b7..04845ff3ad57 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3417,23 +3417,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) bt_dev_err(hdev, "link tx timeout"); - rcu_read_lock(); + hci_dev_lock(hdev); /* Kill stalled connections */ - list_for_each_entry_rcu(c, &h->list, list) { + list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); - /* hci_disconnect might sleep, so, we have to release - * the RCU read lock before calling it. - */ - rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); - rcu_read_lock(); } } - rcu_read_unlock(); + hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, -- cgit v1.2.3 From daabd276985055250528da97e9ce6d277d7009c2 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 3 Jun 2025 15:34:38 +0530 Subject: Bluetooth: btintel_pcie: Fix driver not posting maximum rx buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver was posting only 6 rx buffers, despite the maximum rx buffers being defined as 16. Having fewer RX buffers caused firmware exceptions in HID use cases when events arrived in bursts. Exception seen on android 6.12 kernel. E Bluetooth: hci0: Received hw exception interrupt E Bluetooth: hci0: Received gp1 mailbox interrupt D Bluetooth: hci0: 00000000: ff 3e 87 80 03 01 01 01 03 01 0c 0d 02 1c 10 0e D Bluetooth: hci0: 00000010: 01 00 05 14 66 b0 28 b0 c0 b0 28 b0 ac af 28 b0 D Bluetooth: hci0: 00000020: 14 f1 28 b0 00 00 00 00 fa 04 00 00 00 00 40 10 D Bluetooth: hci0: 00000030: 08 00 00 00 7a 7a 7a 7a 47 00 fb a0 10 00 00 00 D Bluetooth: hci0: 00000000: 10 01 0a E Bluetooth: hci0: ---- Dump of debug registers — E Bluetooth: hci0: boot stage: 0xe0fb0047 E Bluetooth: hci0: ipc status: 0x00000004 E Bluetooth: hci0: ipc control: 0x00000000 E Bluetooth: hci0: ipc sleep control: 0x00000000 E Bluetooth: hci0: mbox_1: 0x00badbad E Bluetooth: hci0: mbox_2: 0x0000101c E Bluetooth: hci0: mbox_3: 0x00000008 E Bluetooth: hci0: mbox_4: 0x7a7a7a7a Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 3 ++- drivers/bluetooth/btintel_pcie.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 50fe17f1e1d1..2c7731803c9f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -396,8 +396,9 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data) static int btintel_pcie_start_rx(struct btintel_pcie_data *data) { int i, ret; + struct rxq *rxq = &data->rxq; - for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) { + for (i = 0; i < rxq->count; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 21b964b15c1c..5ddd6d7d8d45 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -177,9 +177,6 @@ enum { /* Doorbell vector for TFD */ #define BTINTEL_PCIE_TX_DB_VEC 0 -/* Number of pending RX requests for downlink */ -#define BTINTEL_PCIE_RX_MAX_QUEUE 6 - /* Doorbell vector for FRBD */ #define BTINTEL_PCIE_RX_DB_VEC 513 -- cgit v1.2.3 From 2dd711102ce69ae41f65d09c012441227d4aa983 Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:39 +0530 Subject: Bluetooth: btintel_pcie: Increase the tx and rx descriptor count This change addresses latency issues observed in HID use cases where events arrive in bursts. By increasing the Rx descriptor count to 64, the firmware can handle bursty data more effectively, reducing latency and preventing buffer overflows. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 24 ++++++++++++------------ drivers/bluetooth/btintel_pcie.h | 7 +++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2c7731803c9f..03f13de4a723 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1783,8 +1783,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd) - + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT; + total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; + total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; /* Add the sum of size of index array and size of ci struct */ total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); @@ -1809,36 +1809,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->dma_v_addr = v_addr; /* Setup descriptor count */ - data->txq.count = BTINTEL_DESCS_COUNT; - data->rxq.count = BTINTEL_DESCS_COUNT; + data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT; + data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT; /* Setup tfds */ data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 5ddd6d7d8d45..7dad4523236c 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -154,8 +154,11 @@ enum msix_mbox_int_causes { /* Default interrupt timeout in msec */ #define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000 -/* The number of descriptors in TX/RX queues */ -#define BTINTEL_DESCS_COUNT 16 +/* The number of descriptors in TX queues */ +#define BTINTEL_PCIE_TX_DESCS_COUNT 32 + +/* The number of descriptors in RX queues */ +#define BTINTEL_PCIE_RX_DESCS_COUNT 64 /* Number of Queue for TX and RX * It indicates the index of the IA(Index Array) -- cgit v1.2.3 From bf2ffc4d14db29cab781549912d2dc69127f4d3e Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:40 +0530 Subject: Bluetooth: btintel_pcie: Reduce driver buffer posting to prevent race condition Modify the driver to post 3 fewer buffers than the maximum rx buffers (64) allowed for the firmware. This change mitigates a hardware issue causing a race condition in the firmware, improving stability and data handling. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 03f13de4a723..563165c5efae 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -398,7 +398,11 @@ static int btintel_pcie_start_rx(struct btintel_pcie_data *data) int i, ret; struct rxq *rxq = &data->rxq; - for (i = 0; i < rxq->count; i++) { + /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the + * hardware issues leading to race condition at the firmware. + */ + + for (i = 0; i < rxq->count - 3; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; -- cgit v1.2.3 From e6ed54e86aae9e4f7286ce8d5c73780f91b48d1c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Jun 2025 16:12:39 -0400 Subject: Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete This reworks MGMT_OP_REMOVE_ADV_MONITOR to not use mgmt_pending_add to avoid crashes like bellow: ================================================================== BUG: KASAN: slab-use-after-free in mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 Read of size 8 at addr ffff88801c53f318 by task kworker/u5:5/5341 CPU: 0 UID: 0 PID: 5341 Comm: kworker/u5:5 Not tainted 6.15.0-syzkaller-10402-g4cb6c8af8591 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 hci_cmd_sync_work+0x261/0x3a0 net/bluetooth/hci_sync.c:334 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x711/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 5987: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4358 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] mgmt_pending_new+0x65/0x240 net/bluetooth/mgmt_util.c:252 mgmt_pending_add+0x34/0x120 net/bluetooth/mgmt_util.c:279 remove_adv_monitor+0x103/0x1b0 net/bluetooth/mgmt.c:5454 hci_mgmt_cmd+0x9c9/0xef0 net/bluetooth/hci_sock.c:1719 hci_sock_sendmsg+0x6ca/0xef0 net/bluetooth/hci_sock.c:1839 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 sock_write_iter+0x258/0x330 net/socket.c:1131 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x548/0xa90 fs/read_write.c:686 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5989: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x18e/0x440 mm/slub.c:4841 mgmt_pending_foreach+0xc9/0x120 net/bluetooth/mgmt_util.c:242 mgmt_index_removed+0x10d/0x2f0 net/bluetooth/mgmt.c:9366 hci_sock_bind+0xbe9/0x1000 net/bluetooth/hci_sock.c:1314 __sys_bind_socket net/socket.c:1810 [inline] __sys_bind+0x2c3/0x3e0 net/socket.c:1841 __do_sys_bind net/socket.c:1846 [inline] __se_sys_bind net/socket.c:1844 [inline] __x64_sys_bind+0x7a/0x90 net/socket.c:1844 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 66bd095ab5d4 ("Bluetooth: advmon offload MSFT remove monitor") Closes: https://syzkaller.appspot.com/bug?extid=feb0dc579bbe30a13190 Reported-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Tested-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 4 +--- net/bluetooth/mgmt.c | 37 +++++++++++-------------------------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2b261e74e2c4..93fcb659f0d4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2400,7 +2400,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 04845ff3ad57..aeda2e4557d5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1877,10 +1877,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); - if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; - mgmt_adv_monitor_removed(hdev, monitor->handle); - } kfree(monitor); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 14a9462fced5..feaeec2423ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5108,24 +5108,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + u16 handle) { struct mgmt_ev_adv_monitor_removed ev; - struct mgmt_pending_cmd *cmd; - struct sock *sk_skip = NULL; - struct mgmt_cp_remove_adv_monitor *cp; - - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (cmd) { - cp = cmd->param; - - if (cp->monitor_handle) - sk_skip = cmd->sk; - } ev.monitor_handle = cpu_to_le16(handle); - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -5227,8 +5217,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || - pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } @@ -5398,8 +5387,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp; - if (status == -ECANCELED || - cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) + if (status == -ECANCELED) return; hci_dev_lock(hdev); @@ -5408,12 +5396,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, rp.monitor_handle = cp->monitor_handle; - if (!status) + if (!status) { + mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle); hci_update_passive_scan(hdev); + } mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", @@ -5423,10 +5413,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - - if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) - return -ECANCELED; - struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); @@ -5445,14 +5431,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; @@ -5462,7 +5447,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, mgmt_remove_adv_monitor_complete); if (err) { - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; -- cgit v1.2.3 From 6fe26f694c824b8a4dbf50c635bee1302e3f099c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 May 2025 15:42:21 -0400 Subject: Bluetooth: MGMT: Protect mgmt_pending list with its own lock This uses a mutex to protect from concurrent access of mgmt_pending list which can cause crashes like: ================================================================== BUG: KASAN: slab-use-after-free in hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 Read of size 2 at addr ffff0000c48885b2 by task syz.4.334/7318 CPU: 0 UID: 0 PID: 7318 Comm: syz.4.334 Not tainted 6.15.0-rc7-syzkaller-g187899f4124a #0 PREEMPT Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack+0x30/0x40 lib/dump_stack.c:94 dump_stack_lvl+0xd8/0x12c lib/dump_stack.c:120 print_address_description+0xa8/0x254 mm/kasan/report.c:408 print_report+0x68/0x84 mm/kasan/report.c:521 kasan_report+0xb0/0x110 mm/kasan/report.c:634 __asan_report_load2_noabort+0x20/0x2c mm/kasan/report_generic.c:379 hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 mgmt_pending_find+0x7c/0x140 net/bluetooth/mgmt_util.c:223 pending_find net/bluetooth/mgmt.c:947 [inline] remove_adv_monitor+0x44/0x1a4 net/bluetooth/mgmt.c:5445 hci_mgmt_cmd+0x780/0xc00 net/bluetooth/hci_sock.c:1712 hci_sock_sendmsg+0x544/0xbb0 net/bluetooth/hci_sock.c:1832 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] sock_write_iter+0x25c/0x378 net/socket.c:1131 new_sync_write fs/read_write.c:591 [inline] vfs_write+0x62c/0x97c fs/read_write.c:684 ksys_write+0x120/0x210 fs/read_write.c:736 __do_sys_write fs/read_write.c:747 [inline] __se_sys_write fs/read_write.c:744 [inline] __arm64_sys_write+0x7c/0x90 fs/read_write.c:744 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 7037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x44/0x54 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x9c/0xb4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4327 [inline] __kmalloc_noprof+0x2fc/0x4c8 mm/slub.c:4339 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0xc4/0x1f0 net/core/sock.c:2198 sk_alloc+0x44/0x3ac net/core/sock.c:2254 bt_sock_alloc+0x4c/0x300 net/bluetooth/af_bluetooth.c:148 hci_sock_create+0xa8/0x194 net/bluetooth/hci_sock.c:2202 bt_sock_create+0x14c/0x24c net/bluetooth/af_bluetooth.c:132 __sock_create+0x43c/0x91c net/socket.c:1541 sock_create net/socket.c:1599 [inline] __sys_socket_create net/socket.c:1636 [inline] __sys_socket+0xd4/0x1c0 net/socket.c:1683 __do_sys_socket net/socket.c:1697 [inline] __se_sys_socket net/socket.c:1695 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1695 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Freed by task 6607: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x68/0x88 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x17c/0x474 mm/slub.c:4841 sk_prot_free net/core/sock.c:2237 [inline] __sk_destruct+0x4f4/0x760 net/core/sock.c:2332 sk_destruct net/core/sock.c:2360 [inline] __sk_free+0x320/0x430 net/core/sock.c:2371 sk_free+0x60/0xc8 net/core/sock.c:2382 sock_put include/net/sock.h:1944 [inline] mgmt_pending_free+0x88/0x118 net/bluetooth/mgmt_util.c:290 mgmt_pending_remove+0xec/0x104 net/bluetooth/mgmt_util.c:298 mgmt_set_powered_complete+0x418/0x5cc net/bluetooth/mgmt.c:1355 hci_cmd_sync_work+0x204/0x33c net/bluetooth/hci_sync.c:334 process_one_work+0x7e8/0x156c kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x958/0xed8 kernel/workqueue.c:3400 kthread+0x5fc/0x75c kernel/kthread.c:464 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:847 Fixes: a380b6cff1a2 ("Bluetooth: Add generic mgmt helper API") Closes: https://syzkaller.appspot.com/bug?extid=0a7039d5d9986ff4ecec Closes: https://syzkaller.appspot.com/bug?extid=cc0cc52e7f43dc9e6df1 Reported-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+cc0cc52e7f43dc9e6df1@syzkaller.appspotmail.com Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/mgmt.c | 101 +++++++++++++++++++-------------------- net/bluetooth/mgmt_util.c | 32 +++++++++++-- net/bluetooth/mgmt_util.h | 4 +- 5 files changed, 80 insertions(+), 59 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 93fcb659f0d4..f7b1a9eb9543 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -546,6 +546,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aeda2e4557d5..487c045a7ba8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2485,6 +2485,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index feaeec2423ae..de7adb9a47f9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1447,22 +1447,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -1476,8 +1471,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) if (cmd->cmd_complete) { cmd->cmd_complete(cmd, match->mgmt_status); - mgmt_pending_remove(cmd); - return; } @@ -1486,13 +1479,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } @@ -1532,7 +1525,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } @@ -1707,7 +1700,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -1943,8 +1936,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, NULL); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, + cmd_status_rsp, &mgmt_err); return; } @@ -1954,7 +1947,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -2074,12 +2067,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp, + &status); return; } - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -2138,7 +2131,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) struct sock *sk = cmd->sk; if (status) { - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); return; } @@ -2638,7 +2631,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); @@ -3427,7 +3420,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ @@ -5186,7 +5179,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -5401,7 +5394,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -5777,7 +5770,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -5998,7 +5991,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6223,7 +6216,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) u8 status = mgmt_status(err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, cmd_status_rsp, &status); return; } @@ -6233,7 +6226,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) else hci_dev_clear_flag(hdev, HCI_ADVERTISING); - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -6577,7 +6570,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -6714,7 +6707,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -7161,7 +7154,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7321,7 +7314,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -8571,10 +8564,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); @@ -8762,10 +8755,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, cp->instance); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); } else { - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } @@ -8912,10 +8905,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9074,10 +9067,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data, rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9349,7 +9342,7 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9387,7 +9380,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err) hci_update_passive_scan(hdev); } - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); new_settings(hdev, match.sk); @@ -9402,7 +9396,8 @@ void __mgmt_power_off(struct hci_dev *hdev) struct cmd_lookup match = { NULL, hdev }; u8 zero_cod[] = { 0, 0, 0 }; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use @@ -9416,7 +9411,7 @@ void __mgmt_power_off(struct hci_dev *hdev) else match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -9657,7 +9652,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); - mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) @@ -9713,8 +9707,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); + mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true, + unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) @@ -9907,7 +9901,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, cmd_status_rsp, &mgmt_err); return; } @@ -9917,8 +9911,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, - &match); + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, + settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -9942,9 +9936,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup, + &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 3713ff490c65..a88a07da3947 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { - struct mgmt_pending_cmd *cmd; + struct mgmt_pending_cmd *cmd, *tmp; + + mutex_lock(&hdev->mgmt_pending_lock); - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; - if (cmd->opcode == opcode) + + if (cmd->opcode == opcode) { + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; + } } + mutex_unlock(&hdev->mgmt_pending_lock); + return NULL; } -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; + mutex_lock(&hdev->mgmt_pending_lock); + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; + if (remove) + list_del(&cmd->list); + cb(cmd, data); + + if (remove) + mgmt_pending_free(cmd); } + + mutex_unlock(&hdev->mgmt_pending_lock); } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, @@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = hdev->id; + cmd->hdev = hdev; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { @@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; + mutex_lock(&hdev->mgmt_pending_lock); list_add_tail(&cmd->list, &hdev->mgmt_pending); + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; } @@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd) void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { + mutex_lock(&cmd->hdev->mgmt_pending_lock); list_del(&cmd->list); + mutex_unlock(&cmd->hdev->mgmt_pending_lock); + mgmt_pending_free(cmd); } diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index f2ba994ab1d8..024e51dd6937 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -33,7 +33,7 @@ struct mgmt_mesh_tx { struct mgmt_pending_cmd { struct list_head list; u16 opcode; - int index; + struct hci_dev *hdev; void *param; size_t param_len; struct sock *sk; @@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, -- cgit v1.2.3 From 692eb9f8a5b71d852e873375d20cf5da7a046ea6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:49:14 +0200 Subject: net: dsa: b53: fix untagged traffic sent via cpu tagged with VID 0 When Linux sends out untagged traffic from a port, it will enter the CPU port without any VLAN tag, even if the port is a member of a vlan filtering bridge with a PVID egress untagged VLAN. This makes the CPU port's PVID take effect, and the PVID's VLAN table entry controls if the packet will be tagged on egress. Since commit 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") we remove bridged ports from VLAN 0 when joining or leaving a VLAN aware bridge. But we also clear the untagged bit, causing untagged traffic from the controller to become tagged with VID 0 (and priority 0). Fix this by not touching the untagged map of VLAN 0. Additionally, always keep the CPU port as a member, as the untag map is only effective as long as there is at least one member, and we would remove it when bridging all ports and leaving no standalone ports. Since Linux (and the switch) treats VLAN 0 tagged traffic like untagged, the actual impact of this is rather low, but this also prevented earlier detection of the issue. Fixes: 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250602194914.1011890-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 862bdccb7439..dc2f4adac9bc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2034,9 +2034,6 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); - if (vl->members == BIT(cpu_port)) - vl->members &= ~BIT(cpu_port); - vl->untag = vl->members; b53_set_vlan_entry(dev, pvid, vl); } @@ -2115,8 +2112,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) } b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); b53_set_vlan_entry(dev, pvid, vl); } } -- cgit v1.2.3 From 87f7ce260a3c838b49e1dc1ceedf1006795157a2 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Wed, 21 May 2025 01:07:17 +0900 Subject: ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use() There is no disagreement that we should check both ptp->is_virtual_clock and ptp->n_vclocks to check if the ptp virtual clock is in use. However, when we acquire ptp->n_vclocks_mux to read ptp->n_vclocks in ptp_vclock_in_use(), we observe a recursive lock in the call trace starting from n_vclocks_store(). ============================================ WARNING: possible recursive locking detected 6.15.0-rc6 #1 Not tainted -------------------------------------------- syz.0.1540/13807 is trying to acquire lock: ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_vclock_in_use drivers/ptp/ptp_private.h:103 [inline] ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_clock_unregister+0x21/0x250 drivers/ptp/ptp_clock.c:415 but task is already holding lock: ffff888030704868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: n_vclocks_store+0xf1/0x6d0 drivers/ptp/ptp_sysfs.c:215 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ptp->n_vclocks_mux); lock(&ptp->n_vclocks_mux); *** DEADLOCK *** .... ============================================ The best way to solve this is to remove the logic that checks ptp->n_vclocks in ptp_vclock_in_use(). The reason why this is appropriate is that any path that uses ptp->n_vclocks must unconditionally check if ptp->n_vclocks is greater than 0 before unregistering vclocks, and all functions are already written this way. And in the function that uses ptp->n_vclocks, we already get ptp->n_vclocks_mux before unregistering vclocks. Therefore, we need to remove the redundant check for ptp->n_vclocks in ptp_vclock_in_use() to prevent recursive locking. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Link: https://patch.msgid.link/20250520160717.7350-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 18934e28469e..528d86a33f37 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,17 +98,7 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - bool in_use = false; - - if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) - return true; - - if (!ptp->is_virtual_clock && ptp->n_vclocks) - in_use = true; - - mutex_unlock(&ptp->n_vclocks_mux); - - return in_use; + return !ptp->is_virtual_clock; } /* Check if ptp clock shall be free running */ -- cgit v1.2.3 From 82cbd06f327f3c2ccdee990bd356c9303ae168f9 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 5 Jun 2025 14:08:36 +0800 Subject: net: enetc: fix the netc-lib driver build dependency The kernel robot reported the following errors when the netc-lib driver was compiled as a loadable module and the enetc-core driver was built-in. ld.lld: error: undefined symbol: ntmp_init_cbdr referenced by enetc_cbdr.c:88 (drivers/net/ethernet/freescale/enetc/enetc_cbdr.c:88) ld.lld: error: undefined symbol: ntmp_free_cbdr referenced by enetc_cbdr.c:96 (drivers/net/ethernet/freescale/enetc/enetc_cbdr.c:96) Simply changing "tristate" to "bool" can fix this issue, but considering that the netc-lib driver needs to support being compiled as a loadable module and LS1028 does not need the netc-lib driver. Therefore, we add a boolean symbol 'NXP_NTMP' to enable 'NXP_NETC_LIB' as needed. And when adding NETC switch driver support in the future, there is no need to modify the dependency, just select "NXP_NTMP" and "NXP_NETC_LIB" at the same time. Reported-by: Arnd Bergmann Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505220734.x6TF6oHR-lkp@intel.com/ Fixes: 4701073c3deb ("net: enetc: add initial netc-lib driver to support NTMP") Suggested-by: Arnd Bergmann Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index e917132d3714..54b0f0a5a6bb 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config FSL_ENETC_CORE tristate + select NXP_NETC_LIB if NXP_NTMP help This module supports common functionality between the PF and VF drivers for the NXP ENETC controller. @@ -22,6 +23,9 @@ config NXP_NETC_LIB Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc flower and debugfs interfaces and so on. +config NXP_NTMP + bool + config FSL_ENETC tristate "ENETC PF driver" depends on PCI_MSI @@ -45,7 +49,7 @@ config NXP_ENETC4 select FSL_ENETC_CORE select FSL_ENETC_MDIO select NXP_ENETC_PF_COMMON - select NXP_NETC_LIB + select NXP_NTMP select PHYLINK select DIMLIB help -- cgit v1.2.3 From 11fcf368506d347088e613edf6cd2604d70c454f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 6 Jun 2025 10:23:57 +0200 Subject: uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") did not take in account that the usage of BITS_PER_LONG in __GENMASK() was changed to __BITS_PER_LONG for UAPI-safety in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"). BITS_PER_LONG can not be used in UAPI headers as it derives from the kernel configuration and not from the current compiler invocation. When building compat userspace code or a compat vDSO its value will be incorrect. Switch back to __BITS_PER_LONG. Fixes: 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Yury Norov [NVIDIA] --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From c0f691388992c708436ab5f6e810865be6ddf5c6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:04:11 +0200 Subject: intel_idle: Use subsys_initcall_sync() for initialization It is not necessary to wait until the device_initcall() stage with intel_idle initialization. All of its dependencies are met after all subsys_initcall()s have run, so subsys_initcall_sync() can be used for initializing it. It is also better to ensure that intel_idle will always initialize before the ACPI processor driver that uses module_init() for its initialization. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2994397.e9J7NaK4W3@rjwysocki.net --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8ccb483204fa..64ac4da08094 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2518,7 +2518,7 @@ init_driver_fail: return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also -- cgit v1.2.3 From 02670deede2288d8e4e3d800477b27c091080fae Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 6 Jun 2025 13:21:34 -0700 Subject: libbpf: Handle unsupported mmap-based /sys/kernel/btf/vmlinux correctly libbpf_err_ptr() helpers are meant to return NULL and set errno, if there is an error. But btf_parse_raw_mmap() is meant to be used internally and is expected to return ERR_PTR() values. Because of this mismatch, when libbpf tries to mmap /sys/kernel/btf/vmlinux, we don't detect the error correctly with IS_ERR() check, and never fallback to old non-mmap-based way of loading vmlinux BTF. Fix this by using proper ERR_PTR() returns internally. Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Fixes: 3c0421c93ce4 ("libbpf: Use mmap to parse vmlinux BTF from sysfs") Cc: Lorenz Bauer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20250606202134.2738910-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f1d495dc66bb..37682908cb0f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1384,12 +1384,12 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) fd = open(path, O_RDONLY); if (fd < 0) - return libbpf_err_ptr(-errno); + return ERR_PTR(-errno); if (fstat(fd, &st) < 0) { err = -errno; close(fd); - return libbpf_err_ptr(err); + return ERR_PTR(err); } data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -1397,7 +1397,7 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) close(fd); if (data == MAP_FAILED) - return libbpf_err_ptr(err); + return ERR_PTR(err); btf = btf_new(data, st.st_size, base_btf, true); if (IS_ERR(btf)) -- cgit v1.2.3 From 4c529a4a7260776bb4abe264498857b4537aa70d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 7 Jun 2025 14:22:56 +0200 Subject: x86/smp: PM/hibernate: Split arch_resume_nosmt() Move the inner part of the arch_resume_nosmt() code into a separate function called arch_cpu_rescan_dead_smt_siblings(), so it can be used in other places where "dead" SMT siblings may need to be taken online and offline again in order to get into deep idle states. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/3361688.44csPzL39Z@rjwysocki.net [ rjw: Prevent build issues with CONFIG_SMP unset ] Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/smp.c | 24 ++++++++++++++++++++++++ arch/x86/power/hibernate.c | 19 ++++++------------- include/linux/cpu.h | 3 +++ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18266cc3d98c..b014e6d229f9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -299,3 +299,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index a7c23f2a58c9..a2294c1649f6 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -192,7 +192,8 @@ out: int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -206,18 +207,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e6089abc28e2..96a3a0d6a60e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -134,6 +135,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; -- cgit v1.2.3 From a430c11f401589a0f4f57fd398271a5d85142c7a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:06:08 +0200 Subject: intel_idle: Rescan "dead" SMT siblings during initialization Make intel_idle_init() call arch_cpu_rescan_dead_smt_siblings() after successfully registering intel_idle as the cpuidle driver so as to allow the "dead" SMT siblings (if any) to go into deep idle states. This is necessary for the processor to be able to reach deep package C-states (like PC10) going forward which is requisite for reducing power sufficiently in suspend-to-idle, among other things. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/10669885.nUPlyArG6x@rjwysocki.net --- drivers/idle/intel_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 64ac4da08094..63565814c7e5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2507,6 +2507,8 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: -- cgit v1.2.3 From f694481b1d3177144fcac4242eb750cfcb9f7bd5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:07:31 +0200 Subject: ACPI: processor: Rescan "dead" SMT siblings during initialization Make acpi_processor_driver_init() call arch_cpu_rescan_dead_smt_siblings(), via a new wrapper function called acpi_idle_rescan_dead_smt_siblings(), after successfully initializing the driver, to allow the "dead" SMT siblings to go into deep idle states, which is necessary for the processor to be able to reach deep package C-states (like PC10) going forward, so that power can be reduced sufficiently in suspend-to-idle, among other things. However, do it only if the ACPI idle driver is the current cpuidle driver (otherwise it is assumed that another cpuidle driver will take care of this) and avoid doing it on architectures other than x86. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2005721.PYKUYFuaPT@rjwysocki.net --- drivers/acpi/internal.h | 6 ++++++ drivers/acpi/processor_driver.c | 3 +++ drivers/acpi/processor_idle.c | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 00910ccd7eda..e2781864fdce 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -175,6 +175,12 @@ bool processor_physically_present(acpi_handle handle); static inline void acpi_early_processor_control_setup(void) {} #endif +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void); +#else +static inline void acpi_idle_rescan_dead_smt_siblings(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 3b281bc1e73c..65e779be64ff 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -279,6 +279,9 @@ static int __init acpi_processor_driver_init(void) * after acpi_cppc_processor_probe() has been called for all online CPUs */ acpi_processor_init_invariance_cppc(); + + acpi_idle_rescan_dead_smt_siblings(); + return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e2febca2ec13..2c2dc559e0f8 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -24,6 +24,8 @@ #include #include +#include "internal.h" + /* * Include the apic definitions for x86 to have the APIC timer related defines * available also for UP (on SMP it gets magically included via linux/smp.h). @@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = { }; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void) +{ + if (cpuidle_get_driver() == &acpi_idle_driver) + arch_cpu_rescan_dead_smt_siblings(); +} + static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); -- cgit v1.2.3 From a18d098f2aab82abde1d59c56aef9beca01c892b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:09:35 +0200 Subject: Reapply "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" Revert commit 70523f335734 ("Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()"") to reapply the changes from commit 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") reverted by it. Previously, these changes caused idle power to rise on systems booting with "nosmt" in the kernel command line because they effectively caused "dead" SMT siblings to remain in idle state C1 after executing the HLT instruction, which prevented the processor from reaching package idle states deeper than PC2 going forward. Now, the "dead" SMT siblings are rescanned after initializing a proper cpuidle driver for the processor (either intel_idle or ACPI idle), at which point they are able to enter a sufficiently deep idle state in native_play_dead() via cpuidle, so the code changes in question can be reapplied. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/7813065.EvYhyI6sBW@rjwysocki.net --- arch/x86/kernel/smpboot.c | 54 ++++++----------------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc78c2325fd2..58ede3fa6a75 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1244,6 +1244,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1289,50 +1293,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1383,9 +1343,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 1650d32b92b01db03a1a95d69ee74fcbc34d4b00 Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Tue, 18 Mar 2025 20:50:27 +0000 Subject: ath10k: snoc: fix unbalanced IRQ enable in crash recovery In ath10k_snoc_hif_stop() we skip disabling the IRQs in the crash recovery flow, but we still unconditionally call enable again in ath10k_snoc_hif_start(). We can't check the ATH10K_FLAG_CRASH_FLUSH bit since it is cleared before hif_start() is called, so instead check the ATH10K_SNOC_FLAG_RECOVERY flag and skip enabling the IRQs during crash recovery. This fixes unbalanced IRQ enable splats that happen after recovering from a crash. Fixes: 0e622f67e041 ("ath10k: add support for WCN3990 firmware crash recovery") Signed-off-by: Caleb Connolly Tested-by: Loic Poulain Link: https://patch.msgid.link/20250318205043.1043148-1-caleb.connolly@linaro.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/snoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 866bad2db334..65673b1aba55 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -937,7 +937,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) dev_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); - ath10k_snoc_irq_enable(ar); + /* IRQs are left enabled when we restart due to a firmware crash */ + if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) + ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags); -- cgit v1.2.3 From dc9c4252fe0d7a7f1ee904405ea91534277305bf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 22 May 2025 15:17:04 +0200 Subject: wifi: ath10k: Avoid vdev delete timeout when firmware is already down In some scenarios, the firmware may be stopped before the interface is removed, either due to a crash or because the remoteproc (e.g., MPSS) is shut down early during system reboot or shutdown. This leads to a delay during interface teardown, as the driver waits for a vdev delete response that never arrives, eventually timing out. Example (SNOC): $ echo stop > /sys/class/remoteproc/remoteproc0/state [ 71.64] remoteproc remoteproc0: stopped remote processor modem $ reboot [ 74.84] ath10k_snoc c800000.wifi: failed to transmit packet, dropping: -108 [ 74.84] ath10k_snoc c800000.wifi: failed to submit frame: -108 [...] [ 82.39] ath10k_snoc c800000.wifi: Timeout in receiving vdev delete response To avoid this, skip waiting for the vdev delete response if the firmware is already marked as unreachable (`ATH10K_FLAG_CRASH_FLUSH`), similar to how `ath10k_mac_wait_tx_complete()` and `ath10k_vdev_setup_sync()` handle this case. Signed-off-by: Loic Poulain Link: https://patch.msgid.link/20250522131704.612206-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/mac.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 8c7ffea0fa44..07fe05384cdf 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4,6 +4,7 @@ * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "mac.h" @@ -1022,6 +1023,26 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar) return ar->last_wmi_vdev_start_status; } +static inline int ath10k_vdev_delete_sync(struct ath10k *ar) +{ + unsigned long time_left; + + lockdep_assert_held(&ar->conf_mutex); + + if (!test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) + return 0; + + if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) + return -ESHUTDOWN; + + time_left = wait_for_completion_timeout(&ar->vdev_delete_done, + ATH10K_VDEV_DELETE_TIMEOUT_HZ); + if (time_left == 0) + return -ETIMEDOUT; + + return 0; +} + static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) { struct cfg80211_chan_def *chandef = NULL; @@ -5900,7 +5921,6 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; - unsigned long time_left; int ret; int i; @@ -5940,13 +5960,10 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n", arvif->vdev_id, ret); - if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) { - time_left = wait_for_completion_timeout(&ar->vdev_delete_done, - ATH10K_VDEV_DELETE_TIMEOUT_HZ); - if (time_left == 0) { - ath10k_warn(ar, "Timeout in receiving vdev delete response\n"); - goto out; - } + ret = ath10k_vdev_delete_sync(ar); + if (ret) { + ath10k_warn(ar, "Error in receiving vdev delete response: %d\n", ret); + goto out; } /* Some firmware revisions don't notify host about self-peer removal -- cgit v1.2.3 From 593963660919a97a4546acfd706dac93625724f5 Mon Sep 17 00:00:00 2001 From: Sebastian Gottschall Date: Tue, 4 Mar 2025 08:21:31 +0700 Subject: wil6210: fix support for sparrow chipsets the wil6210 driver irq handling code is unconditionally writing edma irq registers which are supposed to be only used on Talyn chipsets. This however leade to a chipset hang on the older sparrow chipset generation and firmware will not even boot. Fix that by simply checking for edma support before handling these registers. Tested on Netgear R9000 Signed-off-by: Sebastian Gottschall Link: https://patch.msgid.link/20250304012131.25970-2-s.gottschall@dd-wrt.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/wil6210/interrupt.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 67172385a5d6..89d4394cedcf 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -179,9 +179,11 @@ void wil_mask_irq(struct wil6210_priv *wil) wil_dbg_irq(wil, "mask_irq\n"); wil6210_mask_irq_tx(wil); - wil6210_mask_irq_tx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_tx_edma(wil); wil6210_mask_irq_rx(wil); - wil6210_mask_irq_rx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_rx_edma(wil); wil6210_mask_irq_misc(wil, true); wil6210_mask_irq_pseudo(wil); } @@ -190,10 +192,12 @@ void wil_unmask_irq(struct wil6210_priv *wil) { wil_dbg_irq(wil, "unmask_irq\n"); - wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); - wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); + if (wil->use_enhanced_dma_hw) { + wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + } wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICC), WIL_ICR_ICC_MISC_VALUE); wil_w(wil, RGF_INT_GEN_TX_ICR + offsetof(struct RGF_ICR, ICC), @@ -845,10 +849,12 @@ void wil6210_clear_irq(struct wil6210_priv *wil) offsetof(struct RGF_ICR, ICR)); wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_TX_ICR) + offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + - offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + - offsetof(struct RGF_ICR, ICR)); + if (wil->use_enhanced_dma_hw) { + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + + offsetof(struct RGF_ICR, ICR)); + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + + offsetof(struct RGF_ICR, ICR)); + } wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_MISC_ICR) + offsetof(struct RGF_ICR, ICR)); wmb(); /* make sure write completed */ -- cgit v1.2.3 From 9f6e82d11bb9692a90d20b10f87345598945c803 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:42 +0800 Subject: wifi: ath11k: avoid burning CPU in ath11k_debugfs_fw_stats_request() We get report [1] that CPU is running a hot loop in ath11k_debugfs_fw_stats_request(): 94.60% 0.00% i3status [kernel.kallsyms] [k] do_syscall_64 | --94.60%--do_syscall_64 | --94.55%--__sys_sendmsg ___sys_sendmsg ____sys_sendmsg netlink_sendmsg netlink_unicast genl_rcv netlink_rcv_skb genl_rcv_msg | --94.55%--genl_family_rcv_msg_dumpit __netlink_dump_start netlink_dump genl_dumpit nl80211_dump_station | --94.55%--ieee80211_dump_station sta_set_sinfo | --94.55%--ath11k_mac_op_sta_statistics ath11k_debugfs_get_fw_stats | --94.55%--ath11k_debugfs_fw_stats_request | |--41.73%--_raw_spin_lock_bh | |--22.74%--__local_bh_enable_ip | |--9.22%--_raw_spin_unlock_bh | --6.66%--srso_alias_safe_ret This is because, if for whatever reason ar->fw_stats_done is not set by ath11k_update_stats_event(), ath11k_debugfs_fw_stats_request() won't yield CPU before an up to 3s timeout. Change to completion mechanism to avoid CPU burning. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Reported-by: Yury Vostrikov Closes: https://lore.kernel.org/all/7324ac7a-8b7a-42a5-aa19-de52138ff638@app.fastmail.com/ # [1] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-2-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 1 + drivers/net/wireless/ath/ath11k/core.h | 2 +- drivers/net/wireless/ath/ath11k/debugfs.c | 38 ++++++++++++------------------- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 2e9f8a5e61e4..c8c5008be7f2 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -990,6 +990,7 @@ void ath11k_fw_stats_init(struct ath11k *ar) INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath11k_fw_stats_free(struct ath11k_fw_stats *stats) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 339d4fca1ed5..41bb81d00189 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -784,7 +784,7 @@ struct ath11k { u8 alpha2[REG_ALPHA2_LEN + 1]; struct ath11k_fw_stats fw_stats; struct completion fw_stats_complete; - bool fw_stats_done; + struct completion fw_stats_done; /* protected by conf_mutex */ bool ps_state_enable; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index bf192529e3fe..1d03e3aab011 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -96,7 +96,6 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); spin_unlock_bh(&ar->data_lock); @@ -114,7 +113,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* WMI_REQUEST_PDEV_STAT request has been already processed */ if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); return; } @@ -138,7 +137,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.vdevs); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_vdev = 0; } return; @@ -158,7 +157,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.bcn); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_bcn = 0; } } @@ -168,21 +167,15 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, struct stats_request_params *req_param) { struct ath11k_base *ab = ar->ab; - unsigned long timeout, time_left; + unsigned long time_left; int ret; lockdep_assert_held(&ar->conf_mutex); - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + secs_to_jiffies(3); - ath11k_debugfs_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); @@ -193,21 +186,18 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, } time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) return -ETIMEDOUT; - for (;;) { - if (time_after(jiffies, timeout)) - break; + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 08d7b136851f..9ab501cd4f47 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9390,11 +9390,11 @@ static int ath11k_fw_stats_request(struct ath11k *ar, lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); if (ret) { diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index d7f852bebf4a..27cb0bb06b93 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8189,7 +8189,7 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } -- cgit v1.2.3 From 2bcf73b2612dda7432f2c2eaad6679bd291791f2 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:43 +0800 Subject: wifi: ath11k: don't use static variables in ath11k_debugfs_fw_stats_process() Currently ath11k_debugfs_fw_stats_process() is using static variables to count firmware stat events. Taking num_vdev as an example, if for whatever reason ( say ar->num_started_vdevs is 0 or firmware bug etc.) the following condition (++num_vdev) == total_vdevs_started is not met, is_end is not set thus num_vdev won't be cleared. Next time when firmware stats is requested again, even if everything is working fine, we will fail due to the condition above will never be satisfied. The same applies to num_bcn as well. Change to use non-static counters so that we have a chance to clear them each time firmware stats is requested. Currently only ath11k_fw_stats_request() and ath11k_debugfs_fw_stats_request() are requesting firmware stats, so clear counters there. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: da3a9d3c1576 ("ath11k: refactor debugfs code into debugfs.c") Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-3-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 2 ++ drivers/net/wireless/ath/ath11k/debugfs.c | 16 +++++++--------- drivers/net/wireless/ath/ath11k/mac.c | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 41bb81d00189..6b2f207975e3 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -600,6 +600,8 @@ struct ath11k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath11k_dbg_htt_stats { diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 1d03e3aab011..27c93c0b4c22 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -98,6 +98,8 @@ static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } @@ -106,7 +108,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; bool is_end; - static unsigned int num_vdev, num_bcn; size_t total_vdevs_started = 0; int i; @@ -131,15 +132,14 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++num_vdev) == total_vdevs_started); + is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_vdev = 0; - } + return; } @@ -151,15 +151,13 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_bcn = 0; - } } } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 9ab501cd4f47..5e098780cf23 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9391,6 +9391,8 @@ static int ath11k_fw_stats_request(struct ath11k *ar, spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); -- cgit v1.2.3 From 3b6d00fa883075dcaf49221538230e038a9c0b43 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:44 +0800 Subject: wifi: ath11k: don't wait when there is no vdev started For WMI_REQUEST_VDEV_STAT request, firmware might split response into multiple events dut to buffer limit, hence currently in ath11k_debugfs_fw_stats_process() we wait until all events received. In case there is no vdev started, this results in that below condition would never get satisfied ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started) finally the requestor would be blocked until wait time out. The same applies to WMI_REQUEST_BCN_STAT request as well due to: ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs) Change to check the number of started vdev first: if it is zero, finish wait directly; if not, follow the old way. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-4-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/debugfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 27c93c0b4c22..ccf0e62c7d7a 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -107,7 +107,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * { struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; - bool is_end; + bool is_end = true; size_t total_vdevs_started = 0; int i; @@ -132,7 +132,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); @@ -151,7 +153,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); -- cgit v1.2.3 From 72610ed7d79da17ee09102534d6c696a4ea8a08e Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:45 +0800 Subject: wifi: ath11k: move some firmware stats related functions outside of debugfs Commit b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") and commit c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") call debugfs functions in mac ops. Those functions are no-ops if CONFIG_ATH11K_DEBUGFS is not enabled, thus cause wrong status reported. Move them to mac.c. Besides, since WMI_REQUEST_RSSI_PER_CHAIN_STAT and WMI_REQUEST_VDEV_STAT stats could also be requested via mac ops, process them directly in ath11k_update_stats_event(). Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") Fixes: c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-5-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/debugfs.c | 126 ++---------------------------- drivers/net/wireless/ath/ath11k/debugfs.h | 10 +-- drivers/net/wireless/ath/ath11k/mac.c | 88 ++++++++++++++++++++- drivers/net/wireless/ath/ath11k/mac.h | 4 +- drivers/net/wireless/ath/ath11k/wmi.c | 45 ++++++++++- 5 files changed, 135 insertions(+), 138 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index ccf0e62c7d7a..5d46f8e4c231 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -93,58 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, spin_unlock_bh(&dbr_data->lock); } -static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); -} - void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats) { struct ath11k_base *ab = ar->ab; - struct ath11k_pdev *pdev; bool is_end = true; - size_t total_vdevs_started = 0; - int i; - - /* WMI_REQUEST_PDEV_STAT request has been already processed */ - - if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - complete(&ar->fw_stats_done); - return; - } - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath11k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += ar->num_started_vdevs; - } - - if (total_vdevs_started) - is_end = ((++ar->fw_stats.num_vdev_recvd) == - total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) - complete(&ar->fw_stats_done); - - return; - } + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and + * WMI_REQUEST_VDEV_STAT requests have been already processed. + */ if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath11k_warn(ab, "empty bcn stats"); @@ -165,76 +121,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * } } -static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - ath11k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) - return -ETIMEDOUT; - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - -err_unlock: - mutex_unlock(&ar->conf_mutex); - - return ret; -} - static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) { struct ath11k *ar = inode->i_private; @@ -260,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_free; @@ -331,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); goto err_free; @@ -407,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file) continue; req_param.vdev_id = arvif->vdev_id; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); goto err_free; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index a39e458637b0..ed7fec177588 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_DEBUGFS_H_ @@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar); void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats); void ath11k_debugfs_fw_stats_init(struct ath11k *ar); -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id); static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar) { @@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return 0; } -static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, - u32 pdev_id, u32 vdev_id, u32 stats_id) -{ - return 0; -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 5e098780cf23..1550533b5587 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -8997,6 +8997,86 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + mutex_lock(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) { + ret = -ENETDOWN; + goto err_unlock; + } + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + +err_unlock: + mutex_unlock(&ar->conf_mutex); + + return ret; +} + static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9034,8 +9114,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { + !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true); } @@ -9043,8 +9123,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!signal && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index f5800fbecff8..5e61eea1bb03 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH11K_MAC_H @@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif, void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 27cb0bb06b93..98811726d33b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb) { struct ath11k_fw_stats stats = {}; + size_t total_vdevs_started = 0; + struct ath11k_pdev *pdev; + bool is_end = true; + int i; + struct ath11k *ar; int ret; @@ -8184,7 +8189,8 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and + * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via * debugfs fw stats. Therefore, processing it separately. */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { @@ -8193,9 +8199,40 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk goto complete; } - /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT - * are currently requested only via debugfs fw stats. Hence, processing these - * in debugfs context + if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats.vdevs)) { + ath11k_warn(ab, "empty vdev stats"); + goto complete; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += ar->num_started_vdevs; + } + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats.vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + goto complete; + } + + /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats. + * Hence, processing it in debugfs context */ ath11k_debugfs_fw_stats_process(ar, &stats); -- cgit v1.2.3 From 81f64165c9dc2d9b070d8240dd369974ebe188d3 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:46 +0800 Subject: wifi: ath11k: adjust unlock sequence in ath11k_update_stats_event() Currently RCU lock and ar->data_lock are acquired in a sequence of rcu_read_lock() spin_lock_bh(&ar->data_lock) but released in a sequence of rcu_read_unlock() spin_unlock_bh(&ar->data_lock) Although there are no apparent issues with this, reorder them to achieve symmetry. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-6-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 98811726d33b..56af2e9634f4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8238,8 +8238,8 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk complete: complete(&ar->fw_stats_complete); - rcu_read_unlock(); spin_unlock_bh(&ar->data_lock); + rcu_read_unlock(); /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised * at this point, no need to free the individual list. -- cgit v1.2.3 From c5b92a2c18938ebb08e8d4062408ab1524da31c3 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:47 +0800 Subject: wifi: ath11k: move locking outside of ath11k_mac_get_fw_stats() Currently ath11k_mac_get_fw_stats() is acquiring/releasing ar->conf_mutex by itself. In order to reuse this function in a context where that lock is already taken, move lock handling to its callers, then the function itself only has to assert it. There is only one caller now, i.e., ath11k_mac_op_sta_statistics(), so add lock handling there. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-7-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 1550533b5587..18e1d78cac45 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9052,12 +9052,10 @@ static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, struct stats_request_params req_param; int ret; - mutex_lock(&ar->conf_mutex); + lockdep_assert_held(&ar->conf_mutex); - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } + if (ar->state != ATH11K_STATE_ON) + return -ENETDOWN; req_param.pdev_id = pdev_id; req_param.vdev_id = vdev_id; @@ -9071,9 +9069,6 @@ static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", pdev_id, vdev_id, stats_id); -err_unlock: - mutex_unlock(&ar->conf_mutex); - return ret; } @@ -9111,6 +9106,7 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, ath11k_mac_put_chain_rssi(sinfo, arsta, "ppdu", false); + mutex_lock(&ar->conf_mutex); if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && @@ -9126,6 +9122,7 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; + mutex_unlock(&ar->conf_mutex); ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "sta statistics db2dbm %u rssi comb %d rssi beacon %d\n", -- cgit v1.2.3 From 29e2adf2ef2966379bd3fe002530b10dfc3030ba Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:48 +0800 Subject: wifi: ath11k: consistently use ath11k_mac_get_fw_stats() Currently to get firmware stats, ath11k_mac_op_get_txpower() calls ath11k_fw_stats_request() and ath11k_mac_op_sta_statistics() calls ath11k_mac_get_fw_stats(). Those two helpers are basically doing the same, except for: 1. ath11k_mac_get_fw_stats() verifies ar->state inside itself. 2. ath11k_mac_get_fw_stats() calls ath11k_mac_fw_stats_request() which then calls ath11k_mac_fw_stats_reset() to free pdev/vdev stats whereas only pdev stats are freed by ath11k_fw_stats_request(). 3. ath11k_mac_get_fw_stats() waits for ar->fw_stats_complete and ar->fw_stats_done, whereas ath11k_fw_stats_request() only waits for ar->fw_stats_complete. Change to call ath11k_mac_get_fw_stats() in ath11k_mac_op_get_txpower(). This is valid because: 1. ath11k_mac_op_get_txpower() also has the same request on ar->state. 2. it is harmless to call ath11k_fw_stats_vdevs_free() since ar->fw_stats.vdevs should be empty and there should be no one expecting it at that time. 3. ath11k_mac_op_get_txpower() only needs pdev stats. For pdev stats, ar->fw_stats_done is set to true whenever ar->fw_stats_complete is set to true in ath11k_update_stats_event(). So additional wait on ar->fw_stats_done does not wast any time. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-8-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 44 ++--------------------------------- 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 18e1d78cac45..13301ca317a5 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9457,40 +9457,6 @@ exit: return ret; } -static int ath11k_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, - 1 * HZ); - - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, @@ -9498,7 +9464,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, { struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param = {0}; struct ath11k_fw_stats_pdev *pdev; int ret; @@ -9510,9 +9475,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, */ mutex_lock(&ar->conf_mutex); - if (ar->state != ATH11K_STATE_ON) - goto err_fallback; - /* Firmware doesn't provide Tx power during CAC hence no need to fetch * the stats. */ @@ -9521,10 +9483,8 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, return -EAGAIN; } - req_param.pdev_id = ar->pdev->pdev_id; - req_param.stats_id = WMI_REQUEST_PDEV_STAT; - - ret = ath11k_fw_stats_request(ar, &req_param); + ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_PDEV_STAT); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_fallback; -- cgit v1.2.3 From b0d226a60856a1b765bb9a3848c7b2322fd08c47 Mon Sep 17 00:00:00 2001 From: Rodrigo Gobbi Date: Thu, 22 May 2025 17:01:12 -0300 Subject: wifi: ath11k: validate ath11k_crypto_mode on top of ath11k_core_qmi_firmware_ready if ath11k_crypto_mode is invalid (not ATH11K_CRYPT_MODE_SW/ATH11K_CRYPT_MODE_HW), ath11k_core_qmi_firmware_ready() will not undo some actions that was previously started/configured. Do the validation as soon as possible in order to avoid undoing actions in that case and also to fix the following smatch warning: drivers/net/wireless/ath/ath11k/core.c:2166 ath11k_core_qmi_firmware_ready() warn: missing unwind goto? Signed-off-by: Rodrigo Gobbi Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202304151955.oqAetVFd-lkp@intel.com/ Fixes: aa2092a9bab3 ("ath11k: add raw mode and software crypto support") Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250522200519.16858-1-rodrigo.gobbi.7@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index c8c5008be7f2..22a101136135 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -2135,6 +2135,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) { int ret; + switch (ath11k_crypto_mode) { + case ATH11K_CRYPT_MODE_SW: + set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + case ATH11K_CRYPT_MODE_HW: + clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + default: + ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); + return -EINVAL; + } + ret = ath11k_core_start_firmware(ab, ab->fw_mode); if (ret) { ath11k_err(ab, "failed to start firmware: %d\n", ret); @@ -2153,20 +2167,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) goto err_firmware_stop; } - switch (ath11k_crypto_mode) { - case ATH11K_CRYPT_MODE_SW: - set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - case ATH11K_CRYPT_MODE_HW: - clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - default: - ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); - return -EINVAL; - } - if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW) set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); -- cgit v1.2.3 From 7588a893cde5385ad308400ff167d29a29913b3a Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 23 May 2025 10:23:05 +0800 Subject: wifi: ath12k: fix GCC_GCC_PCIE_HOT_RST definition for WCN7850 GCC_GCC_PCIE_HOT_RST is wrongly defined for WCN7850, causing kernel crash on some specific platforms. Since this register is divergent for WCN7850 and QCN9274, move it to register table to allow different definitions. Then correct the register address for WCN7850 to fix this issue. Note IPQ5332 is not affected as it is not PCIe based device. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Reported-by: Parth Pancholi Closes: https://lore.kernel.org/all/86899b2235a59c9134603beebe08f2bb0b244ea0.camel@gmail.com Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Tested-by: Parth Pancholi Link: https://patch.msgid.link/20250523-ath12k-wrong-global-reset-addr-v1-1-3b06eb556196@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hw.c | 6 ++++++ drivers/net/wireless/ath/ath12k/hw.h | 2 ++ drivers/net/wireless/ath/ath12k/pci.c | 6 +++--- drivers/net/wireless/ath/ath12k/pci.h | 4 +++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 7e2cf0fb2085..8254dc10b53b 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -951,6 +951,8 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -1042,6 +1044,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs ipq5332_regs = { @@ -1215,6 +1219,8 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e40304, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 0fbc17649df4..0a75bc5abfa2 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -375,6 +375,8 @@ struct ath12k_hw_regs { u32 hal_reo_cmd_ring_base; u32 hal_reo_status_ring_base; + + u32 gcc_gcc_pcie_hot_rst; }; static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 489d546390fc..1f3cfd9b89fd 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab) ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); val |= GCC_GCC_PCIE_HOT_RST_VAL; - ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 0b4c459d6d8e..d1ec8aad7f6c 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -28,7 +28,9 @@ #define PCIE_PCIE_PARF_LTSSM 0x1e081b0 #define PARM_LTSSM_VALUE 0x111 -#define GCC_GCC_PCIE_HOT_RST 0x1e38338 +#define GCC_GCC_PCIE_HOT_RST(ab) \ + ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst) + #define GCC_GCC_PCIE_HOT_RST_VAL 0x10 #define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228 -- cgit v1.2.3 From b6bca6d7149e0bc1a56e831af0296d45688945ec Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 5 Jun 2025 10:27:22 -0700 Subject: wifi: ath12k: Fix hal_reo_cmd_status kernel-doc Currently a warning is reported when running: % scripts/kernel-doc -Wall -Werror -none drivers/net/wireless/ath/ath12k/hal.h Warning: drivers/net/wireless/ath/ath12k/hal.h:596 Enum value 'HAL_REO_CMD_RESOURCE_BLOCKED' not described in enum 'hal_reo_cmd_status' Add the missing description of HAL_REO_CMD_RESOURCE_BLOCKED. Link: https://patch.msgid.link/20250605-hal_reo_cmd_status-kdoc-v1-1-e59f4b814b88@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 0ee9c6b26dab..c1750b5dc03c 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -585,7 +585,8 @@ enum hal_reo_cmd_type { * or cache was blocked * @HAL_REO_CMD_FAILED: Command execution failed, could be due to * invalid queue desc - * @HAL_REO_CMD_RESOURCE_BLOCKED: + * @HAL_REO_CMD_RESOURCE_BLOCKED: Command could not be executed because + * one or more descriptors were blocked * @HAL_REO_CMD_DRAIN: */ enum hal_reo_cmd_status { -- cgit v1.2.3 From f3fe49dbddd73f0155a8935af47cb63693069dbe Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 4 Jun 2025 13:52:50 +0800 Subject: wifi: ath12k: fix uaf in ath12k_core_init() When the execution of ath12k_core_hw_group_assign() or ath12k_core_hw_group_create() fails, the registered notifier chain is not unregistered properly. Its memory is freed after rmmod, which may trigger to a use-after-free (UAF) issue if there is a subsequent access to this notifier chain. Fixes the issue by calling ath12k_core_panic_notifier_unregister() in failure cases. Call trace: notifier_chain_register+0x4c/0x1f0 (P) atomic_notifier_chain_register+0x38/0x68 ath12k_core_init+0x50/0x4e8 [ath12k] ath12k_pci_probe+0x5f8/0xc28 [ath12k] pci_device_probe+0xbc/0x1a8 really_probe+0xc8/0x3a0 __driver_probe_device+0x84/0x1b0 driver_probe_device+0x44/0x130 __driver_attach+0xcc/0x208 bus_for_each_dev+0x84/0x100 driver_attach+0x2c/0x40 bus_add_driver+0x130/0x260 driver_register+0x70/0x138 __pci_register_driver+0x68/0x80 ath12k_pci_init+0x30/0x68 [ath12k] ath12k_init+0x28/0x78 [ath12k] Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 6f245ea0ec6c ("wifi: ath12k: introduce device group abstraction") Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250604055250.1228501-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 31d851d8e688..ebc0560d40e3 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -2129,7 +2129,8 @@ int ath12k_core_init(struct ath12k_base *ab) if (!ag) { mutex_unlock(&ath12k_hw_group_mutex); ath12k_warn(ab, "unable to get hw group\n"); - return -ENODEV; + ret = -ENODEV; + goto err_unregister_notifier; } mutex_unlock(&ath12k_hw_group_mutex); @@ -2144,7 +2145,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err; + goto err_destroy_hw_group; } } @@ -2152,9 +2153,12 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err: +err_destroy_hw_group: ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); +err_unregister_notifier: + ath12k_core_panic_notifier_unregister(ab); + return ret; } -- cgit v1.2.3 From 06118ae36855b7d3d22688298e74a766ccf0cb7a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 May 2025 08:44:14 +0300 Subject: regulator: max20086: Fix refcount leak in max20086_parse_regulators_dt() There is a missing call to of_node_put() if devm_kcalloc() fails. Fix this by changing the code to use cleanup.h magic to drop the refcount. Fixes: 6b0cd72757c6 ("regulator: max20086: fix invalid memory access") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aDVRLqgJWMxYU03G@stanley.mountain Reviewed-by: Laurent Pinchart Signed-off-by: Mark Brown --- drivers/regulator/max20086-regulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index b4fe76e33ff2..fcdd2d0317a5 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -5,6 +5,7 @@ // Copyright (C) 2022 Laurent Pinchart // Copyright (C) 2018 Avnet, Inc. +#include #include #include #include @@ -133,11 +134,11 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { struct of_regulator_match *matches; - struct device_node *node; unsigned int i; int ret; - node = of_get_child_by_name(chip->dev->of_node, "regulators"); + struct device_node *node __free(device_node) = + of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); return -ENODEV; @@ -153,7 +154,6 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) ret = of_regulator_match(chip->dev, node, matches, chip->info->num_outputs); - of_node_put(node); if (ret < 0) { dev_err(chip->dev, "Failed to match regulators\n"); return -EINVAL; -- cgit v1.2.3 From a5bf5272295d3f058adeee025d2a0b6625f8ba7b Mon Sep 17 00:00:00 2001 From: Félix Piédallu Date: Fri, 6 Jun 2025 15:37:24 +0200 Subject: spi: omap2-mcspi: Disable multi mode when CS should be kept asserted after message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. Multi-mode can't respect this as CS is deasserted by the hardware at the end of the message. Disable multi-mode when not applicable to the current message. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-1-27191a98a2e5@non.se.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 29c616e2c408..e834fb42fd4b 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1287,9 +1287,15 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, mcspi->use_multi_mode = false; } - /* Check if transfer asks to change the CS status after the transfer */ - if (!tr->cs_change) - mcspi->use_multi_mode = false; + if (list_is_last(&tr->transfer_list, &msg->transfers)) { + /* Check if transfer asks to keep the CS status after the whole message */ + if (tr->cs_change) + mcspi->use_multi_mode = false; + } else { + /* Check if transfer asks to change the CS status after the transfer */ + if (!tr->cs_change) + mcspi->use_multi_mode = false; + } /* * If at least one message is not compatible, switch back to single mode -- cgit v1.2.3 From 10c24e0d2f7cd2bc8a847cf750f01301ce67dbc8 Mon Sep 17 00:00:00 2001 From: Félix Piédallu Date: Fri, 6 Jun 2025 15:37:25 +0200 Subject: spi: omap2-mcspi: Disable multi-mode when the previous message kept CS asserted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. The next message can't use multi-mode because the CS will be briefly deasserted before the first transfer. Remove the early exit of the list_for_each_entry because the last transfer actually needs to be always checked. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-2-27191a98a2e5@non.se.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index e834fb42fd4b..70bb74b3bd9c 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -134,6 +134,7 @@ struct omap2_mcspi { size_t max_xfer_len; u32 ref_clk_hz; bool use_multi_mode; + bool last_msg_kept_cs; }; struct omap2_mcspi_cs { @@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, * multi-mode is applicable. */ mcspi->use_multi_mode = true; + + if (mcspi->last_msg_kept_cs) + mcspi->use_multi_mode = false; + list_for_each_entry(tr, &msg->transfers, transfer_list) { if (!tr->bits_per_word) bits_per_word = msg->spi->bits_per_word; @@ -1289,22 +1294,17 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, if (list_is_last(&tr->transfer_list, &msg->transfers)) { /* Check if transfer asks to keep the CS status after the whole message */ - if (tr->cs_change) + if (tr->cs_change) { mcspi->use_multi_mode = false; + mcspi->last_msg_kept_cs = true; + } else { + mcspi->last_msg_kept_cs = false; + } } else { /* Check if transfer asks to change the CS status after the transfer */ if (!tr->cs_change) mcspi->use_multi_mode = false; } - - /* - * If at least one message is not compatible, switch back to single mode - * - * The bits_per_word of certain transfer can be different, but it will have no - * impact on the signal itself. - */ - if (!mcspi->use_multi_mode) - break; } omap2_mcspi_set_mode(ctlr); -- cgit v1.2.3 From 1dd630088332b5b310f3dd7eaacae9f3c4465651 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 8 Jun 2025 22:29:39 +0800 Subject: spi: loongson: Fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: drivers/spi/spi-loongson-core.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for SPI/Loongson. Signed-off-by: Huacai Chen Link: https://patch.msgid.link/20250608142939.172108-1-chenhuacai@loongson.cn Signed-off-by: Mark Brown --- drivers/spi/spi-loongson-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-loongson-core.c b/drivers/spi/spi-loongson-core.c index 4fec226456d1..b46f072a0387 100644 --- a/drivers/spi/spi-loongson-core.c +++ b/drivers/spi/spi-loongson-core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 2b74aea6d078ade810a3b0f7d1bcfcba2eaad416 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Jun 2025 12:04:14 +0300 Subject: spi: spi-pci1xxxx: Fix error code in probe Return the error code if pci_alloc_irq_vectors() fails. Don't return success. Fixes: b4608e944177 ("spi: spi-pci1xxxx: Fix Probe failure with Dual SPI instance with INTx interrupts") Signed-off-by: Dan Carpenter Reviewed-by: Thangaraj Samynathan Link: https://patch.msgid.link/aEKvDrUxD19GWi0u@stanley.mountain Signed-off-by: Mark Brown --- drivers/spi/spi-pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index c6489e90b8b9..9112d8a1a0c8 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -765,7 +765,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * PCI_IRQ_ALL_TYPES); if (num_vector < 0) { dev_err(&pdev->dev, "Error allocating MSI vectors\n"); - return ret; + return num_vector; } init_completion(&spi_sub_ptr->spi_xfer_done); -- cgit v1.2.3 From 56ec63a6e107e724619e61c7e605b49d365dfa07 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 May 2025 21:38:59 +0300 Subject: pinctrl: qcom: switch to devm_gpiochip_add_data() In order to simplify cleanup actions, use devres-enabled version of gpiochip_add_data(). As the msm_pinctrl_remove() function is now empty, drop it and all its calls from the corresponding pinctrl drivers. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250513-pinctrl-msm-fix-v2-3-249999af0fc1@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-apq8064.c | 1 - drivers/pinctrl/qcom/pinctrl-apq8084.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq4019.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5018.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5332.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5424.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq6018.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq8064.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq8074.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq9574.c | 1 - drivers/pinctrl/qcom/pinctrl-mdm9607.c | 1 - drivers/pinctrl/qcom/pinctrl-mdm9615.c | 1 - drivers/pinctrl/qcom/pinctrl-msm.c | 11 +---------- drivers/pinctrl/qcom/pinctrl-msm.h | 1 - drivers/pinctrl/qcom/pinctrl-msm8226.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8660.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8909.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8916.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8917.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8953.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8960.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8976.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8994.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8996.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8998.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8x74.c | 1 - drivers/pinctrl/qcom/pinctrl-qcm2290.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs404.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs615.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs8300.c | 1 - drivers/pinctrl/qcom/pinctrl-qdf2xxx.c | 1 - drivers/pinctrl/qcom/pinctrl-qdu1000.c | 1 - drivers/pinctrl/qcom/pinctrl-sa8775p.c | 1 - drivers/pinctrl/qcom/pinctrl-sar2130p.c | 1 - drivers/pinctrl/qcom/pinctrl-sc7180.c | 1 - drivers/pinctrl/qcom/pinctrl-sc7280.c | 1 - drivers/pinctrl/qcom/pinctrl-sc8180x.c | 1 - drivers/pinctrl/qcom/pinctrl-sc8280xp.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm660.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm670.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm845.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx55.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx65.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx75.c | 1 - drivers/pinctrl/qcom/pinctrl-sm4450.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6115.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6125.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6350.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6375.c | 1 - drivers/pinctrl/qcom/pinctrl-sm7150.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8150.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8250.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8350.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8450.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8550.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8650.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8750.c | 1 - drivers/pinctrl/qcom/pinctrl-x1e80100.c | 1 - 58 files changed, 1 insertion(+), 67 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-apq8064.c b/drivers/pinctrl/qcom/pinctrl-apq8064.c index 20c3b9025044..3654913f1ae5 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8064.c @@ -629,7 +629,6 @@ static struct platform_driver apq8064_pinctrl_driver = { .of_match_table = apq8064_pinctrl_of_match, }, .probe = apq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-apq8084.c b/drivers/pinctrl/qcom/pinctrl-apq8084.c index 3fc0a40762b6..27693cd64881 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8084.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8084.c @@ -1207,7 +1207,6 @@ static struct platform_driver apq8084_pinctrl_driver = { .of_match_table = apq8084_pinctrl_of_match, }, .probe = apq8084_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8084_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index 1f7944dd829d..6ede3149b6e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -710,7 +710,6 @@ static struct platform_driver ipq4019_pinctrl_driver = { .of_match_table = ipq4019_pinctrl_of_match, }, .probe = ipq4019_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq4019_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5018.c b/drivers/pinctrl/qcom/pinctrl-ipq5018.c index e2951f81c3ee..10b99d5d8a11 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5018.c @@ -754,7 +754,6 @@ static struct platform_driver ipq5018_pinctrl_driver = { .of_match_table = ipq5018_pinctrl_of_match, }, .probe = ipq5018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5332.c b/drivers/pinctrl/qcom/pinctrl-ipq5332.c index 625f8014051f..1ac2fc09c119 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5332.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5332.c @@ -834,7 +834,6 @@ static struct platform_driver ipq5332_pinctrl_driver = { .of_match_table = ipq5332_pinctrl_of_match, }, .probe = ipq5332_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5332_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5424.c b/drivers/pinctrl/qcom/pinctrl-ipq5424.c index 0d610b076da3..7ff1f8acc1a3 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5424.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5424.c @@ -791,7 +791,6 @@ static struct platform_driver ipq5424_pinctrl_driver = { .of_match_table = ipq5424_pinctrl_of_match, }, .probe = ipq5424_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5424_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq6018.c b/drivers/pinctrl/qcom/pinctrl-ipq6018.c index 0ad08647dbcd..a4ba980252e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq6018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq6018.c @@ -1080,7 +1080,6 @@ static struct platform_driver ipq6018_pinctrl_driver = { .of_match_table = ipq6018_pinctrl_of_match, }, .probe = ipq6018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq6018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8064.c b/drivers/pinctrl/qcom/pinctrl-ipq8064.c index e2bb94e86aef..0a9e357e64c6 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8064.c @@ -631,7 +631,6 @@ static struct platform_driver ipq8064_pinctrl_driver = { .of_match_table = ipq8064_pinctrl_of_match, }, .probe = ipq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c b/drivers/pinctrl/qcom/pinctrl-ipq8074.c index 337f3a1c92c1..482f13282fc2 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c @@ -1041,7 +1041,6 @@ static struct platform_driver ipq8074_pinctrl_driver = { .of_match_table = ipq8074_pinctrl_of_match, }, .probe = ipq8074_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8074_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq9574.c b/drivers/pinctrl/qcom/pinctrl-ipq9574.c index e2491617b236..89c05d8eb550 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq9574.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq9574.c @@ -799,7 +799,6 @@ static struct platform_driver ipq9574_pinctrl_driver = { .of_match_table = ipq9574_pinctrl_of_match, }, .probe = ipq9574_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq9574_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9607.c b/drivers/pinctrl/qcom/pinctrl-mdm9607.c index e7cd3ef1cf3e..3e18ba124fed 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9607.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9607.c @@ -1059,7 +1059,6 @@ static struct platform_driver mdm9607_pinctrl_driver = { .of_match_table = mdm9607_pinctrl_of_match, }, .probe = mdm9607_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9607_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9615.c b/drivers/pinctrl/qcom/pinctrl-mdm9615.c index 0a2ae383d3d5..bea1ca3d1b7f 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9615.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9615.c @@ -446,7 +446,6 @@ static struct platform_driver mdm9615_pinctrl_driver = { .of_match_table = mdm9615_pinctrl_of_match, }, .probe = mdm9615_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9615_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f012ea88aa22..5c4687de1464 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1442,7 +1442,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; - ret = gpiochip_add_data(&pctrl->chip, pctrl); + ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { dev_err(pctrl->dev, "Failed register gpiochip\n"); return ret; @@ -1463,7 +1463,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) dev_name(pctrl->dev), 0, 0, chip->ngpio); if (ret) { dev_err(pctrl->dev, "Failed to add pin range\n"); - gpiochip_remove(&pctrl->chip); return ret; } } @@ -1599,13 +1598,5 @@ int msm_pinctrl_probe(struct platform_device *pdev, } EXPORT_SYMBOL(msm_pinctrl_probe); -void msm_pinctrl_remove(struct platform_device *pdev) -{ - struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); - - gpiochip_remove(&pctrl->chip); -} -EXPORT_SYMBOL(msm_pinctrl_remove); - MODULE_DESCRIPTION("Qualcomm Technologies, Inc. TLMM driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 63852ed70295..d7dc0947bb16 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -171,6 +171,5 @@ extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops; int msm_pinctrl_probe(struct platform_device *pdev, const struct msm_pinctrl_soc_data *soc_data); -void msm_pinctrl_remove(struct platform_device *pdev); #endif diff --git a/drivers/pinctrl/qcom/pinctrl-msm8226.c b/drivers/pinctrl/qcom/pinctrl-msm8226.c index 64fee70f1772..f9a957347340 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8226.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8226.c @@ -654,7 +654,6 @@ static struct platform_driver msm8226_pinctrl_driver = { .of_match_table = msm8226_pinctrl_of_match, }, .probe = msm8226_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8226_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8660.c b/drivers/pinctrl/qcom/pinctrl-msm8660.c index 999a5f867eb5..4dbc19ffd80e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8660.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8660.c @@ -981,7 +981,6 @@ static struct platform_driver msm8660_pinctrl_driver = { .of_match_table = msm8660_pinctrl_of_match, }, .probe = msm8660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8909.c b/drivers/pinctrl/qcom/pinctrl-msm8909.c index 756856d20d6b..0aa4f77b774f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8909.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8909.c @@ -929,7 +929,6 @@ static struct platform_driver msm8909_pinctrl_driver = { .of_match_table = msm8909_pinctrl_of_match, }, .probe = msm8909_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8909_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8916.c b/drivers/pinctrl/qcom/pinctrl-msm8916.c index cea5c54f92fe..0dfc6dd33d58 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8916.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8916.c @@ -969,7 +969,6 @@ static struct platform_driver msm8916_pinctrl_driver = { .of_match_table = msm8916_pinctrl_of_match, }, .probe = msm8916_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8916_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8917.c b/drivers/pinctrl/qcom/pinctrl-msm8917.c index 350636807b07..2e1a94ab18b2 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8917.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8917.c @@ -1607,7 +1607,6 @@ static struct platform_driver msm8917_pinctrl_driver = { .of_match_table = msm8917_pinctrl_of_match, }, .probe = msm8917_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8917_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8953.c b/drivers/pinctrl/qcom/pinctrl-msm8953.c index 998351bdfee1..956383341a7a 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8953.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8953.c @@ -1816,7 +1816,6 @@ static struct platform_driver msm8953_pinctrl_driver = { .of_match_table = msm8953_pinctrl_of_match, }, .probe = msm8953_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8953_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8960.c b/drivers/pinctrl/qcom/pinctrl-msm8960.c index ebe230b3b437..a937ea867de7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8960.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8960.c @@ -1246,7 +1246,6 @@ static struct platform_driver msm8960_pinctrl_driver = { .of_match_table = msm8960_pinctrl_of_match, }, .probe = msm8960_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8960_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8976.c b/drivers/pinctrl/qcom/pinctrl-msm8976.c index c30d80e4e98c..3bcb03387781 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8976.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8976.c @@ -1096,7 +1096,6 @@ static struct platform_driver msm8976_pinctrl_driver = { .of_match_table = msm8976_pinctrl_of_match, }, .probe = msm8976_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8976_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8994.c b/drivers/pinctrl/qcom/pinctrl-msm8994.c index b1a6759ab4a5..7a3b6cbccb68 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8994.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8994.c @@ -1343,7 +1343,6 @@ static struct platform_driver msm8994_pinctrl_driver = { .of_match_table = msm8994_pinctrl_of_match, }, .probe = msm8994_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8994_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8996.c b/drivers/pinctrl/qcom/pinctrl-msm8996.c index 1b5d80eaab83..d86d83106d3b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8996.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8996.c @@ -1920,7 +1920,6 @@ static struct platform_driver msm8996_pinctrl_driver = { .of_match_table = msm8996_pinctrl_of_match, }, .probe = msm8996_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8996_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8998.c b/drivers/pinctrl/qcom/pinctrl-msm8998.c index b7cbf32b3125..1daee815888f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8998.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8998.c @@ -1535,7 +1535,6 @@ static struct platform_driver msm8998_pinctrl_driver = { .of_match_table = msm8998_pinctrl_of_match, }, .probe = msm8998_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8998_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8x74.c b/drivers/pinctrl/qcom/pinctrl-msm8x74.c index 238c83f6ec4f..8253aa25775b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8x74.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8x74.c @@ -1083,7 +1083,6 @@ static struct platform_driver msm8x74_pinctrl_driver = { .of_match_table = msm8x74_pinctrl_of_match, }, .probe = msm8x74_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8x74_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index f885af571ec9..85c951305abd 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -1125,7 +1125,6 @@ static struct platform_driver qcm2290_pinctrl_driver = { .of_match_table = qcm2290_pinctrl_of_match, }, .probe = qcm2290_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcm2290_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs404.c b/drivers/pinctrl/qcom/pinctrl-qcs404.c index ae7224012f8a..54e3b4435349 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs404.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs404.c @@ -1644,7 +1644,6 @@ static struct platform_driver qcs404_pinctrl_driver = { .of_match_table = qcs404_pinctrl_of_match, }, .probe = qcs404_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs404_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 17ca743c2210..2a943bc46a62 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1087,7 +1087,6 @@ static struct platform_driver qcs615_tlmm_driver = { .of_match_table = qcs615_tlmm_of_match, }, .probe = qcs615_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs615_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c index 5f5f7c4ac644..d6437e26392b 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c @@ -1227,7 +1227,6 @@ static struct platform_driver qcs8300_pinctrl_driver = { .of_match_table = qcs8300_pinctrl_of_match, }, .probe = qcs8300_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs8300_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c index b5808fcfb13c..9ecc4d40e4dc 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c +++ b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c @@ -145,7 +145,6 @@ static struct platform_driver qdf2xxx_pinctrl_driver = { .acpi_match_table = qdf2xxx_acpi_ids, }, .probe = qdf2xxx_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qdf2xxx_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdu1000.c b/drivers/pinctrl/qcom/pinctrl-qdu1000.c index 47bc529ef550..eacb89fa3888 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdu1000.c +++ b/drivers/pinctrl/qcom/pinctrl-qdu1000.c @@ -1248,7 +1248,6 @@ static struct platform_driver qdu1000_tlmm_driver = { .of_match_table = qdu1000_tlmm_of_match, }, .probe = qdu1000_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qdu1000_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sa8775p.c b/drivers/pinctrl/qcom/pinctrl-sa8775p.c index a5b38221aea8..1b62eb3e6620 100644 --- a/drivers/pinctrl/qcom/pinctrl-sa8775p.c +++ b/drivers/pinctrl/qcom/pinctrl-sa8775p.c @@ -1540,7 +1540,6 @@ static struct platform_driver sa8775p_pinctrl_driver = { .of_match_table = sa8775p_pinctrl_of_match, }, .probe = sa8775p_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sa8775p_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sar2130p.c b/drivers/pinctrl/qcom/pinctrl-sar2130p.c index 19a2e37826c7..3dd1b5e5cfee 100644 --- a/drivers/pinctrl/qcom/pinctrl-sar2130p.c +++ b/drivers/pinctrl/qcom/pinctrl-sar2130p.c @@ -1486,7 +1486,6 @@ static struct platform_driver sar2130p_tlmm_driver = { .of_match_table = sar2130p_tlmm_of_match, }, .probe = sar2130p_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sar2130p_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7180.c b/drivers/pinctrl/qcom/pinctrl-sc7180.c index 6eb0c73791c0..c43fe10b71ad 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7180.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7180.c @@ -1159,7 +1159,6 @@ static struct platform_driver sc7180_pinctrl_driver = { .of_match_table = sc7180_pinctrl_of_match, }, .probe = sc7180_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7180_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7280.c b/drivers/pinctrl/qcom/pinctrl-sc7280.c index 0c10eeb60b55..1b070e9d41f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7280.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7280.c @@ -1505,7 +1505,6 @@ static struct platform_driver sc7280_pinctrl_driver = { .of_match_table = sc7280_pinctrl_of_match, }, .probe = sc7280_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7280_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c index d6a79ad41a40..26dd165d1543 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c @@ -1720,7 +1720,6 @@ static struct platform_driver sc8180x_pinctrl_driver = { .acpi_match_table = sc8180x_pinctrl_acpi_match, }, .probe = sc8180x_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8180x_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c index 96f4fb5a5d29..6ccd7e5648d4 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c @@ -1926,7 +1926,6 @@ static struct platform_driver sc8280xp_pinctrl_driver = { .of_match_table = sc8280xp_pinctrl_of_match, }, .probe = sc8280xp_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8280xp_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660.c b/drivers/pinctrl/qcom/pinctrl-sdm660.c index 907e4ffca5e7..1a78288f1bc8 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm660.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm660.c @@ -1442,7 +1442,6 @@ static struct platform_driver sdm660_pinctrl_driver = { .of_match_table = sdm660_pinctrl_of_match, }, .probe = sdm660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm670.c b/drivers/pinctrl/qcom/pinctrl-sdm670.c index c76183ba95e1..0fe1fa94cd6d 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm670.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm670.c @@ -1337,7 +1337,6 @@ static struct platform_driver sdm670_pinctrl_driver = { .of_match_table = sdm670_pinctrl_of_match, }, .probe = sdm670_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm670_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c index cc05c415ed15..0446e291aa48 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm845.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c @@ -1351,7 +1351,6 @@ static struct platform_driver sdm845_pinctrl_driver = { .acpi_match_table = ACPI_PTR(sdm845_pinctrl_acpi_match), }, .probe = sdm845_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm845_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx55.c b/drivers/pinctrl/qcom/pinctrl-sdx55.c index 8826db9d21d0..2c17bf889146 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx55.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx55.c @@ -990,7 +990,6 @@ static struct platform_driver sdx55_pinctrl_driver = { .of_match_table = sdx55_pinctrl_of_match, }, .probe = sdx55_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx55_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx65.c b/drivers/pinctrl/qcom/pinctrl-sdx65.c index f6f319c997fc..85b5c0206dbd 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx65.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx65.c @@ -939,7 +939,6 @@ static struct platform_driver sdx65_pinctrl_driver = { .of_match_table = sdx65_pinctrl_of_match, }, .probe = sdx65_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx65_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx75.c b/drivers/pinctrl/qcom/pinctrl-sdx75.c index 3cfe8c7f04df..ab13a3a57a83 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx75.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx75.c @@ -1124,7 +1124,6 @@ static struct platform_driver sdx75_pinctrl_driver = { .of_match_table = sdx75_pinctrl_of_match, }, .probe = sdx75_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx75_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm4450.c b/drivers/pinctrl/qcom/pinctrl-sm4450.c index 622f20e6f6f8..1ecdf1ab4f27 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm4450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm4450.c @@ -994,7 +994,6 @@ static struct platform_driver sm4450_tlmm_driver = { .of_match_table = sm4450_tlmm_of_match, }, .probe = sm4450_tlmm_probe, - .remove = msm_pinctrl_remove, }; MODULE_DEVICE_TABLE(of, sm4450_tlmm_of_match); diff --git a/drivers/pinctrl/qcom/pinctrl-sm6115.c b/drivers/pinctrl/qcom/pinctrl-sm6115.c index 4e91c75ad952..c273efa43996 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6115.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6115.c @@ -907,7 +907,6 @@ static struct platform_driver sm6115_tlmm_driver = { .of_match_table = sm6115_tlmm_of_match, }, .probe = sm6115_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6115_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6125.c b/drivers/pinctrl/qcom/pinctrl-sm6125.c index c188842047aa..5092f20e0c1b 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6125.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6125.c @@ -1266,7 +1266,6 @@ static struct platform_driver sm6125_tlmm_driver = { .of_match_table = sm6125_tlmm_of_match, }, .probe = sm6125_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6125_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c index f3828c07b134..ba4686c86c54 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c @@ -1373,7 +1373,6 @@ static struct platform_driver sm6350_tlmm_driver = { .of_match_table = sm6350_tlmm_of_match, }, .probe = sm6350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6375.c b/drivers/pinctrl/qcom/pinctrl-sm6375.c index c82c8516932e..49031571e65e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6375.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6375.c @@ -1516,7 +1516,6 @@ static struct platform_driver sm6375_tlmm_driver = { .of_match_table = sm6375_tlmm_of_match, }, .probe = sm6375_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6375_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm7150.c b/drivers/pinctrl/qcom/pinctrl-sm7150.c index 3c7fd8af6635..6e89966cd70e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm7150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm7150.c @@ -1255,7 +1255,6 @@ static struct platform_driver sm7150_tlmm_driver = { .of_match_table = sm7150_tlmm_of_match, }, .probe = sm7150_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm7150_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c index 01aea9c70b7a..794ed99463f7 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c @@ -1542,7 +1542,6 @@ static struct platform_driver sm8150_pinctrl_driver = { .of_match_table = sm8150_pinctrl_of_match, }, .probe = sm8150_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8150_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index e9961a49ff98..fb6f005d64f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1351,7 +1351,6 @@ static struct platform_driver sm8250_pinctrl_driver = { .of_match_table = sm8250_pinctrl_of_match, }, .probe = sm8250_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8250_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c index 9c69458bd910..c8a3f39ce6f1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c @@ -1642,7 +1642,6 @@ static struct platform_driver sm8350_tlmm_driver = { .of_match_table = sm8350_tlmm_of_match, }, .probe = sm8350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450.c b/drivers/pinctrl/qcom/pinctrl-sm8450.c index d11bb1ee9e3d..f2e52d5a0f93 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8450.c @@ -1677,7 +1677,6 @@ static struct platform_driver sm8450_tlmm_driver = { .of_match_table = sm8450_tlmm_of_match, }, .probe = sm8450_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8450_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8550.c b/drivers/pinctrl/qcom/pinctrl-sm8550.c index 3c847d9cb5d9..1b4496cb39eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8550.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8550.c @@ -1762,7 +1762,6 @@ static struct platform_driver sm8550_tlmm_driver = { .of_match_table = sm8550_tlmm_of_match, }, .probe = sm8550_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8550_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8650.c b/drivers/pinctrl/qcom/pinctrl-sm8650.c index 104708252d12..449a0077f4b1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8650.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8650.c @@ -1742,7 +1742,6 @@ static struct platform_driver sm8650_tlmm_driver = { .of_match_table = sm8650_tlmm_of_match, }, .probe = sm8650_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8650_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index b94fb4ee0ec3..8516693d1db5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -1711,7 +1711,6 @@ static struct platform_driver sm8750_tlmm_driver = { .of_match_table = sm8750_tlmm_of_match, }, .probe = sm8750_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8750_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c index 419cb8facb2f..d4b215f34c39 100644 --- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c +++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c @@ -1861,7 +1861,6 @@ static struct platform_driver x1e80100_pinctrl_driver = { .of_match_table = x1e80100_pinctrl_of_match, }, .probe = x1e80100_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init x1e80100_pinctrl_init(void) -- cgit v1.2.3 From 315345610faee8a0568b522dba9e35067d1732ab Mon Sep 17 00:00:00 2001 From: Wojciech Slenska Date: Fri, 23 May 2025 12:14:37 +0200 Subject: pinctrl: qcom: pinctrl-qcm2290: Add missing pins Added the missing pins to the qcm2290_pins table. Signed-off-by: Wojciech Slenska Fixes: 48e049ef1238 ("pinctrl: qcom: Add QCM2290 pinctrl driver") Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250523101437.59092-1-wojciech.slenska@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-qcm2290.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index 85c951305abd..eeeec6434f6a 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -167,6 +167,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(62, "GPIO_62"), PINCTRL_PIN(63, "GPIO_63"), PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), PINCTRL_PIN(69, "GPIO_69"), PINCTRL_PIN(70, "GPIO_70"), PINCTRL_PIN(71, "GPIO_71"), @@ -181,12 +185,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(80, "GPIO_80"), PINCTRL_PIN(81, "GPIO_81"), PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), PINCTRL_PIN(86, "GPIO_86"), PINCTRL_PIN(87, "GPIO_87"), PINCTRL_PIN(88, "GPIO_88"), PINCTRL_PIN(89, "GPIO_89"), PINCTRL_PIN(90, "GPIO_90"), PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), PINCTRL_PIN(94, "GPIO_94"), PINCTRL_PIN(95, "GPIO_95"), PINCTRL_PIN(96, "GPIO_96"), -- cgit v1.2.3 From fcd65d65fd85dbde090ef8c2615760ebc5ccf9e3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 11:22:02 +0200 Subject: pinctrl: st: Drop unused st_gpio_bank() function Static inline st_gpio_bank() function is not referenced: pinctrl-st.c:377:19: error: unused function 'st_gpio_bank' [-Werror,-Wunused-function] Fixes: 701016c0cba5 ("pinctrl: st: Add pinctrl and pinconf support.") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250528092201.52132-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-st.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index fe2d52e434db..8a2ef74862d3 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -374,11 +374,6 @@ static struct st_pio_control *st_get_pio_control( } /* Low level functions.. */ -static inline int st_gpio_bank(int gpio) -{ - return gpio/ST_GPIO_PINS_PER_BANK; -} - static inline int st_gpio_pin(int gpio) { return gpio%ST_GPIO_PINS_PER_BANK; -- cgit v1.2.3 From d38e00c417e1ca0c586802e47ad7d54347c91f67 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 12:45:15 +0200 Subject: pinctrl: MAINTAINERS: Drop bouncing Jianlong Huang Emails to Jianlong Huang bounce since 9 months, so drop the person from maintainers: 550 5.4.1 Recipient address rejected: Access denied. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250528104514.184122-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- .../devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml | 2 +- .../devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml | 2 +- MAINTAINERS | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml index b470901f5f56..4dbef86bd958 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml @@ -15,7 +15,7 @@ description: | Some peripherals such as PWM have their I/O go through the 4 "GPIOs". maintainers: - - Jianlong Huang + - Hal Feng properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml index 222b9e240f8a..e2a25a20f6a6 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml @@ -18,7 +18,7 @@ description: | any GPIO can be set up to be controlled by any of the peripherals. maintainers: - - Jianlong Huang + - Hal Feng properties: compatible: diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..4f17af5d7240 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23661,7 +23661,6 @@ F: include/dt-bindings/clock/starfive?jh71*.h STARFIVE JH71X0 PINCTRL DRIVERS M: Emil Renner Berthing -M: Jianlong Huang M: Hal Feng L: linux-gpio@vger.kernel.org S: Maintained -- cgit v1.2.3 From 24b0277c1c539cd41539d9297baafc62df04464a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 1 Jun 2025 12:51:01 +0200 Subject: pinctrl: tb10x: Drop of_match_ptr for ID table The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). This also fixes !CONFIG_OF warning: pinctrl-tb10x.c:815:34: warning: unused variable 'tb10x_pinctrl_dt_ids' [-Wunused-const-variable] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505301317.EI1caRC0-lkp@intel.com/ Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250601105100.27927-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-tb10x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index d6bb8f58978d..4edb20e61951 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -823,7 +823,7 @@ static struct platform_driver tb10x_pinctrl_pdrv = { .remove = tb10x_pinctrl_remove, .driver = { .name = "tb10x_pinctrl", - .of_match_table = of_match_ptr(tb10x_pinctrl_dt_ids), + .of_match_table = tb10x_pinctrl_dt_ids, } }; -- cgit v1.2.3 From e51a086117ed857ea455c9ea774dbfb82f53e517 Mon Sep 17 00:00:00 2001 From: Andres Urian Florez Date: Sun, 8 Jun 2025 18:04:21 -0500 Subject: spi: offload: check offload ops existence before disabling the trigger Add a safe guard in spi_offload_trigger to check the existence of offload->ops before invoking the trigger_disable callback Signed-off-by: Andres Urian Florez Link: https://patch.msgid.link/20250608230422.325360-1-andres.emb.sys@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-offload.c b/drivers/spi/spi-offload.c index e674097bf3be..d336f4d228d5 100644 --- a/drivers/spi/spi-offload.c +++ b/drivers/spi/spi-offload.c @@ -297,7 +297,7 @@ int spi_offload_trigger_enable(struct spi_offload *offload, if (trigger->ops->enable) { ret = trigger->ops->enable(trigger, config); if (ret) { - if (offload->ops->trigger_disable) + if (offload->ops && offload->ops->trigger_disable) offload->ops->trigger_disable(offload); return ret; } -- cgit v1.2.3 From 779a0c9e06a91d0007f2a4f4071e3b9a8102b15e Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Jun 2025 07:32:17 -0700 Subject: accel/amdxdna: Fix incorrect PSP firmware size The incorrect PSP firmware size is used for initializing. It may cause error for newer version firmware. Fixes: 8c9ff1b181ba ("accel/amdxdna: Add a new driver for AMD AI Engine") Acked-by: Alex Deucher Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250604143217.1386272-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6..f28a060a8810 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; -- cgit v1.2.3 From d6fb4f01736a1d18cc981eb04fa2907a7121fc27 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 May 2025 11:01:02 +0200 Subject: drm/xe/svm: Fix regression disallowing 64K SVM migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When changing the condition from >= SZ_64K, it was changed to <= SZ_64K. This disallows migration of 64K, which is the exact minimum allowed. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5057 Fixes: 794f5493f518 ("drm/xe: Strict migration policy for atomic SVM faults") Cc: stable@vger.kernel.org Cc: Matthew Brost Cc: Himal Prasad Ghimiray Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250521090102.2965100-1-dev@lankhorst.se (cherry picked from commit 531bef26d189b28bf0d694878c0e064b30990b6c) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 6345896585de..f0b167b3fb6a 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -764,7 +764,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } -- cgit v1.2.3 From cf2c3eceb757e3f28e6f1034f9bc178e1535f5cc Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Mon, 9 Jun 2025 17:05:04 +0200 Subject: spi: stm32-ospi: Make usage of reset_control_acquire/release() API As ospi reset is consumed by both OMM and OSPI drivers, use the reset acquire/release mechanism which ensure exclusive reset usage. This avoid to call reset_control_get/put() in OMM driver each time we need to reset OSPI children and guarantee the reset line stays deasserted. During resume, OMM driver takes temporarily control of reset. Fixes: 79b8a705e26c ("spi: stm32: Add OSPI driver") Signed-off-by: Patrice Chotard Reviewed-by: Philipp Zabel Link: https://patch.msgid.link/20250609-b4-upstream_ospi_reset_update-v6-1-5b602b567e8a@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 7c1fa55fbc47..db6b1cfc970f 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -804,7 +804,7 @@ static int stm32_ospi_get_resources(struct platform_device *pdev) return ret; } - ospi->rstc = devm_reset_control_array_get_exclusive(dev); + ospi->rstc = devm_reset_control_array_get_exclusive_released(dev); if (IS_ERR(ospi->rstc)) return dev_err_probe(dev, PTR_ERR(ospi->rstc), "Can't get reset\n"); @@ -936,11 +936,13 @@ static int stm32_ospi_probe(struct platform_device *pdev) if (ret < 0) goto err_pm_enable; - if (ospi->rstc) { - reset_control_assert(ospi->rstc); - udelay(2); - reset_control_deassert(ospi->rstc); - } + ret = reset_control_acquire(ospi->rstc); + if (ret) + return dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + + reset_control_assert(ospi->rstc); + udelay(2); + reset_control_deassert(ospi->rstc); ret = spi_register_controller(ctrl); if (ret) { @@ -987,6 +989,8 @@ static void stm32_ospi_remove(struct platform_device *pdev) if (ospi->dma_chrx) dma_release_channel(ospi->dma_chrx); + reset_control_release(ospi->rstc); + pm_runtime_put_sync_suspend(ospi->dev); pm_runtime_force_suspend(ospi->dev); } @@ -997,6 +1001,8 @@ static int __maybe_unused stm32_ospi_suspend(struct device *dev) pinctrl_pm_select_sleep_state(dev); + reset_control_release(ospi->rstc); + return pm_runtime_force_suspend(ospi->dev); } @@ -1016,6 +1022,12 @@ static int __maybe_unused stm32_ospi_resume(struct device *dev) if (ret < 0) return ret; + ret = reset_control_acquire(ospi->rstc); + if (ret) { + dev_err(dev, "Can not acquire reset\n"); + return ret; + } + writel_relaxed(ospi->cr_reg, regs_base + OSPI_CR); writel_relaxed(ospi->dcr_reg, regs_base + OSPI_DCR1); pm_runtime_mark_last_busy(ospi->dev); -- cgit v1.2.3 From e044b8a9545cd8265c7110c179aeec2624c16455 Mon Sep 17 00:00:00 2001 From: "Francesco Poli (wintermute)" Date: Wed, 21 May 2025 23:14:39 +0200 Subject: cpupower: split unitdir from libdir in Makefile Improve the installation procedure for the systemd service unit 'cpupower.service', to be more flexible. Some distros install libraries to /usr/lib64/, but systemd service units have to be installed to /usr/lib/systemd/system: as a consequence, the installation procedure should not assume that systemd service units can be installed to ${libdir}/systemd/system ... Define a dedicated variable ("unitdir") in the Makefile. Link: https://lore.kernel.org/linux-pm/260b6d79-ab61-43b7-a0eb-813e257bc028@leemhuis.info/T/#m0601940ab439d5cbd288819d2af190ce59e810e6 Fixes: 9c70b779ad91 ("cpupower: add a systemd service to run cpupower") Link: https://lore.kernel.org/r/20250521211656.65646-1-invernomuto@paranoici.org Signed-off-by: Francesco Poli (wintermute) Tested-by: Thorsten Leemhuis Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index be8dfac14076..c43db1c41205 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -73,6 +73,7 @@ sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib libexecdir ?= /usr/libexec +unitdir ?= /usr/lib/systemd/system includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -309,9 +310,9 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' $(INSTALL) -d $(DESTDIR)${libexecdir} $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' - $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system - sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service' - $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service' + $(INSTALL) -d $(DESTDIR)${unitdir} + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -348,7 +349,7 @@ uninstall: - rm -f $(DESTDIR)${bindir}/utils/cpupower - rm -f $(DESTDIR)${confdir}cpupower-service.conf - rm -f $(DESTDIR)${libexecdir}/cpupower - - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service + - rm -f $(DESTDIR)${unitdir}/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 -- cgit v1.2.3 From ea7caffedd011f7d40abe93a884ffbe46f122535 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 14:22:56 -0300 Subject: ARC: atomics: Implement arch_atomic64_cmpxchg using _relaxed The core atomic code has a number of macros where it elaborates architecture primitives into more functions. ARC uses arch_atomic64_cmpxchg() as it's architecture primitive which disable alot of the additional functions. Instead provide arch_cmpxchg64_relaxed() as the primitive and rely on the core macros to create arch_cmpxchg64(). The macros will also provide other functions, for instance, try_cmpxchg64_release(), giving a more complete implementation. Suggested-by: Mark Rutland Link: https://lore.kernel.org/r/Z0747n5bSep4_1VX@J2N7QTR9R3 Signed-off-by: Jason Gunthorpe Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic64-arcv2.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 9b5791b85471..73080a664369 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 -arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) +static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new) { - s64 prev; - - smp_mb(); + u64 prev; __asm__ __volatile__( "1: llockd %0, [%1] \n" @@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) " bnz 1b \n" "2: \n" : "=&r"(prev) - : "r"(ptr), "ir"(expected), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); + : "r"(ptr), "ir"(old), "r"(new) + : "memory", "cc"); return prev; } -#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg +#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { -- cgit v1.2.3 From 857f4517965b282234e12f6bca0c21ef10eec09b Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Thu, 10 Apr 2025 01:11:16 +0800 Subject: ARC: unwind: Use built-in sort swap to reduce code size and improve performance The custom swap function used in sort() was identical to the default built-in sort swap. Remove the custom swap function and passes NULL to sort(), allowing it to use the default swap function. This change reduces code size and improves performance, particularly when CONFIG_MITIGATION_RETPOLINE is enabled. With RETPOLINE mitigation, indirect function calls incur significant overhead, and using the default swap function avoids this cost. $ ./scripts/bloat-o-meter ./unwind.o.old ./unwind.o.new add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-22 (-22) Function old new delta init_unwind_hdr.constprop 544 540 -4 swap_eh_frame_hdr_table_entries 18 - -18 Total: Before=4410, After=4388, chg -0.50% Signed-off-by: Yu-Chun Lin Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index d8969dab12d4..789cfb9ea14e 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) return (e1->start > e2->start) - (e1->start < e2->start); } -static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) -{ - struct eh_frame_hdr_table_entry *e1 = p1; - struct eh_frame_hdr_table_entry *e2 = p2; - - swap(e1->start, e2->start); - swap(e1->fde, e2->fde); -} - static void init_unwind_hdr(struct unwind_table *table, void *(*alloc) (unsigned long)) { @@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table, sort(header->table, n, sizeof(*header->table), - cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries); + cmp_eh_frame_hdr_table_entries, NULL); table->hdrsz = hdrSize; smp_wmb(); -- cgit v1.2.3 From 2cb74be378675c860af0fcaf1ec2801beebdf028 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:35 +0100 Subject: ARC: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Thomas Huth Signed-off-by: Vineet Gupta --- arch/arc/include/uapi/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2a6eff57f6dd..3ae832db278c 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -14,7 +14,7 @@ #define PTRACE_GET_THREAD_AREA 25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Userspace ABI: Register state needed by * -ptrace (gdbserver) @@ -53,6 +53,6 @@ struct user_regs_arcv2 { unsigned long r30, r58, r59; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI__ASM_ARC_PTRACE_H */ -- cgit v1.2.3 From 179e949719fe81219a3e23f1e716ac2d02eea845 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:36 +0100 Subject: ARC: Replace __ASSEMBLY__ with __ASSEMBLER__ in the non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Thomas Huth Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/include/asm/atomic.h | 4 ++-- arch/arc/include/asm/bitops.h | 4 ++-- arch/arc/include/asm/bug.h | 4 ++-- arch/arc/include/asm/cache.h | 4 ++-- arch/arc/include/asm/current.h | 4 ++-- arch/arc/include/asm/dsp-impl.h | 2 +- arch/arc/include/asm/dsp.h | 4 ++-- arch/arc/include/asm/dwarf.h | 4 ++-- arch/arc/include/asm/entry.h | 4 ++-- arch/arc/include/asm/irqflags-arcv2.h | 4 ++-- arch/arc/include/asm/irqflags-compact.h | 4 ++-- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arc/include/asm/linkage.h | 6 +++--- arch/arc/include/asm/mmu-arcv2.h | 4 ++-- arch/arc/include/asm/mmu.h | 2 +- arch/arc/include/asm/page.h | 4 ++-- arch/arc/include/asm/pgtable-bits-arcv2.h | 4 ++-- arch/arc/include/asm/pgtable-levels.h | 4 ++-- arch/arc/include/asm/pgtable.h | 4 ++-- arch/arc/include/asm/processor.h | 4 ++-- arch/arc/include/asm/ptrace.h | 4 ++-- arch/arc/include/asm/switch_to.h | 2 +- arch/arc/include/asm/thread_info.h | 4 ++-- 24 files changed, 45 insertions(+), 45 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 005d9e4d187a..a31bbf5c8bbc 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -144,7 +144,7 @@ #define ARC_AUX_AGU_MOD2 0x5E2 #define ARC_AUX_AGU_MOD3 0x5E3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 592d7fffc223..e615c42b93ba 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_ATOMIC_H #define _ASM_ARC_ATOMIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -31,6 +31,6 @@ #include #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index f5a936496f06..5340c2871392 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -10,7 +10,7 @@ #error only can be included directly #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 4c453ba96c51..171c16021f70 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_BUG_H #define _ASM_ARC_BUG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index f0f1fc5d62b6..040a97f4dd82 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -23,7 +23,7 @@ */ #define ARC_UNCACHED_ADDR_SPACE 0xc0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -65,7 +65,7 @@ extern int ioc_enable; extern unsigned long perip_base, perip_end; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Instruction cache related Auxiliary registers */ #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 06be89f6f2f0..03ffd005f3fa 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_CURRENT_H #define _ASM_ARC_CURRENT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_ARC_CURR_IN_REG @@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp"); #include #endif /* ! CONFIG_ARC_CURR_IN_REG */ -#endif /* ! __ASSEMBLY__ */ +#endif /* ! __ASSEMBLER__ */ #endif /* _ASM_ARC_CURRENT_H */ diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h index cd5636dfeb6f..fd5fdaad90c1 100644 --- a/arch/arc/include/asm/dsp-impl.h +++ b/arch/arc/include/asm/dsp-impl.h @@ -11,7 +11,7 @@ #define DSP_CTRL_DISABLED_ALL 0 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* clobbers r5 register */ .macro DSP_EARLY_INIT diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index f496dbc4640b..eeaaf4e4eabd 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_DSP_H #define __ASM_ARC_DSP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * DSP-related saved registers - need to be saved only when you are @@ -24,6 +24,6 @@ struct dsp_callee_regs { #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_ARC_DSP_H */ diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h index a0d5ebe1bc3f..1524c5cf8b59 100644 --- a/arch/arc/include/asm/dwarf.h +++ b/arch/arc/include/asm/dwarf.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_DWARF_H #define _ASM_ARC_DWARF_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef ARC_DW2_UNWIND_AS_CFI @@ -38,6 +38,6 @@ #endif /* !ARC_DW2_UNWIND_AS_CFI */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ARC_DWARF_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 38c35722cebf..f453af251a1a 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -13,7 +13,7 @@ #include /* For VMALLOC_START */ #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_ISA_ARCOMPACT #include /* ISA specific bits */ @@ -146,7 +146,7 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ extern void do_signal(struct pt_regs *); extern void do_notify_resume(struct pt_regs *); diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index fb3c21f1a238..30aea562f8aa 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -50,7 +50,7 @@ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \ (ARCV2_IRQ_DEF_PRIO << 1)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Save IRQ state and disable IRQs @@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq) seti .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 936a2f21f315..85c2f6bcde0c 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -40,7 +40,7 @@ #define ISA_INIT_STATUS_BITS STATUS_IE_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /****************************************************************** * IRQ Control Macros @@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void) flag \scratch .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index a339223d9e05..66ead75784d9 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_ARC_JUMP_LABEL_H #define _ASM_ARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -68,5 +68,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 8a3fb71e9cfa..ba3cb65b5eaa 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -12,7 +12,7 @@ #define __ALIGN .align 4 #define __ALIGN_STR __stringify(__ALIGN) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ST2 e, o, off #ifdef CONFIG_ARC_HAS_LL64 @@ -61,7 +61,7 @@ CFI_ENDPROC ASM_NL \ .size name, .-name -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #ifdef CONFIG_ARC_HAS_ICCM #define __arcfp_code __section(".text.arcfp") @@ -75,6 +75,6 @@ #define __arcfp_data __section(".data") #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index 41412642f279..5e5482026ac9 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -69,7 +69,7 @@ #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; extern int pae40_exist_but_not_enab(void); @@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) sr \reg, [ARC_REG_PID] .endm -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 4ae2db59d494..e3b35ceab582 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* NR_CPUS */ diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index def0dfb95b43..9720fe6b2c24 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -19,7 +19,7 @@ #endif /* CONFIG_ARC_HAS_PAE40 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) @@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #include /* page_to_pfn, pfn_to_page */ #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..a8fdb4fe1fbd 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -75,7 +75,7 @@ * This is to enable COW mechanism */ /* xwr */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) @@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE)); #include #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index d1ce4b0f1071..c8f9273372c0 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -85,7 +85,7 @@ #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if CONFIG_PGTABLE_LEVELS > 3 #include @@ -181,6 +181,6 @@ #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 4cf45a99fd79..bd580e2b62d7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -19,7 +19,7 @@ */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) @@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d606658e2fe7..7f7901ac6643 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -11,7 +11,7 @@ #ifndef __ASM_ARC_PROCESSOR_H #define __ASM_ARC_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc, extern unsigned int __get_wchan(struct task_struct *p); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Default System Memory Map on ARC diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index cf79df0b2570..f6c052af8f4d 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -10,7 +10,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union { struct { @@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 1f85de8288b1..5806106a65f9 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_SWITCH_TO_H #define _ASM_ARC_SWITCH_TO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 12daaf3a61ea..255d2c774219 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -24,7 +24,7 @@ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags -- cgit v1.2.3 From ad0f54842cd23127070d8c310dd06d8f7add025c Mon Sep 17 00:00:00 2001 From: Ankit Chauhan Date: Wed, 28 May 2025 16:36:04 +0530 Subject: scsi: mvsas: Fix typos in per-phy comments and SAS cmd port registers Spelling fixes: Deocder --> Decoder Memroy --> Memory This is a non-functional change aimed at improving code clarity. Signed-off-by: Ankit Chauhan Link: https://lore.kernel.org/r/20250528110604.59528-1-ankitchauhan2065@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h index 8ef174cd4d37..3e4124177b2a 100644 --- a/drivers/scsi/mvsas/mv_defs.h +++ b/drivers/scsi/mvsas/mv_defs.h @@ -215,7 +215,7 @@ enum hw_register_bits { /* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */ PHYEV_DEC_ERR = (1U << 24), /* Phy Decoding Error */ - PHYEV_DCDR_ERR = (1U << 23), /* STP Deocder Error */ + PHYEV_DCDR_ERR = (1U << 23), /* STP Decoder Error */ PHYEV_CRC_ERR = (1U << 22), /* STP CRC Error */ PHYEV_UNASSOC_FIS = (1U << 19), /* unassociated FIS rx'd */ PHYEV_AN = (1U << 18), /* SATA async notification */ @@ -347,7 +347,7 @@ enum sas_cmd_port_registers { CMD_SATA_PORT_MEM_CTL0 = 0x158, /* SATA Port Memory Control 0 */ CMD_SATA_PORT_MEM_CTL1 = 0x15c, /* SATA Port Memory Control 1 */ CMD_XOR_MEM_BIST_CTL = 0x160, /* XOR Memory BIST Control */ - CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memroy BIST Status */ + CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memory BIST Status */ CMD_DMA_MEM_BIST_CTL = 0x168, /* DMA Memory BIST Control */ CMD_DMA_MEM_BIST_STAT = 0x16c, /* DMA Memory BIST Status */ CMD_PORT_MEM_BIST_CTL = 0x170, /* Port Memory BIST Control */ -- cgit v1.2.3 From 9b17621366d210ffee83262a8754086ebbde5e55 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 30 May 2025 12:29:35 -0700 Subject: scsi: iscsi: Fix incorrect error path labels for flashnode operations Correct the error handling goto labels used when host lookup fails in various flashnode-related event handlers: - iscsi_new_flashnode() - iscsi_del_flashnode() - iscsi_login_flashnode() - iscsi_logout_flashnode() - iscsi_logout_flashnode_sid() scsi_host_put() is not required when shost is NULL, so jumping to the correct label avoids unnecessary operations. These functions previously jumped to the wrong goto label (put_host), which did not match the intended cleanup logic. Use the correct exit labels (exit_new_fnode, exit_del_fnode, etc.) to ensure proper error handling. Also remove the unused put_host label under iscsi_new_flashnode() as it is no longer needed. No functional changes beyond accurate error path correction. Fixes: c6a4bb2ef596 ("[SCSI] scsi_transport_iscsi: Add flash node mgmt support") Signed-off-by: Alok Tiwari Link: https://lore.kernel.org/r/20250530193012.3312911-1-alok.a.tiwari@oracle.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793f..c75a806496d6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3499,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); @@ -3509,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, else err = -EIO; -put_host: scsi_host_put(shost); exit_new_fnode: @@ -3534,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; @@ -3576,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; @@ -3628,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; @@ -3678,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); -- cgit v1.2.3 From 9697ca0d53e3db357be26d2414276143c4a2cd49 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 3 Jun 2025 20:21:56 +0200 Subject: scsi: s390: zfcp: Ensure synchronous unit_add Improve the usability of the unit_add sysfs attribute by ensuring that the associated FCP LUN scan processing is completed synchronously. This enables configuration tooling to consistently determine the end of the scan process to allow for serialization of follow-on actions. While the scan process associated with unit_add typically completes synchronously, it is deferred to an asynchronous background process if unit_add is used before initial remote port scanning has completed. This occurs when unit_add is used immediately after setting the associated FCP device online. To ensure synchronous unit_add processing, wait for remote port scanning to complete before initiating the FCP LUN scan. Cc: stable@vger.kernel.org Reviewed-by: M Nikhil Reviewed-by: Nihar Panda Signed-off-by: Peter Oberparleiter Signed-off-by: Nihar Panda Link: https://lore.kernel.org/r/20250603182252.2287285-2-niharp@linux.ibm.com Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 41e36af35488..90a84ae98b97 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -449,6 +449,8 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev, if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun)) return -EINVAL; + flush_work(&port->rport_work); + retval = zfcp_unit_add(port, fcp_lun); if (retval) return retval; -- cgit v1.2.3 From 93310053663ba647e402ef67e4bb18ec06ff8dc4 Mon Sep 17 00:00:00 2001 From: Philipp Kerling Date: Sun, 8 Jun 2025 20:59:00 +0200 Subject: smb: client: disable path remapping with POSIX extensions If SMB 3.1.1 POSIX Extensions are available and negotiated, the client should be able to use all characters and not remap anything. Currently, the user has to explicitly request this behavior by specifying the "nomapposix" mount option. Link: https://lore.kernel.org/4195bb677b33d680e77549890a4f4dd3b474ceaf.camel@rx2.rx-server.de Signed-off-by: Philipp Kerling Reviewed-by: Namjae Jeon Signed-off-by: Steve French --- Documentation/admin-guide/cifs/usage.rst | 2 ++ fs/smb/client/connect.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index c09674a75a9e..d989ae5778ba 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled illegal Windows/NTFS/SMB characters to a remap range (this mount parameter is the default for SMB3). This remap (``mapposix``) range is also compatible with Mac (and "Services for Mac" on some older Windows). +When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically +disabled. CIFS VFS Mount Options ====================== diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 28bc33496623..c4fb80b37738 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3718,9 +3718,15 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) goto out; } - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) + /* + * if new SMB3.11 POSIX extensions are supported, do not change anything in the + * path (i.e., do not remap / and \ and do not map any special characters) + */ + if (tcon->posix_extensions) { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; + cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR | + CIFS_MOUNT_MAP_SPECIAL_CHR); + } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* tell server which Unix caps we support */ -- cgit v1.2.3 From 82ffbe7776d0ac084031f114167712269bf3d832 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jun 2025 16:51:27 +0000 Subject: net_sched: sch_sfq: fix a potential crash on gso_skb handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFQ has an assumption of always being able to queue at least one packet. However, after the blamed commit, sch->q.len can be inflated by packets in sch->gso_skb, and an enqueue() on an empty SFQ qdisc can be followed by an immediate drop. Fix sfq_drop() to properly clear q->tail in this situation. Tested: ip netns add lb ip link add dev to-lb type veth peer name in-lb netns lb ethtool -K to-lb tso off # force qdisc to requeue gso_skb ip netns exec lb ethtool -K in-lb gro on # enable NAPI ip link set dev to-lb up ip -netns lb link set dev in-lb up ip addr add dev to-lb 192.168.20.1/24 ip -netns lb addr add dev in-lb 192.168.20.2/24 tc qdisc replace dev to-lb root sfq limit 100 ip netns exec lb netserver netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & Fixes: a53851e2c321 ("net: sched: explicit locking in gso_cpu fallback") Reported-by: Marcus Wichelmann Closes: https://lore.kernel.org/netdev/9da42688-bfaa-4364-8797-e9271f3bdaef@hetzner-cloud.de/ Signed-off-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250606165127.3629486-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b912ad99aa15..77fa02f2bfcd 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -310,7 +310,10 @@ drop: /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; - q->tail->next = slot->next; + if (slot->next == x) + q->tail = NULL; /* no more active slots */ + else + q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } -- cgit v1.2.3 From b2f966568faaad326de97481096d0f3dc0971c43 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 6 Jun 2025 13:57:39 -0700 Subject: scsi: storvsc: Increase the timeouts to storvsc_timeout Currently storvsc_timeout is only used in storvsc_sdev_configure(), and 5s and 10s are used elsewhere. It turns out that rarely the 5s is not enough on Azure, so let's use storvsc_timeout everywhere. In case a timeout happens and storvsc_channel_init() returns an error, close the VMBus channel so that any host-to-guest messages in the channel's ringbuffer, which might come late, can be safely ignored. Add a "const" to storvsc_timeout. Cc: stable@kernel.org Signed-off-by: Dexuan Cui Link: https://lore.kernel.org/r/1749243459-10419-1-git-send-email-decui@microsoft.com Reviewed-by: Long Li Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 2e6b2412d2c9..d9e59204a9c3 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -362,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater, /* * Timeout in seconds for all devices managed by this driver. */ -static int storvsc_timeout = 180; +static const int storvsc_timeout = 180; #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; @@ -768,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) return; } - t = wait_for_completion_timeout(&request->wait_event, 10*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) { dev_err(dev, "Failed to create sub-channel: timed out\n"); return; @@ -833,7 +833,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device, if (ret != 0) return ret; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return -ETIMEDOUT; @@ -1350,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, return ret; ret = storvsc_channel_init(device, is_fc); + if (ret) + vmbus_close(device->channel); return ret; } @@ -1668,7 +1670,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) if (ret != 0) return FAILED; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return TIMEOUT_ERROR; -- cgit v1.2.3 From 5c3ba81923e02adae354ec8afd006f93289b4a3c Mon Sep 17 00:00:00 2001 From: Rajashekhar M A Date: Fri, 6 Jun 2025 15:59:24 +0200 Subject: scsi: error: alua: I/O errors for ALUA state transitions When a host is configured with a few LUNs and I/O is running, injecting FC faults repeatedly leads to path recovery problems. The LUNs have 4 paths each and 3 of them come back active after say an FC fault which makes 2 of the paths go down, instead of all 4. This happens after several iterations of continuous FC faults. Reason here is that we're returning an I/O error whenever we're encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC ACCESS STATE TRANSITION) instead of retrying. Signed-off-by: Rajashekhar M A Signed-off-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250606135924.27397-1-hare@kernel.org Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 376b8897ab90..746ff6a1f309 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -665,7 +665,8 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) * if the device is in the process of becoming ready, we * should retry. */ - if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) + if ((sshdr.asc == 0x04) && + (sshdr.ascq == 0x01 || sshdr.ascq == 0x0a)) return NEEDS_RETRY; /* * if the device is not started, we need to wake -- cgit v1.2.3 From cd097df4596f3a1e9d75eb8520162de1eb8485b2 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Tue, 10 Jun 2025 07:42:26 +0530 Subject: powerpc/powernv/memtrace: Fix out of bounds issue in memtrace mmap memtrace mmap issue has an out of bounds issue. This patch fixes the by checking that the requested mapping region size should stay within the allocated region size. Reported-by: Jonathan Greental Fixes: 08a022ad3dfa ("powerpc/powernv/memtrace: Allow mmaping trace buffers") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-1-maddy@linux.ibm.com --- arch/powerpc/platforms/powernv/memtrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a4..2ea30b343354 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -- cgit v1.2.3 From 0d67f0dee6c9176bc09a5482dd7346e3a0f14d0b Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 10 Jun 2025 07:42:27 +0530 Subject: powerpc/vas: Return -EINVAL if the offset is non-zero in mmap() The user space calls mmap() to map VAS window paste address and the kernel returns the complete mapped page for each window. So return -EINVAL if non-zero is passed for offset parameter to mmap(). See Documentation/arch/powerpc/vas-api.rst for mmap() restrictions. Co-developed-by: Jonathan Greental Signed-off-by: Jonathan Greental Reported-by: Jonathan Greental Fixes: dda44eb29c23 ("powerpc/vas: Add VAS user space API") Signed-off-by: Haren Myneni Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-2-maddy@linux.ibm.com --- arch/powerpc/platforms/book3s/vas-api.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); -- cgit v1.2.3 From c0317ad44f45b3c1f0ff46a4e28d14c7bccdedf4 Mon Sep 17 00:00:00 2001 From: Gabriel Dalimonte Date: Sun, 1 Jun 2025 12:45:36 -0400 Subject: drm/vc4: fix infinite EPROBE_DEFER loop `vc4_hdmi_audio_init` calls `devm_snd_dmaengine_pcm_register` which may return EPROBE_DEFER. Calling `drm_connector_hdmi_audio_init` adds a child device. The driver model docs[1] state that adding a child device prior to returning EPROBE_DEFER may result in an infinite loop. [1] https://www.kernel.org/doc/html/v6.14/driver-api/driver-model/driver.html Fixes: 9640f1437a88 ("drm/vc4: hdmi: switch to using generic HDMI Codec infrastructure") Signed-off-by: Gabriel Dalimonte Link: https://lore.kernel.org/r/20250601-vc4-audio-inf-probe-v2-1-9ad43c7b6147@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index a29a6ef266f9..163d092bd973 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -560,12 +560,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drm_connector_hdmi_audio_init(connector, dev->dev, - &vc4_hdmi_audio_funcs, - 8, false, -1); - if (ret) - return ret; - drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -2291,6 +2285,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } + ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev, + &vc4_hdmi_audio_funcs, 8, false, + -1); + if (ret) + return ret; + dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; -- cgit v1.2.3 From 4823a58093c6dfa20df62b5c18da613621b9716e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 16:38:56 +0530 Subject: cpufreq: Convert `/// SAFETY` lines to `# Safety` sections Replace `/// SAFETY` comments in doc comments with proper `# Safety` sections, as per rustdoc conventions. Also mark the C FFI callbacks as `unsafe` to correctly reflect their safety requirements. Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1169 Signed-off-by: Viresh Kumar --- rust/kernel/cpufreq.rs | 146 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 37 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index b0a9c6182aec..9b995f18aac6 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -1055,8 +1055,11 @@ impl Registration { impl Registration { /// Driver's `init` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1070,8 +1073,11 @@ impl Registration { /// Driver's `exit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1082,8 +1088,11 @@ impl Registration { /// Driver's `online` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1094,8 +1103,13 @@ impl Registration { /// Driver's `offline` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn offline_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1106,8 +1120,13 @@ impl Registration { /// Driver's `suspend` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn suspend_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1118,8 +1137,11 @@ impl Registration { /// Driver's `resume` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1130,8 +1152,11 @@ impl Registration { /// Driver's `ready` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1140,8 +1165,13 @@ impl Registration { /// Driver's `verify` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn verify_callback( + ptr: *mut bindings::cpufreq_policy_data, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1152,8 +1182,13 @@ impl Registration { /// Driver's `setpolicy` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn setpolicy_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1164,8 +1199,11 @@ impl Registration { /// Driver's `target` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, relation: u32, @@ -1180,8 +1218,11 @@ impl Registration { /// Driver's `target_index` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_index_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_index_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1200,8 +1241,11 @@ impl Registration { /// Driver's `fast_switch` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn fast_switch_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn fast_switch_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, ) -> kernel::ffi::c_uint { @@ -1212,7 +1256,11 @@ impl Registration { } /// Driver's `adjust_perf` callback. - extern "C" fn adjust_perf_callback( + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn adjust_perf_callback( cpu: u32, min_perf: usize, target_perf: usize, @@ -1225,8 +1273,11 @@ impl Registration { /// Driver's `get_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn get_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn get_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_uint { @@ -1243,8 +1294,11 @@ impl Registration { /// Driver's `target_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1262,12 +1316,21 @@ impl Registration { } /// Driver's `get` callback. - extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) } /// Driver's `update_limit` callback. - extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1276,8 +1339,11 @@ impl Registration { /// Driver's `bios_limit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { from_result(|| { let mut policy = PolicyCpu::from_cpu(cpu as u32)?; @@ -1288,8 +1354,11 @@ impl Registration { /// Driver's `set_boost` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn set_boost_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn set_boost_callback( ptr: *mut bindings::cpufreq_policy, state: i32, ) -> kernel::ffi::c_int { @@ -1303,8 +1372,11 @@ impl Registration { /// Driver's `register_em` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; -- cgit v1.2.3 From d17e61ab63fb7747b340d6a66bf1408cd5c6562b Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 6 Jun 2025 22:37:29 +0200 Subject: drm/meson: fix debug log statement when setting the HDMI clocks The "phy" and "vclk" frequency labels were swapped, making it more difficult to debug driver errors. Swap the label order to make them match with the actual frequencies printed to correct this. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606203729.3311592-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 47136bbbe8c6..ab08d690d882 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -109,7 +109,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, venc_freq /= 2; dev_dbg(priv->dev, - "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); -- cgit v1.2.3 From faf2f8382088e8c74bd6eeb236c8c9190e61615e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 7 Jun 2025 00:10:31 +0200 Subject: drm/meson: use vclk_freq instead of pixel_freq in debug print meson_vclk_vic_supported_freq() has a debug print which includes the pixel freq. However, within the whole function the pixel freq is irrelevant, other than checking the end of the params array. Switch to printing the vclk_freq which is being compared / matched against the inputs to the function to avoid confusion when analyzing error reports from users. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606221031.3419353-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 3325580d885d..c4123bb958e4 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,9 +790,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", - i, params[i].pixel_freq, - PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + i, params[i].vclk_freq, + PIXEL_FREQ_1000_1001(params[i].vclk_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, PHY_FREQ_1000_1001(params[i].phy_freq)); -- cgit v1.2.3 From 0cee6c4d3518b2e757aedae78771f17149f57653 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 9 Jun 2025 22:27:51 +0200 Subject: drm/meson: fix more rounding issues with 59.94Hz modes Commit 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") attempts to resolve video playback using 59.94Hz. using YUV420 by changing the clock calculation to use Hz instead of kHz (thus yielding more precision). The basic calculation itself is correct, however the comparisions in meson_vclk_vic_supported_freq() and meson_vclk_setup() don't work anymore for 59.94Hz modes (using the freq * 1000 / 1001 logic). For example, drm/edid specifies a 593407kHz clock for 3840x2160@59.94Hz. With the mentioend commit we convert this to Hz. Then meson_vclk tries to find a matchig "params" entry (as the clock setup code currently only supports specific frequencies) by taking the venc_freq from the params and calculating the "alt frequency" (used for the 59.94Hz modes) from it, which is: (594000000Hz * 1000) / 1001 = 593406593Hz Similar calculation is applied to the phy_freq (TMDS clock), which is 10 times the pixel clock. Implement a new meson_vclk_freqs_are_matching_param() function whose purpose is to compare if the requested and calculated frequencies. They may not match exactly (for the reasons mentioned above). Allow the clocks to deviate slightly to make the 59.94Hz modes again. Fixes: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") Reported-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250609202751.962208-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_vclk.c | 55 +++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index c4123bb958e4..dfe0c28a0f05 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,10 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define PIXEL_FREQ_1000_1001(_freq) \ - DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) -#define PHY_FREQ_1000_1001(_freq) \ - (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -772,6 +769,36 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned long long phy_freq, @@ -790,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", - i, params[i].vclk_freq, - PIXEL_FREQ_1000_1001(params[i].vclk_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", - i, params[i].phy_freq, - PHY_FREQ_1000_1001(params[i].phy_freq)); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && - vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -1075,10 +1090,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else -- cgit v1.2.3 From 5558f27a58459a4038ebb23bcb5bd40c1e345c57 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 7 Jun 2025 21:52:03 +0800 Subject: pinctrl: sunxi: dt: Consider pin base when calculating bank number from pin In prepare_function_table() when the pinctrl function table IRQ entries are generated, the pin bank is calculated from the absolute pin number; however the IRQ bank mux array is indexed from the first pin bank of the controller. For R_PIO controllers, this means the absolute pin bank is way off from the relative pin bank used for array indexing. Correct this by taking into account the pin base of the controller. Fixes: f5e2cd34b12f ("pinctrl: sunxi: allow reading mux values from DT") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/20250607135203.2085226-1-wens@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c index 1833078f6877..4e34b0cd3b73 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c @@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev, */ static int prepare_function_table(struct device *dev, struct device_node *pnode, struct sunxi_desc_pin *pins, int npins, - const u8 *irq_bank_muxes) + unsigned pin_base, const u8 *irq_bank_muxes) { struct device_node *node; struct property *prop; @@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, */ for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; if (irq_bank_muxes[bank]) { pin->variant++; @@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, last_bank = 0; for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; int lastfunc = pin->variant + 1; int irq_mux = irq_bank_muxes[bank]; @@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev, return PTR_ERR(pins); ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins, - irq_bank_muxes); + desc->pin_base, irq_bank_muxes); if (ret) return ret; -- cgit v1.2.3 From 8a157d8a00e815cab4432653cb50c9cedbbb4931 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Jun 2025 09:33:48 -0400 Subject: tracing: Do not free "head" on error path of filter_free_subsystem_filters() The variable "head" is allocated and initialized as a list before allocating the first "item" for the list. If the allocation of "item" fails, it frees "head" and then jumps to the label "free_now" which will process head and free it. This will cause a UAF of "head", and it doesn't need to free it before jumping to the "free_now" label as that code will free it. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250610093348.33c5643a@gandalf.local.home Fixes: a9d0aab5eb33 ("tracing: Fix regression of filter waiting a long time on RCU synchronization") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202506070424.lCiNreTI-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index ea8b364b6818..08141f105c95 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1437,10 +1437,8 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - kfree(head); + if (!item) goto free_now; - } item->filter = filter; list_add_tail(&item->list, &head->list); -- cgit v1.2.3 From a298bbab903e3fb4cbe16d36d6195e68fad1b776 Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Fri, 6 Jun 2025 16:45:38 +0900 Subject: tools/resolve_btfids: Fix build when cross compiling kernel with clang. When cross compiling the kernel with clang, we need to override CLANG_CROSS_FLAGS when preparing the step libraries. Prior to commit d1d096312176 ("tools: fix annoying "mkdir -p ..." logs when building tools in parallel"), MAKEFLAGS would have been set to a value that wouldn't set a value for CLANG_CROSS_FLAGS, hiding the fact that we weren't properly overriding it. Fixes: 56a2df7615fa ("tools/resolve_btfids: Compile resolve_btfids as host program") Signed-off-by: Suleiman Souhlal Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Cc: stable@vger.kernel.org Link: https://lore.kernel.org/bpf/20250606074538.1608546-1-suleiman@google.com --- tools/bpf/resolve_btfids/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c..ce1b556dfa90 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc -- cgit v1.2.3 From a2c90d63b71223d69a813333c1abf4fdacddbbe5 Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:51 +0200 Subject: i40e: return false from i40e_reset_vf if reset is in progress The function i40e_vc_reset_vf attempts, up to 20 times, to handle a VF reset request, using the return value of i40e_reset_vf as an indicator of whether the reset was successfully triggered. Currently, i40e_reset_vf always returns true, which causes new reset requests to be ignored if a different VF reset is already in progress. This patch updates the return value of i40e_reset_vf to reflect when another VF reset is in progress, allowing the caller to properly use the retry mechanism. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb67..22d5b1ec2289 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * Returns true if the VF is in reset, resets successfully, or resets - * are disabled and false otherwise. + * Return: True if reset was performed successfully or if resets are disabled. + * False if reset is already in progress. **/ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { @@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) /* If VF is being reset already we don't need to continue. */ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - return true; + return false; i40e_trigger_vf_reset(vf, flr); -- cgit v1.2.3 From fb4e9239e029954a37a00818b21e837cebf2aa10 Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:52 +0200 Subject: i40e: retry VFLR handling if there is ongoing VF reset When a VFLR interrupt is received during a VF reset initiated from a different source, the VFLR may be not fully handled. This can leave the VF in an undefined state. To address this, set the I40E_VFLR_EVENT_PENDING bit again during VFLR handling if the reset is not yet complete. This ensures the driver will properly complete the VF reset in such scenarios. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 22d5b1ec2289..88e6bef69342 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - i40e_reset_vf(vf, true); + if (!i40e_reset_vf(vf, true)) { + /* At least one VF did not finish resetting, retry next time */ + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } return 0; -- cgit v1.2.3 From 0c6f4631436ecea841f1583b98255aedd7495b18 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 24 Apr 2025 15:50:13 +0200 Subject: iavf: fix reset_task for early reset event If a reset event is received from the PF early in the init cycle, the state machine hangs for about 25 seconds. Reproducer: echo 1 > /sys/class/net/$PF0/device/sriov_numvfs ip link set dev $PF0 vf 0 mac $NEW_MAC The log shows: [792.620416] ice 0000:5e:00.0: Enabling 1 VFs [792.738812] iavf 0000:5e:01.0: enabling device (0000 -> 0002) [792.744182] ice 0000:5e:00.0: Enabling 1 VFs with 17 vectors and 16 queues per VF [792.839964] ice 0000:5e:00.0: Setting MAC 52:54:00:00:00:11 on VF 0. VF driver will be reinitialized [813.389684] iavf 0000:5e:01.0: Failed to communicate with PF; waiting before retry [818.635918] iavf 0000:5e:01.0: Hardware came out of reset. Attempting reinit. [818.766273] iavf 0000:5e:01.0: Multiqueue Enabled: Queue pair count = 16 Fix it by scheduling the reset task and making the reset task capable of resetting early in the init cycle. Fixes: ef8693eb90ae3 ("i40evf: refactor reset handling") Signed-off-by: Ahmed Zaki Tested-by: Przemek Kitszel Reviewed-by: Przemek Kitszel Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 11 +++++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2c0bb41809a4..81d7249d1149 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work) } continue_reset: + /* If we are still early in the state machine, just restart. */ + if (adapter->state <= __IAVF_INIT_FAILED) { + iavf_shutdown_adminq(hw); + iavf_change_state(adapter, __IAVF_STARTUP); + iavf_startup(adapter); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + netdev_unlock(netdev); + return; + } + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a6f0e5990be2..07f0d0a0f1e2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return iavf_status_to_errno(status); received_op = (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + + if (received_op == VIRTCHNL_OP_EVENT) { + struct iavf_adapter *adapter = hw->back; + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)event->msg_buf; + + if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING) + continue; + + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) + iavf_schedule_reset(adapter, + IAVF_FLAG_RESET_PENDING); + + return -EIO; + } + if (op_to_poll == received_op) break; } -- cgit v1.2.3 From a5a441ae283d54ec329aadc7426991dc32786d52 Mon Sep 17 00:00:00 2001 From: Anton Nadezhdin Date: Tue, 20 May 2025 10:42:16 +0200 Subject: ice/ptp: fix crosstimestamp reporting Set use_nsecs=true as timestamp is reported in ns. Lack of this result in smaller timestamp error window which cause error during phc2sys execution on E825 NICs: phc2sys[1768.256]: ioctl PTP_SYS_OFFSET_PRECISE: Invalid argument This problem was introduced in the cited commit which omitted setting use_nsecs to true when converting the ice driver to use convert_base_to_cs(). Testing hints (ethX is PF netdev): phc2sys -s ethX -c CLOCK_REALTIME -O 37 -m phc2sys[1769.256]: CLOCK_REALTIME phc offset -5 s0 freq -0 delay 0 Fixes: d4bea547ebb57 ("ice/ptp: Remove convert_art_to_tsc()") Signed-off-by: Anton Nadezhdin Reviewed-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index b79a148ed0f2..55cad824c5b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2299,6 +2299,7 @@ static int ice_capture_crosststamp(ktime_t *device, ts = ((u64)ts_hi << 32) | ts_lo; system->cycles = ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); -- cgit v1.2.3 From b4a8085ceefb7bbb12c2b71c55e71fc946c6929f Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 3 Jun 2025 16:34:01 +0000 Subject: e1000: Move cancel_work_sync to avoid deadlock Previously, e1000_down called cancel_work_sync for the e1000 reset task (via e1000_down_and_stop), which takes RTNL. As reported by users and syzbot, a deadlock is possible in the following scenario: CPU 0: - RTNL is held - e1000_close - e1000_down - cancel_work_sync (cancel / wait for e1000_reset_task()) CPU 1: - process_one_work - e1000_reset_task - take RTNL To remedy this, avoid calling cancel_work_sync from e1000_down (e1000_reset_task does nothing if the device is down anyway). Instead, call cancel_work_sync for e1000_reset_task when the device is being removed. Fixes: e400c7444d84 ("e1000: Hold RTNL when e1000_down can be called") Reported-by: syzbot+846bb38dc67fe62cc733@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/683837bf.a00a0220.52848.0003.GAE@google.com/ Reported-by: John Closes: https://lore.kernel.org/netdev/CAP=Rh=OEsn4y_2LvkO3UtDWurKcGPnZ_NPSXK=FbgygNXL37Sw@mail.gmail.com/ Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Acked-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000/e1000_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f089c3d47b2..d8595e84326d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); - - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); + e1000_phy_hw_reset(hw); kfree(adapter->tx_ring); -- cgit v1.2.3 From ac0b8b327a5677dc6fecdf353d808161525b1ff0 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Tue, 10 Jun 2025 10:18:01 -0700 Subject: io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo() syzbot reports: BUG: KASAN: slab-use-after-free in getrusage+0x1109/0x1a60 Read of size 8 at addr ffff88810de2d2c8 by task a.out/304 CPU: 0 UID: 0 PID: 304 Comm: a.out Not tainted 6.16.0-rc1 #1 PREEMPT(voluntary) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 print_report+0xd0/0x670 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? getrusage+0x1109/0x1a60 kasan_report+0xce/0x100 ? getrusage+0x1109/0x1a60 getrusage+0x1109/0x1a60 ? __pfx_getrusage+0x10/0x10 __io_uring_show_fdinfo+0x9fe/0x1790 ? ksys_read+0xf7/0x1c0 ? do_syscall_64+0xa4/0x260 ? vsnprintf+0x591/0x1100 ? __pfx___io_uring_show_fdinfo+0x10/0x10 ? __pfx_vsnprintf+0x10/0x10 ? mutex_trylock+0xcf/0x130 ? __pfx_mutex_trylock+0x10/0x10 ? __pfx_show_fd_locks+0x10/0x10 ? io_uring_show_fdinfo+0x57/0x80 io_uring_show_fdinfo+0x57/0x80 seq_show+0x38c/0x690 seq_read_iter+0x3f7/0x1180 ? inode_set_ctime_current+0x160/0x4b0 seq_read+0x271/0x3e0 ? __pfx_seq_read+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? __mark_inode_dirty+0x402/0x810 ? selinux_file_permission+0x368/0x500 ? file_update_time+0x10f/0x160 vfs_read+0x177/0xa40 ? __pfx___handle_mm_fault+0x10/0x10 ? __pfx_vfs_read+0x10/0x10 ? mutex_lock+0x81/0xe0 ? __pfx_mutex_lock+0x10/0x10 ? fdget_pos+0x24d/0x4b0 ksys_read+0xf7/0x1c0 ? __pfx_ksys_read+0x10/0x10 ? do_user_addr_fault+0x43b/0x9c0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f0f74170fc9 Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 8 RSP: 002b:00007fffece049e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0f74170fc9 RDX: 0000000000001000 RSI: 00007fffece049f0 RDI: 0000000000000004 RBP: 00007fffece05ad0 R08: 0000000000000000 R09: 00007fffece04d90 R10: 0000000000000000 R11: 0000000000000206 R12: 00005651720a1100 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 298: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_slab_alloc+0x6e/0x70 kmem_cache_alloc_node_noprof+0xe8/0x330 copy_process+0x376/0x5e00 create_io_thread+0xab/0xf0 io_sq_offload_create+0x9ed/0xf20 io_uring_setup+0x12b0/0x1cc0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 22: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x37/0x50 kmem_cache_free+0xc4/0x360 rcu_core+0x5ff/0x19f0 handle_softirqs+0x18c/0x530 run_ksoftirqd+0x20/0x30 smpboot_thread_fn+0x287/0x6c0 kthread+0x30d/0x630 ret_from_fork+0xef/0x1a0 ret_from_fork_asm+0x1a/0x30 Last potentially related work creation: kasan_save_stack+0x33/0x60 kasan_record_aux_stack+0x8c/0xa0 __call_rcu_common.constprop.0+0x68/0x940 __schedule+0xff2/0x2930 __cond_resched+0x4c/0x80 mutex_lock+0x5c/0xe0 io_uring_del_tctx_node+0xe1/0x2b0 io_uring_clean_tctx+0xb7/0x160 io_uring_cancel_generic+0x34e/0x760 do_exit+0x240/0x2350 do_group_exit+0xab/0x220 __x64_sys_exit_group+0x39/0x40 x64_sys_call+0x1243/0x1840 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff88810de2cb00 which belongs to the cache task_struct of size 3712 The buggy address is located 1992 bytes inside of freed 3712-byte region [ffff88810de2cb00, ffff88810de2d980) which is caused by the task_struct pointed to by sq->thread being released while it is being used in the function __io_uring_show_fdinfo(). Holding ctx->uring_lock does not prevent ehre relase or exit of sq->thread. Fix this by assigning and looking up ->thread under RCU, and grabbing a reference to the task_struct. This ensures that it cannot get released while fdinfo is using it. Reported-by: syzbot+531502bbbe51d2f769f4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/682b06a5.a70a0220.3849cf.00b3.GAE@google.com Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Penglei Jiang Link: https://lore.kernel.org/r/20250610171801.70960-1-superman.xpt@gmail.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 12 ++++++++++-- io_uring/sqpoll.c | 9 ++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index e9355276ab5d..9798d6fb4ec7 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; + struct task_struct *tsk; + rcu_read_lock(); + tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ - if (sq->thread) { + if (tsk) { + get_task_struct(tsk); + rcu_read_unlock(); + getrusage(tsk, RUSAGE_SELF, &sq_usage); + put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec); sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); } } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 03c699493b5a..0625a421626f 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -270,7 +270,8 @@ static int io_sq_thread(void *data) /* offload context creation failed, just exit */ if (!current->io_uring) { mutex_lock(&sqd->lock); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); mutex_unlock(&sqd->lock); goto err_out; } @@ -379,7 +380,8 @@ static int io_sq_thread(void *data) io_sq_tw(&retry_list, UINT_MAX); io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); @@ -495,9 +497,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; goto err; } - - if (task_to_put) - put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); -- cgit v1.2.3 From aef17cb3d3c43854002956f24c24ec8e1a0e3546 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 10 Jun 2025 10:22:15 -0700 Subject: Revert "mm/damon/Kconfig: enable CONFIG_DAMON by default" This reverts commit 28615e6eed152f2fda5486680090b74aeed7b554. No, we don't make random features default to being on. Reported-by: Geert Uytterhoeven Cc: Andrew Morton Cc: SeongJae Park Signed-off-by: Linus Torvalds --- mm/damon/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 551745df011b..c93d0c56b963 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,7 +4,6 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" - default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful -- cgit v1.2.3 From c393befa14ab26596fb86d702566d648832dae06 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:26 -0700 Subject: driver core: faux: Suppress bind attributes faux_device_create() is almost a suitable candidate to replace platform_driver_probe() if not for the fact that faux_device_create() supports dynamic attach/detach of the driver. Drop the bind attributes with the expectation that simple faux devices can always assume that the device is permanently bound at create, and only unbound at 'destroy'. The acpi-einj driver depends on static bind. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-2-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/base/faux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 9054d346bd7f..934da77ca48b 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -86,6 +86,7 @@ static struct device_driver faux_driver = { .name = "faux_driver", .bus = &faux_bus_type, .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, }; static void faux_device_release(struct device *dev) -- cgit v1.2.3 From ff53a6e247285687df1a71d8ee5c457939792c13 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:27 -0700 Subject: driver core: faux: Quiet probe failures The acpi-einj conversion to faux_device_create() leads to a noisy error message when the error injection facility is disabled. Quiet the error as CXL error injection via ACPI expects the module to stay loaded even if the error injection facility is disabled. This situation arose because CXL knows proper kernel named objects to trigger errors against, but acpi-einj knows how to perform the error injection. The injection mechanism is shared with non-CXL use cases. The result is CXL now has a module dependency on einj-core.ko, and init/probe failures are handled at runtime. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-3-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/base/faux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 934da77ca48b..f5fbda0a9a44 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -170,7 +170,7 @@ struct faux_device *faux_device_create_with_groups(const char *name, * successful is almost impossible to determine by the caller. */ if (!dev->driver) { - dev_err(dev, "probe did not succeed, tearing down the device\n"); + dev_dbg(dev, "probe did not succeed, tearing down the device\n"); faux_device_destroy(faux_dev); faux_dev = NULL; } -- cgit v1.2.3 From 162457f5853ce3348e7956666916f5e5e31be51f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:28 -0700 Subject: ACPI: APEI: EINJ: Do not fail einj_init() on faux_device_create() failure CXL has a symbol dependency on einj_core.ko, so if einj_init() fails then cxl_core.ko fails to load. Prior to the faux_device_create() conversion, einj_probe() failures were tracked by the einj_initialized flag without failing einj_init(). Revert to that behavior and always succeed einj_init() given there is no way, and no pressing need, to discern faux device-create vs device-probe failures. This situation arose because CXL knows proper kernel named objects to trigger errors against, but acpi-einj knows how to perform the error injection. The injection mechanism is shared with non-CXL use cases. The result is CXL now has a module dependency on einj-core.ko, and init/probe failures are handled at runtime. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Ben Cheatham Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-4-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index fea11a35eea3..9b041415a9d0 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -883,19 +883,16 @@ static int __init einj_init(void) } einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops); - if (!einj_dev) - return -ENODEV; - einj_initialized = true; + if (einj_dev) + einj_initialized = true; return 0; } static void __exit einj_exit(void) { - if (einj_initialized) - faux_device_destroy(einj_dev); - + faux_device_destroy(einj_dev); } module_init(einj_init); -- cgit v1.2.3 From 5b2d595efbfc9c46823bdb9ef11e1f9fa46adf9d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 6 Jun 2025 11:05:05 +0900 Subject: rust: time: Fix compile error in impl_has_hr_timer macro Fix a compile error in the `impl_has_hr_timer!` macro as follows: error[E0599]: no method named cast_mut found for raw pointer *mut Foo in the current scope The `container_of!` macro already returns a mutable pointer when used in a `*mut T` context so the `.cast_mut()` method is not available. [ We missed this one because there is no caller yet and it is a macro. - Miguel ] Fixes: 74d6a606c2b3 ("rust: retain pointer mut-ness in `container_of!`") Signed-off-by: FUJITA Tomonori Reviewed-by: Benno Lossin Acked-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250606020505.3186533-1-fujita.tomonori@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/time/hrtimer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 9df3dcd2fa39..36e1290cd079 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -517,7 +517,7 @@ macro_rules! impl_has_hr_timer { ) -> *mut Self { // SAFETY: As per the safety requirement of this function, `ptr` // is pointing inside a `$timer_type`. - unsafe { ::kernel::container_of!(ptr, $timer_type, $field).cast_mut() } + unsafe { ::kernel::container_of!(ptr, $timer_type, $field) } } } } -- cgit v1.2.3 From 2f76d269073bdb2971b253ef87d1f96f1a94c50e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Jun 2025 08:42:06 +0200 Subject: ACPI: PAD: Update arguments of mwait_idle_with_hints() Commit a17b37a3f416 ("x86/idle: Change arguments of mwait_idle_with_hints() to u32") changed the type of arguments of mwait_idle_with_hints() from unsigned long to u32. Change the type of variables in the call to mwait_idle_with_hints() to unsigned int to follow the change. Signed-off-by: Uros Bizjak Link: https://patch.msgid.link/20250609064235.49146-1-ubizjak@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6f8bbe1247a5..c9a0bcaba2e4 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -33,7 +33,7 @@ static DEFINE_MUTEX(isolated_cpus_lock); static DEFINE_MUTEX(round_robin_lock); -static unsigned long power_saving_mwait_eax; +static unsigned int power_saving_mwait_eax; static unsigned char tsc_detected_unstable; static unsigned char tsc_marked_unstable; -- cgit v1.2.3 From 15eece6c5b05e5f9db0711978c3e3b7f1a2cfe12 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 4 Jun 2025 10:30:36 +0800 Subject: ACPI: CPPC: Fix NULL pointer dereference when nosmp is used With nosmp in cmdline, other CPUs are not brought up, leaving their cpc_desc_ptr NULL. CPU0's iteration via for_each_possible_cpu() dereferences these NULL pointers, causing panic. Panic backtrace: [ 0.401123] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b8 ... [ 0.403255] [] cppc_allow_fast_switch+0x6a/0xd4 ... Kernel panic - not syncing: Attempted to kill init! Fixes: 3cc30dd00a58 ("cpufreq: CPPC: Enable fast_switch") Reported-by: Xu Lu Signed-off-by: Yunhui Cui Link: https://patch.msgid.link/20250604023036.99553-1-cuiyunhui@bytedance.com [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a9ae2fd62863..6b649031808f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && -- cgit v1.2.3 From 7a0d59f6a913a2bc7680c663b8cf1e45d1bdbf26 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 30 May 2025 01:53:10 +0200 Subject: ACPI: EC: Ignore ECDT tables with an invalid ID string On the MSI Modern 14 C5M the ECDT table contains invalid data: UID : 00000000 GPE Number : 00 /* Invalid, 03 would be correct */ Namepath : "" /* Invalid, "\_SB.PCI0.SBRG.EC" would * be correct */ This slows down the EC access as the wrong GPE event is used for communication. Additionally the ID string is invalid. Ignore such faulty ECDT tables by verifying that the ID string has a valid format. Tested-by: glpnk@proton.me Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20250529235310.540530-1-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 6f4203716b53..75c7db8b156a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -2031,6 +2033,21 @@ void __init acpi_ec_ecdt_probe(void) goto out; } + if (!strstarts(ecdt_ptr->id, "\\")) { + /* + * The ECDT table on some MSI notebooks contains invalid data, together + * with an empty ID string (""). + * + * Section 5.2.15 of the ACPI specification requires the ID string to be + * a "fully qualified reference to the (...) embedded controller device", + * so this string always has to start with a backslash. + * + * By verifying this we can avoid such faulty ECDT tables in a safe way. + */ + pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id); + goto out; + } + ec = acpi_ec_alloc(); if (!ec) goto out; -- cgit v1.2.3 From c99ad987d3e9b550e9839d5df22de97d90462e5f Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 3 Jun 2025 20:20:59 +0800 Subject: ACPI: resource: Use IRQ override on MACHENIKE 16P Use ACPI IRQ override on MACHENIKE laptop to make the internal keyboard work. Add a new entry to the irq1_edge_low_force_override structure, similar to the existing ones. Link: https://bbs.deepin.org.cn/zh/post/287628 Signed-off-by: Wentao Guan Link: https://patch.msgid.link/20250603122059.1072790-1-guanwentao@uniontech.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7d59c6c9185f..b1ab192d7a08 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -666,6 +666,13 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { DMI_MATCH(DMI_BOARD_NAME, "GMxHGxx"), }, }, + { + /* MACHENIKE L16P/L16P */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MACHENIKE"), + DMI_MATCH(DMI_BOARD_NAME, "L16P"), + }, + }, { /* * TongFang GM5HG0A in case of the SKIKK Vanaheim relabel the -- cgit v1.2.3 From 72840238e2bcb8fb24cb35d8d1d5a822c04e62a4 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Jun 2025 08:35:01 +0200 Subject: intel_idle: Update arguments of mwait_idle_with_hints() Commit a17b37a3f416 ("x86/idle: Change arguments of mwait_idle_with_hints() to u32") changed the type of arguments of mwait_idle_with_hints() from unsigned long to u32. Change the type of variables in the call to mwait_idle_with_hints() to unsigned int to follow the change. Signed-off-by: Uros Bizjak Reviewed-by: Artem Bityutskiy Link: https://patch.msgid.link/20250609063528.48715-1-ubizjak@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 63565814c7e5..73747d20df85 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -152,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev, int index, bool irqoff) { struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned long ecx = 1*irqoff; /* break on interrupt flag */ + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1*irqoff; /* break on interrupt flag */ mwait_idle_with_hints(eax, ecx); @@ -226,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1; /* break on interrupt flag */ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) fpu_idle_fpregs(); -- cgit v1.2.3 From 9cf1e25053c269d64b9e9fa25e8697d6d58028d4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 10 Jun 2025 10:54:37 -0700 Subject: MAINTAINERS: Add myself as bpf networking reviewer I've been focusing on networking BPF bits lately, add myself as a reviewer. Signed-off-by: Stanislav Fomichev Acked-by: KP Singh Acked-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20250610175442.2138504-1-stfomichev@gmail.com Signed-off-by: Alexei Starovoitov --- .mailmap | 1 + MAINTAINERS | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 77bb62564ef7..7641338b8d3f 100644 --- a/.mailmap +++ b/.mailmap @@ -712,6 +712,7 @@ Srinivas Ramana Sriram R Sriram Yagnaraman Stanislav Fomichev +Stanislav Fomichev Stefan Wahren Stéphane Witzmann Stephen Hemminger diff --git a/MAINTAINERS b/MAINTAINERS index 8d314e169486..8e109f4436eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4555,6 +4555,7 @@ BPF [NETWORKING] (tcx & tc BPF, sock_addr) M: Martin KaFai Lau M: Daniel Borkmann R: John Fastabend +R: Stanislav Fomichev L: bpf@vger.kernel.org L: netdev@vger.kernel.org S: Maintained @@ -26948,6 +26949,7 @@ M: David S. Miller M: Jakub Kicinski M: Jesper Dangaard Brouer M: John Fastabend +R: Stanislav Fomichev L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported @@ -26969,6 +26971,7 @@ M: Björn Töpel M: Magnus Karlsson M: Maciej Fijalkowski R: Jonathan Lemon +R: Stanislav Fomichev L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained -- cgit v1.2.3 From dc9c67820f81ee0d34f9095195228fcb828315ff Mon Sep 17 00:00:00 2001 From: Lucas Sanchez Sagrado Date: Mon, 9 Jun 2025 16:55:36 +0200 Subject: net: usb: r8152: Add device ID for TP-Link UE200 The TP-Link UE200 is a RTL8152B based USB 2.0 Fast Ethernet adapter. This patch adds its device ID. It has been tested on Ubuntu 22.04.5. Signed-off-by: Lucas Sanchez Sagrado Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/20250609145536.26648-1-lucsansag@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d6589b24c68d..44cba7acfe7d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) }, { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, + { USB_DEVICE(VENDOR_ID_TPLINK, 0x0602) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, -- cgit v1.2.3 From 2660a544fdc0940bba15f70508a46cf9a6491230 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 9 Jun 2025 19:08:03 +0200 Subject: net: Fix TOCTOU issue in sk_is_readable() sk->sk_prot->sock_is_readable is a valid function pointer when sk resides in a sockmap. After the last sk_psock_put() (which usually happens when socket is removed from sockmap), sk->sk_prot gets restored and sk->sk_prot->sock_is_readable becomes NULL. This makes sk_is_readable() racy, if the value of sk->sk_prot is reloaded after the initial check. Which in turn may lead to a null pointer dereference. Ensure the function pointer does not turn NULL after the check. Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support") Suggested-by: Jakub Sitnicki Signed-off-by: Michal Luczaj Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250609-skisreadable-toctou-v1-1-d0dfb2d62c37@rbox.co Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..4c37015b7cf7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -3010,8 +3010,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ -- cgit v1.2.3 From c85bf1975108d2e2431c11d1cb7e95aca587dfbe Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Mon, 9 Jun 2025 11:24:20 -0700 Subject: netconsole: fix appending sysdata when sysdata_fields == SYSDATA_RELEASE Before appending sysdata, prepare_extradata() checks if any feature is enabled in sysdata_fields (and exits early if none is enabled). When SYSDATA_RELEASE was introduced, we missed adding it to the list of features being checked against sysdata_fields in prepare_extradata(). The result was that, if only SYSDATA_RELEASE is enabled in sysdata_fields, we incorreclty exit early and fail to append the release. Instead of checking specific bits in sysdata_fields, check if sysdata_fields has ALL bit zeroed and exit early if true. This fixes case when only SYSDATA_RELEASE enabled and makes the code more general / less error prone in future feature implementation. Signed-off-by: Gustavo Luiz Duarte Reviewed-by: Breno Leitao Fixes: cfcc9239e78a ("netconsole: append release to sysdata") Link: https://patch.msgid.link/20250609-netconsole-fix-v1-1-17543611ae31@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41b..176935a8645f 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset) */ static int prepare_extradata(struct netconsole_target *nt) { - u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME; int extradata_len; /* userdata was appended when configfs write helper was called @@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt) */ extradata_len = nt->userdata_length; - if (!(nt->sysdata_fields & fields)) + if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) -- cgit v1.2.3 From f478d68b653323b691280b40fbd3b8ca1ac75aa2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Jun 2025 22:40:35 +0200 Subject: net: airoha: Enable RX queues 16-31 Fix RX_DONE_INT_MASK definition in order to enable RX queues 16-31. Fixes: f252493e18353 ("net: airoha: Enable multiple IRQ lines support in airoha_eth driver.") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250609-aioha-fix-rx-queue-mask-v1-1-f33706a06fa2@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_regs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 04187eb40ec6..150c85995cc1 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -614,8 +614,9 @@ RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \ RX17_DONE_INT_MASK | RX16_DONE_INT_MASK) -#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK) #define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK) +#define RX_DONE_INT_MASK \ + ((RX_DONE_HIGH_INT_MASK << RX_DONE_HIGH_OFFSET) | RX_DONE_LOW_INT_MASK) #define INT_RX2_MASK(_n) \ ((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \ -- cgit v1.2.3 From 40a98e702b528c631094f2e524d309faf33dc774 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 10 Jun 2025 12:16:00 -0700 Subject: crypto: hkdf - move to late_initcall The HKDF self-tests depend on the HMAC algorithms being registered. HMAC is now registered at module_init, which put it at the same level as HKDF. Move HKDF to late_initcall so that it runs afterwards. Fixes: ef93f1562803 ("Revert "crypto: run initcalls for generic implementations earlier"") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/hkdf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/hkdf.c b/crypto/hkdf.c index f24c2a8d4df9..82d1b32ca6ce 100644 --- a/crypto/hkdf.c +++ b/crypto/hkdf.c @@ -566,7 +566,7 @@ static int __init crypto_hkdf_module_init(void) static void __exit crypto_hkdf_module_exit(void) {} -module_init(crypto_hkdf_module_init); +late_initcall(crypto_hkdf_module_init); module_exit(crypto_hkdf_module_exit); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From b8d3291d3185b5c69ba36362ae1a370f06a33e5c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 10 Jun 2025 12:40:38 -0700 Subject: drm/sitronix: st7571-i2c: Select VIDEOMODE_HELPERS This driver requires of_get_display_timing() from CONFIG_VIDEOMODE_HELPERS but does not select it. If no other driver selects it, there will be a failure from the linker if the driver is built in or modpost if it is a module. ERROR: modpost: "of_get_display_timing" [drivers/gpu/drm/sitronix/st7571-i2c.ko] undefined! Select CONFIG_VIDEOMODE_HELPERS to resolve the build failure. Fixes: 4b35f0f41ee2 ("drm/st7571-i2c: add support for Sitronix ST7571 LCD controller") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250610-drm-st7571-i2c-select-videomode-helpers-v1-1-d30b50ff6e64@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sitronix/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sitronix/Kconfig b/drivers/gpu/drm/sitronix/Kconfig index c069d0d41775..741d1bb4b83f 100644 --- a/drivers/gpu/drm/sitronix/Kconfig +++ b/drivers/gpu/drm/sitronix/Kconfig @@ -5,6 +5,7 @@ config DRM_ST7571_I2C select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER select REGMAP_I2C + select VIDEOMODE_HELPERS help DRM driver for Sitronix ST7571 panels controlled over I2C. -- cgit v1.2.3 From d9816ec74e6d6aa29219d010bba3f780ba1d9d75 Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 9 Jun 2025 09:26:26 +0200 Subject: macsec: MACsec SCI assignment for ES = 0 According to 802.1AE standard, when ES and SC flags in TCI are zero, used SCI should be the current active SC_RX. Current code uses the header MAC address. Without this patch, when ES flag is 0 (using a bridge or switch), header MAC will not fit the SCI and MACSec frames will be discarted. In order to test this issue, MACsec link should be stablished between two interfaces, setting SC and ES flags to zero and a port identifier different than one. For example, using ip macsec tools: ip link add link $ETH0 macsec0 type macsec port 11 send_sci off end_station off ip macsec add macsec0 tx sa 0 pn 2 on key 01 $ETH1_KEY ip macsec add macsec0 rx port 11 address $ETH1_MAC ip macsec add macsec0 rx port 11 address $ETH1_MAC sa 0 pn 2 on key 02 ip link set dev macsec0 up ip link add link $ETH1 macsec1 type macsec port 11 send_sci off end_station off ip macsec add macsec1 tx sa 0 pn 2 on key 01 $ETH0_KEY ip macsec add macsec1 rx port 11 address $ETH0_MAC ip macsec add macsec1 rx port 11 address $ETH0_MAC sa 0 pn 2 on key 02 ip link set dev macsec1 up Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Co-developed-by: Andreu Montiel Signed-off-by: Andreu Montiel Signed-off-by: Carlos Fernandez Reviewed-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- drivers/net/macsec.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d315e30ee47..7edbe76b5455 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port) return sci; } -static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) +static sci_t macsec_active_sci(struct macsec_secy *secy) { - sci_t sci; + struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); + + /* Case single RX SC */ + if (rx_sc && !rcu_dereference_bh(rx_sc->next)) + return (rx_sc->active) ? rx_sc->sci : 0; + /* Case no RX SC or multiple */ + else + return 0; +} + +static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, + struct macsec_rxh_data *rxd) +{ + struct macsec_dev *macsec; + sci_t sci = 0; - if (sci_present) + /* SC = 1 */ + if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); - else + /* SC = 0; ES = 0 */ + } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && + (list_is_singular(&rxd->secys))) { + /* Only one SECY should exist on this scenario */ + macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, + secys); + if (macsec) + return macsec_active_sci(&macsec->secy); + } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); + } return sci; } @@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; - sci_t sci; + sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; @@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; - sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); + sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); + if (!sci) + goto drop_nosc; + list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); @@ -1283,6 +1310,7 @@ drop: macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); +drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); -- cgit v1.2.3 From 570896604f47d44d4ff6882d2a588428d2a6ef17 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 5 Jun 2025 15:03:02 +0200 Subject: Revert "wifi: mwifiex: Fix HT40 bandwidth issue." This reverts commit 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") That commit introduces a regression, when HT40 mode is enabled, received packets are lost, this was experience with W8997 with both SDIO-UART and SDIO-SDIO variants. From an initial investigation the issue solves on its own after some time, but it's not clear what is the reason. Given that this was just a performance optimization, let's revert it till we have a better understanding of the issue and a proper fix. Cc: Jeff Chen Cc: stable@vger.kernel.org Fixes: 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") Closes: https://lore.kernel.org/all/20250603203337.GA109929@francesco-nb/ Signed-off-by: Francesco Dolcini Link: https://patch.msgid.link/20250605130302.55555-1-francesco@dolcini.it [fix commit reference format] Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/11n.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 738bafc3749b..66f0f5377ac1 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && bss_desc->bcn_ht_oper->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) { - chan_list->chan_scan_param[0].radio_type |= - CHAN_BW_40MHZ << 2; + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) SET_SECONDARYCHAN(chan_list->chan_scan_param[0]. radio_type, (bss_desc->bcn_ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET)); - } + *buffer += struct_size(chan_list, chan_scan_param, 1); ret_len += struct_size(chan_list, chan_scan_param, 1); } -- cgit v1.2.3 From 6a0f81c549a00f87865ff0ad2400944ca0726868 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 3 Jun 2025 12:17:54 +0300 Subject: wifi: iwlwifi: fix merge damage related to iwl_pci_resume The changes I made in wifi: iwlwifi: don't warn if the NIC is gone in resume conflicted with a major rework done in this area. The merge de-facto removed my changes. Re-add them. Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20250603091754.70182-1-emmanuel.grumbach@intel.com Fixes: 06c4b2036818 ("Merge tag 'iwlwifi-next-2025-05-15' of https://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next") Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 656f8b06c27b..0a9e0dbb58fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1501,11 +1501,27 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. + * For older devices, just try silently to grab the NIC. */ - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; -- cgit v1.2.3 From f87586598fffac31afc1141471b789251b030a76 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 23 May 2025 11:01:56 +0000 Subject: wifi: cfg80211: use kfree_sensitive() for connkeys cleanup The nl80211_parse_connkeys() function currently uses kfree() to release the 'result' structure in error handling paths. However, if an error occurs due to result->def being less than 0, the 'result' structure may contain sensitive information. To prevent potential leakage of sensitive data, replace kfree() with kfree_sensitive() when freeing 'result'. This change aligns with the approach used in its caller, nl80211_join_ibss(), enhancing the overall security of the wireless subsystem. Signed-off-by: Zilin Guan Link: https://patch.msgid.link/20250523110156.4017111-1-zilin@seu.edu.cn Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd5f79266471..85f139016da2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1583,7 +1583,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, return result; error: - kfree(result); + kfree_sensitive(result); return ERR_PTR(err); } -- cgit v1.2.3 From aa3f93cd75bf8195e434deefff853d70d723d6f0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 5 Jun 2025 10:38:30 +0200 Subject: dma-buf: fix compare in WARN_ON_ONCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smatch pointed out this trivial typo: drivers/dma-buf/dma-buf.c:1123 dma_buf_map_attachment() warn: passing positive error code '16' to 'ERR_PTR' drivers/dma-buf/dma-buf.c 1113 dma_resv_assert_held(attach->dmabuf->resv); 1114 1115 if (dma_buf_pin_on_map(attach)) { 1116 ret = attach->dmabuf->ops->pin(attach); 1117 /* 1118 * Catch exporters making buffers inaccessible even when 1119 * attachments preventing that exist. 1120 */ 1121 WARN_ON_ONCE(ret == EBUSY); ^^^^^ This was probably intended to be -EBUSY? 1122 if (ret) --> 1123 return ERR_PTR(ret); ^^^ Otherwise we will eventually crash. 1124 } 1125 1126 sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); 1127 if (!sg_table) 1128 sg_table = ERR_PTR(-ENOMEM); 1129 if (IS_ERR(sg_table)) 1130 goto error_unpin; 1131 Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250605085336.62156-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 890ecac04dac..2bcf9ceca997 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1118,7 +1118,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, * Catch exporters making buffers inaccessible even when * attachments preventing that exist. */ - WARN_ON_ONCE(ret == EBUSY); + WARN_ON_ONCE(ret == -EBUSY); if (ret) return ERR_PTR(ret); } -- cgit v1.2.3 From afe382843717d44b24ef5014d57dcbaab75a4052 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 7 May 2025 18:09:12 +0200 Subject: udmabuf: use sgtable-based scatterlist wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use common wrappers operating directly on the struct sg_table objects to fix incorrect use of scatterlists sync calls. dma_sync_sg_for_*() functions have to be called with the number of elements originally passed to dma_map_sg_*() function, not the one returned in sgtable's nents. Fixes: 1ffe09590121 ("udmabuf: fix dma-buf cpu access") CC: stable@vger.kernel.org Signed-off-by: Marek Szyprowski Acked-by: Vivek Kasireddy Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250507160913.2084079-3-m.szyprowski@samsung.com --- drivers/dma-buf/udmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 7eee3eb47a8e..c9d0c68d2fcb 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -264,8 +264,7 @@ static int begin_cpu_udmabuf(struct dma_buf *buf, ubuf->sg = NULL; } } else { - dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents, - direction); + dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); } return ret; @@ -280,7 +279,7 @@ static int end_cpu_udmabuf(struct dma_buf *buf, if (!ubuf->sg) return -EINVAL; - dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction); + dma_sync_sgtable_for_device(dev, ubuf->sg, direction); return 0; } -- cgit v1.2.3 From 0e629694126ca388916f059453a1c36adde219c4 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:46 +0200 Subject: net/mdiobus: Fix potential out-of-bounds read/write access When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 080bb352fad00 ("net: phy: Maintain MDIO device and bus statistics") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a6bcb0fee863..60fd0cd7cb9c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -445,6 +445,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read) retval = bus->read(bus, addr, regnum); else @@ -474,6 +477,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write) err = bus->write(bus, addr, regnum, val); else -- cgit v1.2.3 From 260388f79e94fb3026c419a208ece8358bb7b555 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:47 +0200 Subject: net/mdiobus: Fix potential out-of-bounds clause 45 read/write access When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via C45 (clause 45) mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before C45 read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 4e4aafcddbbf ("net: mdio: Add dedicated C45 API to MDIO bus drivers") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 60fd0cd7cb9c..fda2e27c1810 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -541,6 +541,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read_c45) retval = bus->read_c45(bus, addr, devad, regnum); else @@ -572,6 +575,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write_c45) err = bus->write_c45(bus, addr, devad, regnum, val); else -- cgit v1.2.3 From 4bb08cf974c54adc321b8505ce82720c2a15c451 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Jun 2025 16:49:38 +0800 Subject: loop: move lo_set_size() out of queue freeze It isn't necessary to freeze queue for updating disk size given submit_bio() doesn't grab queue usage counter for checking eod. Also many driver won't freeze queue for calling set_capacity_and_notify(). Move lo_set_size() out of queue freeze for fixing many lockdep warning report. Link: https://lore.kernel.org/linux-block/67ea99e0.050a0220.3c3d88.0042.GAE@google.com/ Reported-by: syzbot+9dd7dbb1a4b915dee638@syzkaller.appspotmail.com Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250611084938.108829-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8d136684109..500840e4a74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1248,12 +1248,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); - if (size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); - loop_set_size(lo, new_size); - } - /* update the direct I/O flag if lo_offset changed */ loop_update_dio(lo); @@ -1261,6 +1255,11 @@ out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + if (!err && size_changed) { + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, + lo->lo_backing_file); + loop_set_size(lo, new_size); + } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) -- cgit v1.2.3 From ff20c516485efbeb5c32bcb6aa5a24f73774185b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Jun 2025 16:56:32 +0800 Subject: ublk: document auto buffer registration(UBLK_F_AUTO_BUF_REG) Document recently merged feature auto buffer registration(UBLK_F_AUTO_BUF_REG). Cc: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250611085632.109719-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index c368e1081b41..abec524a04ed 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -352,6 +352,81 @@ For reaching best IO performance, ublk server should align its segment parameter of `struct ublk_param_segment` with backend for avoiding unnecessary IO split, which usually hurts io_uring performance. +Auto Buffer Registration +------------------------ + +The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration +and unregistration for I/O requests, which simplifies the buffer management +process and reduces overhead in the ublk server implementation. + +This is another feature flag for using zero copy, and it is compatible with +``UBLK_F_SUPPORT_ZERO_COPY``. + +Feature Overview +~~~~~~~~~~~~~~~~ + +This feature automatically registers request buffers to the io_uring context +before delivering I/O commands to the ublk server and unregisters them when +completing I/O commands. This eliminates the need for manual buffer +registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and +``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server +can avoid dependency on the two uring_cmd operations. + +IOs can't be issued concurrently to io_uring if there is any dependency +among these IOs. So this way not only simplifies ublk server implementation, +but also makes concurrent IO handling becomes possible by removing the +dependency on buffer registration & unregistration commands. + +Usage Requirements +~~~~~~~~~~~~~~~~~~ + +1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx`` + used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If + uring_cmd is issued on a different ``io_ring_ctx``, manual buffer + unregistration is required. + +2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the + following structure:: + + struct ublk_auto_buf_reg { + __u16 index; /* Buffer index for registration */ + __u8 flags; /* Registration flags */ + __u8 reserved0; /* Reserved for future use */ + __u32 reserved1; /* Reserved for future use */ + }; + + ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into + ``sqe->addr``. + +3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed. + +4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``. + +Fallback Behavior +~~~~~~~~~~~~~~~~~ + +If auto buffer registration fails: + +1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled: + - The uring_cmd is completed + - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags`` + - The ublk server must manually deal with the failure, such as, register + the buffer manually, or using user copy feature for retrieving the data + for handling ublk IO + +2. If fallback is not enabled: + - The ublk I/O request fails silently + - The uring_cmd won't be completed + +Limitations +~~~~~~~~~~~ + +- Requires same ``io_ring_ctx`` for all operations +- May require manual buffer management in fallback cases +- io_ring_ctx buffer table has a max size of 16K, which may not be enough + in case that too many ublk devices are handled by this single io_ring_ctx + and each one has very large queue depth + References ========== -- cgit v1.2.3 From f705d33c2f0353039d03e5d6f18f70467d86080e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 09:59:15 +0900 Subject: block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion When blk_zone_write_plug_bio_endio() is called for a regular write BIO used to emulate a zone append operation, that is, a BIO flagged with BIO_EMULATES_ZONE_APPEND, the BIO operation code is restored to the original REQ_OP_ZONE_APPEND but the BIO_EMULATES_ZONE_APPEND flag is not cleared. Clear it to fully return the BIO to its orginal definition. Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250611005915.89843-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 8f15d1aa6eb8..cee7a0774a9b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio) if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_ZONE_APPEND; + bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); } /* -- cgit v1.2.3 From cf625013d8741c01407bbb4a60c111b61b9fa69d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2025 06:44:16 +0200 Subject: block: don't use submit_bio_noacct_nocheck in blk_zone_wplug_bio_work Bios queued up in the zone write plug have already gone through all all preparation in the submit_bio path, including the freeze protection. Submitting them through submit_bio_noacct_nocheck duplicates the work and can can cause deadlocks when freezing a queue with pending bio write plugs. Go straight to ->submit_bio or blk_mq_submit_bio to bypass the superfluous extra freeze protection and checks. Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Reported-by: Bart Van Assche Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20250611044416.2351850-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index cee7a0774a9b..351d659280e1 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1307,7 +1307,6 @@ again: spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; - submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue @@ -1315,8 +1314,12 @@ again: * path for BIO-based devices will not do that. So drop this extra * reference here. */ - if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) + if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { + bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); + } else { + blk_mq_submit_bio(bio); + } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ -- cgit v1.2.3 From 961296e89dc3800e6a3abc3f5d5bb4192cf31e98 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Jun 2025 08:48:46 -0600 Subject: block: use plug request list tail for one-shot backmerge attempt Previously, the block layer stored the requests in the plug list in LIFO order. For this reason, blk_attempt_plug_merge() would check just the head entry for a back merge attempt, and abort after that unless requests for multiple queues existed in the plug list. If more than one request is present in the plug list, this makes the one-shot back merging less useful than before, as it'll always fail to find a quick merge candidate. Use the tail entry for the one-shot merge attempt, which is the last added request in the list. If that fails, abort immediately unless there are multiple queues available. If multiple queues are available, then scan the list. Ideally the latter scan would be a backwards scan of the list, but as it currently stands, the plug list is singly linked and hence this isn't easily feasible. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-block/20250611121626.7252-1-abuehaze@amazon.com/ Reported-by: Hazem Mohamed Abuelfotoh Fixes: e70c301faece ("block: don't reorder requests in blk_add_rq_to_plug") Signed-off-by: Jens Axboe --- block/blk-merge.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 3af1d284add5..70d704615be5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -998,20 +998,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(&plug->mq_list)) return false; - rq_list_for_each(&plug->mq_list, rq) { - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; - break; - } + rq = plug->mq_list.tail; + if (rq->q == q) + return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK; + else if (!plug->multiple_queues) + return false; - /* - * Only keep iterating plug list for merges if we have multiple - * queues - */ - if (!plug->multiple_queues) - break; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q != q) + continue; + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; } return false; } -- cgit v1.2.3 From 83f066fac3c231e58e9adf3b307e96fee172dfb3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jun 2025 16:09:45 +0300 Subject: spi: stm32-ospi: clean up on error in probe() If reset_control_acquire() fails, then we can't return directly. We need to do a little clean up first. Fixes: cf2c3eceb757 ("spi: stm32-ospi: Make usage of reset_control_acquire/release() API") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aEmAGTUzzKZlLe3K@stanley.mountain Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index db6b1cfc970f..4ab7e86f4bd5 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -937,8 +937,10 @@ static int stm32_ospi_probe(struct platform_device *pdev) goto err_pm_enable; ret = reset_control_acquire(ospi->rstc); - if (ret) - return dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + if (ret) { + dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + goto err_pm_resume; + } reset_control_assert(ospi->rstc); udelay(2); -- cgit v1.2.3 From 179a8427fcbffe36ccfed5e138d7c9b6180caff9 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 12 May 2025 22:16:34 +0000 Subject: KVM: SEV: Disable SEV-SNP support on initialization failure During platform init, SNP initialization may fail for several reasons, such as firmware command failures and incompatible versions. However, the KVM capability may continue to advertise support for it. The platform may have SNP enabled but if SNP_INIT fails then SNP is not supported by KVM. During KVM module initialization query the SNP platform status to obtain the SNP initialization state and use it as an additional condition to determine support for SEV-SNP. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Pratik R. Sampat Signed-off-by: Pratik R. Sampat Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Pankaj Gupta Reviewed-by: Pavan Kumar Paluri Message-ID: <20250512221634.12045-1-Ashish.Kalra@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5a69b657dae9..459c3b791fd4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2871,6 +2871,33 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; @@ -2975,6 +3002,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3001,15 +3036,6 @@ out: sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; - - if (!sev_enabled) - return; - - /* - * Do both SNP and SEV initialization at KVM module load. - */ - init_args.probe = true; - sev_platform_init(&init_args); } void sev_hardware_unsetup(void) -- cgit v1.2.3 From 488ef3560196ee10fc1c5547e1574a87068c3494 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 13:18:17 +0100 Subject: KEYS: Invert FINAL_PUT bit Invert the FINAL_PUT bit so that test_bit_acquire and clear_bit_unlock can be used instead of smp_mb. Signed-off-by: Herbert Xu Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/key.h | 2 +- security/keys/gc.c | 4 ++-- security/keys/key.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..748e83818a76 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,8 @@ continue_scanning: key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ + if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) { + /* Clobber key->user after final put seen. */ goto found_unreferenced_key; } diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..3bbdde778631 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); + key->flags |= 1 << KEY_FLAG_USER_ALIVE; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) @@ -658,8 +659,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + /* Mark key as safe for GC after key->user done. */ + clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags); schedule_work(&key_gc_work); } } -- cgit v1.2.3 From 20a2aa01f5aeb6daad9aeaa7c33dd512c58d81eb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:14:25 -0400 Subject: Bluetooth: Fix NULL pointer deference on eir_get_service_data The len parameter is considered optional so it can be NULL so it cannot be used for skipping to next entry of EIR_SERVICE_DATA. Fixes: 8f9ae5b3ae80 ("Bluetooth: eir: Add helpers for managing service data") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/eir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a3..3e1713673ecc 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -366,17 +366,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) { - while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + size_t dlen; + + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) { u16 value = get_unaligned_le16(eir); if (uuid == value) { if (len) - *len -= 2; + *len = dlen - 2; return &eir[2]; } - eir += *len; - eir_len -= *len; + eir += dlen; + eir_len -= dlen; } return NULL; -- cgit v1.2.3 From 5725bc608252050ed8a4d47d59225b7dd73474c8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:15:16 -0400 Subject: Bluetooth: hci_sync: Fix broadcast/PA when using an existing instance When using and existing adv_info instance for broadcast source it needs to be updated to periodic first before it can be reused, also in case the existing instance already have data hci_set_adv_instance_data cannot be used directly since it would overwrite the existing data so this reappend the original data after the Broadcast ID, if one was generated. Example: bluetoothctl># Add PBP to EA so it can be later referenced as the BIS ID bluetoothctl> advertise.service 0x1856 0x00 0x00 bluetoothctl> advertise on ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 13 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x09 Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported ... bluetoothctl># Attempt to acquire Broadcast Source transport bluetoothctl>transport.acquire /org/bluez/hci0/pac_bcast0/fd0 ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 255 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x0e Service Data: Broadcast Audio Announcement (0x1852) Broadcast ID: 11371620 (0xad8464) Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported Link: https://github.com/bluez/bluez/issues/1117 Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 62d1ff951ebe..8ba1c3aa7801 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1559,7 +1559,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; - u8 ad[4 + 3]; + u8 ad[HCI_MAX_EXT_AD_LENGTH]; + u8 len; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement @@ -1585,8 +1586,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); - eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); - hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + memcpy(ad + len, adv->adv_data, adv->adv_data_len); + hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, + ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } @@ -1603,8 +1606,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); - /* Create an instance if that could not be found */ - if (!adv) { + if (adv) { + /* Turn it into periodic advertising */ + adv->periodic = true; + adv->per_adv_data_len = data_len; + if (data) + memcpy(adv->per_adv_data, data, data_len); + adv->flags = flags; + } else if (!adv) { + /* Create an instance if that could not be found */ adv = hci_add_per_instance(hdev, instance, flags, data_len, data, sync_interval, -- cgit v1.2.3 From 47c03902269aff377f959dc3fd94a9733aa31d6e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 10 Jun 2025 10:14:35 -0400 Subject: Bluetooth: eir: Fix possible crashes on eir_create_adv_data eir_create_adv_data may attempt to add EIR_FLAGS and EIR_TX_POWER without checking if that would fit. Link: https://github.com/bluez/bluez/issues/1117#issuecomment-2958244066 Fixes: 01ce70b0a274 ("Bluetooth: eir: Move EIR/Adv Data functions to its own file") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/eir.c | 7 ++++--- net/bluetooth/eir.h | 2 +- net/bluetooth/hci_sync.c | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 3e1713673ecc..3f72111ba651 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size) { struct adv_info *adv = NULL; u8 ad_len = 0, flags = 0; @@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ - if (flags) { + if (flags && (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_FLAGS; ptr[2] = flags; @@ -316,7 +316,8 @@ skip_flags: } /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { + if (adv_tx_power != HCI_TX_POWER_INVALID && + (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; ptr[2] = (u8)adv_tx_power; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 5c89a05e8b29..9372db83f912 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -9,7 +9,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8ba1c3aa7801..83de3847c8ea 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1822,7 +1822,8 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) return 0; } - len = eir_create_adv_data(hdev, instance, pdu->data); + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); pdu->length = len; pdu->handle = adv ? adv->handle : instance; @@ -1853,7 +1854,7 @@ static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && -- cgit v1.2.3 From 2df108c227b266a9ec9e3fda3828d2ac9662aa33 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 9 Jun 2025 14:53:53 -0400 Subject: Bluetooth: ISO: Fix using BT_SK_PA_SYNC to detect BIS sockets BT_SK_PA_SYNC is only valid for Broadcast Sinks which means socket used for Broadcast Sources wouldn't be able to use the likes of getpeername to read out the sockaddr_iso_bc fields which may have been update (e.g. bc_sid). Fixes: 0a766a0affb5 ("Bluetooth: ISO: Fix getpeername not returning sockaddr_iso_bc fields") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 6e2c752aaa8f..affa2077e3a2 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1337,10 +1337,13 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, addr->sa_family = AF_BLUETOOTH; if (peer) { + struct hci_conn *hcon = iso_pi(sk)->conn ? + iso_pi(sk)->conn->hcon : NULL; + bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); sa->iso_bdaddr_type = iso_pi(sk)->dst_type; - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + if (hcon && hcon->type == BIS_LINK) { sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid; sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis; memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis, -- cgit v1.2.3 From 5842c01a9ed1d515c8ba2d6d3733eac78ace89c1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 6 Jun 2025 14:32:49 -0400 Subject: Bluetooth: ISO: Fix not using bc_sid as advertisement SID Currently bc_sid is being ignore when acting as Broadcast Source role, so this fix it by passing the bc_sid and then use it when programming the PA: < HCI Command: LE Set Exte.. (0x08|0x0036) plen 25 Handle: 0x01 Properties: 0x0000 Min advertising interval: 140.000 msec (0x00e0) Max advertising interval: 140.000 msec (0x00e0) Channel map: 37, 38, 39 (0x07) Own address type: Random (0x01) Peer address type: Public (0x00) Peer address: 00:00:00:00:00:00 (OUI 00-00-00) Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00) TX power: Host has no preference (0x7f) Primary PHY: LE 1M (0x01) Secondary max skip: 0x00 Secondary PHY: LE 2M (0x02) SID: 0x01 Scan request notifications: Disabled (0x00) Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 9 ++++++--- include/net/bluetooth/hci_sync.h | 4 ++-- net/bluetooth/hci_conn.c | 31 ++++++++++++++++++++++++------- net/bluetooth/hci_core.c | 16 +++++++++++++++- net/bluetooth/hci_sync.c | 20 +++++++++++++++++--- net/bluetooth/iso.c | 12 ++++++++---- 6 files changed, 72 insertions(+), 20 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7b1a9eb9543..a760f05fa3fb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -242,6 +242,7 @@ struct adv_info { __u8 mesh; __u8 instance; __u8 handle; + __u8 sid; __u32 flags; __u16 timeout; __u16 remaining_time; @@ -1551,13 +1552,14 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); @@ -1832,6 +1834,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, @@ -1839,7 +1842,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u16 timeout, u16 duration, s8 tx_power, u32 min_interval, u32 max_interval, u8 mesh_handle); -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 72558c826aa1..5224f57f6af2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_enable_advertising(struct hci_dev *hdev); -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 99efeed6a766..4f379184df5b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1501,8 +1501,8 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) /* This function requires the caller holds hdev->lock */ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, - struct bt_iso_qos *qos, __u8 base_len, - __u8 *base) + __u8 sid, struct bt_iso_qos *qos, + __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; @@ -1543,6 +1543,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, return conn; conn->state = BT_CONNECT; + conn->sid = sid; hci_conn_hold(conn); return conn; @@ -2062,7 +2063,8 @@ static int create_big_sync(struct hci_dev *hdev, void *data) if (qos->bcast.bis) sync_interval = interval * 4; - err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len, + err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->sid, + conn->le_per_adv_data_len, conn->le_per_adv_data, flags, interval, interval, sync_interval); if (err) @@ -2134,7 +2136,7 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) } } -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { @@ -2156,7 +2158,7 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, base, base_len); /* We need hci_conn object using the BDADDR_ANY as dst */ - conn = hci_add_bis(hdev, dst, qos, base_len, eir); + conn = hci_add_bis(hdev, dst, sid, qos, base_len, eir); if (IS_ERR(conn)) return conn; @@ -2207,20 +2209,35 @@ static void bis_mark_per_adv(struct hci_conn *conn, void *data) } struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; struct iso_list_data data; - conn = hci_bind_bis(hdev, dst, qos, base_len, base); + conn = hci_bind_bis(hdev, dst, sid, qos, base_len, base); if (IS_ERR(conn)) return conn; if (conn->state == BT_CONNECTED) return conn; + /* Check if SID needs to be allocated then search for the first + * available. + */ + if (conn->sid == HCI_SID_INVALID) { + u8 sid; + + for (sid = 0; sid <= 0x0f; sid++) { + if (!hci_find_adv_sid(hdev, sid)) { + conn->sid = sid; + break; + } + } + } + data.big = qos->bcast.big; data.bis = qos->bcast.bis; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 487c045a7ba8..07a8b4281a39 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1584,6 +1584,19 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) return NULL; } +/* This function requires the caller holds hdev->lock */ +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid) +{ + struct adv_info *adv; + + list_for_each_entry(adv, &hdev->adv_instances, list) { + if (adv->sid == sid) + return adv; + } + + return NULL; +} + /* This function requires the caller holds hdev->lock */ struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { @@ -1736,7 +1749,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, } /* This function requires the caller holds hdev->lock */ -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval) { @@ -1748,6 +1761,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, if (IS_ERR(adv)) return adv; + adv->sid = sid; adv->periodic = true; adv->per_adv_data_len = data_len; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 83de3847c8ea..6687f2a4d1eb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1261,10 +1261,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) hci_cpu_to_le24(adv->min_interval, cp.min_interval); hci_cpu_to_le24(adv->max_interval, cp.max_interval); cp.tx_power = adv->tx_power; + cp.sid = adv->sid; } else { hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; + cp.sid = 0x00; } secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); @@ -1594,8 +1596,8 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) return hci_update_adv_data_sync(hdev, adv->instance); } -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval) { struct adv_info *adv = NULL; @@ -1607,6 +1609,18 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); if (adv) { + if (sid != HCI_SID_INVALID && adv->sid != sid) { + /* If the SID don't match attempt to find by + * SID. + */ + adv = hci_find_adv_sid(hdev, sid); + if (!adv) { + bt_dev_err(hdev, + "Unable to find adv_info"); + return -EINVAL; + } + } + /* Turn it into periodic advertising */ adv->periodic = true; adv->per_adv_data_len = data_len; @@ -1615,7 +1629,7 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, adv->flags = flags; } else if (!adv) { /* Create an instance if that could not be found */ - adv = hci_add_per_instance(hdev, instance, flags, + adv = hci_add_per_instance(hdev, instance, sid, flags, data_len, data, sync_interval, sync_interval); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index affa2077e3a2..3c2c98eecc62 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -336,7 +336,7 @@ static int iso_connect_bis(struct sock *sk) struct hci_dev *hdev; int err; - BT_DBG("%pMR", &iso_pi(sk)->src); + BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid); hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, iso_pi(sk)->src_type); @@ -365,7 +365,7 @@ static int iso_connect_bis(struct sock *sk) /* Just bind if DEFER_SETUP has been set */ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, + hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { @@ -375,12 +375,16 @@ static int iso_connect_bis(struct sock *sk) } else { hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst, le_addr_type(iso_pi(sk)->dst_type), - &iso_pi(sk)->qos, iso_pi(sk)->base_len, - iso_pi(sk)->base); + iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, + iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto unlock; } + + /* Update SID if it was not set */ + if (iso_pi(sk)->bc_sid == HCI_SID_INVALID) + iso_pi(sk)->bc_sid = hcon->sid; } conn = iso_conn_add(hcon); -- cgit v1.2.3 From 7dd38ba4acbea9875b4ee061e20a26413e39d9f4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 11 Jun 2025 16:36:27 -0400 Subject: Bluetooth: MGMT: Fix sparse errors This fixes the following errors: net/bluetooth/mgmt.c:5400:59: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected unsigned short [usertype] handle @@ got restricted __le16 [usertype] monitor_handle @@ net/bluetooth/mgmt.c:5400:59: sparse: expected unsigned short [usertype] handle net/bluetooth/mgmt.c:5400:59: sparse: got restricted __le16 [usertype] monitor_handle Fixes: e6ed54e86aae ("Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506060347.ux2O1p7L-lkp@intel.com/ Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index de7adb9a47f9..d540f7b4f75f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5102,11 +5102,11 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, } static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, - u16 handle) + __le16 handle) { struct mgmt_ev_adv_monitor_removed ev; - ev.monitor_handle = cpu_to_le16(handle); + ev.monitor_handle = handle; mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } -- cgit v1.2.3 From f37258133c1e95e61db532e14067e28b4881bf24 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 10 Jun 2025 18:15:06 +0300 Subject: net/mlx5: Ensure fw pages are always allocated on same NUMA When firmware asks the driver to allocate more pages, using event of give_pages, the driver should always allocate it from same NUMA, the original device NUMA. Current code uses dev_to_node() which can result in different NUMA as it is changed by other driver flows, such as mlx5_dma_zalloc_coherent_node(). Instead, use saved numa node for allocating firmware pages. Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585..9bc9bd83c232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; -- cgit v1.2.3 From 687560d8a9a2d654829ad0da1ec24242f1de711d Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 10 Jun 2025 18:15:07 +0300 Subject: net/mlx5: Fix ECVF vports unload on shutdown flow Fix shutdown flow UAF when a virtual function is created on the embedded chip (ECVF) of a BlueField device. In such case the vport acl ingress table is not properly destroyed. ECVF functionality is independent of ecpf_vport_exists capability and thus functions mlx5_eswitch_(enable|disable)_pf_vf_vports() should not test it when enabling/disabling ECVF vports. kernel log: [] refcount_t: underflow; use-after-free. [] WARNING: CPU: 3 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0x124/0x220 ---------------- [] Call trace: [] refcount_warn_saturate+0x124/0x220 [] tree_put_node+0x164/0x1e0 [mlx5_core] [] mlx5_destroy_flow_table+0x98/0x2c0 [mlx5_core] [] esw_acl_ingress_table_destroy+0x28/0x40 [mlx5_core] [] esw_acl_ingress_lgcy_cleanup+0x80/0xf4 [mlx5_core] [] esw_legacy_vport_acl_cleanup+0x44/0x60 [mlx5_core] [] esw_vport_cleanup+0x64/0x90 [mlx5_core] [] mlx5_esw_vport_disable+0xc0/0x1d0 [mlx5_core] [] mlx5_eswitch_unload_ec_vf_vports+0xcc/0x150 [mlx5_core] [] mlx5_eswitch_disable_sriov+0x198/0x2a0 [mlx5_core] [] mlx5_device_disable_sriov+0xb8/0x1e0 [mlx5_core] [] mlx5_sriov_detach+0x40/0x50 [mlx5_core] [] mlx5_unload+0x40/0xc4 [mlx5_core] [] mlx5_unload_one_devl_locked+0x6c/0xe4 [mlx5_core] [] mlx5_unload_one+0x3c/0x60 [mlx5_core] [] shutdown+0x7c/0xa4 [mlx5_core] [] pci_device_shutdown+0x3c/0xa0 [] device_shutdown+0x170/0x340 [] __do_sys_reboot+0x1f4/0x2a0 [] __arm64_sys_reboot+0x2c/0x40 [] invoke_syscall+0x78/0x100 [] el0_svc_common.constprop.0+0x54/0x184 [] do_el0_svc+0x30/0xac [] el0_svc+0x48/0x160 [] el0t_64_sync_handler+0xa4/0x12c [] el0t_64_sync+0x1a4/0x1a8 [] --[ end trace 9c4601d68c70030e ]--- Fixes: a7719b29a821 ("net/mlx5: Add management of EC VF vports") Reviewed-by: Daniel Jurgens Reviewed-by: Moshe Shemesh Signed-off-by: Amir Tzin Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f84..4917d185d0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } -- cgit v1.2.3 From 8ec40e3f1f72bf8f8accf18020d487caa99f46a4 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 10 Jun 2025 18:15:08 +0300 Subject: net/mlx5: Fix return value when searching for existing flow group When attempting to add a rule to an existing flow group, if a matching flow group exists but is not active, the error code returned should be EAGAIN, so that the rule can be added to the matching flow group once it is active, rather than ENOENT, which indicates that no matching flow group was found. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Gavi Teitz Signed-off-by: Roi Dayan Signed-off-by: Patrisious Haddad Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 23a7e8e7adfa..a8046200d376 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2228,6 +2228,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2292,6 +2293,7 @@ skip_search: nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2313,7 +2315,8 @@ skip_search: tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; -- cgit v1.2.3 From a002602676cdae0c9996adb75b9310559b718a93 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 10 Jun 2025 18:15:09 +0300 Subject: net/mlx5: HWS, Init mutex on the correct path The newly introduced mutex is only used for reformat actions, but it was initialized for modify header instead. The struct that contains the mutex is zero-initialized and an all-zero mutex is valid, so the issue only shows up with CONFIG_DEBUG_MUTEXES. Fixes: b206d9ec19df ("net/mlx5: HWS, register reformat actions with fw") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 9d1c0e4b224a..372e2be90706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -1357,6 +1357,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->fs_hws_action.pr_data = pr_data; } + mutex_init(&pkt_reformat->fs_hws_action.lock); pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS; pkt_reformat->fs_hws_action.hws_action = hws_action; return 0; @@ -1503,7 +1504,6 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns, err = -ENOMEM; goto release_mh; } - mutex_init(&modify_hdr->fs_hws_action.lock); modify_hdr->fs_hws_action.mh_data = mh_data; modify_hdr->fs_hws_action.fs_pool = pool; modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; -- cgit v1.2.3 From b5e3c76f35ee7e814c2469c73406c5bbf110d89c Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 10 Jun 2025 18:15:10 +0300 Subject: net/mlx5: HWS, fix missing ip_version handling in definer Fix missing field handling in definer - outer IP version. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index 5cc0dc002ac1..d45e1145d197 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -785,6 +785,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); -- cgit v1.2.3 From b8335829518ec5988294280e37d735799209d70d Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 10 Jun 2025 18:15:11 +0300 Subject: net/mlx5: HWS, make sure the uplink is the last destination When there are more than one destinations, we create a FW flow table and provide it with all the destinations. FW requires to have wire as the last destination in the list (if it exists), otherwise the operation fails with FW syndrome. This patch fixes the destination array action creation: if it contains a wire destination, it is moved to the end. Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/action.c | 14 +++++++------- .../net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c | 3 +++ .../net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index fb62f3bc4bd4..447ea3f8722c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1370,8 +1370,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1401,11 +1401,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1429,6 +1426,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 372e2be90706..bf4643d0ce17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 9bbadc4d8a0b..d8ac6c196211 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -213,6 +213,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** -- cgit v1.2.3 From aa9c44b842096c553871bc68a8cebc7861fa192b Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 10 Jun 2025 18:15:13 +0300 Subject: net/mlx5e: Fix leak of Geneve TLV option object Previously, a unique tunnel id was added for the matching on TC non-zero chains, to support inner header rewrite with goto action. Later, it was used to support VF tunnel offload for vxlan, then for Geneve and GRE. To support VF tunnel, a temporary mlx5_flow_spec is used to parse tunnel options. For Geneve, if there is TLV option, a object is created, or refcnt is added if already exists. But the temporary mlx5_flow_spec is directly freed after parsing, which causes the leak because no information regarding the object is saved in flow's mlx5_flow_spec, which is used to free the object when deleting the flow. To fix the leak, call mlx5_geneve_tlv_option_del() before free the temporary spec if it has TLV object. Fixes: 521933cdc4aa ("net/mlx5e: Support Geneve and GRE with VF tunnel offload") Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Reviewed-by: Alex Lazar Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f1d908f61134..fef418e1ed1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2028,9 +2028,8 @@ err_out: return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; -- cgit v1.2.3 From 875d7c160d60616ff06dedb70470ad199661efe7 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Tue, 10 Jun 2025 18:15:14 +0300 Subject: net/mlx5e: Fix number of lanes to UNKNOWN when using data_rate_oper When the link is up, either eth_proto_oper or ext_eth_proto_oper typically reports the active link protocol, from which both speed and number of lanes can be retrieved. However, in certain cases, such as when a NIC is connected via a non-standard cable, the firmware may not report the protocol. In such scenarios, the speed can still be obtained from the data_rate_oper field in PTYS register. Since data_rate_oper provides only speed information and lacks lane details, it is incorrect to derive the number of lanes from it. This patch corrects the behavior by setting the number of lanes to UNKNOWN instead of incorrectly using MAX_LANES when relying on data_rate_oper. Fixes: 7e959797f021 ("net/mlx5e: Enable lanes configuration when auto-negotiation is off") Signed-off-by: Shahar Shitrit Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-10-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ea078c9f5d15..3cb8d3bf9044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,7 +43,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; -- cgit v1.2.3 From 403d1338a4a59cfebb4ded53fa35fbd5119f36b1 Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Tue, 18 Feb 2025 18:55:14 +0100 Subject: mm: pgtable: fix pte_swp_exclusive Make pte_swp_exclusive return bool instead of int. This will better reflect how pte_swp_exclusive is actually used in the code. This fixes swap/swapoff problems on Alpha due pte_swp_exclusive not returning correct values when _PAGE_SWP_EXCLUSIVE bit resides in upper 32-bits of PTE (like on alpha). Suggested-by: Al Viro Signed-off-by: Magnus Lindholm Cc: Sam James Link: https://lore.kernel.org/lkml/20250218175735.19882-2-linmag7@gmail.com/ Link: https://lore.kernel.org/lkml/20250602041118.GA2675383@ZenIV/ [ Applied as the 'sed' script Al suggested - Linus ] Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 +- arch/arc/include/asm/pgtable-bits-arcv2.h | 2 +- arch/arm/include/asm/pgtable.h | 2 +- arch/arm64/include/asm/pgtable.h | 2 +- arch/csky/include/asm/pgtable.h | 2 +- arch/hexagon/include/asm/pgtable.h | 2 +- arch/loongarch/include/asm/pgtable.h | 2 +- arch/m68k/include/asm/mcf_pgtable.h | 2 +- arch/m68k/include/asm/motorola_pgtable.h | 2 +- arch/m68k/include/asm/sun3_pgtable.h | 2 +- arch/microblaze/include/asm/pgtable.h | 2 +- arch/mips/include/asm/pgtable.h | 4 ++-- arch/nios2/include/asm/pgtable.h | 2 +- arch/openrisc/include/asm/pgtable.h | 2 +- arch/parisc/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/riscv/include/asm/pgtable.h | 2 +- arch/s390/include/asm/pgtable.h | 2 +- arch/sh/include/asm/pgtable_32.h | 2 +- arch/sparc/include/asm/pgtable_32.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/um/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable.h | 2 +- arch/xtensa/include/asm/pgtable.h | 2 +- 26 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 2676017f42f1..44e7aedac6e8 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..3084c53f402d 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 7f1c3b4e3e04..86378eec7757 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) __pte((swp).val) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_isset(pte, L_PTE_SWP_EXCLUSIVE); } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 88db8a0c0b37..192d86e1cc76 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index b8378431aeff..5a394be09c35 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 9fbdfdbc539f..fbf24d1d1ca6 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) (((type & 0x1f) << 1) | \ ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index a3f17914dbab..b30185302c07 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) #define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index f5c596b211d4..d79fef609194 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) (__pte((x).val)) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 040ac3bad713..14fee64d3e60 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index 73745dc0ec0e..858cbe936f5b 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index b1bb2c65dd04..bae1abfa6f6b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4852b005a72d..ae73ecf4c41a 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #endif #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } @@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } #else -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index e98578e27e26..844dce55569f 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 71bfb8c8c482..5bd6463bd514 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 80f5e2a28413..1a86a4370b29 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 42c3af90d1f0..92d21c6faf1e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6ed93e290c2f..a2ddcbb3fcb9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8d1f0b7062eb..7d6b9e5b286e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index a11816bbf9e7..438ce7df24c3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1028,7 +1028,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1c661ac62ce8..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index 71b18741cc11..5f51af18997b 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _PAGE_USER -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 1454ebe91539..7c199c003ffe 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & SRMMU_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 4af03e3c161b..669cd02469a1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index ca2a519d53ab..24fdea6f88c3 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 774430c3abff..97954c936c54 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index cb1725c40e36..d62aa1c316fc 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } -- cgit v1.2.3 From ba9db6f907ac02215e30128770f85fbd7db2fcf9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:12:44 -0700 Subject: net: clear the dst when changing skb protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A not-so-careful NAT46 BPF program can crash the kernel if it indiscriminately flips ingress packets from v4 to v6: BUG: kernel NULL pointer dereference, address: 0000000000000000 ip6_rcv_core (net/ipv6/ip6_input.c:190:20) ipv6_rcv (net/ipv6/ip6_input.c:306:8) process_backlog (net/core/dev.c:6186:4) napi_poll (net/core/dev.c:6906:9) net_rx_action (net/core/dev.c:7028:13) do_softirq (kernel/softirq.c:462:3) netif_rx (net/core/dev.c:5326:3) dev_loopback_xmit (net/core/dev.c:4015:2) ip_mc_finish_output (net/ipv4/ip_output.c:363:8) NF_HOOK (./include/linux/netfilter.h:314:9) ip_mc_output (net/ipv4/ip_output.c:400:5) dst_output (./include/net/dst.h:459:9) ip_local_out (net/ipv4/ip_output.c:130:9) ip_send_skb (net/ipv4/ip_output.c:1496:8) udp_send_skb (net/ipv4/udp.c:1040:8) udp_sendmsg (net/ipv4/udp.c:1328:10) The output interface has a 4->6 program attached at ingress. We try to loop the multicast skb back to the sending socket. Ingress BPF runs as part of netif_rx(), pushes a valid v6 hdr and changes skb->protocol to v6. We enter ip6_rcv_core which tries to use skb_dst(). But the dst is still an IPv4 one left after IPv4 mcast output. Clear the dst in all BPF helpers which change the protocol. Try to preserve metadata dsts, those may carry non-routing metadata. Cc: stable@vger.kernel.org Reviewed-by: Maciej Żenczykowski Acked-by: Daniel Borkmann Fixes: d219df60a70e ("bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()") Fixes: 1b00e0dfe7d0 ("bpf: update skb->protocol in bpf_skb_net_grow") Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610001245.1981782-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/filter.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 327ca73f9cd7..7a72f766aacf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) +{ + skb->protocol = htons(proto); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); +} + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with len as headroom, @@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); } if (skb_is_gso(skb)) { @@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); -- cgit v1.2.3 From 567766954b2d5d6c29f568ad4382d6351ea48079 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:12:45 -0700 Subject: selftests: net: add test case for NAT46 looping back dst Simple test for crash involving multicast loopback and stale dst. Reuse exising NAT46 program. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610001245.1981782-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/nat6to4.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100755 tools/testing/selftests/net/nat6to4.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ea84b88bcb30..ab996bd22a5f 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -27,6 +27,7 @@ TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += udpgro_frglist.sh +TEST_PROGS += nat6to4.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' -- cgit v1.2.3 From 27cea0e419d2f9dc6f51bbce5a44c70bc3774b9a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Jun 2025 23:56:58 +0000 Subject: MAINTAINERS: Update Kuniyuki Iwashima's email address. I left Amazon and joined Google, so let's map the email addresses accordingly. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250610235734.88540-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .mailmap | 3 +++ MAINTAINERS | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 074082ce9299..7890b69a9b6e 100644 --- a/.mailmap +++ b/.mailmap @@ -424,6 +424,9 @@ Krzysztof Wilczyński Krzysztof Wilczyński Kshitiz Godara Kuninori Morimoto +Kuniyuki Iwashima +Kuniyuki Iwashima +Kuniyuki Iwashima Kuogee Hsieh Lee Jones Lee Jones diff --git a/MAINTAINERS b/MAINTAINERS index f2668b81115c..bd09337f178d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17420,7 +17420,7 @@ F: tools/testing/selftests/net/srv6* NETWORKING [TCP] M: Eric Dumazet M: Neal Cardwell -R: Kuniyuki Iwashima +R: Kuniyuki Iwashima L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -17450,7 +17450,7 @@ F: net/tls/* NETWORKING [SOCKETS] M: Eric Dumazet -M: Kuniyuki Iwashima +M: Kuniyuki Iwashima M: Paolo Abeni M: Willem de Bruijn S: Maintained @@ -17465,7 +17465,7 @@ F: net/core/scm.c F: net/socket.c NETWORKING [UNIX SOCKETS] -M: Kuniyuki Iwashima +M: Kuniyuki Iwashima S: Maintained F: include/net/af_unix.h F: include/net/netns/unix.h -- cgit v1.2.3 From ebf2e500e06f707654572bc7d8bc569a8caa51aa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 12:38:43 +0530 Subject: rust: cpu: Introduce CpuId abstraction This adds abstraction for representing a CPU identifier. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- rust/kernel/cpu.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index 10c5c3b25873..6a3aecb12468 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -6,6 +6,116 @@ use crate::{bindings, device::Device, error::Result, prelude::ENODEV}; +/// Returns the maximum number of possible CPUs in the current system configuration. +#[inline] +pub fn nr_cpu_ids() -> u32 { + #[cfg(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS))] + { + bindings::NR_CPUS + } + + #[cfg(not(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS)))] + // SAFETY: `nr_cpu_ids` is a valid global provided by the kernel. + unsafe { + bindings::nr_cpu_ids + } +} + +/// The CPU ID. +/// +/// Represents a CPU identifier as a wrapper around an [`u32`]. +/// +/// # Invariants +/// +/// The CPU ID lies within the range `[0, nr_cpu_ids())`. +/// +/// # Examples +/// +/// ``` +/// use kernel::cpu::CpuId; +/// +/// let cpu = 0; +/// +/// // SAFETY: 0 is always a valid CPU number. +/// let id = unsafe { CpuId::from_u32_unchecked(cpu) }; +/// +/// assert_eq!(id.as_u32(), cpu); +/// assert!(CpuId::from_i32(0).is_some()); +/// assert!(CpuId::from_i32(-1).is_none()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct CpuId(u32); + +impl CpuId { + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_i32_unchecked(id: i32) -> Self { + debug_assert!(id >= 0); + debug_assert!((id as u32) < nr_cpu_ids()); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id as u32) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_i32(id: i32) -> Option { + if id < 0 || id as u32 >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id as u32)) + } + } + + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_u32_unchecked(id: u32) -> Self { + debug_assert!(id < nr_cpu_ids()); + + // Ensure the `id` fits in an [`i32`] as it's also representable that way. + debug_assert!(id <= i32::MAX as u32); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_u32(id: u32) -> Option { + if id >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id)) + } + } + + /// Returns CPU number. + #[inline] + pub fn as_u32(&self) -> u32 { + self.0 + } +} + +impl From for u32 { + fn from(id: CpuId) -> Self { + id.as_u32() + } +} + +impl From for i32 { + fn from(id: CpuId) -> Self { + id.as_u32() as i32 + } +} + /// Creates a new instance of CPU's device. /// /// # Safety -- cgit v1.2.3 From 47fe65b105f29ef22f463c17ab8a4c0eeb741045 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Jun 2025 20:22:17 -0400 Subject: bcachefs: Add missing restart handling to check_topology() The next patch will add logging of the specific error being corrected in repair paths to the journal; this means __bch2_fsck_err() can return transaction restarts in places that previously weren't expecting them. check_topology() is old code that doesn't use btree iterators for btree node locking - it'll have to be rewritten in the future to work online. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 95 +++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 9ddcbe1bda78..e92cf3928c63 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -397,7 +397,11 @@ again: continue; } - ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan); + ret = lockrestart_do(trans, + btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); + if (ret < 0) + goto err; + if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -438,7 +442,8 @@ again: if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(trans, b, prev, pulled_from_scan); + ret = lockrestart_do(trans, + btree_repair_node_end(trans, b, prev, pulled_from_scan)); if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -519,6 +524,46 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + +static int bch2_check_root(struct btree_trans *trans, enum btree_id i, + bool *reconstructed_root) +{ + struct bch_fs *c = trans->c; + struct btree_root *r = bch2_btree_id_root(c, i); + struct printbuf buf = PRINTBUF; + int ret = 0; + + bch2_btree_id_to_text(&buf, i); + + if (r->error) { + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); + bch2_btree_root_alloc_fake_trans(trans, i, 0); + } else { + bch2_btree_root_alloc_fake_trans(trans, i, 1); + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + if (ret) + goto err; + } + + *reconstructed_root = true; + } +err: +fsck_err: + printbuf_exit(&buf); + bch_err_fn(c, ret); return ret; } @@ -526,42 +571,18 @@ int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); struct bpos pulled_from_scan = POS_MIN; - struct printbuf buf = PRINTBUF; int ret = 0; bch2_trans_srcu_unlock(trans); for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { - struct btree_root *r = bch2_btree_id_root(c, i); bool reconstructed_root = false; +recover: + ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); + if (ret) + break; - printbuf_reset(&buf); - bch2_btree_id_to_text(&buf, i); - - if (r->error) { -reconstruct_root: - bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - - r->alive = false; - r->error = 0; - - if (!bch2_btree_has_scanned_nodes(c, i)) { - __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); - if (ret) - break; - } - - reconstructed_root = true; - } - + struct btree_root *r = bch2_btree_id_root(c, i); struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); @@ -575,17 +596,21 @@ reconstruct_root: r->b = NULL; - if (!reconstructed_root) - goto reconstruct_root; + if (!reconstructed_root) { + r->error = -EIO; + goto recover; + } + struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); + printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } -fsck_err: - printbuf_exit(&buf); + bch2_trans_put(trans); return ret; } -- cgit v1.2.3 From b43f724927686387589b98eb762e1a4109bb1bac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Jun 2025 13:07:31 -0400 Subject: bcachefs: Log fsck errors in the journal Log the specific error being corrected in the journal when we're repairing, this helps greatly with 'bcachefs list_journal' analysis. Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 63951e293c47..ff49cebfd0e8 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -620,6 +620,9 @@ print: if (s) s->ret = ret; + + if (trans) + ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; err_unlock: mutex_unlock(&c->fsck_error_msgs_lock); err: -- cgit v1.2.3 From c7e351be7aa4e176223f0128e4d1b959ffad9598 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Jun 2025 19:00:11 -0400 Subject: bcachefs: Add range being updated to btree_update_to_text() We had a deadlock during recovery where interior btree updates became wedged and all open_buckets were consumed; start adding more introspection. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 26 ++++++++++++++++++++++++-- fs/bcachefs/btree_update_interior.h | 7 +++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d2ecb782919b..340812b28d08 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1232,6 +1232,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); + struct btree *b = btree_path_node(path, path->level); + as->node_start = b->data->min_key; + as->node_end = b->data->max_key; + as->node_needed_rewrite = btree_node_need_rewrite(b); + as->node_written = b->written; + as->node_sectors = btree_buf_bytes(b) >> 9; + as->node_remaining = __bch2_btree_u64s_remaining(b, + btree_bkey_last(b, bset_tree_last(b))); + /* * We don't want to allocate if we're in an error state, that can cause * deadlock on emergency shutdown due to open buckets getting stuck in @@ -2108,6 +2117,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (ret) goto err; + as->node_start = prev->data->min_key; + as->node_end = next->data->max_key; + trace_and_count(c, btree_node_merge, trans, b); n = bch2_btree_node_alloc(as, trans, b->c.level); @@ -2681,9 +2693,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update prt_str(out, " "); bch2_btree_id_to_text(out, as->btree_id); - prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + prt_printf(out, " l=%u-%u ", as->update_level_start, - as->update_level_end, + as->update_level_end); + bch2_bpos_to_text(out, as->node_start); + prt_char(out, ' '); + bch2_bpos_to_text(out, as->node_end); + prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %u", + as->node_written, + as->node_sectors, + as->node_remaining, + as->node_needed_rewrite); + + prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_modes[as->mode], as->nodes_written, closure_nr_remaining(&as->cl), diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 7fe793788a79..85682a13de4d 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -57,6 +57,13 @@ struct btree_update { unsigned took_gc_lock:1; enum btree_id btree_id; + struct bpos node_start; + struct bpos node_end; + bool node_needed_rewrite; + u16 node_written; + u16 node_sectors; + u16 node_remaining; + unsigned update_level_start; unsigned update_level_end; -- cgit v1.2.3 From b76cce12700b8bb8d59b9ff444504b94db96dd4b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Jun 2025 20:53:01 -0400 Subject: bcachefs: Add more flags to btree nodes for rewrite reason It seems excessive forced btree node rewrites can cause interior btree updates to become wedged during recovery, before we're using the write buffer for backpointer updates. Add more flags so we can determine where these are coming from. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 10 ++++++++-- fs/bcachefs/btree_types.h | 29 +++++++++++++++++++++++++++++ fs/bcachefs/btree_update_interior.c | 13 ++++++++++--- fs/bcachefs/btree_update_interior.h | 2 +- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 57eff3012a7b..6787d5b91eab 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1045,6 +1045,7 @@ got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); } fsck_err: printbuf_exit(&buf); @@ -1305,6 +1306,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); continue; } if (ret) @@ -1329,12 +1331,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - if (!ptr_written) + if (!ptr_written) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_ptr_written_zero(b); + } fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c61c4171ae50..3aa4a602bd02 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -617,6 +617,9 @@ enum btree_write_type { x(dying) \ x(fake) \ x(need_rewrite) \ + x(need_rewrite_error) \ + x(need_rewrite_degraded) \ + x(need_rewrite_ptr_written_zero) \ x(never_write) \ x(pinned) @@ -641,6 +644,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ BTREE_FLAGS() #undef x +#define BTREE_NODE_REWRITE_REASON() \ + x(none) \ + x(unknown) \ + x(error) \ + x(degraded) \ + x(ptr_written_zero) + +enum btree_node_rewrite_reason { +#define x(n) BTREE_NODE_REWRITE_##n, + BTREE_NODE_REWRITE_REASON() +#undef x +}; + +static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b) +{ + if (btree_node_need_rewrite_ptr_written_zero(b)) + return BTREE_NODE_REWRITE_ptr_written_zero; + if (btree_node_need_rewrite_degraded(b)) + return BTREE_NODE_REWRITE_degraded; + if (btree_node_need_rewrite_error(b)) + return BTREE_NODE_REWRITE_error; + if (btree_node_need_rewrite(b)) + return BTREE_NODE_REWRITE_unknown; + return BTREE_NODE_REWRITE_none; +} + static inline struct btree_write *btree_current_write(struct btree *b) { return b->writes + btree_node_write_idx(b); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 340812b28d08..e77584607f0d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1138,6 +1138,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * start_time); } +static const char * const btree_node_reawrite_reason_strs[] = { +#define x(n) #n, + BTREE_NODE_REWRITE_REASON() +#undef x + NULL, +}; + static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned level_start, bool split, @@ -1235,7 +1242,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, struct btree *b = btree_path_node(path, path->level); as->node_start = b->data->min_key; as->node_end = b->data->max_key; - as->node_needed_rewrite = btree_node_need_rewrite(b); + as->node_needed_rewrite = btree_node_rewrite_reason(b); as->node_written = b->written; as->node_sectors = btree_buf_bytes(b) >> 9; as->node_remaining = __bch2_btree_u64s_remaining(b, @@ -2699,11 +2706,11 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update bch2_bpos_to_text(out, as->node_start); prt_char(out, ' '); bch2_bpos_to_text(out, as->node_end); - prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %u", + prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s", as->node_written, as->node_sectors, as->node_remaining, - as->node_needed_rewrite); + btree_node_reawrite_reason_strs[as->node_needed_rewrite]); prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_modes[as->mode], diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 85682a13de4d..b649c36c3fbb 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -59,7 +59,7 @@ struct btree_update { enum btree_id btree_id; struct bpos node_start; struct bpos node_end; - bool node_needed_rewrite; + enum btree_node_rewrite_reason node_needed_rewrite; u16 node_written; u16 node_sectors; u16 node_remaining; -- cgit v1.2.3 From af5b88618a38371545251ad92ce42bc4bfa1142a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:01:22 -0400 Subject: bcachefs: Update /dev/disk/by-uuid on device add Invalidate pagecache after we write the new superblock and send a uevent. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 397a69da5a75..8648f22411be 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1995,6 +1995,22 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; } + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); + + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + up_write(&c->state_lock); out: printbuf_exit(&label); -- cgit v1.2.3 From 7b0e6b198e4ef39ec89285d953778c5a3a04d390 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:56:15 -0400 Subject: bcachefs: Mark need_discard_freespace_key_bad autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6fdbf265e4c0..90c878f289b2 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -134,7 +134,7 @@ enum bch_fsck_flags { x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ - x(need_discard_freespace_key_bad, 124, 0) \ + x(need_discard_freespace_key_bad, 124, FSCK_AUTOFIX) \ x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_level_bad, 294, 0) \ -- cgit v1.2.3 From b47a82ff4772ea9d7091b85ef5f34dc78c866a02 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:56:33 -0400 Subject: bcachefs: Only run 'increase_depth' for keys from btree node csan bch2_btree_increase_depth() was originally for disaster recovery, to get some data back from the journal when a btree root was bad. We don't need it for that purpose anymore; on bad btree root we'll launch btree node scan and reconstruct all the interior nodes. If there's a key in the journal for a depth that doesn't exists, and it's not from check_topology/btree node scan, we should just ignore it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1e68e61f08e8..dbd066260725 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -271,13 +271,24 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level))) { + if (unlikely(!btree_path_node(path, k->level) && + !k->allocated)) { + struct bch_fs *c = trans->c; + + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| + BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { + bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + ret = -EINVAL; + } +#if 0 bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; +#endif + k->overwritten = true; goto out; } -- cgit v1.2.3 From dd22844f48a71a414bd7e08cd4a9a5847974c07e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 14:22:24 -0400 Subject: bcachefs: Read error message now prints if self healing Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 11 +++++++++-- fs/bcachefs/io_read.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index a77779afad01..04bbdcf58e40 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -343,6 +343,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, *bounce = true; *read_full = promote_full; + + if (have_io_error(failed)) + orig->self_healing = true; + return promote; nopromote: trace_io_read_nopromote(c, ret); @@ -635,12 +639,15 @@ static void bch2_rbio_retry(struct work_struct *work) prt_str(&buf, "(internal move) "); prt_str(&buf, "data read error, "); - if (!ret) + if (!ret) { prt_str(&buf, "successful retry"); - else + if (rbio->self_healing) + prt_str(&buf, ", self healing"); + } else prt_str(&buf, bch2_err_str(ret)); prt_newline(&buf); + if (!bkey_deleted(&sk.k->k)) { bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); prt_newline(&buf); diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 45c959018919..9c5ddbf861b3 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -44,6 +44,7 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, saw_error:1, + self_healing:1, context:2; }; u16 _state; -- cgit v1.2.3 From 263561649ee5875a922f4358e96d3deb17a91021 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 14:27:35 -0400 Subject: bcachefs: Don't persistently run scan_for_btree_nodes bch2_btree_lost_data() gets called on btree node read error, but the error might be transient. btree_node_scan is expensive, and there's no need to run it persistently (marking it in the superblock as required to run) - check_topology will run it if required, via bch2_get_scanned_nodes(). Running it non-persistently is fine, to avoid check_topology having to rewind recovery to run it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 2 ++ fs/bcachefs/recovery_passes.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index dbd066260725..06d0ba0fbffb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -99,9 +99,11 @@ int bch2_btree_lost_data(struct bch_fs *c, goto out; case BTREE_ID_snapshots: ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; default: + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; } diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 605588e33fb3..35ac0d64d73a 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -294,8 +294,13 @@ static bool recovery_pass_needs_set(struct bch_fs *c, enum bch_run_recovery_pass_flags *flags) { struct bch_fs_recovery *r = &c->recovery; - bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); - bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); + + /* + * Never run scan_for_btree_nodes persistently: check_topology will run + * it if required + */ + if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + *flags |= RUN_RECOVERY_PASS_nopersistent; if ((*flags & RUN_RECOVERY_PASS_ratelimit) && !bch2_recovery_pass_want_ratelimit(c, pass)) @@ -310,6 +315,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c, * Otherwise, we run run_explicit_recovery_pass when we find damage, so * it should run again even if it's already run: */ + bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); + bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); if (persistent ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) @@ -334,6 +341,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct bch_fs_recovery *r = &c->recovery; int ret = 0; + lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); @@ -446,7 +454,7 @@ int bch2_require_recovery_pass(struct bch_fs *c, int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { - enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent; + enum bch_run_recovery_pass_flags flags = 0; if (!recovery_pass_needs_set(c, pass, &flags)) return 0; -- cgit v1.2.3 From 3315113af178041ab474723804e1322cee7d0a97 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 18:56:28 -0400 Subject: bcachefs: mark more errors autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 90c878f289b2..d06e73884871 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -165,7 +165,7 @@ enum bch_fsck_flags { x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \ x(ptr_too_stale, 153, 0) \ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ x(ptr_bucket_data_type_mismatch, 155, 0) \ @@ -236,7 +236,7 @@ enum bch_fsck_flags { x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ - x(inode_has_child_snapshots_wrong, 287, 0) \ + x(inode_has_child_snapshots_wrong, 287, FSCK_AUTOFIX) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ @@ -279,8 +279,8 @@ enum bch_fsck_flags { x(root_dir_missing, 239, 0) \ x(root_inode_not_dir, 240, 0) \ x(dir_loop, 241, 0) \ - x(hash_table_key_duplicate, 242, 0) \ - x(hash_table_key_wrong_offset, 243, 0) \ + x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \ + x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ x(reflink_p_front_pad_bad, 245, 0) \ x(journal_entry_dup_same_device, 246, 0) \ -- cgit v1.2.3 From 0acb385ec19c99b213d28a309a941790758c53a8 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Tue, 20 May 2025 15:34:28 +0800 Subject: bcachefs: Fix possible console lock involved deadlock Link: https://lore.kernel.org/all/6822ab02.050a0220.f2294.00cb.GAE@google.com/T/ Reported-by: syzbot+2c3ef91c9523c3d1a25c@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 - fs/bcachefs/btree_locking.c | 2 +- fs/bcachefs/error.c | 2 +- fs/bcachefs/super.c | 11 +++-------- fs/bcachefs/util.c | 10 ++-------- fs/bcachefs/util.h | 2 +- 6 files changed, 8 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3651a296d506..5a1cede2febf 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -296,7 +296,6 @@ do { \ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") void bch2_print_str(struct bch_fs *, const char *, const char *); -void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *); __printf(2, 3) void bch2_print_opts(struct bch_opts *, const char *, ...); diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 47035aae232e..91a51aef82f1 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) prt_newline(&buf); } - bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf); + bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index ff49cebfd0e8..a8ec6aae5738 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -69,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_str_nonblocking(c, KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8648f22411be..ae8fa58a208d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -104,7 +104,7 @@ const char * const bch2_dev_write_refs[] = { #undef x static void __bch2_print_str(struct bch_fs *c, const char *prefix, - const char *str, bool nonblocking) + const char *str) { #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -114,17 +114,12 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix, return; } #endif - bch2_print_string_as_lines(KERN_ERR, str, nonblocking); + bch2_print_string_as_lines(KERN_ERR, str); } void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) { - __bch2_print_str(c, prefix, str, false); -} - -void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str) -{ - __bch2_print_str(c, prefix, str, true); + __bch2_print_str(c, prefix, str); } __printf(2, 0) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index dc3817f545fa..df9a6071fe18 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -262,8 +262,7 @@ static bool string_is_spaces(const char *str) return true; } -void bch2_print_string_as_lines(const char *prefix, const char *lines, - bool nonblocking) +void bch2_print_string_as_lines(const char *prefix, const char *lines) { bool locked = false; const char *p; @@ -273,12 +272,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines, return; } - if (!nonblocking) { - console_lock(); - locked = true; - } else { - locked = console_trylock(); - } + locked = console_trylock(); while (*lines) { p = strchrnul(lines, '\n'); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 0a4b1d433621..6488f098d140 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -214,7 +214,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]); void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); -void bch2_print_string_as_lines(const char *, const char *, bool); +void bch2_print_string_as_lines(const char *, const char *); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); -- cgit v1.2.3 From f946ce0be45e75801583a5371fccf7466961d9d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 20:18:16 -0400 Subject: bcachefs: Make sure opts.read_only gets propagated back to VFS If we think we're read-only but the VFS doesn't, fun will ensue. And now that we know we have to be able to do this safely, just make nochanges imply ro. Reported-by: syzbot+a7d6ceaba099cc21dee4@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 8 ++++++++ fs/bcachefs/recovery.c | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 85d13f800165..3063a8ddc2df 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2490,6 +2490,14 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (ret) goto err_stop_fs; + /* + * We might be doing a RO mount because other options required it, or we + * have no alloc info and it's a small image with no room to regenerate + * it + */ + if (c->opts.read_only) + fc->sb_flags |= SB_RDONLY; + sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); if (ret) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 06d0ba0fbffb..520eb72c4971 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -752,9 +752,11 @@ int bch2_fs_recovery(struct bch_fs *c) ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) : BCH_RECOVERY_PASS_snapshots_read; c->opts.nochanges = true; - c->opts.read_only = true; } + if (c->opts.nochanges) + c->opts.read_only = true; + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; -- cgit v1.2.3 From 757601ef853359fe2d57d75c00b5045f62efc608 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:40:00 -0400 Subject: bcachefs: Don't put rhashtable on stack Object debugging generally needs special provisions for putting said objects on the stack, which rhashtable does not have. Reported-by: syzbot+bcc38a9556d0324c2ec2@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 6d7b1d5f7697..27e68d470ad0 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -28,7 +28,7 @@ #include struct buckets_in_flight { - struct rhashtable table; + struct rhashtable *table; struct move_bucket *first; struct move_bucket *last; size_t nr; @@ -98,7 +98,7 @@ out: static void move_bucket_free(struct buckets_in_flight *list, struct move_bucket *b) { - int ret = rhashtable_remove_fast(&list->table, &b->hash, + int ret = rhashtable_remove_fast(list->table, &b->hash, bch_move_bucket_params); BUG_ON(ret); kfree(b); @@ -133,7 +133,7 @@ static void move_buckets_wait(struct moving_context *ctxt, static bool bucket_in_flight(struct buckets_in_flight *list, struct move_bucket_key k) { - return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params); + return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params); } static int bch2_copygc_get_buckets(struct moving_context *ctxt, @@ -185,7 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, goto err; } - ret2 = rhashtable_lookup_insert_fast(&buckets_in_flight->table, &b_i->hash, + ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash, bch_move_bucket_params); BUG_ON(ret2); @@ -350,10 +350,13 @@ static int bch2_copygc_thread(void *arg) struct buckets_in_flight buckets = {}; u64 last, wait; - int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); + buckets.table = kzalloc(sizeof(*buckets.table), GFP_KERNEL); + int ret = !buckets.table + ? -ENOMEM + : rhashtable_init(buckets.table, &bch_move_bucket_params); bch_err_msg(c, ret, "allocating copygc buckets in flight"); if (ret) - return ret; + goto err; set_freezable(); @@ -421,11 +424,12 @@ static int bch2_copygc_thread(void *arg) } move_buckets_wait(&ctxt, &buckets, true); - rhashtable_destroy(&buckets.table); + rhashtable_destroy(buckets.table); bch2_moving_ctxt_exit(&ctxt); bch2_move_stats_exit(&move_stats, c); - - return 0; +err: + kfree(buckets.table); + return ret; } void bch2_copygc_stop(struct bch_fs *c) -- cgit v1.2.3 From 082c74411491f8b0d31465fc104b8342e66c4056 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:58:59 -0400 Subject: bcachefs: Fix downgrade_table_extra() Fix a UAF: we were calling darray_make_room() and retaining a pointer to the old buffer. And fix an UBSAN warning: struct bch_sb_field_downgrade_entry uses __counted_by, so set dst->nr_errors before assigning to the array entry. Reported-by: syzbot+14c52d86ddbd89bea13e@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index b61f88450a6d..1506d05e0665 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -253,6 +253,7 @@ DOWNGRADE_TABLE() static int downgrade_table_extra(struct bch_fs *c, darray_char *table) { + unsigned dst_offset = table->nr; struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); int ret = 0; @@ -268,6 +269,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) if (ret) return ret; + dst = (void *) &table->data[dst_offset]; + dst->nr_errors = cpu_to_le16(nr_errors + 1); + /* open coded __set_bit_le64, as dst is packed and * dst->recovery_passes is misaligned */ unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; @@ -278,7 +282,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) break; } - dst->nr_errors = cpu_to_le16(nr_errors); return ret; } -- cgit v1.2.3 From 54aacfe3976893ee04582a0bd61967bbee5a7e04 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 12:17:02 -0400 Subject: bcachefs: Fix rcu_pending for PREEMPT_RT PREEMPT_RT redefines how standard spinlocks work, so local_irq_save() + spin_lock() is no longer equivalent to spin_lock_irqsave(). Fortunately, we don't strictly need to do it that way. Signed-off-by: Kent Overstreet --- fs/bcachefs/rcu_pending.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index bef2aa1b8bcd..b1438be9d690 100644 --- a/fs/bcachefs/rcu_pending.c +++ b/fs/bcachefs/rcu_pending.c @@ -182,11 +182,6 @@ static inline void kfree_bulk(size_t nr, void ** p) while (nr--) kfree(*p); } - -#define local_irq_save(flags) \ -do { \ - flags = 0; \ -} while (0) #endif static noinline void __process_finished_items(struct rcu_pending *pending, @@ -429,9 +424,15 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + /* We could technically be scheduled before taking the lock and end up + * using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock + * anyways + * + * And we have to do it this way to avoid breaking PREEMPT_RT, which + * redefines how spinlocks work: + */ + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && @@ -520,9 +521,8 @@ check_expired: goto free_node; } - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); goto restart; } -- cgit v1.2.3 From 9e48f574e55731106b26dc33d8b0be1adedf3f20 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jun 2025 17:30:40 -0400 Subject: bcachefs: Fix leak in bch2_fs_recovery() error path Fix a small leak of the superblock 'clean' section. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 520eb72c4971..0b21fa6ff062 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1108,9 +1108,6 @@ use_clean: out: bch2_flush_fsck_errs(c); - if (!IS_ERR(clean)) - kfree(clean); - if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && !c->opts.nochanges) { @@ -1119,6 +1116,9 @@ out: } bch_err_fn(c, ret); +final_out: + if (!IS_ERR(clean)) + kfree(clean); return ret; err: fsck_err: @@ -1132,7 +1132,7 @@ fsck_err: bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } - return ret; + goto final_out; } int bch2_fs_initialize(struct bch_fs *c) -- cgit v1.2.3 From c3dd25319c1818e067f41f41127f935f153498e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jun 2025 17:28:00 -0400 Subject: bcachefs: Don't pass trans to fsck_err() in gc_accounting_done fsck_err() can return a transaction restart if passed a transaction object - this has always been true when it has to drop locks to prompt for user input, but we're seeing this more now that we're logging the error being corrected in the journal. gc_accounting_done() doesn't call fsck_err() from an actual commit loop, and it doesn't need to be holding btree locks when it calls fsck_err(), so the easy fix here for the unhandled transaction restart is to just not pass it the transaction object. We'll miss out on the fancy new logging, but that's ok. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 3d59a57a5256..f7528cd69c73 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -618,7 +618,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) for (unsigned j = 0; j < nr; j++) src_v[j] -= dst_v[j]; - if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) { + bch2_trans_unlock_long(trans); + + if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); ret = commit_do(trans, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); -- cgit v1.2.3 From e82b3a63a9a91cc5c585efb3e28f32100f24e3e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:24:04 +0200 Subject: bcachefs: ioctl: avoid stack overflow warning Multiple ioctl handlers individually use a lot of stack space, and clang chooses to inline them into the bch2_fs_ioctl() function, blowing through the warning limit: fs/bcachefs/chardev.c:655:6: error: stack frame size (1032) exceeds limit (1024) in 'bch2_fs_ioctl' [-Werror,-Wframe-larger-than] 655 | long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) By marking the largest two of them as noinline_for_stack, no indidual code path ends up using this much, which avoids the warning and reduces the possible total stack usage in the ioctl handler. Signed-off-by: Arnd Bergmann Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 2d38466eddfd..fde3c2380e28 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -399,7 +399,7 @@ static long bch2_ioctl_data(struct bch_fs *c, return ret; } -static long bch2_ioctl_fs_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; @@ -469,7 +469,7 @@ err: } /* obsolete, didn't allow for new data types: */ -static long bch2_ioctl_dev_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; -- cgit v1.2.3 From 625c494db95624f09f5e7b81b64d4da34e45bd2a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 13:32:58 -0400 Subject: bcachefs: Fix version checks in validate_bset() It seems btree node scan picked up a partially overwritten btree node, and corrected the "bset version older than sb version_min" error - resulting in an invalid superblock with a bad version_min field. Don't run this check at all when we're in btree node scan, and when we do run it, do something saner if the bset version is totally crazy. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 6787d5b91eab..d8f3c4c65e90 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); - if (btree_err_on(version < c->sb.version_min, + if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes && + btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->version_min = cpu_to_le16(version); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (bch2_version_compatible(version)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } else { + /* We have no idea what's going on: */ + i->version = cpu_to_le16(c->sb.version); + } } if (btree_err_on(BCH_VERSION_MAJOR(version) > -- cgit v1.2.3 From 205da7c026739d965e605d39001049c7b6728e87 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:31:23 -0400 Subject: bcachefs: Don't trust sb->nr_devices in members_to_text() We have to be able to print superblock sections even if they fail to validate (for debugging), so we have to calculate the number of entries from the field size. Reported-by: syzbot+5138f00559ffb3cb3610@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 363eb0c6eb7c..6245e342a8a8 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v1_get(mi, i), gi, sb, i); } @@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + if (!le16_to_cpu(mi->member_bytes)) { + prt_printf(out, "member_bytes 0"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + /* + * We call to_text() on superblock sections that haven't passed + * validate, so we can't trust sb->nr_devices. + */ + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v2_get(mi, i), gi, sb, i); } -- cgit v1.2.3 From b68baf9a87330148d3ad074e48503a4e9c5c3929 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 15:57:48 -0400 Subject: bcachefs: Print devices we're mounting on multi device filesystems Previously, we only ever logged the filesystem UUID. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ae8fa58a208d..a5b97c9c5163 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1067,12 +1067,13 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - struct printbuf p = PRINTBUF; - bool first = true; + CLASS(printbuf, p)(); + bch2_log_msg_start(c, &p); prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); + bool first = true; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); @@ -1089,17 +1090,24 @@ static void print_mount_opts(struct bch_fs *c) } if (c->sb.version_incompat_allowed != c->sb.version) { - prt_printf(&p, "\n allowing incompatible features above "); + prt_printf(&p, "\nallowing incompatible features above "); bch2_version_to_text(&p, c->sb.version_incompat_allowed); } if (c->opts.verbose) { - prt_printf(&p, "\n features: "); + prt_printf(&p, "\nfeatures: "); prt_bitflags(&p, bch2_sb_features, c->sb.features); } - bch_info(c, "%s", p.buf); - printbuf_exit(&p); + if (c->sb.multi_device) { + prt_printf(&p, "\nwith devices"); + for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { + prt_char(&p, ' '); + prt_str(&p, ca->name); + } + } + + bch2_print_str(c, KERN_INFO, p.buf); } static bool bch2_fs_may_start(struct bch_fs *c) -- cgit v1.2.3 From cd1124244be30fd3e87da9186508aab371e9307d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 12:35:20 -0400 Subject: bcachefs: Ensure that snapshot creation propagates has_case_insensitive We normally can't create a new directory with the case-insensitive option already set - except when we're creating a snapshot. And if casefolding is enabled filesystem wide, we should still set it even though not strictly required, for consistency. Signed-off-by: Kent Overstreet --- fs/bcachefs/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 24120037c031..779c22eb3979 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } + if (S_ISDIR(mode)) { + ret = bch2_maybe_propagate_has_case_insensitive(trans, + (subvol_inum) { + new_inode->bi_subvol ?: dir.subvol, + new_inode->bi_inum }, + new_inode); + if (ret) + goto err; + } + if (S_ISDIR(mode) && !new_inode->bi_subvol) new_inode->bi_depth = dir_u->bi_depth + 1; -- cgit v1.2.3 From aef22f6fe7a630d536f9eaa0a7a2ed0f90ea369e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 10 Jun 2025 22:32:14 -0400 Subject: bcachefs: Don't trace should_be_locked unless changing Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_locking.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 9adca77e2580..f2173a3316f4 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st EBUG_ON(!btree_node_locked(path, path->level)); EBUG_ON(path->uptodate); - path->should_be_locked = true; - trace_btree_path_should_be_locked(trans, path); + if (!path->should_be_locked) { + path->should_be_locked = true; + trace_btree_path_should_be_locked(trans, path); + } } static inline void __btree_path_set_level_up(struct btree_trans *trans, -- cgit v1.2.3 From aa2024c01a9afba6728b362626f868811ca872ee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Jun 2025 20:10:18 -0400 Subject: KVM: x86/mmu: Embed direct bits into gpa for KVM_PRE_FAULT_MEMORY Bug[*] reported for TDX case when enabling KVM_PRE_FAULT_MEMORY in QEMU. It turns out that @gpa passed to kvm_mmu_do_page_fault() doesn't have shared bit set when the memory attribute of it is shared, and it leads to wrong root in tdp_mmu_get_root_for_fault(). Fix it by embedding the direct bits in the gpa that is passed to kvm_tdp_map_page(), when the memory of the gpa is not private. [*] https://lore.kernel.org/qemu-devel/4a757796-11c2-47f1-ae0d-335626e818fd@intel.com/ Reported-by: Xiaoyao Li Closes: https://lore.kernel.org/qemu-devel/4a757796-11c2-47f1-ae0d-335626e818fd@intel.com/ Signed-off-by: Paolo Bonzini Signed-off-by: Xiaoyao Li Message-ID: <20250611001018.2179964-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..a4040578b537 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4896,6 +4896,7 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; @@ -4910,15 +4911,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; -- cgit v1.2.3 From 8046d29dde17002523f94d3e6e0ebe486ce52166 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jun 2025 00:47:44 -0400 Subject: KVM: x86/mmu: Reject direct bits in gpa passed to KVM_PRE_FAULT_MEMORY Only let userspace pass the same addresses that were used in KVM_SET_USER_MEMORY_REGION (or KVM_SET_USER_MEMORY_REGION2); gpas in the the upper half of the address space are an implementation detail of TDX and KVM. Extracted from a patch by Sean Christopherson . Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a4040578b537..4e06e2e89a8f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4903,6 +4903,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. -- cgit v1.2.3 From 33db8c97b4cfa0328054fb755dfbcd6e7f3c7a9d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 14:26:54 +0530 Subject: rust: Use CpuId in place of raw CPU numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the newly defined `CpuId` abstraction instead of raw CPU numbers. This also fixes a doctest failure for configurations where `nr_cpu_ids < 4`. The C `cpumask_{set|clear}_cpu()` APIs emit a warning when given an invalid CPU number — but only if `CONFIG_DEBUG_PER_CPU_MAPS=y` is set. Meanwhile, `cpumask_weight()` only considers CPUs up to `nr_cpu_ids`, which can cause inconsistencies: a CPU number greater than `nr_cpu_ids` may be set in the mask, yet the weight calculation won't reflect it. This leads to doctest failures when `nr_cpu_ids < 4`, as the test tries to set CPUs 2 and 3: rust_doctest_kernel_cpumask_rs_0.location: rust/kernel/cpumask.rs:180 rust_doctest_kernel_cpumask_rs_0: ASSERTION FAILED at rust/kernel/cpumask.rs:190 Fixes: 8961b8cb3099 ("rust: cpumask: Add initial abstractions") Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/rust-for-linux/CANiq72k3ozKkLMinTLQwvkyg9K=BeRxs1oYZSKhJHY-veEyZdg@mail.gmail.com/ Reported-by: Andreas Hindborg Closes: https://lore.kernel.org/all/87qzzy3ric.fsf@kernel.org/ Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- drivers/cpufreq/rcpufreq_dt.rs | 4 ++-- rust/kernel/cpu.rs | 4 ++-- rust/kernel/cpufreq.rs | 27 +++++++++++++++------- rust/kernel/cpumask.rs | 51 +++++++++++++++++++++++++++++------------- 4 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 94ed81644fe1..43c87d0259b6 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -26,9 +26,9 @@ fn find_supply_name_exact(dev: &Device, name: &str) -> Option { } /// Finds supply name for the CPU from DT. -fn find_supply_names(dev: &Device, cpu: u32) -> Option> { +fn find_supply_names(dev: &Device, cpu: cpu::CpuId) -> Option> { // Try "cpu0" for older DTs, fallback to "cpu". - let name = (cpu == 0) + let name = (cpu.as_u32() == 0) .then(|| find_supply_name_exact(dev, "cpu0")) .flatten() .or_else(|| find_supply_name_exact(dev, "cpu"))?; diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index 6a3aecb12468..abc780d7a8ec 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -127,9 +127,9 @@ impl From for i32 { /// Callers must ensure that the CPU device is not used after it has been unregistered. /// This can be achieved, for example, by registering a CPU hotplug notifier and removing /// any references to the CPU device within the notifier's callback. -pub unsafe fn from_cpu(cpu: u32) -> Result<&'static Device> { +pub unsafe fn from_cpu(cpu: CpuId) -> Result<&'static Device> { // SAFETY: It is safe to call `get_cpu_device()` for any CPU. - let ptr = unsafe { bindings::get_cpu_device(cpu) }; + let ptr = unsafe { bindings::get_cpu_device(u32::from(cpu)) }; if ptr.is_null() { return Err(ENODEV); } diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 9b995f18aac6..11b03e9d7e89 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -10,6 +10,7 @@ use crate::{ clk::Hertz, + cpu::CpuId, cpumask, device::{Bound, Device}, devres::Devres, @@ -465,8 +466,9 @@ impl Policy { /// Returns the primary CPU for the [`Policy`]. #[inline] - pub fn cpu(&self) -> u32 { - self.as_ref().cpu + pub fn cpu(&self) -> CpuId { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + unsafe { CpuId::from_u32_unchecked(self.as_ref().cpu) } } /// Returns the minimum frequency for the [`Policy`]. @@ -525,7 +527,7 @@ impl Policy { #[inline] pub fn generic_get(&self) -> Result { // SAFETY: By the type invariant, the pointer stored in `self` is valid. - Ok(unsafe { bindings::cpufreq_generic_get(self.cpu()) }) + Ok(unsafe { bindings::cpufreq_generic_get(u32::from(self.cpu())) }) } /// Provides a wrapper to the register with energy model using the OPP core. @@ -678,9 +680,9 @@ impl Policy { struct PolicyCpu<'a>(&'a mut Policy); impl<'a> PolicyCpu<'a> { - fn from_cpu(cpu: u32) -> Result { + fn from_cpu(cpu: CpuId) -> Result { // SAFETY: It is safe to call `cpufreq_cpu_get` for any valid CPU. - let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(cpu) })?; + let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(u32::from(cpu)) })?; Ok(Self( // SAFETY: The `ptr` is guaranteed to be valid and remains valid for the lifetime of @@ -1266,7 +1268,10 @@ impl Registration { target_perf: usize, capacity: usize, ) { - if let Ok(mut policy) = PolicyCpu::from_cpu(cpu) { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) { T::adjust_perf(&mut policy, min_perf, target_perf, capacity); } } @@ -1321,7 +1326,10 @@ impl Registration { /// /// - This function may only be called from the cpufreq C infrastructure. unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { - PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + PolicyCpu::from_cpu(cpu_id).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) } /// Driver's `update_limit` callback. @@ -1344,8 +1352,11 @@ impl Registration { /// - This function may only be called from the cpufreq C infrastructure. /// - The pointer arguments must be valid pointers. unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_i32_unchecked(cpu) }; + from_result(|| { - let mut policy = PolicyCpu::from_cpu(cpu as u32)?; + let mut policy = PolicyCpu::from_cpu(cpu_id)?; // SAFETY: `limit` is guaranteed by the C code to be valid. T::bios_limit(&mut policy, &mut (unsafe { *limit })).map(|()| 0) diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index c90bfac9346a..19c607709b5f 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -6,6 +6,7 @@ use crate::{ alloc::{AllocError, Flags}, + cpu::CpuId, prelude::*, types::Opaque, }; @@ -35,9 +36,10 @@ use core::ops::{Deref, DerefMut}; /// /// ``` /// use kernel::bindings; +/// use kernel::cpu::CpuId; /// use kernel::cpumask::Cpumask; /// -/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: u32, clear_cpu: i32) { +/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: CpuId, clear_cpu: CpuId) { /// // SAFETY: The `ptr` is valid for writing and remains valid for the lifetime of the /// // returned reference. /// let mask = unsafe { Cpumask::as_mut_ref(ptr) }; @@ -90,9 +92,9 @@ impl Cpumask { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_set_cpu()`. #[inline] - pub fn set(&mut self, cpu: u32) { + pub fn set(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `__cpumask_set_cpu`. - unsafe { bindings::__cpumask_set_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_set_cpu(u32::from(cpu), self.as_raw()) }; } /// Clear `cpu` in the cpumask. @@ -101,19 +103,19 @@ impl Cpumask { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_clear_cpu()`. #[inline] - pub fn clear(&mut self, cpu: i32) { + pub fn clear(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to // `__cpumask_clear_cpu`. - unsafe { bindings::__cpumask_clear_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_clear_cpu(i32::from(cpu), self.as_raw()) }; } /// Test `cpu` in the cpumask. /// /// Equivalent to the kernel's `cpumask_test_cpu` API. #[inline] - pub fn test(&self, cpu: i32) -> bool { + pub fn test(&self, cpu: CpuId) -> bool { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_test_cpu`. - unsafe { bindings::cpumask_test_cpu(cpu, self.as_raw()) } + unsafe { bindings::cpumask_test_cpu(i32::from(cpu), self.as_raw()) } } /// Set all CPUs in the cpumask. @@ -178,21 +180,40 @@ impl Cpumask { /// The following example demonstrates how to create and update a [`CpumaskVar`]. /// /// ``` +/// use kernel::cpu::CpuId; /// use kernel::cpumask::CpumaskVar; /// /// let mut mask = CpumaskVar::new_zero(GFP_KERNEL).unwrap(); /// /// assert!(mask.empty()); -/// mask.set(2); -/// assert!(mask.test(2)); -/// mask.set(3); -/// assert!(mask.test(3)); -/// assert_eq!(mask.weight(), 2); +/// let mut count = 0; +/// +/// let cpu2 = CpuId::from_u32(2); +/// if let Some(cpu) = cpu2 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// let cpu3 = CpuId::from_u32(3); +/// if let Some(cpu) = cpu3 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// assert_eq!(mask.weight(), count); /// /// let mask2 = CpumaskVar::try_clone(&mask).unwrap(); -/// assert!(mask2.test(2)); -/// assert!(mask2.test(3)); -/// assert_eq!(mask2.weight(), 2); +/// +/// if let Some(cpu) = cpu2 { +/// assert!(mask2.test(cpu)); +/// } +/// +/// if let Some(cpu) = cpu3 { +/// assert!(mask2.test(cpu)); +/// } +/// assert_eq!(mask2.weight(), count); /// ``` pub struct CpumaskVar { #[cfg(CONFIG_CPUMASK_OFFSTACK)] -- cgit v1.2.3 From c7f005f70d22cd5613cac30bf6d34867189e36a9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 16:44:16 +0530 Subject: rust: cpu: Add CpuId::current() to retrieve current CPU ID Introduce `CpuId::current()`, a constructor that wraps the C function `raw_smp_processor_id()` to retrieve the current CPU identifier without guaranteeing stability. This function should be used only when the caller can ensure that the CPU ID won't change unexpectedly due to preemption or migration. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- MAINTAINERS | 1 + rust/helpers/cpu.c | 8 ++++++++ rust/helpers/helpers.c | 1 + rust/kernel/cpu.rs | 11 +++++++++++ 4 files changed, 21 insertions(+) create mode 100644 rust/helpers/cpu.c diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..4255186784c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6254,6 +6254,7 @@ F: include/linux/cpuhotplug.h F: include/linux/smpboot.h F: kernel/cpu.c F: kernel/smpboot.* +F: rust/helper/cpu.c F: rust/kernel/cpu.rs CPU IDLE TIME MANAGEMENT FRAMEWORK diff --git a/rust/helpers/cpu.c b/rust/helpers/cpu.c new file mode 100644 index 000000000000..824e0adb19d4 --- /dev/null +++ b/rust/helpers/cpu.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +unsigned int rust_helper_raw_smp_processor_id(void) +{ + return raw_smp_processor_id(); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 0f1b5d115985..16fa9bca5949 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -13,6 +13,7 @@ #include "build_assert.c" #include "build_bug.c" #include "clk.c" +#include "cpu.c" #include "cpufreq.c" #include "cpumask.c" #include "cred.c" diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index abc780d7a8ec..b75403b0eb56 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -102,6 +102,17 @@ impl CpuId { pub fn as_u32(&self) -> u32 { self.0 } + + /// Returns the ID of the CPU the code is currently running on. + /// + /// The returned value is considered unstable because it may change + /// unexpectedly due to preemption or CPU migration. It should only be + /// used when the context ensures that the task remains on the same CPU + /// or the users could use a stale (yet valid) CPU ID. + pub fn current() -> Self { + // SAFETY: raw_smp_processor_id() always returns a valid CPU ID. + unsafe { Self::from_u32_unchecked(bindings::raw_smp_processor_id()) } + } } impl From for u32 { -- cgit v1.2.3 From b1a529bdb9641392b4f5fc91545598793c0b3b96 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Jun 2025 22:34:31 +0100 Subject: selftests/mm: skip failed memfd setups in gup_longterm Unlike the other cases gup_longterm's memfd tests previously skipped the test when failing to set up the file descriptor to test. Restore this behavior to avoid hitting failures when hugetlb isn't configured. Link: https://lkml.kernel.org/r/20250605-selftest-mm-gup-longterm-tweaks-v1-1-2fae34b05958@kernel.org Fixes: 66bce7afbaca ("selftests/mm: fix test result reporting in gup_longterm") Signed-off-by: Mark Brown Reported-by: Lorenzo Stoakes Closes: https://lkml.kernel.org/r/a76fc252-0fe3-4d4b-a9a1-4a2895c2680d@lucifer.local Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 8a97ac5176a4..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc) log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) + if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; + } fn(fd, pagesize); close(fd); @@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; } fn(fd, hugetlbsize); -- cgit v1.2.3 From 331843c845d15413e9d89fd91cdcfa6912e291c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jun 2025 17:23:37 -0700 Subject: scatterlist: fix extraneous '@'-sign kernel-doc notation Using "@argname@" in kernel-doc produces "argname****" (with "argname" in bold) in the generated html output, so use the expected kernel-doc notation of just "@argname" instead. "Fixes:" lines are added in case Matthew's patch [1] is backported. Link: https://lkml.kernel.org/r/20250605002337.2842659-1-rdunlap@infradead.org Link: https://lore.kernel.org/linux-doc/3bc4e779-7a79-42c1-8867-024f643a22fc@infradead.org/T/#m5d2bd9d21fb34f297aa4e7db069f09bc27b89007 [1] Fixes: 0db9299f48eb ("SG: Move functions to lib/scatterlist.c and add sg chaining allocator helpers") Fixes: 8d1d4b538bb1 ("scatterlist: inline sg_next()") Fixes: 18dabf473e15 ("Change table chaining layout") Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 4 ++-- lib/scatterlist.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7582dfab7fe3..4af1c8b0775a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len); * Should only be used casually, it (currently) scans the entire list * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl pointer passed in need not be the first one, + * the important bit is that @nents denotes the number of entries that + * exist from @sgl. * **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) @@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table); * @gfp_mask: GFP allocation mask * * Description: - * Allocate and initialize an sg table. If @nents@ is larger than + * Allocate and initialize an sg table. If @nents is larger than * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. * **/ -- cgit v1.2.3 From 1b8e4091ffb4655c761354eb33f41b618e809427 Mon Sep 17 00:00:00 2001 From: wangfushuai Date: Sat, 7 Jun 2025 23:36:14 +0800 Subject: docs: proc: update VmFlags documentation in smaps Remove outdated VM_DENYWRITE("dw") reference and add missing VM_LOCKONFAULT("lf") and VM_UFFD_MINOR("ui") flags. [akpm@linux-foundation.org: add "dp" (VM_DROPPABLE), per Tal] Link: https://lkml.kernel.org/r/20250607153614.81914-1-wangfushuai@baidu.com Signed-off-by: wangfushuai Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Mariano Pache Cc: xu xin Cc: Tal Zussman Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2a17865dfe39..5236cb52e357 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -584,7 +584,6 @@ encoded manner. The codes are the following: ms may share gd stack segment growns down pf pure PFN range - dw disabled write to the mapped file lo pages are locked in memory io memory mapped I/O area sr sequential read advise provided @@ -607,8 +606,11 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking + ui userfaultfd minor fault ss shadow/guarded control stack page sl sealed + lf lock on fault pages + dp always lazily freeable mapping == ======================================= Note that there is no guarantee that every flag and associated mnemonic will -- cgit v1.2.3 From 0cf4b1687a187ba9247c71721d8b064634eda1f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 6 Jun 2025 13:50:32 +0100 Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure While an OOM failure in commit_merge() isn't really feasible due to the allocation which might fail (a maple tree pre-allocation) being 'too small to fail', we do need to handle this case correctly regardless. In vma_merge_existing_range(), we can theoretically encounter failures which result in an OOM error in two ways - firstly dup_anon_vma() might fail with an OOM error, and secondly commit_merge() failing, ultimately, to pre-allocate a maple tree node. The abort logic for dup_anon_vma() resets the VMA iterator to the initial range, ensuring that any logic looping on this iterator will correctly proceed to the next VMA. However the commit_merge() abort logic does not do the same thing. This resulted in a syzbot report occurring because mlockall() iterates through VMAs, is tolerant of errors, but ended up with an incorrect previous VMA being specified due to incorrect iterator state. While making this change, it became apparent we are duplicating logic - the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom option on modify/merge, use in uffd release") duplicates the vmg->give_up_on_oom check in both abort branches. Additionally, we observe that we can perform the anon_dup check safely on dup_anon_vma() failure, as this will not be modified should this call fail. Finally, we need to reset the iterator in both cases, so now we can simply use the exact same code to abort for both. We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to be otherwise and it allows us to implement the abort check more neatly. Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Jann Horn Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index 726b2a31ce59..0fb9b2c7b734 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -967,26 +967,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( err = dup_anon_vma(next, middle, &anon_dup); } - if (err) + if (err || commit_merge(vmg)) goto abort; - err = commit_merge(vmg); - if (err) { - VM_WARN_ON(err != -ENOMEM); - - if (anon_dup) - unlink_anon_vmas(anon_dup); - - /* - * We've cleaned up any cloned anon_vma's, no VMAs have been - * modified, no harm no foul if the user requests that we not - * report this and just give up, leaving the VMAs unmerged. - */ - if (!vmg->give_up_on_oom) - vmg->state = VMA_MERGE_ERROR_NOMEM; - return NULL; - } - khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; return vmg->target; @@ -995,6 +978,9 @@ abort: vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); + if (anon_dup) + unlink_anon_vmas(anon_dup); + /* * This means we have failed to clone anon_vma's correctly, but no * actual changes to VMAs have occurred, so no harm no foul - if the -- cgit v1.2.3 From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 6 Jun 2025 10:28:07 +0100 Subject: mm: close theoretical race where stale TLB entries could linger Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") described a theoretical race as such: """ Nadav Amit identified a theoretical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. """ The solution was to introduce flush_tlb_batched_pending() and call it under the PTL from mprotect/madvise/munmap/mremap to complete any pending tlb flushes. However, while madvise_free_pte_range() and madvise_cold_or_pageout_pte_range() were both retro-fitted to call flush_tlb_batched_pending() immediately after initially acquiring the PTL, they both temporarily release the PTL to split a large folio if they stumble upon one. In this case, where re-acquiring the PTL flush_tlb_batched_pending() must be called again, but it previously was not. Let's fix that. There are 2 Fixes: tags here: the first is the commit that fixed madvise_free_pte_range(). The second is the commit that added madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the faulty pattern from madvise_free_pte_range(). This is a theoretical bug discovered during code review. Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") Signed-off-by: Ryan Roberts Reviewed-by: Jann Horn Acked-by: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mel Gorman Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/madvise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/madvise.c b/mm/madvise.c index 5f7a66a1617e..1d44a35ae85c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -508,6 +508,7 @@ restart: pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; @@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, start_pte = pte; if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; -- cgit v1.2.3 From 50695153d7ddde3b1696dbf0085be0033bf3ddb3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 7 Jun 2025 17:43:18 -0700 Subject: drivers/rapidio/rio_cm.c: prevent possible heap overwrite In riocm_cdev_ioctl(RIO_CM_CHAN_SEND) -> cm_chan_msg_send() -> riocm_ch_send() cm_chan_msg_send() checks that userspace didn't send too much data but riocm_ch_send() failed to check that userspace sent sufficient data. The result is that riocm_ch_send() can write to fields in the rio_ch_chan_hdr which were outside the bounds of the space which cm_chan_msg_send() allocated. Address this by teaching riocm_ch_send() to check that the entire rio_ch_chan_hdr was copied in from userspace. Reported-by: maher azz Cc: Matt Porter Cc: Alexandre Bounine Cc: Linus Torvalds Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/rio_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 97287e838ce1..66464674223f 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -783,6 +783,9 @@ static int riocm_ch_send(u16 ch_id, void *buf, int len) if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) return -EINVAL; + if (len < sizeof(struct rio_ch_chan_hdr)) + return -EINVAL; /* insufficient data from user */ + ch = riocm_get_channel(ch_id); if (!ch) { riocm_error("%s(%d) ch_%d not found", current->comm, -- cgit v1.2.3 From 02fb36505c61c35d5fd879f5a66a97935dbcb2ee Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 9 Jun 2025 12:24:42 +1200 Subject: MAINTAINERS: add Barry as a THP reviewer I have been actively contributing to mTHP and reviewing related patches for an extended period, and I would like to continue supporting patch reviews. Link: https://lkml.kernel.org/r/20250609002442.1856-1-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Zi Yan Acked-by: Baolin Wang Acked-by: Dev Jain Acked-by: Lorenzo Stoakes Reviewed-by: Ryan Roberts Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Nico Pache Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8315c84df075 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15919,6 +15919,7 @@ R: Liam R. Howlett R: Nico Pache R: Ryan Roberts R: Dev Jain +R: Barry Song L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org -- cgit v1.2.3 From 66ac1a4d366d3faa95fcf6082b555f8d77f1e8db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 8 Jun 2025 22:12:35 +0800 Subject: init: fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: init/main.c: warning: EXPORT_SYMBOL() is used, but #include is missing init/initramfs.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for the init code. Link: https://lkml.kernel.org/r/20250608141235.155206-1-chenhuacai@loongson.cn Fixes: a934a57a42f6 ("scripts/misc-check: check missing #include when W=1") Signed-off-by: Huacai Chen Reviewed-by: Masahiro Yamada Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Andrew Morton --- init/initramfs.c | 1 + init/main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/init/initramfs.c b/init/initramfs.c index 72bad44a1d41..097673b97784 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include diff --git a/init/main.c b/init/main.c index ed576c7f475d..225a58279acd 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include +#include #include #include #include -- cgit v1.2.3 From c538f400fae22725580842deb2bef546701b64bd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jun 2025 13:53:43 -0700 Subject: io_uring: consistently use rcu semantics with sqpoll thread The sqpoll thread is dereferenced with rcu read protection in one place, so it needs to be annotated as an __rcu type, and should consistently use rcu helpers for access and assignment to make sparse happy. Since most of the accesses occur under the sqd->lock, we can use rcu_dereference_protected() without declaring an rcu read section. Provide a simple helper to get the thread from a locked context. Fixes: ac0b8b327a5677d ("io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20250611205343.1821117-1-kbusch@meta.com [axboe: fold in fix for register.c] Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 4 ++-- io_uring/register.c | 7 +++++-- io_uring/sqpoll.c | 34 ++++++++++++++++++++++++---------- io_uring/sqpoll.h | 8 +++++++- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf759c172083..4e32f808d07d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2906,7 +2906,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct task_struct *tsk; io_sq_thread_park(sqd); - tsk = sqd->thread; + tsk = sqpoll_task_locked(sqd); if (tsk && tsk->io_uring && tsk->io_uring->io_wq) io_wq_cancel_cb(tsk->io_uring->io_wq, io_cancel_ctx_cb, ctx, true); @@ -3142,7 +3142,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) s64 inflight; DEFINE_WAIT(wait); - WARN_ON_ONCE(sqd && sqd->thread != current); + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); if (!current->io_uring) return; diff --git a/io_uring/register.c b/io_uring/register.c index cc23a4c205cd..a59589249fce 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + struct task_struct *tsk; + /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold @@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; + tsk = sqpoll_task_locked(sqd); + if (tsk) + tctx = tsk->io_uring; } } else { tctx = current->io_uring; diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 0625a421626f..268d2fbe6160 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -30,7 +30,7 @@ enum { void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); /* * Do the dance but not conditional clear_bit() because it'd race with @@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(data_race(sqd->thread) == current); + struct task_struct *tsk; atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } } void io_sq_thread_stop(struct io_sq_data *sqd) { - WARN_ON_ONCE(sqd->thread == current); + struct task_struct *tsk; + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -486,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err_sqpoll; } - sqd->thread = tsk; + mutex_lock(&sqd->lock); + rcu_assign_pointer(sqd->thread, tsk); + mutex_unlock(&sqd->lock); + task_to_put = get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); @@ -514,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, int ret = -EINVAL; if (sqd) { + struct task_struct *tsk; + io_sq_thread_park(sqd); /* Don't set affinity for a dying thread */ - if (sqd->thread) - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); + tsk = sqpoll_task_locked(sqd); + if (tsk) + ret = io_wq_cpu_affinity(tsk->io_uring, mask); io_sq_thread_unpark(sqd); } diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index 4171666b1cf4..b83dcdec9765 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -8,7 +8,7 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct task_struct *thread; + struct task_struct __rcu *thread; struct wait_queue_head wait; unsigned sq_thread_idle; @@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); + +static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) +{ + return rcu_dereference_protected(sqd->thread, + lockdep_is_held(&sqd->lock)); +} -- cgit v1.2.3 From d4e6cb324dcc952618fec6b25aa3fc7bfc2750b4 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 6 Jun 2025 11:43:20 +0200 Subject: net: phy: phy_caps: Don't skip better duplex macth on non-exact match When performing a non-exact phy_caps lookup, we are looking for a supported mode that matches as closely as possible the passed speed/duplex. Blamed patch broke that logic by returning a match too early in case the caller asks for half-duplex, as a full-duplex linkmode may match first, and returned as a non-exact match without even trying to mach on half-duplex modes. Reported-by: Jijie Shao Closes: https://lore.kernel.org/netdev/20250603102500.4ec743cf@fedora/T/#m22ed60ca635c67dc7d9cbb47e8995b2beb5c1576 Tested-by: Jijie Shao Reviewed-by: Larysa Zaremba Fixes: fc81e257d19f ("net: phy: phy_caps: Allow looking-up link caps based on speed and duplex") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250606094321.483602-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_caps.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index 703321689726..38417e288611 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) * When @exact is not set, we return either an exact match, or matching capabilities * at lower speed, or the lowest matching speed, or NULL. * + * Non-exact matches will try to return an exact speed and duplex match, but may + * return matching capabilities with same speed but a different duplex. + * * Returns: a matched link_capabilities according to the above process, NULL * otherwise. */ @@ -195,7 +198,7 @@ const struct link_capabilities * phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, bool exact) { - const struct link_capabilities *lcap, *last = NULL; + const struct link_capabilities *lcap, *match = NULL, *last = NULL; for_each_link_caps_desc_speed(lcap) { if (linkmode_intersects(lcap->linkmodes, supported)) { @@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, if (lcap->speed == speed && lcap->duplex == duplex) { return lcap; } else if (!exact) { - if (lcap->speed <= speed) - return lcap; + if (!match && lcap->speed <= speed) + match = lcap; + + if (lcap->speed < speed) + break; } } } - if (!exact) - return last; + if (!match && !exact) + match = last; - return NULL; + return match; } EXPORT_SYMBOL_GPL(phy_caps_lookup); -- cgit v1.2.3 From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 08:35:01 +0000 Subject: net_sched: sch_sfq: reject invalid perturb period Gerrard Tai reported that SFQ perturb_period has no range check yet, and this can be used to trigger a race condition fixed in a separate patch. We want to make sure ctl->perturb_period * HZ will not overflow and is positive. Tested: tc qd add dev lo root sfq perturb -10 # negative value : error Error: sch_sfq: invalid perturb period. tc qd add dev lo root sfq perturb 1000000000 # too big : error Error: sch_sfq: invalid perturb period. tc qd add dev lo root sfq perturb 2000000 # acceptable value tc -s -d qd sh dev lo qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Gerrard Tai Signed-off-by: Eric Dumazet Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 77fa02f2bfcd..a8cca549b5a2 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); return -EINVAL; } + + if (ctl->perturb_period < 0 || + ctl->perturb_period > INT_MAX / HZ) { + NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); + return -EINVAL; + } + perturb_period = ctl->perturb_period * HZ; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; @@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, headdrop = q->headdrop; maxdepth = q->maxdepth; maxflows = q->maxflows; - perturb_period = q->perturb_period; quantum = q->quantum; flags = q->flags; /* update and validate configuration */ if (ctl->quantum) quantum = ctl->quantum; - perturb_period = ctl->perturb_period * HZ; if (ctl->flows) maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { -- cgit v1.2.3 From d35acc1be3480505b5931f17e4ea9b7617fea4d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:11 +0000 Subject: net_sched: prio: fix a race in prio_tune() Gerrard Tai reported a race condition in PRIO, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 7b8e0b6e6599 ("net: sched: prio: delay destroying child qdiscs on change") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_prio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cc30f7a32f1a..9e2b9a490db2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->queues[i]); + qdisc_purge_queue(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; -- cgit v1.2.3 From 85a3e0ede38450ea3053b8c45d28cf55208409b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:12 +0000 Subject: net_sched: red: fix a race in __red_change() Gerrard Tai reported a race condition in RED, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 0c8d13ac9607 ("net: sched: red: delay destroying child qdisc on replace") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_red.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1ba3e0bba54f..4696c893cf55 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, q->userbits = userbits; q->limit = ctl->limit; if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old_child = q->qdisc; q->qdisc = child; } -- cgit v1.2.3 From 43eb466041216d25dedaef1c383ad7bd89929cbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:13 +0000 Subject: net_sched: tbf: fix a race in tbf_change() Gerrard Tai reported a race condition in TBF, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Cc: Zhengchao Shao Link: https://patch.msgid.link/20250611111515.1983366-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_tbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dc26b22d53c7..4c977f049670 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } -- cgit v1.2.3 From d92adacdd8c2960be856e0b82acc5b7c5395fddb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:14 +0000 Subject: net_sched: ets: fix a race in ets_qdisc_change() Gerrard Tai reported a race condition in ETS, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 2c069f0181c6..037f764822b9 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del_init(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); + qdisc_purge_queue(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); -- cgit v1.2.3 From adcaa890c7a4a91a422168d8fb629183fff07b2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:15 +0000 Subject: net_sched: remove qdisc_tree_flush_backlog() This function is no longer used after the four prior fixes. Given all prior uses were wrong, it seems better to remove it. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 629368ab2787..638948be4c50 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -973,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, *backlog = qstats.backlog; } -static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) -{ - __u32 qlen, backlog; - - qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); - qdisc_tree_reduce_backlog(sch, qlen, backlog); -} - static inline void qdisc_purge_queue(struct Qdisc *sch) { __u32 qlen, backlog; -- cgit v1.2.3 From 9337c54401a5bb6ac3c9f6c71dd2a9130cfba82e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 11 Jun 2025 14:40:04 +0200 Subject: veth: prevent NULL pointer dereference in veth_xdp_rcv The veth peer device is RCU protected, but when the peer device gets deleted (veth_dellink) then the pointer is assigned NULL (via RCU_INIT_POINTER). This patch adds a necessary NULL check in veth_xdp_rcv when accessing the veth peer net_device. This fixes a bug introduced in commit dc82a33297fc ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops"). The bug is a race and only triggers when having inflight packets on a veth that is being deleted. Reported-by: Ihor Solodrai Closes: https://lore.kernel.org/all/fecfcad0-7a16-42b8-bff2-66ee83a6e5c4@linux.dev/ Reported-by: syzbot+c4c7bf27f6b0c4bd97fe@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/683da55e.a00a0220.d8eae.0052.GAE@google.com/ Fixes: dc82a33297fc ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops") Signed-off-by: Jesper Dangaard Brouer Acked-by: Ihor Solodrai Link: https://patch.msgid.link/174964557873.519608.10855046105237280978.stgit@firesoul Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e58a0f1b5c5b..a3046142cb8e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -909,7 +909,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = netdev_get_tx_queue(peer_dev, queue_idx); + peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); @@ -959,7 +959,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); - if (unlikely(netif_tx_queue_stopped(peer_txq))) + if (peer_txq && unlikely(netif_tx_queue_stopped(peer_txq))) netif_tx_wake_queue(peer_txq); return done; -- cgit v1.2.3 From 1619bdf4389c829f16af5c7d5b4fa5f1673614d7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jun 2025 16:14:32 +0300 Subject: net/mlx5: HWS, Add error checking to hws_bwc_rule_complex_hash_node_get() Check for if ida_alloc() or rhashtable_lookup_get_insert_fast() fails. Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers") Signed-off-by: Dan Carpenter Reviewed-by: Yevgeny Kliteynik Link: https://patch.msgid.link/aEmBONjyiF6z5yCV@stanley.mountain Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc_complex.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 70768953a4f6..ca7501c57468 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -1070,7 +1070,7 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; struct rhashtable *refcount_hash; - int i; + int ret, i; bwc_rule->complex_hash_node = NULL; @@ -1078,7 +1078,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(!node)) return -ENOMEM; - node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + if (ret < 0) + goto err_free_node; + node->tag = ret; + refcount_set(&node->refcount, 1); /* Clear match buffer - turn off all the unrelated fields @@ -1094,6 +1098,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, old_node = rhashtable_lookup_get_insert_fast(refcount_hash, &node->hash_node, hws_refcount_hash); + if (IS_ERR(old_node)) { + ret = PTR_ERR(old_node); + goto err_free_ida; + } + if (old_node) { /* Rule with the same tag already exists - update refcount */ refcount_inc(&old_node->refcount); @@ -1112,6 +1121,12 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, bwc_rule->complex_hash_node = node; return 0; + +err_free_ida: + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); +err_free_node: + kfree(node); + return ret; } static void -- cgit v1.2.3 From 1264971017b4d7141352a7fe29021bdfce5d885d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jun 2025 10:46:43 -0700 Subject: net: drv: netdevsim: don't napi_complete() from netpoll netdevsim supports netpoll. Make sure we don't call napi_complete() from it, since it may not be scheduled. Breno reports hitting a warning in napi_complete_done(): WARNING: CPU: 14 PID: 104 at net/core/dev.c:6592 napi_complete_done+0x2cc/0x560 __napi_poll+0x2d8/0x3a0 handle_softirqs+0x1fe/0x710 This is presumably after netpoll stole the SCHED bit prematurely. Reported-by: Breno Leitao Fixes: 3762ec05a9fb ("netdevsim: add NAPI support") Tested-by: Breno Leitao Link: https://patch.msgid.link/20250611174643.2769263-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index af545d42961c..fa5fbd97ad69 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -371,7 +371,8 @@ static int nsim_poll(struct napi_struct *napi, int budget) int done; done = nsim_rcv(rq, budget); - napi_complete(napi); + if (done < budget) + napi_complete_done(napi, done); return done; } -- cgit v1.2.3 From b3979e3d2fc92bf1a2da614fc383b75b9859ef58 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Jun 2025 12:35:02 -0700 Subject: ipv6: Move fib6_config_validate() to ip6_route_add(). syzkaller created an IPv6 route from a malformed packet, which has a prefix len > 128, triggering the splat below. [0] This is a similar issue fixed by commit 586ceac9acb7 ("ipv6: Restore fib6_config validation for SIOCADDRT."). The cited commit removed fib6_config validation from some callers of ip6_add_route(). Let's move the validation back to ip6_route_add() and ip6_route_multipath_add(). [0]: UBSAN: array-index-out-of-bounds in ./include/net/ipv6.h:616:34 index 20 is out of range for type '__u8 [16]' CPU: 1 UID: 0 PID: 7444 Comm: syz.0.708 Not tainted 6.16.0-rc1-syzkaller-g19272b37aa4f #0 PREEMPT Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x2e/0x3c arch/riscv/kernel/stacktrace.c:132 [] show_stack+0x30/0x3c arch/riscv/kernel/stacktrace.c:138 [] __dump_stack lib/dump_stack.c:94 [inline] [] dump_stack_lvl+0x12e/0x1a6 lib/dump_stack.c:120 [] dump_stack+0x1c/0x24 lib/dump_stack.c:129 [] ubsan_epilogue+0x14/0x46 lib/ubsan.c:233 [] __ubsan_handle_out_of_bounds+0xf6/0xf8 lib/ubsan.c:455 [] ipv6_addr_prefix include/net/ipv6.h:616 [inline] [] ip6_route_info_create+0x8f8/0x96e net/ipv6/route.c:3793 [] ip6_route_add+0x2a/0x1aa net/ipv6/route.c:3889 [] addrconf_prefix_route+0x2c4/0x4e8 net/ipv6/addrconf.c:2487 [] addrconf_prefix_rcv+0x1720/0x1e62 net/ipv6/addrconf.c:2878 [] ndisc_router_discovery+0x1a06/0x3504 net/ipv6/ndisc.c:1570 [] ndisc_rcv+0x500/0x600 net/ipv6/ndisc.c:1874 [] icmpv6_rcv+0x145e/0x1e0a net/ipv6/icmp.c:988 [] ip6_protocol_deliver_rcu+0x18a/0x1976 net/ipv6/ip6_input.c:436 [] ip6_input_finish+0xf4/0x174 net/ipv6/ip6_input.c:480 [] NF_HOOK include/linux/netfilter.h:317 [inline] [] NF_HOOK include/linux/netfilter.h:311 [inline] [] ip6_input+0x16a/0x70c net/ipv6/ip6_input.c:491 [] ip6_mc_input+0x5c8/0x1268 net/ipv6/ip6_input.c:588 [] dst_input include/net/dst.h:469 [inline] [] ip6_rcv_finish net/ipv6/ip6_input.c:79 [inline] [] NF_HOOK include/linux/netfilter.h:317 [inline] [] NF_HOOK include/linux/netfilter.h:311 [inline] [] ipv6_rcv+0x5ae/0x6e0 net/ipv6/ip6_input.c:309 [] __netif_receive_skb_one_core+0x106/0x16e net/core/dev.c:5977 [] __netif_receive_skb+0x2c/0x144 net/core/dev.c:6090 [] netif_receive_skb_internal net/core/dev.c:6176 [inline] [] netif_receive_skb+0x1aa/0xbf2 net/core/dev.c:6235 [] tun_rx_batched.isra.0+0x430/0x686 drivers/net/tun.c:1485 [] tun_get_user+0x2952/0x3d6c drivers/net/tun.c:1938 [] tun_chr_write_iter+0xc4/0x21c drivers/net/tun.c:1984 [] new_sync_write fs/read_write.c:593 [inline] [] vfs_write+0x56c/0xa9a fs/read_write.c:686 [] ksys_write+0x126/0x228 fs/read_write.c:738 [] __do_sys_write fs/read_write.c:749 [inline] [] __se_sys_write fs/read_write.c:746 [inline] [] __riscv_sys_write+0x6e/0x94 fs/read_write.c:746 [] syscall_handler+0x94/0x118 arch/riscv/include/asm/syscall.h:112 [] do_trap_ecall_u+0x396/0x530 arch/riscv/kernel/traps.c:341 [] handle_exception+0x146/0x152 arch/riscv/kernel/entry.S:197 Fixes: fa76c1674f2e ("ipv6: Move some validation from ip6_route_info_create() to rtm_to_fib6_config().") Reported-by: syzbot+4c2358694722d304c44e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6849b8c3.a00a0220.1eb5f5.00f0.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250611193551.2999991-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 110 +++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0143262094b0..79c8f1acf8a3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3737,6 +3737,53 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) } } +static int fib6_config_validate(struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); + goto errout; + } + + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto errout; + } + + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto errout; + } + + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto errout; + } + +#ifdef CONFIG_IPV6_SUBTREES + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); + goto errout; + } + + if (cfg->fc_nh_id && cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto errout; + } +#else + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); + goto errout; + } +#endif + return 0; +errout: + return -EINVAL; +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -3886,6 +3933,10 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct fib6_info *rt; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + rt = ip6_route_info_create(cfg, gfp_flags, extack); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -4479,53 +4530,6 @@ void rt6_purge_dflt_routers(struct net *net) rcu_read_unlock(); } -static int fib6_config_validate(struct fib6_config *cfg, - struct netlink_ext_ack *extack) -{ - /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); - goto errout; - } - - /* RTF_CACHE is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_CACHE) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); - goto errout; - } - - if (cfg->fc_type > RTN_MAX) { - NL_SET_ERR_MSG(extack, "Invalid route type"); - goto errout; - } - - if (cfg->fc_dst_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid prefix length"); - goto errout; - } - -#ifdef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid source address length"); - goto errout; - } - - if (cfg->fc_nh_id && cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto errout; - } -#else - if (cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, - "Specifying source address requires IPV6_SUBTREES to be enabled"); - goto errout; - } -#endif - return 0; -errout: - return -EINVAL; -} - static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) @@ -4563,10 +4567,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) switch (cmd) { case SIOCADDRT: - err = fib6_config_validate(&cfg, NULL); - if (err) - break; - /* Only do the default setting of fc_metric in route adding */ if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; @@ -5402,6 +5402,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, int nhn = 0; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + replace = (cfg->fc_nlinfo.nlh && (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); @@ -5636,10 +5640,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = fib6_config_validate(&cfg, extack); - if (err) - return err; - if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; -- cgit v1.2.3 From 43fb2b30eea7cfc40214484935b026ec29838a91 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Jun 2025 13:27:35 -0700 Subject: af_unix: Allow passing cred for embryo without SO_PASSCRED/SO_PASSPIDFD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the cited commit, the kernel unconditionally embedded SCM credentials to skb for embryo sockets even when both the sender and listener disabled SO_PASSCRED and SO_PASSPIDFD. Now, the credentials are added to skb only when configured by the sender or the listener. However, as reported in the link below, it caused a regression for some programs that assume credentials are included in every skb, but sometimes not now. The only problematic scenario would be that a socket starts listening before setting the option. Then, there will be 2 types of non-small race window, where a client can send skb without credentials, which the peer receives as an "invalid" message (and aborts the connection it seems ?): Client Server ------ ------ s1.listen() <-- No SO_PASS{CRED,PIDFD} s2.connect() s2.send() <-- w/o cred s1.setsockopt(SO_PASS{CRED,PIDFD}) s2.send() <-- w/ cred or Client Server ------ ------ s1.listen() <-- No SO_PASS{CRED,PIDFD} s2.connect() s2.send() <-- w/o cred s3, _ = s1.accept() <-- Inherit cred options s2.send() <-- w/o cred but not set yet s3.setsockopt(SO_PASS{CRED,PIDFD}) s2.send() <-- w/ cred It's unfortunate that buggy programs depend on the behaviour, but let's restore the previous behaviour. Fixes: 3f84d577b79d ("af_unix: Inherit sk_flags at connect().") Reported-by: Jacek Łuczak Closes: https://lore.kernel.org/all/68d38b0b-1666-4974-85d4-15575789c8d4@gmail.com/ Signed-off-by: Kuniyuki Iwashima Tested-by: Christian Heusel Tested-by: André Almeida Tested-by: Jacek Łuczak Link: https://patch.msgid.link/20250611202758.3075858-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2e2e9997a68e..22e170fb5dda 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1971,7 +1971,8 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sk) || unix_may_passcred(other)) { + if (unix_may_passcred(sk) || unix_may_passcred(other) || + !other->sk_socket) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } -- cgit v1.2.3 From d78ebc772c7ceccf6e655ddb93099f49a1268af4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jun 2025 10:19:57 +0300 Subject: net: ethtool: Don't check if RSS context exists in case of context 0 Context 0 (default context) always exists, there is no need to check whether it exists or not when adding a flow steering rule. The existing check fails when creating a flow steering rule for context 0 as it is not stored in the rss_ctx xarray. For example: $ ethtool --config-ntuple eth2 flow-type tcp4 dst-ip 194.237.147.23 dst-port 19983 context 0 loc 618 rmgr: Cannot insert RX class rule: Invalid argument Cannot insert classification rule An example usecase for this could be: - A high-priority rule (loc 0) directing specific port traffic to context 0. - A low-priority rule (loc 1) directing all other TCP traffic to context 1. This is a user-visible regression that was caught in our testing environment, it was not reported by a user yet. Fixes: de7f7582dff2 ("net: ethtool: prevent flow steering to RSS contexts which don't exist") Reviewed-by: Tariq Toukan Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Reviewed-by: Joe Damato Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250612071958.1696361-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 39ec920f5de7..71c828d0bf31 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1083,7 +1083,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + if (info.rss_context && + !xa_load(&dev->ethtool->rss_ctx, info.rss_context)) return -EINVAL; } -- cgit v1.2.3 From 56c5d291e88538621029e5c7c5f60540d37846a8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jun 2025 10:19:58 +0300 Subject: selftests: drv-net: rss_ctx: Add test for ntuple rules targeting default RSS context Add test_rss_default_context_rule() to verify that ntuple rules can correctly direct traffic to the default RSS context (context 0). The test creates two ntuple rules with explicit location priorities: - A high-priority rule (loc 0) directing specific port traffic to context 0. - A low-priority rule (loc 1) directing all other TCP traffic to context 1. This validates that: 1. Rules targeting the default context function properly. 2. Traffic steering works as expected when mixing default and additional RSS contexts. The test was written by AI, and reviewed by humans. Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250612071958.1696361-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 59 ++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca60ae325c22..7bb552f8b182 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg): 'noise' : (0,) }) +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -760,7 +816,8 @@ def main() -> None: test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, - test_delete_rss_context_busy, test_rss_ntuple_addition], + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], args=(cfg, )) ksft_exit() -- cgit v1.2.3 From 9c7632faad434c98f1f2cc06f3647a5a5d05ddbf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Jun 2025 08:03:05 -0700 Subject: drm/xe/lrc: Use a temporary buffer for WA BB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case the BO is in iomem, we can't simply take the vaddr and write to it. Instead, prepare a separate buffer that is later copied into io memory. Right now it's just a few words that could be using xe_map_write32(), but the intention is to grow the WA BB for other uses. Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context utilization") Cc: Umesh Nerlige Ramappa Cc: Tvrtko Ursulin Reviewed-by: Matthew Brost Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250604-wa-bb-fix-v1-1-0dfc5dafcef0@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit ef48715b2d3df17c060e23b9aa636af3d95652f8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_lrc.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 63d74e27f54c..bf7c3981897d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -941,11 +941,18 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { - u32 *cmd; + u32 *cmd, *buf = NULL; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (lrc->bb_per_ctx_bo->vmap.is_iomem) { + buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cmd = buf; + } else { + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -966,9 +973,16 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; + if (buf) { + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, + buf, (cmd - buf) * sizeof(*cmd)); + kfree(buf); + } + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + return 0; } #define PVC_CTX_ASID (0x2e + 1) @@ -1125,7 +1139,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = xe_lrc_setup_utilization(lrc); + if (err) + goto err_lrc_finish; return 0; -- cgit v1.2.3 From b64af6bcd3b0f3fc633d6a70adb0991737abfef4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 12 Jun 2025 12:45:04 -0300 Subject: smb: client: fix perf regression with deferred closes Customer reported that one of their applications started failing to open files with STATUS_INSUFFICIENT_RESOURCES due to NetApp server hitting the maximum number of opens to same file that it would allow for a single client connection. It turned out the client was failing to reuse open handles with deferred closes because matching ->f_flags directly without masking off O_CREAT|O_EXCL|O_TRUNC bits first broke the comparision and then client ended up with thousands of deferred closes to same file. Those bits are already satisfied on the original open, so no need to check them against existing open handles. Reproducer: #include #include #include #include #include #include #define NR_THREADS 4 #define NR_ITERATIONS 2500 #define TEST_FILE "/mnt/1/test/dir/foo" static char buf[64]; static void *worker(void *arg) { int i, j; int fd; for (i = 0; i < NR_ITERATIONS; i++) { fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_APPEND, 0666); for (j = 0; j < 16; j++) write(fd, buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { pthread_t t[NR_THREADS]; int fd; int i; fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_TRUNC, 0666); close(fd); memset(buf, 'a', sizeof(buf)); for (i = 0; i < NR_THREADS; i++) pthread_create(&t[i], NULL, worker, NULL); for (i = 0; i < NR_THREADS; i++) pthread_join(t[i], NULL); return 0; } Before patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1391 After patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1 Cc: linux-cifs@vger.kernel.org Cc: David Howells Cc: Jay Shin Cc: Pierguido Lambri Fixes: b8ea3b1ff544 ("smb: enable reuse of deferred file handles for write operations") Acked-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/file.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index d2df10b8e6fd..9835672267d2 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -999,15 +999,18 @@ int cifs_open(struct inode *inode, struct file *file) rc = cifs_get_readable_path(tcon, full_path, &cfile); } if (rc == 0) { - if (file->f_flags == cfile->f_flags) { + unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + + if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && + (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { file->private_data = cfile; spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto use_cache; - } else { - _cifsFileInfo_put(cfile, true, false); } + _cifsFileInfo_put(cfile, true, false); } else { /* hard link on the defeered close file */ rc = cifs_get_hardlink_path(tcon, inode, file); -- cgit v1.2.3 From 72dd7961a4bb4fa1fc456169a61dd12e68e50645 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Wed, 11 Jun 2025 16:59:02 +0530 Subject: smb: improve directory cache reuse for readdir operations Currently, cached directory contents were not reused across subsequent 'ls' operations because the cache validity check relied on comparing the ctx pointer, which changes with each readdir invocation. As a result, the cached dir entries was not marked as valid and the cache was not utilized for subsequent 'ls' operations. This change uses the file pointer, which remains consistent across all readdir calls for a given directory instance, to associate and validate the cache. As a result, cached directory contents can now be correctly reused, improving performance for repeated directory listings. Performance gains with local windows SMB server: Without the patch and default actimeo=1: 1000 directory enumeration operations on dir with 10k files took 135.0s With this patch and actimeo=0: 1000 directory enumeration operations on dir with 10k files took just 5.1s Signed-off-by: Bharath SM Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cached_dir.h | 8 ++++---- fs/smb/client/readdir.c | 28 +++++++++++++++------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 1dfe79d947a6..bc8a812ff95f 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -21,10 +21,10 @@ struct cached_dirent { struct cached_dirents { bool is_valid:1; bool is_failed:1; - struct dir_context *ctx; /* - * Only used to make sure we only take entries - * from a single context. Never dereferenced. - */ + struct file *file; /* + * Used to associate the cache with a single + * open file instance. + */ struct mutex de_mutex; int pos; /* Expected ctx->pos */ struct list_head entries; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index f9f11cbf89be..ba0193cf9033 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -851,9 +851,9 @@ static bool emit_cached_dirents(struct cached_dirents *cde, } static void update_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -862,9 +862,9 @@ static void update_cached_dirents_count(struct cached_dirents *cde, } static void finished_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct dir_context *ctx, struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -877,11 +877,12 @@ static void finished_cached_dirents_count(struct cached_dirents *cde, static void add_cached_dirent(struct cached_dirents *cde, struct dir_context *ctx, const char *name, int namelen, - struct cifs_fattr *fattr) + struct cifs_fattr *fattr, + struct file *file) { struct cached_dirent *de; - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -911,7 +912,8 @@ static void add_cached_dirent(struct cached_dirents *cde, static bool cifs_dir_emit(struct dir_context *ctx, const char *name, int namelen, struct cifs_fattr *fattr, - struct cached_fid *cfid) + struct cached_fid *cfid, + struct file *file) { bool rc; ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); @@ -923,7 +925,7 @@ static bool cifs_dir_emit(struct dir_context *ctx, if (cfid) { mutex_lock(&cfid->dirents.de_mutex); add_cached_dirent(&cfid->dirents, ctx, name, namelen, - fattr); + fattr, file); mutex_unlock(&cfid->dirents.de_mutex); } @@ -1023,7 +1025,7 @@ static int cifs_filldir(char *find_entry, struct file *file, cifs_prime_dcache(file_dentry(file), &name, &fattr); return !cifs_dir_emit(ctx, name.name, name.len, - &fattr, cfid); + &fattr, cfid, file); } @@ -1074,8 +1076,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * we need to initialize scanning and storing the * directory content. */ - if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { - cfid->dirents.ctx = ctx; + if (ctx->pos == 0 && cfid->dirents.file == NULL) { + cfid->dirents.file = file; cfid->dirents.pos = 2; } /* @@ -1143,7 +1145,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) } else { if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - finished_cached_dirents_count(&cfid->dirents, ctx); + finished_cached_dirents_count(&cfid->dirents, ctx, file); mutex_unlock(&cfid->dirents.de_mutex); } cifs_dbg(FYI, "Could not find entry\n"); @@ -1184,7 +1186,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) ctx->pos++; if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - update_cached_dirents_count(&cfid->dirents, ctx); + update_cached_dirents_count(&cfid->dirents, file); mutex_unlock(&cfid->dirents.de_mutex); } -- cgit v1.2.3 From bb666b7c27073b986b75699e51a7102910f58060 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 9 Jun 2025 17:57:49 +0100 Subject: mm: add mmap_prepare() compatibility layer for nested file systems Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.h where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. We carefully place this code so it can be used with CONFIG_MMU and also with cutting edge nommu silicon. [akpm@linux-foundation.org: export compat_vma_mmap_prepare tp fix build] [lorenzo.stoakes@oracle.com: remove unused declarations] Link: https://lkml.kernel.org/r/ac3ae324-4c65-432a-8c6d-2af988b18ac8@lucifer.local Link: https://lkml.kernel.org/r/20250609165749.344976-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@mail.gmail.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 +++-- mm/util.c | 40 ++++++++++++++++++++++++++++++++++ mm/vma.c | 1 - mm/vma.h | 47 ++++++++++++++++++++++++++++++++++++++++ tools/testing/vma/vma_internal.h | 16 ++++++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4ec77da65f14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/mm/util.c b/mm/util.c index 448117da071f..0b270c43d7d1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio) } EXPORT_SYMBOL(flush_dcache_folio); #endif + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma: The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} +EXPORT_SYMBOL(compat_vma_mmap_prepare); diff --git a/mm/vma.c b/mm/vma.c index 0fb9b2c7b734..fef67a66a095 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -3113,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. diff --git a/mm/vma.h b/mm/vma.h index 0db066e7a45d..f47112a352db 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} + int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..14718ca23a05 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ -- cgit v1.2.3 From 9f0ad43b158d07bc7144d219ceabdea36e28e392 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Thu, 12 Jun 2025 08:00:59 +0530 Subject: spi: spi-pci1xxxx: Drop MSI-X usage as unsupported by DMA engine Removes MSI-X from the interrupt request path, as the DMA engine used by the SPI controller does not support MSI-X interrupts. Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20250612023059.71726-1-thangaraj.s@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi-pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index 9112d8a1a0c8..e27642c4dea4 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -762,7 +762,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * return -EINVAL; num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt, - PCI_IRQ_ALL_TYPES); + PCI_IRQ_INTX | PCI_IRQ_MSI); if (num_vector < 0) { dev_err(&pdev->dev, "Error allocating MSI vectors\n"); return num_vector; -- cgit v1.2.3 From f826ec7966a63d48e16e0868af4e038bf9a1a3ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:41:25 +0100 Subject: bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP It is possible for physically contiguous folios to have discontiguous struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. This is correctly handled by folio_page_idx(), so remove this open-coded implementation. Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c37c66ef9ca..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); -- cgit v1.2.3 From 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:42:53 +0100 Subject: block: Fix bvec_set_folio() for very large folios Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if we attempt to add a folio that is larger than 4GB, we'll silently truncate the offset and len. Widen the parameters to size_t, assert that the length is less than 4GB and set the first page that contains the interesting data rather than the first page of the folio. Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bvec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4b..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** -- cgit v1.2.3 From f9705d66fa7107fcd619083f7aae2afb0554a593 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 3 Jun 2025 16:14:45 -0300 Subject: iommu/tegra: Fix incorrect size calculation This driver uses a mixture of ways to get the size of a PTE, tegra_smmu_set_pde() did it as sizeof(*pd) which became wrong when pd switched to a struct tegra_pd. Switch pd back to a u32* in tegra_smmu_set_pde() so the sizeof(*pd) returns 4. Fixes: 50568f87d1e2 ("iommu/terga: Do not use struct page as the handle for as->pd memory") Reported-by: Diogo Ivo Closes: https://lore.kernel.org/all/62e7f7fe-6200-4e4f-ad42-d58ad272baa6@tecnico.ulisboa.pt/ Signed-off-by: Jason Gunthorpe Acked-by: Thierry Reding Reviewed-by: Jerry Snitselaar Tested-by: Diogo Ivo Link: https://lore.kernel.org/r/0-v1-da7b8b3d57eb+ce-iommu_terga_sizeof_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 61897d50162d..e58fe9d8b9e7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -559,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - struct tegra_pd *pd = as->pd; + u32 *pd = &as->pd->val[pd_index]; unsigned long offset = pd_index * sizeof(*pd); /* Set the page directory entry first */ - pd->val[pd_index] = value; + *pd = value; /* The flush the page directory entry from caches */ dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, -- cgit v1.2.3 From db3dfae1a2f662e69d535827703bcdbb04b8d72b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 13 Jun 2025 09:38:57 +0700 Subject: Documentation: ublk: Separate UBLK_F_AUTO_BUF_REG fallback behavior sublists Stephen Rothwell reports htmldocs warning on ublk docs: Documentation/block/ublk.rst:414: ERROR: Unexpected indentation. [docutils] Fix the warning by separating sublists of auto buffer registration fallback behavior from their appropriate parent list item. Fixes: ff20c516485e ("ublk: document auto buffer registration(UBLK_F_AUTO_BUF_REG)") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250612132638.193de386@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250613023857.15971-1-bagasdotme@gmail.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index abec524a04ed..8c4030bcabb6 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -408,6 +408,7 @@ Fallback Behavior If auto buffer registration fails: 1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled: + - The uring_cmd is completed - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags`` - The ublk server must manually deal with the failure, such as, register @@ -415,6 +416,7 @@ If auto buffer registration fails: for handling ublk IO 2. If fallback is not enabled: + - The ublk I/O request fails silently - The uring_cmd won't be completed -- cgit v1.2.3 From 26ec15e4b0c1d7b25214d9c0be1d50492e2f006c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 11:01:49 -0600 Subject: io_uring/kbuf: don't truncate end buffer for multiple buffer peeks If peeking a bunch of buffers, normally io_ring_buffers_peek() will truncate the end buffer. This isn't optimal as presumably more data will be arriving later, and hence it's better to stop with the last full buffer rather than truncate the end buffer. Cc: stable@vger.kernel.org Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") Reported-by: Christian Mazakas Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2ea65f3cef72..ce95e3af44a9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { len = arg->max_len; - if (!(bl->flags & IOBL_INC)) + if (!(bl->flags & IOBL_INC)) { + if (iov != arg->iovs) + break; buf->len = len; + } } iov->iov_base = u64_to_user_ptr(buf->addr); -- cgit v1.2.3 From f90fff1e152dedf52b932240ebbd670d83330eca Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 13 Jun 2025 19:26:50 +0200 Subject: posix-cpu-timers: fix race between handle_posix_cpu_timers() and posix_cpu_timer_del() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an exiting non-autoreaping task has already passed exit_notify() and calls handle_posix_cpu_timers() from IRQ, it can be reaped by its parent or debugger right after unlock_task_sighand(). If a concurrent posix_cpu_timer_del() runs at that moment, it won't be able to detect timer->it.cpu.firing != 0: cpu_timer_task_rcu() and/or lock_task_sighand() will fail. Add the tsk->exit_state check into run_posix_cpu_timers() to fix this. This fix is not needed if CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y, because exit_task_work() is called before exit_notify(). But the check still makes sense, task_work_add(&tsk->posix_cputimers_work.work) will fail anyway in this case. Cc: stable@vger.kernel.org Reported-by: Benoît Sevens Fixes: 0bdd2ed4138e ("sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()") Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/time/posix-cpu-timers.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 50e8d04ab661..2e5b89d7d866 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1405,6 +1405,15 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); + /* + * Ensure that release_task(tsk) can't happen while + * handle_posix_cpu_timers() is running. Otherwise, a concurrent + * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and + * miss timer->it.cpu.firing != 0. + */ + if (tsk->exit_state) + return; + /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. -- cgit v1.2.3 From 9ce6c9875f3e995be5fd720b65835291f8a609b1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 13:37:41 -0600 Subject: nvme: always punt polled uring_cmd end_io work to task_work Currently NVMe uring_cmd completions will complete locally, if they are polled. This is done because those completions are always invoked from task context. And while that is true, there's no guarantee that it's invoked under the right ring context, or even task. If someone does NVMe passthrough via multiple threads and with a limited number of poll queues, then ringA may find completions from ringB. For that case, completing the request may not be sound. Always just punt the passthrough completions via task_work, which will redirect the completion, if needed. Cc: stable@vger.kernel.org Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") Signed-off-by: Jens Axboe --- drivers/nvme/host/ioctl.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 0b50da2f1175..6b3ac8ae3f34 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, pdu->result = le64_to_cpu(nvme_req(req)->result.u64); /* - * For iopoll, complete it directly. Note that using the uring_cmd - * helper for this is safe only because we check blk_rq_is_poll(). - * As that returns false if we're NOT on a polled queue, then it's - * safe to use the polled completion helper. - * - * Otherwise, move the completion to task work. + * IOPOLL could potentially complete this request directly, but + * if multiple rings are polling on the same queue, then it's possible + * for one ring to find completions for another ring. Punting the + * completion via task_work will always direct it to the right + * location, rather than potentially complete requests for ringA + * under iopoll invocations from ringB. */ - if (blk_rq_is_poll(req)) { - if (pdu->bio) - blk_rq_unmap_user(pdu->bio); - io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); - } else { - io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); - } - + io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); return RQ_END_IO_FREE; } -- cgit v1.2.3 From b62e0efd8a8571460d05922862a451855ebdf3c6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 15:24:53 -0600 Subject: io_uring: run local task_work from ring exit IOPOLL reaping In preparation for needing to shift NVMe passthrough to always use task_work for polled IO completions, ensure that those are suitably run at exit time. See commit: 9ce6c9875f3e ("nvme: always punt polled uring_cmd end_io work to task_work") for details on why that is necessary. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4e32f808d07d..5111ec040c53 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) } } mutex_unlock(&ctx->uring_lock); + + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + io_move_task_work_from_local(ctx); } static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) -- cgit v1.2.3 From a6a7946bd691940cfe7289ae6dfb1f077516df72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 13 Jun 2025 01:08:48 +0900 Subject: kbuild: move warnings about linux/export.h from W=1 to W=2 This hides excessive warnings, as nobody builds with W=2. Fixes: a934a57a42f6 ("scripts/misc-check: check missing #include when W=1") Fixes: 7d95680d64ac ("scripts/misc-check: check unnecessary #include when W=1") Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Heiko Carstens --- Makefile | 3 --- scripts/misc-check | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 35e6e5240c61..69c534982415 100644 --- a/Makefile +++ b/Makefile @@ -1832,12 +1832,9 @@ rustfmtcheck: rustfmt # Misc # --------------------------------------------------------------------------- -# Run misc checks when ${KBUILD_EXTRA_WARN} contains 1 PHONY += misc-check -ifneq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) misc-check: $(Q)$(srctree)/scripts/misc-check -endif all: misc-check diff --git a/scripts/misc-check b/scripts/misc-check index a74450e799d1..84f08da17b2c 100755 --- a/scripts/misc-check +++ b/scripts/misc-check @@ -62,6 +62,15 @@ check_unnecessary_include_linux_export_h () { xargs -r printf "%s: warning: EXPORT_SYMBOL() is not used, but #include is present\n" >&2 } -check_tracked_ignored_files -check_missing_include_linux_export_h -check_unnecessary_include_linux_export_h +case "${KBUILD_EXTRA_WARN}" in +*1*) + check_tracked_ignored_files + ;; +esac + +case "${KBUILD_EXTRA_WARN}" in +*2*) + check_missing_include_linux_export_h + check_unnecessary_include_linux_export_h + ;; +esac -- cgit v1.2.3 From 2f6b47b295518c3ba16fabb1dddbe6a319899acb Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Sat, 14 Jun 2025 00:55:33 +0000 Subject: gendwarfksyms: Fix structure type overrides As we always iterate through the entire die_map when expanding type strings, recursively processing referenced types in type_expand_child() is not actually necessary. Furthermore, the type_string kABI rule added in commit c9083467f7b9 ("gendwarfksyms: Add a kABI rule to override type strings") can fail to override type strings for structures due to a missing kabi_get_type_string() check in this function. Fix the issue by dropping the unnecessary recursion and moving the override check to type_expand(). Note that symbol versions are otherwise unchanged with this patch. Fixes: c9083467f7b9 ("gendwarfksyms: Add a kABI rule to override type strings") Reported-by: Giuliano Procida Signed-off-by: Sami Tolvanen Reviewed-by: Petr Pavlu Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/gendwarfksyms.h | 14 ++------ scripts/gendwarfksyms/types.c | 65 ++++++++++------------------------- 2 files changed, 21 insertions(+), 58 deletions(-) diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 7dd03ffe0c5c..d9c06d2cb1df 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -216,24 +216,14 @@ int cache_get(struct cache *cache, unsigned long key); void cache_init(struct cache *cache); void cache_free(struct cache *cache); -static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) -{ - cache_set(cache, addr, 1); -} - -static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) -{ - return cache_get(cache, addr) == 1; -} - static inline void cache_mark_expanded(struct cache *cache, void *addr) { - __cache_mark_expanded(cache, (uintptr_t)addr); + cache_set(cache, (unsigned long)addr, 1); } static inline bool cache_was_expanded(struct cache *cache, void *addr) { - return __cache_was_expanded(cache, (uintptr_t)addr); + return cache_get(cache, (unsigned long)addr) == 1; } /* diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 39ce1770e463..7bd459ea6c59 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -333,37 +333,11 @@ static void calculate_version(struct version *version, cache_free(&expansion_cache); } -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive); - -static void type_expand_child(struct die *cache, struct type_expansion *type, - bool recursive) -{ - struct type_expansion child; - char *name; - - name = get_type_name(cache); - if (!name) { - __type_expand(cache, type, recursive); - return; - } - - if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { - __cache_mark_expanded(&expansion_cache, cache->addr); - type_expansion_init(&child); - __type_expand(cache, &child, true); - type_map_add(name, &child); - type_expansion_free(&child); - } - - type_expansion_append(type, name, name); -} - -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void __type_expand(struct die *cache, struct type_expansion *type) { struct die_fragment *df; struct die *child; + char *name; list_for_each_entry(df, &cache->fragments, list) { switch (df->type) { @@ -379,7 +353,12 @@ static void __type_expand(struct die *cache, struct type_expansion *type, error("unknown child: %" PRIxPTR, df->data.addr); - type_expand_child(child, type, recursive); + name = get_type_name(child); + if (name) + type_expansion_append(type, name, name); + else + __type_expand(child, type); + break; case FRAGMENT_LINEBREAK: /* @@ -397,12 +376,17 @@ static void __type_expand(struct die *cache, struct type_expansion *type, } } -static void type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void type_expand(const char *name, struct die *cache, + struct type_expansion *type) { + const char *override; + type_expansion_init(type); - __type_expand(cache, type, recursive); - cache_free(&expansion_cache); + + if (stable && kabi_get_type_string(name, &override)) + type_parse(name, override, type); + else + __type_expand(cache, type); } static void type_parse(const char *name, const char *str, @@ -416,8 +400,6 @@ static void type_parse(const char *name, const char *str, if (!*str) error("empty type string override for '%s'", name); - type_expansion_init(type); - for (pos = 0; str[pos]; ++pos) { bool empty; char marker = ' '; @@ -478,7 +460,6 @@ static void type_parse(const char *name, const char *str, static void expand_type(struct die *cache, void *arg) { struct type_expansion type; - const char *override; char *name; if (cache->mapped) @@ -504,11 +485,7 @@ static void expand_type(struct die *cache, void *arg) debug("%s", name); - if (stable && kabi_get_type_string(name, &override)) - type_parse(name, override, &type); - else - type_expand(cache, &type, true); - + type_expand(name, cache, &type); type_map_add(name, &type); type_expansion_free(&type); free(name); @@ -518,7 +495,6 @@ static void expand_symbol(struct symbol *sym, void *arg) { struct type_expansion type; struct version version; - const char *override; struct die *cache; /* @@ -532,10 +508,7 @@ static void expand_symbol(struct symbol *sym, void *arg) if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) return; /* We'll warn about missing CRCs later. */ - if (stable && kabi_get_type_string(sym->name, &override)) - type_parse(sym->name, override, &type); - else - type_expand(cache, &type, false); + type_expand(sym->name, cache, &type); /* If the symbol already has a version, don't calculate it again. */ if (sym->state != SYMBOL_PROCESSED) { -- cgit v1.2.3 From e04c78d86a9699d136910cfc0bdcf01087e3267e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Jun 2025 13:49:41 -0700 Subject: Linux 6.16-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 69c534982415..ba0827a1fccd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3