diff options
Diffstat (limited to 'arch')
27 files changed, 179 insertions, 203 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2b07f0a27a7d..0ee4f6fa3a17 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1369,6 +1369,7 @@ static inline bool kvm_system_needs_idmapped_vectors(void) } void kvm_init_host_debug_data(void); +void kvm_debug_init_vhe(void); void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 1246216616b5..2888b5d03757 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -355,11 +355,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) -{ - return pteref; -} - static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { /* @@ -389,11 +384,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) -{ - return rcu_dereference_raw(pteref); -} - static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) @@ -562,26 +552,6 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** - * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * @addr: Intermediate physical address at which to place the mapping. - * @size: Size of the mapping. - * - * The page-table is assumed to be unreachable by any hardware walkers prior - * to freeing and therefore no TLB invalidation is performed. - */ -void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size); - -/** - * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * - * It is assumed that the rest of the page-table is freed before this operation. - */ -void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); - -/** * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. * @pgtable: Unlinked stage-2 paging structure to be freed. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 35f9d9478004..ea58282f59bb 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -179,9 +179,7 @@ struct pkvm_mapping { int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size); -void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5bf101c869c9..bd6b6a620a09 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2113,8 +2113,10 @@ static void cpu_hyp_init_features(void) { cpu_set_hyp_vector(); - if (is_kernel_in_hyp_mode()) + if (is_kernel_in_hyp_mode()) { kvm_timer_init_vhe(); + kvm_debug_init_vhe(); + } if (vgic_present) kvm_vgic_init_cpu_hardware(); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 381382c19fe4..e027d9c32b0d 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -96,6 +96,13 @@ void kvm_init_host_debug_data(void) } } +void kvm_debug_init_vhe(void) +{ + /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ + if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + write_sysreg_el1(0, SYS_PMSCR); +} + /* * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host * has taken over MDSCR_EL1. @@ -138,6 +145,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu) /* Must be called before kvm_vcpu_load_vhe() */ KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm); + if (has_vhe()) + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); + /* * Determine which of the possible debug states we're in: * @@ -184,6 +194,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu) void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu) { + if (has_vhe()) + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); + if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) return; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 84ec4e100fbb..b6682202edf3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -431,9 +431,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - if (cpus_have_final_cap(ARM64_HAS_HCX)) { u64 hcrx = vcpu->arch.hcrx_el2; if (is_nested_ctxt(vcpu)) { @@ -454,8 +451,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); - write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index ccd575d5f6de..d3b9ec8a7c28 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -50,6 +50,10 @@ extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); + + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + __activate_traps_common(vcpu); __activate_cptr_traps(vcpu); @@ -93,6 +97,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) isb(); } + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); + __deactivate_traps_common(vcpu); write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 71d2fc97f004..82da9b03692d 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -253,7 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); - __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); + __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR)); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c36f282a175d..c351b4abd5db 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1551,38 +1551,21 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size) +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) { + size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); -} - -void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) -{ - size_t pgd_sz; - + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - - /* - * Since the pgtable is unlinked at this point, and not shared with - * other walkers, safely deference pgd with kvm_dereference_pteref_raw() - */ - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); pgt->pgd = NULL; } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) -{ - kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); - kvm_pgtable_stage2_destroy_pgd(pgt); -} - void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 86f3d80daf37..736394292503 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -904,38 +904,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } -/* - * Assume that @pgt is valid and unlinked from the KVM MMU to free the - * page-table without taking the kvm_mmu_lock and without performing any - * TLB invalidations. - * - * Also, the range of addresses can be large enough to cause need_resched - * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke - * cond_resched() periodically to prevent hogging the CPU for a long time - * and schedule something else, if required. - */ -static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, - phys_addr_t end) -{ - u64 next; - - do { - next = stage2_range_addr_end(addr, end); - KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, - next - addr); - if (next != end) - cond_resched(); - } while (addr = next, addr != end); -} - -static void kvm_stage2_destroy(struct kvm_pgtable *pgt) -{ - unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); - - stage2_destroy_range(pgt, 0, BIT(ia_bits)); - KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); -} - /** * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure @@ -1012,7 +980,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - kvm_stage2_destroy(pgt); + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); out_free_pgtable: kfree(pgt); return err; @@ -1106,10 +1074,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) mmu->pgt = NULL; free_percpu(mmu->last_vcpu_ran); } + + if (kvm_is_nested_s2_mmu(kvm, mmu)) + kvm_init_nested_s2_mmu(mmu); + write_unlock(&kvm->mmu_lock); if (pgt) { - kvm_stage2_destroy(pgt); + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); kfree(pgt); } } @@ -1541,11 +1513,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); - if (fault_is_perm && !write_fault && !exec_fault) { - kvm_err("Unexpected L2 read permission error\n"); - return -EFAULT; - } - if (!is_protected_kvm_enabled()) memcache = &vcpu->arch.mmu_page_cache; else diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 77db81bae86f..50d559248a1f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -847,7 +847,7 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end) ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - ipa_start = vt->wr.pa & (ipa_size - 1); + ipa_start = vt->wr.pa & ~(ipa_size - 1); ipa_end = ipa_start + ipa_size; if (ipa_end <= start || ipa_start >= end) @@ -887,7 +887,7 @@ static void invalidate_vncr_va(struct kvm *kvm, va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - va_start = vt->gva & (va_size - 1); + va_start = vt->gva & ~(va_size - 1); va_end = va_start + va_size; switch (scope->type) { @@ -1276,7 +1276,7 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu) !(tcr & TCR_ASID16)) asid &= GENMASK(7, 0); - return asid != vt->wr.asid; + return asid == vt->wr.asid; } return true; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 61827cf6fea4..fcd70bfe44fb 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -316,16 +316,9 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e return 0; } -void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size) +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) { - __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); -} - -void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) -{ - /* Expected to be called after all pKVM mappings have been released. */ - WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); + __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); } int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 2684f273d9e1..4c1209261b65 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -69,7 +69,7 @@ static int iter_mark_lpis(struct kvm *kvm) int nr_lpis = 0; xa_for_each(&dist->lpi_xa, intid, irq) { - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) continue; xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1e680ad6e863..4c3c0d82e476 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); + xa_init(&dist->lpi_xa); } /* CREATION */ @@ -208,7 +208,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) raw_spin_lock_init(&irq->irq_lock); irq->vcpu = NULL; irq->target_vcpu = vcpu0; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 0); switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V2: irq->targets = 0; @@ -277,7 +277,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 0); if (vgic_irq_is_sgi(i)) { /* SGIs */ irq->enabled = 1; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 7368c13f16b7..ce3e3ed3f29f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,7 +78,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; - unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -89,7 +88,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -99,19 +98,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, raw_spin_lock_init(&irq->irq_lock); irq->config = VGIC_CONFIG_EDGE; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 1); irq->intid = intid; irq->target_vcpu = vcpu; irq->group = 1; - xa_lock_irqsave(&dist->lpi_xa, flags); + xa_lock(&dist->lpi_xa); /* * There could be a race with another vgic_add_lpi(), so we need to * check that we don't add a second list entry with the same LPI. */ oldirq = xa_load(&dist->lpi_xa, intid); - if (vgic_try_get_irq_kref(oldirq)) { + if (vgic_try_get_irq_ref(oldirq)) { /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; @@ -126,7 +125,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, } out_unlock: - xa_unlock_irqrestore(&dist->lpi_xa, flags); + xa_unlock(&dist->lpi_xa); if (ret) return ERR_PTR(ret); @@ -547,7 +546,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, rcu_read_lock(); irq = xa_load(&its->translation_cache, cache_key); - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) irq = NULL; rcu_read_unlock(); @@ -571,7 +570,7 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, * its_lock, as the ITE (and the reference it holds) cannot be freed. */ lockdep_assert_held(&its->its_lock); - vgic_get_irq_kref(irq); + vgic_get_irq_ref(irq); old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 4d9343d2b0b1..548aec9d5a72 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -518,7 +518,7 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq) if (!irq->hw || irq->host_irq != host_irq) continue; - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) return NULL; return irq; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f5148b38120a..6dd5a10081e2 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,8 +28,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled + * vgic_dist->lpi_xa.xa_lock + * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, @@ -71,7 +71,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) rcu_read_lock(); irq = xa_load(&dist->lpi_xa, intid); - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) irq = NULL; rcu_read_unlock(); @@ -114,37 +114,66 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) return vgic_get_irq(vcpu->kvm, intid); } -/* - * We can't do anything in here, because we lack the kvm pointer to - * lock and remove the item from the lpi_list. So we keep this function - * empty and use the return value of kref_put() to trigger the freeing. - */ -static void vgic_irq_release(struct kref *ref) +static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq) +{ + lockdep_assert_held(&dist->lpi_xa.xa_lock); + __xa_erase(&dist->lpi_xa, irq->intid); + kfree_rcu(irq, rcu); +} + +static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return false; + + return refcount_dec_and_test(&irq->refcount); +} + +static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq) { + if (!__vgic_put_irq(kvm, irq)) + return false; + + irq->pending_release = true; + return true; } void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long flags; - if (irq->intid < VGIC_MIN_LPI) - return; + if (irq->intid >= VGIC_MIN_LPI) + might_lock(&dist->lpi_xa.xa_lock); - if (!kref_put(&irq->refcount, vgic_irq_release)) + if (!__vgic_put_irq(kvm, irq)) return; - xa_lock_irqsave(&dist->lpi_xa, flags); - __xa_erase(&dist->lpi_xa, irq->intid); - xa_unlock_irqrestore(&dist->lpi_xa, flags); + xa_lock(&dist->lpi_xa); + vgic_release_lpi_locked(dist, irq); + xa_unlock(&dist->lpi_xa); +} - kfree_rcu(irq, rcu); +static void vgic_release_deleted_lpis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid; + struct vgic_irq *irq; + + xa_lock(&dist->lpi_xa); + + xa_for_each(&dist->lpi_xa, intid, irq) { + if (irq->pending_release) + vgic_release_lpi_locked(dist, irq); + } + + xa_unlock(&dist->lpi_xa); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + bool deleted = false; unsigned long flags; raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); @@ -155,11 +184,14 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) list_del(&irq->ap_list); irq->vcpu = NULL; raw_spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); + deleted |= vgic_put_irq_norelease(vcpu->kvm, irq); } } raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + + if (deleted) + vgic_release_deleted_lpis(vcpu->kvm); } void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) @@ -399,7 +431,7 @@ retry: * now in the ap_list. This is safe as the caller must already hold a * reference on the irq. */ - vgic_get_irq_kref(irq); + vgic_get_irq_ref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); irq->vcpu = vcpu; @@ -630,6 +662,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + bool deleted_lpis = false; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -657,12 +690,12 @@ retry: /* * This vgic_put_irq call matches the - * vgic_get_irq_kref in vgic_queue_irq_unlock, + * vgic_get_irq_ref in vgic_queue_irq_unlock, * where we added the LPI to the ap_list. As * we remove the irq from the list, we drop * also drop the refcount. */ - vgic_put_irq(vcpu->kvm, irq); + deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq); continue; } @@ -725,6 +758,9 @@ retry: } raw_spin_unlock(&vgic_cpu->ap_list_lock); + + if (unlikely(deleted_lpis)) + vgic_release_deleted_lpis(vcpu->kvm); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) @@ -818,7 +854,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) * the AP list has been sorted already. */ if (multi_sgi && irq->priority > prio) { - _raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); break; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index de1c1d3261c3..ac5f9c5d2b98 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -267,7 +267,7 @@ void vgic_v2_put(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); -static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) +static inline bool vgic_try_get_irq_ref(struct vgic_irq *irq) { if (!irq) return false; @@ -275,12 +275,12 @@ static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) if (irq->intid < VGIC_MIN_LPI) return true; - return kref_get_unless_zero(&irq->refcount); + return refcount_inc_not_zero(&irq->refcount); } -static inline void vgic_get_irq_kref(struct vgic_irq *irq) +static inline void vgic_get_irq_ref(struct vgic_irq *irq) { - WARN_ON_ONCE(!vgic_try_get_irq_kref(irq)); + WARN_ON_ONCE(!vgic_try_get_irq_ref(irq)); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index e5f57cfe1d45..025c6dcbf893 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -16,11 +16,11 @@ #define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 #define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 -/* Load/Store return codes */ -#define ZPCI_PCI_LS_OK 0 -#define ZPCI_PCI_LS_ERR 1 -#define ZPCI_PCI_LS_BUSY 2 -#define ZPCI_PCI_LS_INVAL_HANDLE 3 +/* PCI instruction condition codes */ +#define ZPCI_CC_OK 0 +#define ZPCI_CC_ERR 1 +#define ZPCI_CC_BUSY 2 +#define ZPCI_CC_INVAL_HANDLE 3 /* Load/Store address space identifiers */ #define ZPCI_PCIAS_MEMIO_0 0 diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2a92a8b9e4c2..9384572ffa7b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2778,12 +2778,19 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) static struct page *get_map_page(struct kvm *kvm, u64 uaddr) { + struct mm_struct *mm = kvm->mm; struct page *page = NULL; + int locked = 1; + + if (mmget_not_zero(mm)) { + mmap_read_lock(mm); + get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE, + &page, &locked); + if (locked) + mmap_read_unlock(mm); + mmput(mm); + } - mmap_read_lock(kvm->mm); - get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, - &page, NULL); - mmap_read_unlock(kvm->mm); return page; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bf6fa8b9ca73..6d51aa5f66be 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4864,12 +4864,12 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) * @vcpu: the vCPU whose gmap is to be fixed up * @gfn: the guest frame number used for memslots (including fake memslots) * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps - * @flags: FOLL_* flags + * @foll: FOLL_* flags * * Return: 0 on success, < 0 in case of error. * Context: The mm lock must not be held before calling. May sleep. */ -int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll) { struct kvm_memory_slot *slot; unsigned int fault_flags; @@ -4883,13 +4883,13 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return vcpu_post_run_addressing_exception(vcpu); - fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; if (vcpu->arch.gmap->pfault_enabled) - flags |= FOLL_NOWAIT; + foll |= FOLL_NOWAIT; vmaddr = __gfn_to_hva_memslot(slot, gfn); try_again: - pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page); /* Access outside memory, inject addressing exception */ if (is_noslot_pfn(pfn)) @@ -4905,7 +4905,7 @@ try_again: return 0; vcpu->stat.pfault_sync++; /* Could not setup async pfault, try again synchronously */ - flags &= ~FOLL_NOWAIT; + foll &= ~FOLL_NOWAIT; goto try_again; } /* Any other error */ @@ -4925,7 +4925,7 @@ try_again: return rc; } -static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll) { unsigned long gaddr_tmp; gfn_t gfn; @@ -4950,18 +4950,18 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un } gfn = gpa_to_gfn(gaddr_tmp); } - return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll); } static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { - unsigned int flags = 0; + unsigned int foll = 0; unsigned long gaddr; int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { case 0: @@ -5003,7 +5003,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); if (rc != -ENXIO) break; - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: @@ -5013,7 +5013,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: kvm_s390_assert_primary_as(vcpu); - return vcpu_dat_fault_handler(vcpu, gaddr, flags); + return vcpu_dat_fault_handler(vcpu, gaddr, foll); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 25ede8354514..6ba5a0305e25 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -624,6 +624,17 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) int cc, ret; u16 dummy; + /* Add the notifier only once. No races because we hold kvm->lock */ + if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { + /* The notifier will be unregistered when the VM is destroyed */ + kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; + ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); + if (ret) { + kvm->arch.pv.mmu_notifier.ops = NULL; + return ret; + } + } + ret = kvm_s390_pv_alloc_vm(kvm); if (ret) return ret; @@ -659,11 +670,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - /* Add the notifier only once. No races because we hold kvm->lock */ - if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { - kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; - mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); - } return 0; } diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index ad8d78fb1d9a..de7867ae220d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1250,10 +1250,12 @@ static int virtio_uml_probe(struct platform_device *pdev) device_set_wakeup_capable(&vu_dev->vdev.dev, true); rc = register_virtio_device(&vu_dev->vdev); - if (rc) + if (rc) { put_device(&vu_dev->vdev.dev); + return rc; + } vu_dev->registered = 1; - return rc; + return 0; error_init: os_close_file(vu_dev->sock); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 617886d1fb1e..21f0e50fb1df 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -535,7 +535,7 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, cmsg->cmsg_type != SCM_RIGHTS) return n; - memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len); + memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0)); return n; } diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 4193e04d7e4a..e3ad71a0d13c 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -20,8 +20,7 @@ void stack_protections(unsigned long address) { - if (mprotect((void *) address, UM_THREAD_SIZE, - PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + if (mprotect((void *) address, UM_THREAD_SIZE, PROT_READ | PROT_WRITE) < 0) panic("protecting stack failed, errno = %d", errno); } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 02236962fdb1..465b19fd1a2d 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -562,6 +562,24 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, extern struct ghcb *boot_ghcb; +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define snp_vmpl 0 @@ -605,6 +623,7 @@ static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } static inline void __init snp_secure_tsc_prepare(void) { } static inline void __init snp_secure_tsc_init(void) { } +static inline void sev_evict_cache(void *va, int npages) {} #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -619,24 +638,6 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); - -static inline void sev_evict_cache(void *va, int npages) -{ - volatile u8 val __always_unused; - u8 *bytes = va; - int page_idx; - - /* - * For SEV guests, a read from the first/last cache-lines of a 4K page - * using the guest key is sufficient to cause a flush of all cache-lines - * associated with that 4K page without incurring all the overhead of a - * full CLFLUSH sequence. - */ - for (page_idx = 0; page_idx < npages; page_idx++) { - val = bytes[page_idx * PAGE_SIZE]; - val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; - } -} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -652,7 +653,6 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} -static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9931c6c4bc6..1bfebe40854f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4046,8 +4046,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (nested_svm_virtualize_tpr(vcpu) || - kvm_vcpu_apicv_active(vcpu)) + if (nested_svm_virtualize_tpr(vcpu)) return; cr8 = kvm_get_cr8(vcpu); |