diff options
Diffstat (limited to 'arch')
38 files changed, 299 insertions, 269 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2b07f0a27a7d..0ee4f6fa3a17 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1369,6 +1369,7 @@ static inline bool kvm_system_needs_idmapped_vectors(void) } void kvm_init_host_debug_data(void); +void kvm_debug_init_vhe(void); void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 1246216616b5..2888b5d03757 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -355,11 +355,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } -static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) -{ - return pteref; -} - static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { /* @@ -389,11 +384,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } -static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) -{ - return rcu_dereference_raw(pteref); -} - static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) @@ -562,26 +552,6 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** - * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * @addr: Intermediate physical address at which to place the mapping. - * @size: Size of the mapping. - * - * The page-table is assumed to be unreachable by any hardware walkers prior - * to freeing and therefore no TLB invalidation is performed. - */ -void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size); - -/** - * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * - * It is assumed that the rest of the page-table is freed before this operation. - */ -void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); - -/** * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. * @pgtable: Unlinked stage-2 paging structure to be freed. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 35f9d9478004..ea58282f59bb 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -179,9 +179,7 @@ struct pkvm_mapping { int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size); -void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5bf101c869c9..bd6b6a620a09 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2113,8 +2113,10 @@ static void cpu_hyp_init_features(void) { cpu_set_hyp_vector(); - if (is_kernel_in_hyp_mode()) + if (is_kernel_in_hyp_mode()) { kvm_timer_init_vhe(); + kvm_debug_init_vhe(); + } if (vgic_present) kvm_vgic_init_cpu_hardware(); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 381382c19fe4..e027d9c32b0d 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -96,6 +96,13 @@ void kvm_init_host_debug_data(void) } } +void kvm_debug_init_vhe(void) +{ + /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ + if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + write_sysreg_el1(0, SYS_PMSCR); +} + /* * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host * has taken over MDSCR_EL1. @@ -138,6 +145,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu) /* Must be called before kvm_vcpu_load_vhe() */ KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm); + if (has_vhe()) + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); + /* * Determine which of the possible debug states we're in: * @@ -184,6 +194,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu) void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu) { + if (has_vhe()) + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); + if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) return; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 84ec4e100fbb..b6682202edf3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -431,9 +431,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - if (cpus_have_final_cap(ARM64_HAS_HCX)) { u64 hcrx = vcpu->arch.hcrx_el2; if (is_nested_ctxt(vcpu)) { @@ -454,8 +451,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); - write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index ccd575d5f6de..d3b9ec8a7c28 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -50,6 +50,10 @@ extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); + + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + __activate_traps_common(vcpu); __activate_cptr_traps(vcpu); @@ -93,6 +97,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) isb(); } + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); + __deactivate_traps_common(vcpu); write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 71d2fc97f004..82da9b03692d 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -253,7 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); - __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); + __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR)); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c36f282a175d..c351b4abd5db 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1551,38 +1551,21 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size) +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) { + size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); -} - -void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) -{ - size_t pgd_sz; - + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - - /* - * Since the pgtable is unlinked at this point, and not shared with - * other walkers, safely deference pgd with kvm_dereference_pteref_raw() - */ - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); pgt->pgd = NULL; } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) -{ - kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); - kvm_pgtable_stage2_destroy_pgd(pgt); -} - void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 86f3d80daf37..736394292503 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -904,38 +904,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } -/* - * Assume that @pgt is valid and unlinked from the KVM MMU to free the - * page-table without taking the kvm_mmu_lock and without performing any - * TLB invalidations. - * - * Also, the range of addresses can be large enough to cause need_resched - * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke - * cond_resched() periodically to prevent hogging the CPU for a long time - * and schedule something else, if required. - */ -static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, - phys_addr_t end) -{ - u64 next; - - do { - next = stage2_range_addr_end(addr, end); - KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, - next - addr); - if (next != end) - cond_resched(); - } while (addr = next, addr != end); -} - -static void kvm_stage2_destroy(struct kvm_pgtable *pgt) -{ - unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); - - stage2_destroy_range(pgt, 0, BIT(ia_bits)); - KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); -} - /** * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure @@ -1012,7 +980,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - kvm_stage2_destroy(pgt); + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); out_free_pgtable: kfree(pgt); return err; @@ -1106,10 +1074,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) mmu->pgt = NULL; free_percpu(mmu->last_vcpu_ran); } + + if (kvm_is_nested_s2_mmu(kvm, mmu)) + kvm_init_nested_s2_mmu(mmu); + write_unlock(&kvm->mmu_lock); if (pgt) { - kvm_stage2_destroy(pgt); + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); kfree(pgt); } } @@ -1541,11 +1513,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); - if (fault_is_perm && !write_fault && !exec_fault) { - kvm_err("Unexpected L2 read permission error\n"); - return -EFAULT; - } - if (!is_protected_kvm_enabled()) memcache = &vcpu->arch.mmu_page_cache; else diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 77db81bae86f..50d559248a1f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -847,7 +847,7 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end) ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - ipa_start = vt->wr.pa & (ipa_size - 1); + ipa_start = vt->wr.pa & ~(ipa_size - 1); ipa_end = ipa_start + ipa_size; if (ipa_end <= start || ipa_start >= end) @@ -887,7 +887,7 @@ static void invalidate_vncr_va(struct kvm *kvm, va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - va_start = vt->gva & (va_size - 1); + va_start = vt->gva & ~(va_size - 1); va_end = va_start + va_size; switch (scope->type) { @@ -1276,7 +1276,7 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu) !(tcr & TCR_ASID16)) asid &= GENMASK(7, 0); - return asid != vt->wr.asid; + return asid == vt->wr.asid; } return true; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 61827cf6fea4..fcd70bfe44fb 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -316,16 +316,9 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e return 0; } -void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, - u64 addr, u64 size) +void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) { - __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); -} - -void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) -{ - /* Expected to be called after all pKVM mappings have been released. */ - WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); + __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); } int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 2684f273d9e1..4c1209261b65 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -69,7 +69,7 @@ static int iter_mark_lpis(struct kvm *kvm) int nr_lpis = 0; xa_for_each(&dist->lpi_xa, intid, irq) { - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) continue; xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1e680ad6e863..4c3c0d82e476 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); + xa_init(&dist->lpi_xa); } /* CREATION */ @@ -208,7 +208,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) raw_spin_lock_init(&irq->irq_lock); irq->vcpu = NULL; irq->target_vcpu = vcpu0; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 0); switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V2: irq->targets = 0; @@ -277,7 +277,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 0); if (vgic_irq_is_sgi(i)) { /* SGIs */ irq->enabled = 1; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 7368c13f16b7..ce3e3ed3f29f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,7 +78,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; - unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -89,7 +88,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -99,19 +98,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, raw_spin_lock_init(&irq->irq_lock); irq->config = VGIC_CONFIG_EDGE; - kref_init(&irq->refcount); + refcount_set(&irq->refcount, 1); irq->intid = intid; irq->target_vcpu = vcpu; irq->group = 1; - xa_lock_irqsave(&dist->lpi_xa, flags); + xa_lock(&dist->lpi_xa); /* * There could be a race with another vgic_add_lpi(), so we need to * check that we don't add a second list entry with the same LPI. */ oldirq = xa_load(&dist->lpi_xa, intid); - if (vgic_try_get_irq_kref(oldirq)) { + if (vgic_try_get_irq_ref(oldirq)) { /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; @@ -126,7 +125,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, } out_unlock: - xa_unlock_irqrestore(&dist->lpi_xa, flags); + xa_unlock(&dist->lpi_xa); if (ret) return ERR_PTR(ret); @@ -547,7 +546,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, rcu_read_lock(); irq = xa_load(&its->translation_cache, cache_key); - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) irq = NULL; rcu_read_unlock(); @@ -571,7 +570,7 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, * its_lock, as the ITE (and the reference it holds) cannot be freed. */ lockdep_assert_held(&its->its_lock); - vgic_get_irq_kref(irq); + vgic_get_irq_ref(irq); old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 4d9343d2b0b1..548aec9d5a72 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -518,7 +518,7 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq) if (!irq->hw || irq->host_irq != host_irq) continue; - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) return NULL; return irq; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f5148b38120a..6dd5a10081e2 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,8 +28,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled + * vgic_dist->lpi_xa.xa_lock + * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, @@ -71,7 +71,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) rcu_read_lock(); irq = xa_load(&dist->lpi_xa, intid); - if (!vgic_try_get_irq_kref(irq)) + if (!vgic_try_get_irq_ref(irq)) irq = NULL; rcu_read_unlock(); @@ -114,37 +114,66 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) return vgic_get_irq(vcpu->kvm, intid); } -/* - * We can't do anything in here, because we lack the kvm pointer to - * lock and remove the item from the lpi_list. So we keep this function - * empty and use the return value of kref_put() to trigger the freeing. - */ -static void vgic_irq_release(struct kref *ref) +static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq) +{ + lockdep_assert_held(&dist->lpi_xa.xa_lock); + __xa_erase(&dist->lpi_xa, irq->intid); + kfree_rcu(irq, rcu); +} + +static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return false; + + return refcount_dec_and_test(&irq->refcount); +} + +static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq) { + if (!__vgic_put_irq(kvm, irq)) + return false; + + irq->pending_release = true; + return true; } void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long flags; - if (irq->intid < VGIC_MIN_LPI) - return; + if (irq->intid >= VGIC_MIN_LPI) + might_lock(&dist->lpi_xa.xa_lock); - if (!kref_put(&irq->refcount, vgic_irq_release)) + if (!__vgic_put_irq(kvm, irq)) return; - xa_lock_irqsave(&dist->lpi_xa, flags); - __xa_erase(&dist->lpi_xa, irq->intid); - xa_unlock_irqrestore(&dist->lpi_xa, flags); + xa_lock(&dist->lpi_xa); + vgic_release_lpi_locked(dist, irq); + xa_unlock(&dist->lpi_xa); +} - kfree_rcu(irq, rcu); +static void vgic_release_deleted_lpis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid; + struct vgic_irq *irq; + + xa_lock(&dist->lpi_xa); + + xa_for_each(&dist->lpi_xa, intid, irq) { + if (irq->pending_release) + vgic_release_lpi_locked(dist, irq); + } + + xa_unlock(&dist->lpi_xa); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + bool deleted = false; unsigned long flags; raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); @@ -155,11 +184,14 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) list_del(&irq->ap_list); irq->vcpu = NULL; raw_spin_unlock(&irq->irq_lock); - vgic_put_irq(vcpu->kvm, irq); + deleted |= vgic_put_irq_norelease(vcpu->kvm, irq); } } raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + + if (deleted) + vgic_release_deleted_lpis(vcpu->kvm); } void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) @@ -399,7 +431,7 @@ retry: * now in the ap_list. This is safe as the caller must already hold a * reference on the irq. */ - vgic_get_irq_kref(irq); + vgic_get_irq_ref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); irq->vcpu = vcpu; @@ -630,6 +662,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + bool deleted_lpis = false; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -657,12 +690,12 @@ retry: /* * This vgic_put_irq call matches the - * vgic_get_irq_kref in vgic_queue_irq_unlock, + * vgic_get_irq_ref in vgic_queue_irq_unlock, * where we added the LPI to the ap_list. As * we remove the irq from the list, we drop * also drop the refcount. */ - vgic_put_irq(vcpu->kvm, irq); + deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq); continue; } @@ -725,6 +758,9 @@ retry: } raw_spin_unlock(&vgic_cpu->ap_list_lock); + + if (unlikely(deleted_lpis)) + vgic_release_deleted_lpis(vcpu->kvm); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) @@ -818,7 +854,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) * the AP list has been sorted already. */ if (multi_sgi && irq->priority > prio) { - _raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); break; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index de1c1d3261c3..ac5f9c5d2b98 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -267,7 +267,7 @@ void vgic_v2_put(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); -static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) +static inline bool vgic_try_get_irq_ref(struct vgic_irq *irq) { if (!irq) return false; @@ -275,12 +275,12 @@ static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) if (irq->intid < VGIC_MIN_LPI) return true; - return kref_get_unless_zero(&irq->refcount); + return refcount_inc_not_zero(&irq->refcount); } -static inline void vgic_get_irq_kref(struct vgic_irq *irq) +static inline void vgic_get_irq_ref(struct vgic_irq *irq) { - WARN_ON_ONCE(!vgic_try_get_irq_kref(irq)); + WARN_ON_ONCE(!vgic_try_get_irq_ref(irq)); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index f0abc38c40ac..0631a6b11281 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -298,6 +298,10 @@ config AS_HAS_LVZ_EXTENSION config CC_HAS_ANNOTATE_TABLEJUMP def_bool $(cc-option,-mannotate-tablejump) +config RUSTC_HAS_ANNOTATE_TABLEJUMP + depends on RUST + def_bool $(rustc-option,-Cllvm-args=--loongarch-annotate-tablejump) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -563,10 +567,14 @@ config ARCH_STRICT_ALIGN -mstrict-align build parameter to prevent unaligned accesses. CPUs with h/w unaligned access support: - Loongson-2K2000/2K3000/3A5000/3C5000/3D5000. + Loongson-2K2000/2K3000 and all of Loongson-3 series processors + based on LoongArch. CPUs without h/w unaligned access support: - Loongson-2K500/2K1000. + Loongson-2K0300/2K0500/2K1000. + + If you want to make sure whether to support unaligned memory access + on your hardware, please read the bit 20 (UAL) of CPUCFG1 register. This option is enabled by default to make the kernel be able to run on all LoongArch systems. But you can disable it manually if you want diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a3a9759414f4..ae419e32f22e 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -102,16 +102,21 @@ KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma) ifdef CONFIG_OBJTOOL ifdef CONFIG_CC_HAS_ANNOTATE_TABLEJUMP +KBUILD_CFLAGS += -mannotate-tablejump +else +KBUILD_CFLAGS += -fno-jump-tables # keep compatibility with older compilers +endif +ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP +KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump +else +KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers +endif +ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. # Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to # be passed via '-mllvm' to ld.lld. -KBUILD_CFLAGS += -mannotate-tablejump -ifdef CONFIG_LTO_CLANG KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump endif -else -KBUILD_CFLAGS += -fno-jump-tables # keep compatibility with older compilers -endif endif KBUILD_RUSTFLAGS += --target=loongarch64-unknown-none-softfloat -Ccode-model=small diff --git a/arch/loongarch/include/asm/acenv.h b/arch/loongarch/include/asm/acenv.h index 52f298f7293b..483c955f2ae5 100644 --- a/arch/loongarch/include/asm/acenv.h +++ b/arch/loongarch/include/asm/acenv.h @@ -10,9 +10,8 @@ #ifndef _ASM_LOONGARCH_ACENV_H #define _ASM_LOONGARCH_ACENV_H -/* - * This header is required by ACPI core, but we have nothing to fill in - * right now. Will be updated later when needed. - */ +#ifdef CONFIG_ARCH_STRICT_ALIGN +#define ACPI_MISALIGNMENT_NOT_SUPPORTED +#endif /* CONFIG_ARCH_STRICT_ALIGN */ #endif /* _ASM_LOONGARCH_ACENV_H */ diff --git a/arch/loongarch/include/asm/kvm_mmu.h b/arch/loongarch/include/asm/kvm_mmu.h index 099bafc6f797..e36cc7e8ed20 100644 --- a/arch/loongarch/include/asm/kvm_mmu.h +++ b/arch/loongarch/include/asm/kvm_mmu.h @@ -16,6 +16,13 @@ */ #define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1) +/* + * _PAGE_MODIFIED is a SW pte bit, it records page ever written on host + * kernel, on secondary MMU it records the page writeable attribute, in + * order for fast path handling. + */ +#define KVM_PAGE_WRITEABLE _PAGE_MODIFIED + #define _KVM_FLUSH_PGTABLE 0x1 #define _KVM_HAS_PGMASK 0x2 #define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot)) @@ -52,10 +59,10 @@ static inline void kvm_set_pte(kvm_pte_t *ptep, kvm_pte_t val) WRITE_ONCE(*ptep, val); } -static inline int kvm_pte_write(kvm_pte_t pte) { return pte & _PAGE_WRITE; } -static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & _PAGE_DIRTY; } static inline int kvm_pte_young(kvm_pte_t pte) { return pte & _PAGE_ACCESSED; } static inline int kvm_pte_huge(kvm_pte_t pte) { return pte & _PAGE_HUGE; } +static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & __WRITEABLE; } +static inline int kvm_pte_writeable(kvm_pte_t pte) { return pte & KVM_PAGE_WRITEABLE; } static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte) { @@ -69,12 +76,12 @@ static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t pte) static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte) { - return pte | _PAGE_DIRTY; + return pte | __WRITEABLE; } static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte) { - return pte & ~_PAGE_DIRTY; + return pte & ~__WRITEABLE; } static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte) @@ -87,6 +94,11 @@ static inline kvm_pte_t kvm_pte_mksmall(kvm_pte_t pte) return pte & ~_PAGE_HUGE; } +static inline kvm_pte_t kvm_pte_mkwriteable(kvm_pte_t pte) +{ + return pte | KVM_PAGE_WRITEABLE; +} + static inline int kvm_need_flush(kvm_ptw_ctx *ctx) { return ctx->flag & _KVM_FLUSH_PGTABLE; diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c index c0a5dc9aeae2..23bd5ae2212c 100644 --- a/arch/loongarch/kernel/env.c +++ b/arch/loongarch/kernel/env.c @@ -86,7 +86,7 @@ late_initcall(fdt_cpu_clk_init); static ssize_t boardinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, + return sysfs_emit(buf, "BIOS Information\n" "Vendor\t\t\t: %s\n" "Version\t\t\t: %s\n" @@ -109,6 +109,8 @@ static int __init boardinfo_init(void) struct kobject *loongson_kobj; loongson_kobj = kobject_create_and_add("loongson", firmware_kobj); + if (!loongson_kobj) + return -ENOMEM; return sysfs_create_file(loongson_kobj, &boardinfo_attr.attr); } diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 9a038d1070d7..387dc4d3c486 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -51,12 +51,13 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (task == current) { regs->regs[3] = (unsigned long)__builtin_frame_address(0); regs->csr_era = (unsigned long)__builtin_return_address(0); + regs->regs[22] = 0; } else { regs->regs[3] = thread_saved_fp(task); regs->csr_era = thread_saved_ra(task); + regs->regs[22] = task->thread.reg22; } regs->regs[1] = 0; - regs->regs[22] = 0; for (unwind_start(&state, task, regs); !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 7b888d9085a0..dee1a15d7f4c 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -54,6 +54,9 @@ static int __init init_vdso(void) vdso_info.code_mapping.pages = kcalloc(vdso_info.size / PAGE_SIZE, sizeof(struct page *), GFP_KERNEL); + if (!vdso_info.code_mapping.pages) + return -ENOMEM; + pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 2ce41f93b2a4..6c9c7de7226b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -778,10 +778,8 @@ static long kvm_save_notify(struct kvm_vcpu *vcpu) return 0; default: return KVM_HCALL_INVALID_CODE; - }; - - return KVM_HCALL_INVALID_CODE; -}; + } +} /* * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index 026b139dcff2..c32333695381 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -426,21 +426,26 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, struct loongarch_eiointc *s = dev->kvm->arch.eiointc; data = (void __user *)attr->addr; - spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: if (copy_from_user(&val, data, 4)) - ret = -EFAULT; - else { - if (val >= EIOINTC_ROUTE_MAX_VCPUS) - ret = -EINVAL; - else - s->num_cpu = val; - } + return -EFAULT; + break; + default: + break; + } + + spin_lock_irqsave(&s->lock, flags); + switch (type) { + case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: + if (val >= EIOINTC_ROUTE_MAX_VCPUS) + ret = -EINVAL; + else + s->num_cpu = val; break; case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: - if (copy_from_user(&s->features, data, 4)) - ret = -EFAULT; + s->features = val; if (!(s->features & BIT(EIOINTC_HAS_VIRT_EXTENSION))) s->status |= BIT(EIOINTC_ENABLE); break; @@ -462,19 +467,17 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, static int kvm_eiointc_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, - bool is_write) + bool is_write, int *data) { int addr, cpu, offset, ret = 0; unsigned long flags; void *p = NULL; - void __user *data; struct loongarch_eiointc *s; s = dev->kvm->arch.eiointc; addr = attr->attr; cpu = addr >> 16; addr &= 0xffff; - data = (void __user *)attr->addr; switch (addr) { case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END: offset = (addr - EIOINTC_NODETYPE_START) / 4; @@ -513,13 +516,10 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, } spin_lock_irqsave(&s->lock, flags); - if (is_write) { - if (copy_from_user(p, data, 4)) - ret = -EFAULT; - } else { - if (copy_to_user(data, p, 4)) - ret = -EFAULT; - } + if (is_write) + memcpy(p, data, 4); + else + memcpy(data, p, 4); spin_unlock_irqrestore(&s->lock, flags); return ret; @@ -527,19 +527,17 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, static int kvm_eiointc_sw_status_access(struct kvm_device *dev, struct kvm_device_attr *attr, - bool is_write) + bool is_write, int *data) { int addr, ret = 0; unsigned long flags; void *p = NULL; - void __user *data; struct loongarch_eiointc *s; s = dev->kvm->arch.eiointc; addr = attr->attr; addr &= 0xffff; - data = (void __user *)attr->addr; switch (addr) { case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: if (is_write) @@ -561,13 +559,10 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, return -EINVAL; } spin_lock_irqsave(&s->lock, flags); - if (is_write) { - if (copy_from_user(p, data, 4)) - ret = -EFAULT; - } else { - if (copy_to_user(data, p, 4)) - ret = -EFAULT; - } + if (is_write) + memcpy(p, data, 4); + else + memcpy(data, p, 4); spin_unlock_irqrestore(&s->lock, flags); return ret; @@ -576,11 +571,27 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, static int kvm_eiointc_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + int ret, data; + switch (attr->group) { case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: - return kvm_eiointc_regs_access(dev, attr, false); + ret = kvm_eiointc_regs_access(dev, attr, false, &data); + if (ret) + return ret; + + if (copy_to_user((void __user *)attr->addr, &data, 4)) + ret = -EFAULT; + + return ret; case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: - return kvm_eiointc_sw_status_access(dev, attr, false); + ret = kvm_eiointc_sw_status_access(dev, attr, false, &data); + if (ret) + return ret; + + if (copy_to_user((void __user *)attr->addr, &data, 4)) + ret = -EFAULT; + + return ret; default: return -EINVAL; } @@ -589,13 +600,21 @@ static int kvm_eiointc_get_attr(struct kvm_device *dev, static int kvm_eiointc_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + int data; + switch (attr->group) { case KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL: return kvm_eiointc_ctrl_access(dev, attr); case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS: - return kvm_eiointc_regs_access(dev, attr, true); + if (copy_from_user(&data, (void __user *)attr->addr, 4)) + return -EFAULT; + + return kvm_eiointc_regs_access(dev, attr, true, &data); case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS: - return kvm_eiointc_sw_status_access(dev, attr, true); + if (copy_from_user(&data, (void __user *)attr->addr, 4)) + return -EFAULT; + + return kvm_eiointc_sw_status_access(dev, attr, true, &data); default: return -EINVAL; } diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 119290bcea79..baf3b4faf7ea 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -348,6 +348,7 @@ static int kvm_pch_pic_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { + char buf[8]; int addr, offset, len = 8, ret = 0; void __user *data; void *p = NULL; @@ -397,17 +398,23 @@ static int kvm_pch_pic_regs_access(struct kvm_device *dev, return -EINVAL; } - spin_lock(&s->lock); - /* write or read value according to is_write */ if (is_write) { - if (copy_from_user(p, data, len)) - ret = -EFAULT; - } else { - if (copy_to_user(data, p, len)) - ret = -EFAULT; + if (copy_from_user(buf, data, len)) + return -EFAULT; } + + spin_lock(&s->lock); + if (is_write) + memcpy(p, buf, len); + else + memcpy(buf, p, len); spin_unlock(&s->lock); + if (!is_write) { + if (copy_to_user(data, buf, len)) + return -EFAULT; + } + return ret; } diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index ed956c5cf2cc..7c8143e79c12 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ /* Track access to pages marked old */ new = kvm_pte_mkyoung(*ptep); if (write && !kvm_pte_dirty(new)) { - if (!kvm_pte_write(new)) { + if (!kvm_pte_writeable(new)) { ret = -EFAULT; goto out; } @@ -856,9 +856,9 @@ retry: prot_bits |= _CACHE_SUC; if (writeable) { - prot_bits |= _PAGE_WRITE; + prot_bits = kvm_pte_mkwriteable(prot_bits); if (write) - prot_bits |= __WRITEABLE; + prot_bits = kvm_pte_mkdirty(prot_bits); } /* Disable dirty logging on HugePages */ @@ -904,7 +904,7 @@ retry: kvm_release_faultin_page(kvm, page, false, writeable); spin_unlock(&kvm->mmu_lock); - if (prot_bits & _PAGE_DIRTY) + if (kvm_pte_dirty(prot_bits)) mark_page_dirty_in_slot(kvm, memslot, gfn); out: diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index e5f57cfe1d45..025c6dcbf893 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -16,11 +16,11 @@ #define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 #define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 -/* Load/Store return codes */ -#define ZPCI_PCI_LS_OK 0 -#define ZPCI_PCI_LS_ERR 1 -#define ZPCI_PCI_LS_BUSY 2 -#define ZPCI_PCI_LS_INVAL_HANDLE 3 +/* PCI instruction condition codes */ +#define ZPCI_CC_OK 0 +#define ZPCI_CC_ERR 1 +#define ZPCI_CC_BUSY 2 +#define ZPCI_CC_INVAL_HANDLE 3 /* Load/Store address space identifiers */ #define ZPCI_PCIAS_MEMIO_0 0 diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2a92a8b9e4c2..9384572ffa7b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2778,12 +2778,19 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) static struct page *get_map_page(struct kvm *kvm, u64 uaddr) { + struct mm_struct *mm = kvm->mm; struct page *page = NULL; + int locked = 1; + + if (mmget_not_zero(mm)) { + mmap_read_lock(mm); + get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE, + &page, &locked); + if (locked) + mmap_read_unlock(mm); + mmput(mm); + } - mmap_read_lock(kvm->mm); - get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, - &page, NULL); - mmap_read_unlock(kvm->mm); return page; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bf6fa8b9ca73..6d51aa5f66be 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4864,12 +4864,12 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) * @vcpu: the vCPU whose gmap is to be fixed up * @gfn: the guest frame number used for memslots (including fake memslots) * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps - * @flags: FOLL_* flags + * @foll: FOLL_* flags * * Return: 0 on success, < 0 in case of error. * Context: The mm lock must not be held before calling. May sleep. */ -int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll) { struct kvm_memory_slot *slot; unsigned int fault_flags; @@ -4883,13 +4883,13 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return vcpu_post_run_addressing_exception(vcpu); - fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; if (vcpu->arch.gmap->pfault_enabled) - flags |= FOLL_NOWAIT; + foll |= FOLL_NOWAIT; vmaddr = __gfn_to_hva_memslot(slot, gfn); try_again: - pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page); /* Access outside memory, inject addressing exception */ if (is_noslot_pfn(pfn)) @@ -4905,7 +4905,7 @@ try_again: return 0; vcpu->stat.pfault_sync++; /* Could not setup async pfault, try again synchronously */ - flags &= ~FOLL_NOWAIT; + foll &= ~FOLL_NOWAIT; goto try_again; } /* Any other error */ @@ -4925,7 +4925,7 @@ try_again: return rc; } -static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll) { unsigned long gaddr_tmp; gfn_t gfn; @@ -4950,18 +4950,18 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un } gfn = gpa_to_gfn(gaddr_tmp); } - return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll); } static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { - unsigned int flags = 0; + unsigned int foll = 0; unsigned long gaddr; int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { case 0: @@ -5003,7 +5003,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); if (rc != -ENXIO) break; - flags = FAULT_FLAG_WRITE; + foll = FOLL_WRITE; fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: @@ -5013,7 +5013,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: kvm_s390_assert_primary_as(vcpu); - return vcpu_dat_fault_handler(vcpu, gaddr, flags); + return vcpu_dat_fault_handler(vcpu, gaddr, foll); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 25ede8354514..6ba5a0305e25 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -624,6 +624,17 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) int cc, ret; u16 dummy; + /* Add the notifier only once. No races because we hold kvm->lock */ + if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { + /* The notifier will be unregistered when the VM is destroyed */ + kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; + ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); + if (ret) { + kvm->arch.pv.mmu_notifier.ops = NULL; + return ret; + } + } + ret = kvm_s390_pv_alloc_vm(kvm); if (ret) return ret; @@ -659,11 +670,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - /* Add the notifier only once. No races because we hold kvm->lock */ - if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { - kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; - mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); - } return 0; } diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index ad8d78fb1d9a..de7867ae220d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1250,10 +1250,12 @@ static int virtio_uml_probe(struct platform_device *pdev) device_set_wakeup_capable(&vu_dev->vdev.dev, true); rc = register_virtio_device(&vu_dev->vdev); - if (rc) + if (rc) { put_device(&vu_dev->vdev.dev); + return rc; + } vu_dev->registered = 1; - return rc; + return 0; error_init: os_close_file(vu_dev->sock); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 617886d1fb1e..21f0e50fb1df 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -535,7 +535,7 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, cmsg->cmsg_type != SCM_RIGHTS) return n; - memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len); + memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0)); return n; } diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 4193e04d7e4a..e3ad71a0d13c 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -20,8 +20,7 @@ void stack_protections(unsigned long address) { - if (mprotect((void *) address, UM_THREAD_SIZE, - PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + if (mprotect((void *) address, UM_THREAD_SIZE, PROT_READ | PROT_WRITE) < 0) panic("protecting stack failed, errno = %d", errno); } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 02236962fdb1..465b19fd1a2d 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -562,6 +562,24 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, extern struct ghcb *boot_ghcb; +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define snp_vmpl 0 @@ -605,6 +623,7 @@ static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } static inline void __init snp_secure_tsc_prepare(void) { } static inline void __init snp_secure_tsc_init(void) { } +static inline void sev_evict_cache(void *va, int npages) {} #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -619,24 +638,6 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); - -static inline void sev_evict_cache(void *va, int npages) -{ - volatile u8 val __always_unused; - u8 *bytes = va; - int page_idx; - - /* - * For SEV guests, a read from the first/last cache-lines of a 4K page - * using the guest key is sufficient to cause a flush of all cache-lines - * associated with that 4K page without incurring all the overhead of a - * full CLFLUSH sequence. - */ - for (page_idx = 0; page_idx < npages; page_idx++) { - val = bytes[page_idx * PAGE_SIZE]; - val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; - } -} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -652,7 +653,6 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} -static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9931c6c4bc6..1bfebe40854f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4046,8 +4046,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (nested_svm_virtualize_tpr(vcpu) || - kvm_vcpu_apicv_active(vcpu)) + if (nested_svm_virtualize_tpr(vcpu)) return; cr8 = kvm_get_cr8(vcpu); |