diff options
Diffstat (limited to 'arch/x86/kvm/vmx/tdx.c')
-rw-r--r-- | arch/x86/kvm/vmx/tdx.c | 77 |
1 files changed, 46 insertions, 31 deletions
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 66744f5768c8..0a49c863c811 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -281,25 +281,6 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void tdx_clear_page(struct page *page) -{ - const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0)); - void *dest = page_to_virt(page); - unsigned long i; - - /* - * The page could have been poisoned. MOVDIR64B also clears - * the poison bit so the kernel can safely use the page again. - */ - for (i = 0; i < PAGE_SIZE; i += 64) - movdir64b(dest + i, zero_page); - /* - * MOVDIR64B store uses WC buffer. Prevent following memory reads - * from seeing potentially poisoned cache. - */ - __mb(); -} - static void tdx_no_vcpus_enter_start(struct kvm *kvm) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); @@ -345,7 +326,7 @@ static int tdx_reclaim_page(struct page *page) r = __tdx_reclaim_page(page); if (!r) - tdx_clear_page(page); + tdx_quirk_reset_page(page); return r; } @@ -442,6 +423,16 @@ void tdx_disable_virtualization_cpu(void) tdx_flush_vp(&arg); } local_irq_restore(flags); + + /* + * Flush cache now if kexec is possible: this is necessary to avoid + * having dirty private memory cachelines when the new kernel boots, + * but WBINVD is a relatively expensive operation and doing it during + * kexec can exacerbate races in native_stop_other_cpus(). Do it + * now, since this is a safe moment and there is going to be no more + * TDX activity on this CPU from this point on. + */ + tdx_cpu_flush_cache_for_kexec(); } #define TDX_SEAMCALL_RETRIES 10000 @@ -593,7 +584,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm) pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return; } - tdx_clear_page(kvm_tdx->td.tdr_page); + tdx_quirk_reset_page(kvm_tdx->td.tdr_page); __free_page(kvm_tdx->td.tdr_page); kvm_tdx->td.tdr_page = NULL; @@ -629,6 +620,11 @@ int tdx_vm_init(struct kvm *kvm) struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); kvm->arch.has_protected_state = true; + /* + * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap, + * i.e. all EOIs are accelerated and never trigger exits. + */ + kvm->arch.has_protected_eoi = true; kvm->arch.has_private_mem = true; kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT; @@ -861,6 +857,7 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu) if (tdx->vp.tdvpr_page) { tdx_reclaim_control_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; } tdx->state = VCPU_TD_STATE_UNINITIALIZED; @@ -1714,7 +1711,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return -EIO; } - tdx_clear_page(page); + tdx_quirk_reset_page(page); tdx_unpin(kvm, page); return 0; } @@ -2002,6 +1999,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) * handle retries locally in their EPT violation handlers. */ while (1) { + struct kvm_memory_slot *slot; + ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual); if (ret != RET_PF_RETRY || !local_retry) @@ -2015,6 +2014,15 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) break; } + /* + * Bail if the memslot is invalid, i.e. is being deleted, as + * faulting in will never succeed and this task needs to drop + * SRCU in order to let memslot deletion complete. + */ + slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(gpa)); + if (slot && slot->flags & KVM_MEMSLOT_INVALID) + break; + cond_resched(); } return ret; @@ -2480,7 +2488,7 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params, /* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */ kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1; tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages), - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!tdcs_pages) goto free_tdr; @@ -2940,6 +2948,13 @@ static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx) return -ENOMEM; tdx->vp.tdvpr_page = page; + /* + * page_to_phys() does not work in 'noinstr' code, like guest + * entry via tdh_vp_enter(). Precalculate and store it instead + * of doing it at runtime later. + */ + tdx->vp.tdvpr_pa = page_to_phys(tdx->vp.tdvpr_page); + tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages), GFP_KERNEL); if (!tdx->vp.tdcx_pages) { @@ -3002,6 +3017,7 @@ free_tdvpr: if (tdx->vp.tdvpr_page) __free_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; return ret; } @@ -3318,8 +3334,11 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return ret; } -int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { + if (!is_private) + return 0; + return PG_LEVEL_4K; } @@ -3457,12 +3476,11 @@ static int __init __tdx_bringup(void) if (r) goto tdx_bringup_err; + r = -EINVAL; /* Get TDX global information for later use */ tdx_sysinfo = tdx_get_sysinfo(); - if (WARN_ON_ONCE(!tdx_sysinfo)) { - r = -EINVAL; + if (WARN_ON_ONCE(!tdx_sysinfo)) goto get_sysinfo_err; - } /* Check TDX module and KVM capabilities */ if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) || @@ -3505,14 +3523,11 @@ static int __init __tdx_bringup(void) if (td_conf->max_vcpus_per_td < num_present_cpus()) { pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n", td_conf->max_vcpus_per_td, num_present_cpus()); - r = -EINVAL; goto get_sysinfo_err; } - if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids())) { - r = -EINVAL; + if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids())) goto get_sysinfo_err; - } /* * Leave hardware virtualization enabled after TDX is enabled |