diff options
Diffstat (limited to 'arch/arm64/kvm/nested.c')
| -rw-r--r-- | arch/arm64/kvm/nested.c | 125 |
1 files changed, 85 insertions, 40 deletions
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f04cda40545b..cdeeb8f09e72 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -85,7 +85,7 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) /* * Let's treat memory allocation failures as benign: If we fail to * allocate anything, return an error and keep the allocated array - * alive. Userspace may try to recover by intializing the vcpu + * alive. Userspace may try to recover by initializing the vcpu * again, and there is no reason to affect the whole VM for this. */ num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; @@ -124,14 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) } struct s2_walk_info { - int (*read_desc)(phys_addr_t pa, u64 *desc, void *data); - void *data; - u64 baddr; - unsigned int max_oa_bits; - unsigned int pgshift; - unsigned int sl; - unsigned int t0sz; - bool be; + u64 baddr; + unsigned int max_oa_bits; + unsigned int pgshift; + unsigned int sl; + unsigned int t0sz; + bool be; + bool ha; }; static u32 compute_fsc(int level, u32 fsc) @@ -199,6 +198,42 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) return 0; } +static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc, + struct s2_walk_info *wi) +{ + u64 val; + int r; + + r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val)); + if (r) + return r; + + /* + * Handle reversedescriptors if endianness differs between the + * host and the guest hypervisor. + */ + if (wi->be) + *desc = be64_to_cpu((__force __be64)val); + else + *desc = le64_to_cpu((__force __le64)val); + + return 0; +} + +static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new, + struct s2_walk_info *wi) +{ + if (wi->be) { + old = (__force u64)cpu_to_be64(old); + new = (__force u64)cpu_to_be64(new); + } else { + old = (__force u64)cpu_to_le64(old); + new = (__force u64)cpu_to_le64(new); + } + + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); +} + /* * This is essentially a C-version of the pseudo code from the ARM ARM * AArch64.TranslationTableWalk function. I strongly recommend looking at @@ -206,13 +241,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) * * Must be called with the kvm->srcu read lock held */ -static int walk_nested_s2_pgd(phys_addr_t ipa, +static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, struct s2_walk_info *wi, struct kvm_s2_trans *out) { int first_block_level, level, stride, input_size, base_lower_bound; phys_addr_t base_addr; unsigned int addr_top, addr_bottom; - u64 desc; /* page table entry */ + u64 desc, new_desc; /* page table entry */ int ret; phys_addr_t paddr; @@ -257,28 +292,30 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, >> (addr_bottom - 3); paddr = base_addr | index; - ret = wi->read_desc(paddr, &desc, wi->data); + ret = read_guest_s2_desc(vcpu, paddr, &desc, wi); if (ret < 0) return ret; - /* - * Handle reversedescriptors if endianness differs between the - * host and the guest hypervisor. - */ - if (wi->be) - desc = be64_to_cpu((__force __be64)desc); - else - desc = le64_to_cpu((__force __le64)desc); + new_desc = desc; /* Check for valid descriptor at this point */ - if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { + if (!(desc & KVM_PTE_VALID)) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); out->desc = desc; return 1; } - /* We're at the final level or block translation level */ - if ((desc & 3) == 1 || level == 3) + if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) { + if (level < 3) + break; + + out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); + out->desc = desc; + return 1; + } + + /* We're at the final level */ + if (level == 3) break; if (check_output_size(wi, desc)) { @@ -305,7 +342,18 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, return 1; } - if (!(desc & BIT(10))) { + if (wi->ha) + new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF; + + if (new_desc != desc) { + ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi); + if (ret) + return ret; + + desc = new_desc; + } + + if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); out->desc = desc; return 1; @@ -318,20 +366,13 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, (ipa & GENMASK_ULL(addr_bottom - 1, 0)); out->output = paddr; out->block_size = 1UL << ((3 - level) * stride + wi->pgshift); - out->readable = desc & (0b01 << 6); - out->writable = desc & (0b10 << 6); + out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; + out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; out->level = level; out->desc = desc; return 0; } -static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) -{ - struct kvm_vcpu *vcpu = data; - - return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); -} - static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) { wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; @@ -350,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); + + wi->ha = vtcr & VTCR_EL2_HA; } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, @@ -364,15 +407,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, if (!vcpu_has_nv(vcpu)) return 0; - wi.read_desc = read_guest_s2_desc; - wi.data = vcpu; wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); vtcr_to_walk_info(vtcr, &wi); wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; - ret = walk_nested_s2_pgd(gipa, &wi, result); + ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result); if (ret) result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC); @@ -788,7 +829,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans) return 0; if (kvm_vcpu_trap_is_iabt(vcpu)) { - forward_fault = !kvm_s2_trans_executable(trans); + if (vcpu_mode_priv(vcpu)) + forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans); + else + forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans); } else { bool write_fault = kvm_is_write_fault(vcpu); @@ -1555,12 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64MMFR1_EL1: val &= ~(ID_AA64MMFR1_EL1_CMOW | ID_AA64MMFR1_EL1_nTLBPA | - ID_AA64MMFR1_EL1_ETS | - ID_AA64MMFR1_EL1_XNX | - ID_AA64MMFR1_EL1_HAFDBS); + ID_AA64MMFR1_EL1_ETS); + /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) val &= ~ID_AA64MMFR1_EL1_VH; + + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF); break; case SYS_ID_AA64MMFR2_EL1: |
