From 98631c4904bf6380834c8585ce50451f00eb5389 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 14 Mar 2024 14:38:19 +0800 Subject: arm64: Remove unnecessary irqflags alternative.h include Since commit 20af807d806d ("arm64: Avoid cpus_have_const_cap() for ARM64_HAS_GIC_PRIO_MASKING"), the alternative.h include is not used, so remove it. Fixes: 20af807d806d ("arm64: Avoid cpus_have_const_cap() for ARM64_HAS_GIC_PRIO_MASKING") Signed-off-by: Jinjie Ruan Link: https://lore.kernel.org/r/20240314063819.2636445-1-ruanjinjie@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/irqflags.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 0a7186a93882..d4d7451c2c12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,7 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#include #include #include #include -- cgit v1.2.3 From e07255d69702bc9131427fda8f9749355b10780f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 5 Apr 2024 13:58:51 +1000 Subject: arm64: tlb: Improve __TLBI_VADDR_RANGE() The macro returns the operand of TLBI RANGE instruction. A mask needs to be applied to each individual field upon producing the operand, to avoid the adjacent fields can interfere with each other when invalid arguments have been provided. The code looks more tidy at least with a mask and FIELD_PREP(). Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Ryan Roberts Reviewed-by: Catalin Marinas Reviewed-by: Anshuman Khandual Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240405035852.1532010-3-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a75de2665d84..243d71f7bc1f 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -142,17 +142,24 @@ static inline unsigned long get_trans_granule(void) * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (baddr); \ - unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= __ttl << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ -- cgit v1.2.3 From 73301e464a72a0d007d0d4e0f4d3dab5c58125bf Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 5 Apr 2024 13:58:52 +1000 Subject: arm64: tlb: Allow range operation for MAX_TLBI_RANGE_PAGES MAX_TLBI_RANGE_PAGES pages is covered by SCALE#3 and NUM#31 and it's supported now. Allow TLBI RANGE operation when the number of pages is equal to MAX_TLBI_RANGE_PAGES in __flush_tlb_range_nosync(). Suggested-by: Marc Zyngier Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Reviewed-by: Ryan Roberts Reviewed-by: Catalin Marinas Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240405035852.1532010-4-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 243d71f7bc1f..95fbc8c05607 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -446,11 +446,11 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, * When not uses TLB range ops, we can handle up to * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. + * MAX_TLBI_RANGE_PAGES pages. */ if ((!system_supports_tlb_range() && (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + pages > MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } -- cgit v1.2.3 From b782e8d07baac95a5ce3f8773cc61f4ed7d0ccbc Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 11 Apr 2024 20:30:30 +0800 Subject: arm64: arm_pmuv3: Correctly extract and check the PMUVer Currently we're using "sbfx" to extract the PMUVer from ID_AA64DFR0_EL1 and skip the init/reset if no PMU present when the extracted PMUVer is negative or is zero. However for PMUv3p8 the PMUVer will be 0b1000 and PMUVer extracted by "sbfx" will always be negative and we'll skip the init/reset in __init_el2_debug/reset_pmuserenr_el0 unexpectedly. So this patch use "ubfx" instead of "sbfx" to extract the PMUVer. If the PMUVer is implementation defined (0b1111) or not implemented(0b0000) then skip the reset/init. Previously we'll also skip the init/reset if the PMUVer is higher than the version we known (currently PMUv3p9), with this patch we'll only skip if the PMU is not implemented or implementation defined. This keeps consistence with how we probe the PMU in the driver with pmuv3_implemented(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240411123030.7201-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 7 ++++--- arch/arm64/include/asm/el2_setup.h | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ab8b396428da..9ecd076ba08f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -480,9 +480,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842..e4546b29dd0c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -59,13 +59,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 -- cgit v1.2.3 From 87f842c6c6543cf0dd66161fdf4b62cec804479b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 11:16:56 +0000 Subject: KVM: arm64: Add accessor for per-CPU state In order to facilitate the introduction of new per-CPU state, add a new host_data_ptr() helped that hides some of the per-CPU verbosity, and make it easier to move that state around in the future. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 37 +++++++++++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 ++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 +++---- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 2 +- arch/arm64/kvm/hyp/nvhe/setup.c | 3 +-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 ++-- arch/arm64/kvm/pmu.c | 2 +- 10 files changed, 53 insertions(+), 17 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9e8a496fb284..f41db42529df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -530,6 +530,17 @@ struct kvm_cpu_context { u64 *vncr_array; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; }; @@ -1168,6 +1179,32 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..a24287c3ba99 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1971,7 +1971,7 @@ static void cpu_set_hyp_vector(void) static void cpu_hyp_init_context(void) { - kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); + kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 961bbef104a6..eec0f8ccda56 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,7 +135,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); @@ -154,7 +154,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e3fcf8c4d5b4..ae198b84ca01 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -155,7 +155,7 @@ static inline bool cpu_has_amu(void) static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); CHECK_FGT_MASKS(HFGRTR_EL2); @@ -191,7 +191,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) @@ -226,7 +226,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -260,7 +260,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d..dfe8fe0f7eaf 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bc58d1b515af..ae00dfa80801 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..544a419b9a39 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -264,7 +264,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -367,7 +367,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..14b7a6bc5909 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -221,7 +221,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -306,7 +306,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index a8b9ea496706..e12bd7d6d2dc 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); /* @@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index a243934c5568..329819806096 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val) if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) return false; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } -- cgit v1.2.3 From 6db55734ec4008da39e10d2fffa913fd9751ccaa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Feb 2024 15:58:46 +0000 Subject: KVM: arm64: Exclude host_debug_data from vcpu_arch Keeping host_debug_state on a per-vcpu basis is completely pointless. The lifetime of this data is only that of the inner run-loop, which means it is never accessed outside of the core EL2 code. Move the structure into kvm_host_data, and save over 500 bytes per vcpu. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 31 +++++++++++++++++-------------- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 8 ++++---- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f41db42529df..d7bcb8ce1d7b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -543,6 +543,19 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + } host_debug_state; }; struct kvm_host_psci_config { @@ -638,11 +651,10 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. + * + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. * * debug_ptr points to the set of debug registers that should be loaded * onto the hardware when running the guest. @@ -654,15 +666,6 @@ struct kvm_vcpu_arch { struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct task_struct *parent_task; - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index eec0f8ccda56..d00093699aaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -137,7 +137,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(host_dbg, host_ctxt); @@ -156,7 +156,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(guest_dbg, guest_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b6f..53efda0235cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 4bacd723705a6b6c8386daf3d5148aca66135f3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Feb 2024 08:27:54 +0000 Subject: KVM: arm64: Exclude mdcr_el2_host from kvm_vcpu_arch As for the rest of the host debug state, the host copy of mdcr_el2 has little to do in the vcpu, and is better placed in the host_data structure. Reviewed-by : Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 5 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d7bcb8ce1d7b..a04e69fb2884 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -555,6 +555,8 @@ struct kvm_host_data { u64 pmscr_el1; /* Self-hosted trace */ u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; } host_debug_state; }; @@ -616,9 +618,6 @@ struct kvm_vcpu_arch { u64 mdcr_el2; u64 cptr_el2; - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; - /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index ae198b84ca01..7d7de0245ed0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -232,7 +232,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { @@ -254,7 +254,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { - write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) { -- cgit v1.2.3 From 51e09b5572d665645ce394f94f24a7d6ec32bda9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 12:06:44 +0000 Subject: KVM: arm64: Exclude host_fpsimd_state pointer from kvm_vcpu_arch As the name of the field indicates, host_fpsimd_state is strictly a host piece of data, and we reset this pointer on each PID change. So let's move it where it belongs, and set it at load-time. Although this is slightly more often, it is a well defined life-cycle which matches other pieces of data. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/fpsimd.c | 3 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 1 - 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a04e69fb2884..21730d5ac006 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -543,6 +543,7 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + struct user_fpsimd_state *fpsimd_state; /* hyp VA */ /* * host_debug_state contains the host registers which are @@ -662,7 +663,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct task_struct *parent_task; /* VGIC state */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 826307e19e3a..d30dffc800b6 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -49,8 +49,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) if (ret) return ret; - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - /* * We need to keep current's task_struct pinned until its data has been * unshared with the hypervisor to make sure it is not re-used by the @@ -87,6 +85,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * FP_STATE_FREE if the flag set. */ vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 7d7de0245ed0..6def6ad8dd48 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -377,7 +377,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Write out the host state if it's in the registers */ if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ if (sve_guest) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed87..c5f625dc1f07 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -42,7 +42,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; -- cgit v1.2.3 From 5294afdbf45aced5295fe5941c58b40c41c23800 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 17:42:31 +0000 Subject: KVM: arm64: Exclude FP ownership from kvm_vcpu_arch In retrospect, it is fairly obvious that the FP state ownership is only meaningful for a given CPU, and that locating this information in the vcpu was just a mistake. Move the ownership tracking into the host data structure, and rename it from fp_state to fp_owner, which is a better description (name suggested by Mark Brown). Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_host.h | 14 +++++++------- arch/arm64/kvm/arm.c | 6 ------ arch/arm64/kvm/fpsimd.c | 10 +++++----- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 +++--- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 -- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 8 files changed, 19 insertions(+), 27 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 975af30af31f..3d65d9413608 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -588,7 +588,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; @@ -596,7 +596,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = CPTR_NVHE_EL2_RES1; if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 21730d5ac006..2b63fdfad5b2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -545,6 +545,13 @@ struct kvm_host_data { struct kvm_cpu_context host_ctxt; struct user_fpsimd_state *fpsimd_state; /* hyp VA */ + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + /* * host_debug_state contains the host registers which are * saved and restored during world switches. @@ -622,13 +629,6 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a24287c3ba99..66d8112da268 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -378,12 +378,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; - /* - * Default value for the FP state, will be overloaded at load - * time if we support FP (pretty likely) - */ - vcpu->arch.fp_state = FP_STATE_FREE; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index d30dffc800b6..7507dcc4e553 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -84,7 +84,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * guest in kvm_arch_vcpu_ctxflush_fp() and override this to * FP_STATE_FREE if the flag set. */ - vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); @@ -109,7 +109,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * been saved, this is very unlikely to happen. */ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); } } @@ -125,7 +125,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; } /* @@ -141,7 +141,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { /* * Currently we do not support SME guests so SVCR is @@ -195,7 +195,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 6def6ad8dd48..2629420d0659 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -42,7 +42,7 @@ extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } /* Save the 32-bit only FPSIMD system register state */ @@ -376,7 +376,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ @@ -389,7 +389,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index c5f625dc1f07..26561c562f7a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -39,7 +39,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); @@ -63,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 544a419b9a39..1f82d531a494 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -337,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 14b7a6bc5909..b92f9fe2d50e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -258,7 +258,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); -- cgit v1.2.3 From 1fcb7cea8a5f7747e02230f816c2c80b060d9517 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 12 Apr 2024 14:19:07 +0100 Subject: arm64: mm: Batch dsb and isb when populating pgtables After removing uneccessary TLBIs, the next bottleneck when creating the page tables for the linear map is DSB and ISB, which were previously issued per-pte in __set_pte(). Since we are writing multiple ptes in a given pte table, we can elide these barriers and insert them once we have finished writing to the table. Execution time of map_mem(), which creates the kernel linear map page tables, was measured on different machines with different RAM configs: | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra | VM, 16G | VM, 64G | VM, 256G | Metal, 512G ---------------|-------------|-------------|-------------|------------- | ms (%) | ms (%) | ms (%) | ms (%) ---------------|-------------|-------------|-------------|------------- before | 78 (0%) | 435 (0%) | 1723 (0%) | 3779 (0%) after | 11 (-86%) | 161 (-63%) | 656 (-62%) | 1654 (-56%) Signed-off-by: Ryan Roberts Tested-by: Itaru Kitayama Tested-by: Eric Chanudet Reviewed-by: Mark Rutland Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240412131908.433043-3-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 7 ++++++- arch/arm64/mm/mmu.c | 11 ++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index afdd56d26ad7..105a95a8845c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -271,9 +271,14 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } -static inline void __set_pte(pte_t *ptep, pte_t pte) +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); /* * Only if the new pte is valid and kernel, otherwise TLB maintenance diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9f1d69b7b494..ac88b89770a6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -178,7 +178,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, do { pte_t old_pte = __ptep_get(ptep); - __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + /* + * Required barriers to make this visible to the table walker + * are deferred to the end of alloc_init_cont_pte(). + */ + __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -232,6 +236,11 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, phys += next - addr; } while (addr = next, addr != end); + /* + * Note: barriers and maintenance necessary to clear the fixmap slot + * ensure that all previous pgtable writes are visible to the table + * walker. + */ pte_clear_fixmap(); } -- cgit v1.2.3 From 0e9df1c905d8293d333ace86c13d147382f5caf9 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 12 Apr 2024 14:19:08 +0100 Subject: arm64: mm: Don't remap pgtables for allocate vs populate During linear map pgtable creation, each pgtable is fixmapped / fixunmapped twice; once during allocation to zero the memory, and a again during population to write the entries. This means each table has 2 TLB invalidations issued against it. Let's fix this so that each table is only fixmapped/fixunmapped once, halving the number of TLBIs, and improving performance. Achieve this by separating allocation and initialization (zeroing) of the page. The allocated page is now fixmapped directly by the walker and initialized, before being populated and finally fixunmapped. This approach keeps the change small, but has the side effect that late allocations (using __get_free_page()) must also go through the generic memory clearing routine. So let's tell __get_free_page() not to zero the memory to avoid duplication. Additionally this approach means that fixmap/fixunmap is still used for late pgtable modifications. That's not technically needed since the memory is all mapped in the linear map by that point. That's left as a possible future optimization if found to be needed. Execution time of map_mem(), which creates the kernel linear map page tables, was measured on different machines with different RAM configs: | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra | VM, 16G | VM, 64G | VM, 256G | Metal, 512G ---------------|-------------|-------------|-------------|------------- | ms (%) | ms (%) | ms (%) | ms (%) ---------------|-------------|-------------|-------------|------------- before | 11 (0%) | 161 (0%) | 656 (0%) | 1654 (0%) after | 10 (-11%) | 104 (-35%) | 438 (-33%) | 1223 (-26%) Signed-off-by: Ryan Roberts Suggested-by: Mark Rutland Tested-by: Itaru Kitayama Tested-by: Eric Chanudet Reviewed-by: Mark Rutland Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20240412131908.433043-4-ryan.roberts@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 ++ arch/arm64/mm/mmu.c | 67 +++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 32 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 105a95a8845c..92c9aed5e7af 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1010,6 +1010,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) static inline bool pgtable_l5_enabled(void) { return false; } +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + /* Match p4d_offset folding in */ #define p4d_set_fixmap(addr) NULL #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ac88b89770a6..c927e9312f10 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; - void *ptr; phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); - /* - * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE - * slot will be free, so we can (ab)use the FIX_PTE slot to initialise - * any level of table. - */ - ptr = pte_set_fixmap(phys); - - memset(ptr, 0, PAGE_SIZE); - - /* - * Implicit barriers also ensure the zeroed page is visible to the page - * table walker - */ - pte_clear_fixmap(); - return phys; } @@ -172,6 +156,14 @@ bool pgattr_change_is_safe(u64 old, u64 new) return ((old ^ new) & ~mask) == 0; } +static void init_clear_pgtable(void *table) +{ + clear_page(table); + + /* Ensure the zeroing is observed by page table walks. */ + dsb(ishst); +} + static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot) { @@ -214,12 +206,15 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + ptep = pte_set_fixmap(pte_phys); + init_clear_pgtable(ptep); + ptep += pte_index(addr); __pmd_populate(pmdp, pte_phys, pmdval); - pmd = READ_ONCE(*pmdp); + } else { + BUG_ON(pmd_bad(pmd)); + ptep = pte_set_fixmap_offset(pmdp, addr); } - BUG_ON(pmd_bad(pmd)); - ptep = pte_set_fixmap_offset(pmdp, addr); do { pgprot_t __prot = prot; @@ -298,12 +293,15 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); + pmdp = pmd_set_fixmap(pmd_phys); + init_clear_pgtable(pmdp); + pmdp += pmd_index(addr); __pud_populate(pudp, pmd_phys, pudval); - pud = READ_ONCE(*pudp); + } else { + BUG_ON(pud_bad(pud)); + pmdp = pmd_set_fixmap_offset(pudp, addr); } - BUG_ON(pud_bad(pud)); - pmdp = pmd_set_fixmap_offset(pudp, addr); do { pgprot_t __prot = prot; @@ -340,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); + pudp = pud_set_fixmap(pud_phys); + init_clear_pgtable(pudp); + pudp += pud_index(addr); __p4d_populate(p4dp, pud_phys, p4dval); - p4d = READ_ONCE(*p4dp); + } else { + BUG_ON(p4d_bad(p4d)); + pudp = pud_set_fixmap_offset(p4dp, addr); } - BUG_ON(p4d_bad(p4d)); - pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -395,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(P4D_SHIFT); + p4dp = p4d_set_fixmap(p4d_phys); + init_clear_pgtable(p4dp); + p4dp += p4d_index(addr); __pgd_populate(pgdp, p4d_phys, pgdval); - pgd = READ_ONCE(*pgdp); + } else { + BUG_ON(pgd_bad(pgd)); + p4dp = p4d_set_fixmap_offset(pgdp, addr); } - BUG_ON(pgd_bad(pgd)); - p4dp = p4d_set_fixmap_offset(pgdp, addr); do { p4d_t old_p4d = READ_ONCE(*p4dp); @@ -467,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); + BUG_ON(!ptr); return __pa(ptr); } -- cgit v1.2.3 From f4d9d9dcc70b96b5e5d7801bd5fbf8491b07b13d Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 9 Jan 2024 13:23:08 -0600 Subject: arm64: Add Neoverse-V2 part Add the part number and MIDR for Neoverse-V2 Signed-off-by: Besar Wicaksono Reviewed-by: James Clark Link: https://lore.kernel.org/r/20240109192310.16234-2-bwicaksono@nvidia.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 52f076afeb96..936389e9aecb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -159,6 +160,7 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3 From 1b06b99f25e0c957feb488ff8117a37f592c3866 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:21 +0100 Subject: KVM: arm64: Harden __ctxt_sys_reg() against out-of-range values The unsuspecting kernel tinkerer can be easily confused into writing something that looks like this: ikey.lo = __vcpu_sys_reg(vcpu, SYS_APIAKEYLO_EL1); which seems vaguely sensible, until you realise that the second parameter is the encoding of a sysreg, and not the index into the vcpu sysreg file... Debugging what happens in this case is an interesting exercise in head<->wall interactions. As they often say: "Any resemblance to actual persons, living or dead, or actual events is purely coincidental". In order to save people's time, add some compile-time hardening that will at least weed out the "stupidly out of range" values. This will *not* catch anything that isn't a compile-time constant. Reviewed-by: Joey Gouly Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9e8a496fb284..8eb04c9eb259 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -896,7 +896,7 @@ struct kvm_vcpu_arch { * Don't bother with VNCR-based accesses in the nVHE code, it has no * business dealing with NV. */ -static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) { #if !defined (__KVM_NVHE_HYPERVISOR__) if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && @@ -906,6 +906,13 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) return (u64 *)&ctxt->sys_regs[r]; } +#define __ctxt_sys_reg(c,r) \ + ({ \ + BUILD_BUG_ON(__builtin_constant_p(r) && \ + (r) >= NR_SYS_REGS); \ + ___ctxt_sys_reg(c, r); \ + }) + #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg); -- cgit v1.2.3 From 80d8b55a57a18b0b1dac951ea28bfd657b14facc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:22 +0100 Subject: KVM: arm64: Add helpers for ESR_ELx_ERET_ISS_ERET* The ESR_ELx_ERET_ISS_ERET* macros are a bit confusing: - ESR_ELx_ERET_ISS_ERET really indicates that we have trapped an ERETA* instruction, as opposed to an ERET - ESR_ELx_ERET_ISS_ERETA really indicates that we have trapped an ERETAB instruction, as opposed to an ERETAA. We could repaint those to make more sense, but these are the names that are present in the ARM ARM, and we are sentimentally attached to those. Instead, add two new helpers: - esr_iss_is_eretax() being true tells you that you need to authenticate the ERET - esr_iss_is_eretab() tells you that you need to use the B key instead of the A key Following patches will make use of these primitives. Suggested-by: Joey Gouly Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/esr.h | 12 ++++++++++++ arch/arm64/kvm/handle_exit.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 81606bf7d5ac..7abf09df7033 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -404,6 +404,18 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; } +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 617ae6dea5d5..15221e481ccd 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -219,7 +219,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) static int kvm_handle_eret(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET) + if (esr_iss_is_eretax(kvm_vcpu_get_esr(vcpu))) return kvm_handle_ptrauth(vcpu); /* -- cgit v1.2.3 From 6f57c6be2a0889cc0fd32b0cd2eb25dfee20dde3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:24 +0100 Subject: KVM: arm64: nv: Drop VCPU_HYP_CONTEXT flag It has become obvious that HCR_EL2.NV serves the exact same use as VCPU_HYP_CONTEXT, only in an architectural way. So just drop the flag for good. Reviewed-by: Joey Gouly Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/hyp/vhe/switch.c | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8eb04c9eb259..465cfd49cf7e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -817,8 +817,6 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* vcpu running in HYP context */ -#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) /* SVE enabled for host EL0 */ #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..07fd9f70f870 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -197,7 +197,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. */ - if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) { + if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) { u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); switch (mode) { @@ -240,11 +240,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - if (is_hyp_ctxt(vcpu)) - vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT); - else - vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu); -- cgit v1.2.3 From 95537f06b9e826766f32e513d714e1cda468ef15 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:26 +0100 Subject: KVM: arm64: nv: Add trap forwarding for ERET and SMC Honor the trap forwarding bits for both ERET and SMC, using a new helper that checks for common conditions. Reviewed-by: Joey Gouly Co-developed-by: Jintack Lim Signed-off-by: Jintack Lim Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-7-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 1 + arch/arm64/kvm/emulate-nested.c | 27 +++++++++++++++++++++++++++ arch/arm64/kvm/handle_exit.c | 7 +++++++ 3 files changed, 35 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index c77d795556e1..dbc4e3a67356 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -60,6 +60,7 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } +extern bool forward_smc_trap(struct kvm_vcpu *vcpu); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 4697ba41b3a9..2d80e81ae650 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2117,6 +2117,26 @@ inject: return true; } +static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit) +{ + bool control_bit_set; + + if (!vcpu_has_nv(vcpu)) + return false; + + control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit; + if (!is_hyp_ctxt(vcpu) && control_bit_set) { + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + return true; + } + return false; +} + +bool forward_smc_trap(struct kvm_vcpu *vcpu) +{ + return forward_traps(vcpu, HCR_TSC); +} + static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) { u64 mode = spsr & PSR_MODE_MASK; @@ -2155,6 +2175,13 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) u64 spsr, elr, mode; bool direct_eret; + /* + * Forward this trap to the virtual EL2 if the virtual + * HCR_EL2.NV bit is set and this is coming from !EL2. + */ + if (forward_traps(vcpu, HCR_NV)) + return; + /* * Going through the whole put/load motions is a waste of time * if this is a VHE guest hypervisor returning to its own diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 15221e481ccd..6a88ec024e2f 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -55,6 +55,13 @@ static int handle_hvc(struct kvm_vcpu *vcpu) static int handle_smc(struct kvm_vcpu *vcpu) { + /* + * Forward this trapped smc instruction to the virtual EL2 if + * the guest has asked for it. + */ + if (forward_smc_trap(vcpu)) + return 1; + /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a -- cgit v1.2.3 From 279946ada1f26a905061d0d6f134fff9e7b14239 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:29 +0100 Subject: KVM: arm64: nv: Handle HCR_EL2.{API,APK} independently Although KVM couples API and APK for simplicity, the architecture makes no such requirement, and the two can be independently set or cleared. Check for which of the two possible reasons we have trapped here, and if the corresponding L1 control bit isn't set, delegate the handling for forwarding. Otherwise, set this exact bit in HCR_EL2 and resume the guest. Of course, in the non-NV case, we keep setting both bits and be done with it. Note that the entry core already saves/restores the keys should any of the two control bits be set. This results in a bit of rework, and the removal of the (trivial) vcpu_ptrauth_enable() helper. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-10-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 5 ----- arch/arm64/kvm/hyp/include/hyp/switch.h | 32 +++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 975af30af31f..87f2c31f3206 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -125,11 +125,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); -} - static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f5f701f309a9..a0908d7a8f56 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -480,11 +480,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm_cpu_context *ctxt; - u64 val; + u64 enable = 0; if (!vcpu_has_ptrauth(vcpu)) return false; + /* + * NV requires us to handle API and APK independently, just in + * case the hypervisor is totally nuts. Please barf >here<. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + switch (ESR_ELx_EC(kvm_vcpu_get_esr(vcpu))) { + case ESR_ELx_EC_PAC: + if (!(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_API)) + return false; + + enable |= HCR_API; + break; + + case ESR_ELx_EC_SYS64: + if (!(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_APK)) + return false; + + enable |= HCR_APK; + break; + } + } else { + enable = HCR_API | HCR_APK; + } + ctxt = this_cpu_ptr(&kvm_hyp_ctxt); __ptrauth_save_key(ctxt, APIA); __ptrauth_save_key(ctxt, APIB); @@ -492,11 +516,9 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) __ptrauth_save_key(ctxt, APDB); __ptrauth_save_key(ctxt, APGA); - vcpu_ptrauth_enable(vcpu); - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); + vcpu->arch.hcr_el2 |= enable; + sysreg_clear_set(hcr_el2, 0, enable); return true; } -- cgit v1.2.3 From 719f5206a8fd8336d23ccda6fe2a3287fbfb4c92 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:31 +0100 Subject: KVM: arm64: nv: Add kvm_has_pauth() helper Pointer Authentication comes in many flavors, and a faithful emulation relies on correctly handling the flavour implemented by the HW. For this, provide a new kvm_has_pauth() that checks whether we expose to the guest a particular level of support. This checks across all 3 possible authentication algorithms (Q5, Q3 and IMPDEF). Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-12-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 465cfd49cf7e..a6ba90daa195 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1336,4 +1336,19 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \ get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max)) +/* Check for a given level of PAuth support */ +#define kvm_has_pauth(k, l) \ + ({ \ + bool pa, pi, pa3; \ + \ + pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \ + pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \ + pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \ + pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \ + pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \ + pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \ + \ + (pa + pi + pa3) == 1; \ + }) + #endif /* __ARM64_KVM_HOST_H__ */ -- cgit v1.2.3 From 6ccc971ee2c61a1ffb487e46bf6184f7df6aacfb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:32 +0100 Subject: KVM: arm64: nv: Add emulation for ERETAx instructions FEAT_NV has the interesting property of relying on ERET being trapped. An added complexity is that it also traps ERETAA and ERETAB, meaning that the Pointer Authentication aspect of these instruction must be emulated. Add an emulation of Pointer Authentication, limited to ERETAx (always using SP_EL2 as the modifier and ELR_EL2 as the pointer), using the Generic Authentication instructions. The emulation, however small, is placed in its own compilation unit so that it can be avoided if the configuration doesn't include it (or the toolchan in not up to the task). Reviewed-by: Joey Gouly Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-13-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 12 ++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/pauth.c | 196 +++++++++++++++++++++++++++++++++ 4 files changed, 210 insertions(+) create mode 100644 arch/arm64/kvm/pauth.c (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index dbc4e3a67356..5e0ab0596246 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -64,4 +64,16 @@ extern bool forward_smc_trap(struct kvm_vcpu *vcpu); int kvm_init_nv_sysregs(struct kvm *kvm); +#ifdef CONFIG_ARM64_PTR_AUTH +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr); +#else +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + /* We really should never execute this... */ + WARN_ON_ONCE(1); + *elr = 0xbad9acc0debadbad; + return false; +} +#endif + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index ef207a0d4f0d..9943ff0af4c9 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -297,6 +297,7 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_TBID0 (UL(1) << 51) #define TCR_TBID1 (UL(1) << 52) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c0c050e53157..04882b577575 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-its.o vgic/vgic-debug.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c new file mode 100644 index 000000000000..a3a5c404375b --- /dev/null +++ b/arch/arm64/kvm/pauth.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 - Google LLC + * Author: Marc Zyngier + * + * Primitive PAuth emulation for ERETAA/ERETAB. + * + * This code assumes that is is run from EL2, and that it is part of + * the emulation of ERETAx for a guest hypervisor. That's a lot of + * baked-in assumptions and shortcuts. + * + * Do no reuse for anything else! + */ + +#include + +#include +#include + +static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr, + struct ptrauth_key ikey) +{ + struct ptrauth_key gkey; + u64 mod, pac = 0; + + preempt_disable(); + + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) + mod = __vcpu_sys_reg(vcpu, SP_EL2); + else + mod = read_sysreg(sp_el1); + + gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1); + gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1); + + __ptrauth_key_install_nosync(APGA, ikey); + isb(); + + asm volatile(ARM64_ASM_PREAMBLE ".arch_extension pauth\n" + "pacga %0, %1, %2" : "=r" (pac) : "r" (ptr), "r" (mod)); + isb(); + + __ptrauth_key_install_nosync(APGA, gkey); + + preempt_enable(); + + /* PAC in the top 32bits */ + return pac; +} + +static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55) +{ + u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + bool tbi, tbid; + + /* + * Since we are authenticating an instruction address, we have + * to take TBID into account. If E2H==0, ignore VA[55], as + * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in + * this case, this is likely a guest bug... + */ + if (!vcpu_el2_e2h_is_set(vcpu)) { + tbi = tcr & BIT(20); + tbid = tcr & BIT(29); + } else if (bit55) { + tbi = tcr & TCR_TBI1; + tbid = tcr & TCR_TBID1; + } else { + tbi = tcr & TCR_TBI0; + tbid = tcr & TCR_TBID0; + } + + return tbi && !tbid; +} + +static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55) +{ + static const int maxtxsz = 39; // Revisit these two values once + static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2 + u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + int txsz; + + if (!vcpu_el2_e2h_is_set(vcpu) || !bit55) + txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + else + txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + + return 64 - clamp(txsz, mintxsz, maxtxsz); +} + +static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55) +{ + int bottom_pac; + u64 mask; + + bottom_pac = compute_bottom_pac(vcpu, bit55); + + mask = GENMASK(54, bottom_pac); + if (!effective_tbi(vcpu, bit55)) + mask |= GENMASK(63, 56); + + return mask; +} + +static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask) +{ + bool bit55 = !!(ptr & BIT(55)); + + if (bit55) + return ptr | mask; + + return ptr & ~mask; +} + +static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr) +{ + bool bit55 = !!(ptr & BIT(55)); + u64 mask, error_code; + int shift; + + if (effective_tbi(vcpu, bit55)) { + mask = GENMASK(54, 53); + shift = 53; + } else { + mask = GENMASK(62, 61); + shift = 61; + } + + if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu))) + error_code = 2 << shift; + else + error_code = 1 << shift; + + ptr &= ~mask; + ptr |= error_code; + + return ptr; +} + +/* + * Authenticate an ERETAA/ERETAB instruction, returning true if the + * authentication succeeded and false otherwise. In all cases, *elr + * contains the VA to ERET to. Potential exception injection is left + * to the caller. + */ +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); + u64 esr = kvm_vcpu_get_esr(vcpu); + u64 ptr, cptr, pac, mask; + struct ptrauth_key ikey; + + *elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2); + + /* We assume we're already in the context of an ERETAx */ + if (esr_iss_is_eretab(esr)) { + if (!(sctlr & SCTLR_EL1_EnIB)) + return true; + + ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1); + ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1); + } else { + if (!(sctlr & SCTLR_EL1_EnIA)) + return true; + + ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1); + ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1); + } + + mask = compute_pac_mask(vcpu, !!(ptr & BIT(55))); + cptr = to_canonical_addr(vcpu, ptr, mask); + + pac = compute_pac(vcpu, cptr, ikey); + + /* + * Slightly deviate from the pseudocode: if we have a PAC + * match with the signed pointer, then it must be good. + * Anything after this point is pure error handling. + */ + if ((pac & mask) == (ptr & mask)) { + *elr = cptr; + return true; + } + + /* + * Authentication failed, corrupt the canonical address if + * PAuth2 isn't implemented, or some XORing if it is. + */ + if (!kvm_has_pauth(vcpu->kvm, PAuth2)) + cptr = corrupt_addr(vcpu, cptr); + else + cptr = ptr ^ (pac & mask); + + *elr = cptr; + return false; +} -- cgit v1.2.3 From 814ad8f96e929fa9c60bd360d2f7bccfc1df0111 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:35 +0100 Subject: KVM: arm64: Drop trapping of PAuth instructions/keys We currently insist on disabling PAuth on vcpu_load(), and get to enable it on first guest use of an instruction or a key (ignoring the NV case for now). It isn't clear at all what this is trying to achieve: guests tend to use PAuth when available, and nothing forces you to expose it to the guest if you don't want to. This also isn't totally free: we take a full GPR save/restore between host and guest, only to write ten 64bit registers. The "value proposition" escapes me. So let's forget this stuff and enable PAuth eagerly if exposed to the guest. This results in much simpler code. Performance wise, that's not bad either (tested on M2 Pro running a fully automated Debian installer as the workload): - On a non-NV guest, I can see reduction of 0.24% in the number of cycles (measured with perf over 10 consecutive runs) - On a NV guest (L2), I see a 2% reduction in wall-clock time (measured with 'time', as M2 doesn't have a PMUv3 and NV doesn't support it either) So overall, a much reduced complexity and a (small) performance improvement. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-16-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 5 --- arch/arm64/include/asm/kvm_ptrauth.h | 21 +++++++++ arch/arm64/kvm/arm.c | 45 +++++++++++++++++-- arch/arm64/kvm/handle_exit.c | 10 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 80 +-------------------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 2 - arch/arm64/kvm/hyp/vhe/switch.c | 6 +-- 7 files changed, 70 insertions(+), 99 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 87f2c31f3206..382164d791f4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -125,11 +125,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0cd0965255d2..d81bac256abc 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -99,5 +99,26 @@ alternative_else_nop_endif .macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ + +#else /* !__ASSEMBLY */ + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ + } while(0) + +#define ptrauth_save_keys(ctxt) \ + do { \ + __ptrauth_save_key(ctxt, APIA); \ + __ptrauth_save_key(ctxt, APIB); \ + __ptrauth_save_key(ctxt, APDA); \ + __ptrauth_save_key(ctxt, APDB); \ + __ptrauth_save_key(ctxt, APGA); \ + } while(0) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d32ffbfef724..b5fc44aafb30 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -35,10 +35,11 @@ #include #include #include +#include #include #include #include -#include +#include #include #include @@ -462,6 +463,44 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) } +static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) +{ + if (vcpu_has_ptrauth(vcpu)) { + /* + * Either we're running running an L2 guest, and the API/APK + * bits come from L1's HCR_EL2, or API/APK are both set. + */ + if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { + u64 val; + + val = __vcpu_sys_reg(vcpu, HCR_EL2); + val &= (HCR_API | HCR_APK); + vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); + vcpu->arch.hcr_el2 |= val; + } else { + vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); + } + + /* + * Save the host keys if there is any chance for the guest + * to use pauth, as the entry code will reload the guest + * keys in that case. + * Protected mode is the exception to that rule, as the + * entry into the EL2 code eagerly switch back and forth + * between host and hyp keys (and kvm_hyp_ctxt is out of + * reach anyway). + */ + if (is_protected_kvm_enabled()) + return; + + if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { + struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); + ptrauth_save_keys(ctxt); + } + } +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_s2_mmu *mmu; @@ -500,8 +539,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) else vcpu_set_wfx_traps(vcpu); - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); + vcpu_set_pauth_traps(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 407bdfbb572b..b037f0a0e27e 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -217,14 +217,12 @@ static int handle_sve(struct kvm_vcpu *vcpu) * Two possibilities to handle a trapping ptrauth instruction: * * - Guest usage of a ptrauth instruction (which the guest EL1 did not - * turn into a NOP). If we get here, it is that we didn't fixup - * ptrauth on exit, and all that we can do is give the guest an - * UNDEF (as the guest isn't supposed to use ptrauth without being - * told it could). + * turn into a NOP). If we get here, it is because we didn't enable + * ptrauth for the guest. This results in an UNDEF, as it isn't + * supposed to use ptrauth without being told it could. * * - Running an L2 NV guest while L1 has left HCR_EL2.API==0, and for - * which we reinject the exception into L1. API==1 is handled as a - * fixup so the only way to get here is when API==0. + * which we reinject the exception into L1. * * Anything else is an emulation bug (hence the WARN_ON + UNDEF). */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0908d7a8f56..7c733decbe43 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -447,82 +448,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } -static inline bool esr_is_ptrauth_trap(u64 esr) -{ - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(ctxt, key) \ - do { \ - u64 __val; \ - __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ - __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ - ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ -} while(0) - -DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); - -static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - struct kvm_cpu_context *ctxt; - u64 enable = 0; - - if (!vcpu_has_ptrauth(vcpu)) - return false; - - /* - * NV requires us to handle API and APK independently, just in - * case the hypervisor is totally nuts. Please barf >here<. - */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { - switch (ESR_ELx_EC(kvm_vcpu_get_esr(vcpu))) { - case ESR_ELx_EC_PAC: - if (!(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_API)) - return false; - - enable |= HCR_API; - break; - - case ESR_ELx_EC_SYS64: - if (!(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_APK)) - return false; - - enable |= HCR_APK; - break; - } - } else { - enable = HCR_API | HCR_APK; - } - - ctxt = this_cpu_ptr(&kvm_hyp_ctxt); - __ptrauth_save_key(ctxt, APIA); - __ptrauth_save_key(ctxt, APIB); - __ptrauth_save_key(ctxt, APDA); - __ptrauth_save_key(ctxt, APDB); - __ptrauth_save_key(ctxt, APGA); - - - vcpu->arch.hcr_el2 |= enable; - sysreg_clear_set(hcr_el2, 0, enable); - - return true; -} - static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) { struct arch_timer_context *ctxt; @@ -610,9 +535,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) __vgic_v3_perform_cpuif_access(vcpu) == 1) return true; - if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) - return kvm_hyp_handle_ptrauth(vcpu, exit_code); - if (kvm_hyp_handle_cntpct(vcpu)) return true; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 4103625e46c5..9dfe704bdb69 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -191,7 +191,6 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; @@ -203,7 +202,6 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 8e1d98b691c1..f374bcdab4d4 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -41,9 +41,8 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); * - TGE: we want the guest to use EL1, which is incompatible with * this bit being set * - * - API/APK: for hysterical raisins, we enable PAuth lazily, which - * means that the guest's bits cannot be directly applied (we really - * want to see the traps). Revisit this at some point. + * - API/APK: they are already accounted for by vcpu_load(), and can + * only take effect across a load/put cycle (such as ERET) */ #define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK) @@ -268,7 +267,6 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; -- cgit v1.2.3 From ead79118dae6f9f982532002e82c2fb291ae0480 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 11 Apr 2024 13:46:17 -0300 Subject: arm64/io: Provide a WC friendly __iowriteXX_copy() The kernel provides driver support for using write combining IO memory through the __iowriteXX_copy() API which is commonly used as an optional optimization to generate 16/32/64 byte MemWr TLPs in a PCIe environment. iomap_copy.c provides a generic implementation as a simple 4/8 byte at a time copy loop that has worked well with past ARM64 CPUs, giving a high frequency of large TLPs being successfully formed. However modern ARM64 CPUs are quite sensitive to how the write combining CPU HW is operated and a compiler generated loop with intermixed load/store is not sufficient to frequently generate a large TLP. The CPUs would like to see the entire TLP generated by consecutive store instructions from registers. Compilers like gcc tend to intermix loads and stores and have poor code generation, in part, due to the ARM64 situation that writeq() does not codegen anything other than "[xN]". However even with that resolved compilers like clang still do not have good code generation. This means on modern ARM64 CPUs the rate at which __iowriteXX_copy() successfully generates large TLPs is very small (less than 1 in 10,000) tries), to the point that the use of WC is pointless. Implement __iowrite32/64_copy() specifically for ARM64 and use inline assembly to build consecutive blocks of STR instructions. Provide direct support for 64/32/16 large TLP generation in this manner. Optimize for common constant lengths so that the compiler can directly inline the store blocks. This brings the frequency of large TLP generation up to a high level that is comparable with older CPU generations. As the __iowriteXX_copy() family of APIs is intended for use with WC incorporate the DGH hint directly into the function. Link: https://lore.kernel.org/r/4-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Reviewed-by: Catalin Marinas Signed-off-by: Jason Gunthorpe --- arch/arm64/include/asm/io.h | 132 ++++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/io.c | 42 ++++++++++++++ 2 files changed, 174 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 8d825522c55c..4ff0ae3f6d66 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -139,6 +139,138 @@ extern void __memset_io(volatile void __iomem *, int, size_t); #define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) #define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) +/* + * The ARM64 iowrite implementation is intended to support drivers that want to + * use write combining. For instance PCI drivers using write combining with a 64 + * byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus. + * + * Newer ARM core have sensitive write combining buffers, it is important that + * the stores be contiguous blocks of store instructions. Normal memcpy + * approaches have a very low chance to generate write combining. + * + * Since this is the only API on ARM64 that should be used with write combining + * it also integrates the DGH hint which is supposed to lower the latency to + * emit the large TLP from the CPU. + */ + +static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to, + const u32 *from, size_t count) +{ + switch (count) { + case 8: + asm volatile("str %w0, [%8, #4 * 0]\n" + "str %w1, [%8, #4 * 1]\n" + "str %w2, [%8, #4 * 2]\n" + "str %w3, [%8, #4 * 3]\n" + "str %w4, [%8, #4 * 4]\n" + "str %w5, [%8, #4 * 5]\n" + "str %w6, [%8, #4 * 6]\n" + "str %w7, [%8, #4 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %w0, [%4, #4 * 0]\n" + "str %w1, [%4, #4 * 1]\n" + "str %w2, [%4, #4 * 2]\n" + "str %w3, [%4, #4 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %w0, [%2, #4 * 0]\n" + "str %w1, [%2, #4 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writel(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count); + +static inline void __const_iowrite32_copy(void __iomem *to, const void *from, + size_t count) +{ + if (count == 8 || count == 4 || count == 2 || count == 1) { + __const_memcpy_toio_aligned32(to, from, count); + dgh(); + } else { + __iowrite32_copy_full(to, from, count); + } +} + +#define __iowrite32_copy(to, from, count) \ + (__builtin_constant_p(count) ? \ + __const_iowrite32_copy(to, from, count) : \ + __iowrite32_copy_full(to, from, count)) + +static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to, + const u64 *from, size_t count) +{ + switch (count) { + case 8: + asm volatile("str %x0, [%8, #8 * 0]\n" + "str %x1, [%8, #8 * 1]\n" + "str %x2, [%8, #8 * 2]\n" + "str %x3, [%8, #8 * 3]\n" + "str %x4, [%8, #8 * 4]\n" + "str %x5, [%8, #8 * 5]\n" + "str %x6, [%8, #8 * 6]\n" + "str %x7, [%8, #8 * 7]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]), + "rZ"(from[6]), "rZ"(from[7]), "r"(to)); + break; + case 4: + asm volatile("str %x0, [%4, #8 * 0]\n" + "str %x1, [%4, #8 * 1]\n" + "str %x2, [%4, #8 * 2]\n" + "str %x3, [%4, #8 * 3]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]), + "rZ"(from[3]), "r"(to)); + break; + case 2: + asm volatile("str %x0, [%2, #8 * 0]\n" + "str %x1, [%2, #8 * 1]\n" + : + : "rZ"(from[0]), "rZ"(from[1]), "r"(to)); + break; + case 1: + __raw_writeq(*from, to); + break; + default: + BUILD_BUG(); + } +} + +void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count); + +static inline void __const_iowrite64_copy(void __iomem *to, const void *from, + size_t count) +{ + if (count == 8 || count == 4 || count == 2 || count == 1) { + __const_memcpy_toio_aligned64(to, from, count); + dgh(); + } else { + __iowrite64_copy_full(to, from, count); + } +} + +#define __iowrite64_copy(to, from, count) \ + (__builtin_constant_p(count) ? \ + __const_iowrite64_copy(to, from, count) : \ + __iowrite64_copy_full(to, from, count)) + /* * I/O memory mapping functions. */ diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index aa7a4ec6a3ae..ef48089fbfe1 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -37,6 +37,48 @@ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) } EXPORT_SYMBOL(__memcpy_fromio); +/* + * This generates a memcpy that works on a from/to address which is aligned to + * bits. Count is in terms of the number of bits sized quantities to copy. It + * optimizes to use the STR groupings when possible so that it is WC friendly. + */ +#define memcpy_toio_aligned(to, from, count, bits) \ + ({ \ + volatile u##bits __iomem *_to = to; \ + const u##bits *_from = from; \ + size_t _count = count; \ + const u##bits *_end_from = _from + ALIGN_DOWN(_count, 8); \ + \ + for (; _from < _end_from; _from += 8, _to += 8) \ + __const_memcpy_toio_aligned##bits(_to, _from, 8); \ + if ((_count % 8) >= 4) { \ + __const_memcpy_toio_aligned##bits(_to, _from, 4); \ + _from += 4; \ + _to += 4; \ + } \ + if ((_count % 4) >= 2) { \ + __const_memcpy_toio_aligned##bits(_to, _from, 2); \ + _from += 2; \ + _to += 2; \ + } \ + if (_count % 2) \ + __const_memcpy_toio_aligned##bits(_to, _from, 1); \ + }) + +void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count) +{ + memcpy_toio_aligned(to, from, count, 64); + dgh(); +} +EXPORT_SYMBOL(__iowrite64_copy_full); + +void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count) +{ + memcpy_toio_aligned(to, from, count, 32); + dgh(); +} +EXPORT_SYMBOL(__iowrite32_copy_full); + /* * Copy data from "real" memory space to IO memory space. */ -- cgit v1.2.3 From d4dbc991714eefcbd8d54a3204bd77a0a52bd32d Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 26 Mar 2024 10:16:15 +0100 Subject: sched/cpufreq: Rename arch_update_thermal_pressure() => arch_update_hw_pressure() Now that cpufreq provides a pressure value to the scheduler, rename arch_update_thermal_pressure into HW pressure to reflect that it returns a pressure applied by HW (i.e. with a high frequency change) and not always related to thermal mitigation but also generated by max current limitation as an example. Such high frequency signal needs filtering to be smoothed and provide an value that reflects the average available capacity into the scheduler time scale. Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Lukasz Luba Reviewed-by: Qais Yousef Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20240326091616.3696851-5-vincent.guittot@linaro.org --- arch/arm/include/asm/topology.h | 6 +++--- arch/arm64/include/asm/topology.h | 6 +++--- drivers/base/arch_topology.c | 26 +++++++++++++------------- drivers/cpufreq/qcom-cpufreq-hw.c | 4 ++-- include/linux/arch_topology.h | 8 ++++---- include/linux/sched/topology.h | 8 ++++---- include/trace/events/hw_pressure.h | 29 +++++++++++++++++++++++++++++ include/trace/events/sched.h | 2 +- include/trace/events/thermal_pressure.h | 29 ----------------------------- init/Kconfig | 12 ++++++------ kernel/sched/core.c | 8 ++++---- kernel/sched/fair.c | 16 ++++++++-------- kernel/sched/pelt.c | 18 +++++++++--------- kernel/sched/pelt.h | 16 ++++++++-------- kernel/sched/sched.h | 10 +++++----- 15 files changed, 99 insertions(+), 99 deletions(-) create mode 100644 include/trace/events/hw_pressure.h delete mode 100644 include/trace/events/thermal_pressure.h (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 853c4f81ba4a..ad36b6570067 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -22,9 +22,9 @@ /* Enable topology flag updates */ #define arch_update_cpu_topology topology_update_cpu_topology -/* Replace task scheduler's default thermal pressure API */ -#define arch_scale_thermal_pressure topology_get_thermal_pressure -#define arch_update_thermal_pressure topology_update_thermal_pressure +/* Replace task scheduler's default HW pressure API */ +#define arch_scale_hw_pressure topology_get_hw_pressure +#define arch_update_hw_pressure topology_update_hw_pressure #else diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index a323b109b9c4..0f6ef432fb84 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -35,9 +35,9 @@ void update_freq_counters_refs(void); /* Enable topology flag updates */ #define arch_update_cpu_topology topology_update_cpu_topology -/* Replace task scheduler's default thermal pressure API */ -#define arch_scale_thermal_pressure topology_get_thermal_pressure -#define arch_update_thermal_pressure topology_update_thermal_pressure +/* Replace task scheduler's default HW pressure API */ +#define arch_scale_hw_pressure topology_get_hw_pressure +#define arch_update_hw_pressure topology_update_hw_pressure #include diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 024b78a0cfc1..0248912ff687 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -22,7 +22,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; @@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) per_cpu(cpu_scale, cpu) = capacity; } -DEFINE_PER_CPU(unsigned long, thermal_pressure); +DEFINE_PER_CPU(unsigned long, hw_pressure); /** - * topology_update_thermal_pressure() - Update thermal pressure for CPUs + * topology_update_hw_pressure() - Update HW pressure for CPUs * @cpus : The related CPUs for which capacity has been reduced * @capped_freq : The maximum allowed frequency that CPUs can run at * - * Update the value of thermal pressure for all @cpus in the mask. The + * Update the value of HW pressure for all @cpus in the mask. The * cpumask should include all (online+offline) affected CPUs, to avoid * operating on stale data when hot-plug is used for some CPUs. The * @capped_freq reflects the currently allowed max CPUs frequency due to - * thermal capping. It might be also a boost frequency value, which is bigger + * HW capping. It might be also a boost frequency value, which is bigger * than the internal 'capacity_freq_ref' max frequency. In such case the * pressure value should simply be removed, since this is an indication that - * there is no thermal throttling. The @capped_freq must be provided in kHz. + * there is no HW throttling. The @capped_freq must be provided in kHz. */ -void topology_update_thermal_pressure(const struct cpumask *cpus, +void topology_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_freq) { - unsigned long max_capacity, capacity, th_pressure; + unsigned long max_capacity, capacity, hw_pressure; u32 max_freq; int cpu; @@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus, /* * Handle properly the boost frequencies, which should simply clean - * the thermal pressure value. + * the HW pressure value. */ if (max_freq <= capped_freq) capacity = max_capacity; else capacity = mult_frac(max_capacity, capped_freq, max_freq); - th_pressure = max_capacity - capacity; + hw_pressure = max_capacity - capacity; - trace_thermal_pressure_update(cpu, th_pressure); + trace_hw_pressure_update(cpu, hw_pressure); for_each_cpu(cpu, cpus) - WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); + WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure); } -EXPORT_SYMBOL_GPL(topology_update_thermal_pressure); +EXPORT_SYMBOL_GPL(topology_update_hw_pressure); static ssize_t cpu_capacity_show(struct device *dev, struct device_attribute *attr, diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 70b0f21968a0..ec8df5496a0c 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) throttled_freq = freq_hz / HZ_PER_KHZ; - /* Update thermal pressure (the boost frequencies are accepted) */ - arch_update_thermal_pressure(policy->related_cpus, throttled_freq); + /* Update HW pressure (the boost frequencies are accepted) */ + arch_update_hw_pressure(policy->related_cpus, throttled_freq); /* * In the unlikely case policy is unregistered do not enable diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index a63d61ca55af..b721f360d759 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -60,14 +60,14 @@ void topology_scale_freq_tick(void); void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus); void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus); -DECLARE_PER_CPU(unsigned long, thermal_pressure); +DECLARE_PER_CPU(unsigned long, hw_pressure); -static inline unsigned long topology_get_thermal_pressure(int cpu) +static inline unsigned long topology_get_hw_pressure(int cpu) { - return per_cpu(thermal_pressure, cpu); + return per_cpu(hw_pressure, cpu); } -void topology_update_thermal_pressure(const struct cpumask *cpus, +void topology_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_freq); struct cpu_topology { diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index c8fe9bab981b..4237daa5ac7a 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -270,17 +270,17 @@ unsigned long arch_scale_cpu_capacity(int cpu) } #endif -#ifndef arch_scale_thermal_pressure +#ifndef arch_scale_hw_pressure static __always_inline -unsigned long arch_scale_thermal_pressure(int cpu) +unsigned long arch_scale_hw_pressure(int cpu) { return 0; } #endif -#ifndef arch_update_thermal_pressure +#ifndef arch_update_hw_pressure static __always_inline -void arch_update_thermal_pressure(const struct cpumask *cpus, +void arch_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_frequency) { } #endif diff --git a/include/trace/events/hw_pressure.h b/include/trace/events/hw_pressure.h new file mode 100644 index 000000000000..b9cd68854128 --- /dev/null +++ b/include/trace/events/hw_pressure.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hw_pressure + +#if !defined(_TRACE_THERMAL_PRESSURE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_THERMAL_PRESSURE_H + +#include + +TRACE_EVENT(hw_pressure_update, + TP_PROTO(int cpu, unsigned long hw_pressure), + TP_ARGS(cpu, hw_pressure), + + TP_STRUCT__entry( + __field(unsigned long, hw_pressure) + __field(int, cpu) + ), + + TP_fast_assign( + __entry->hw_pressure = hw_pressure; + __entry->cpu = cpu; + ), + + TP_printk("cpu=%d hw_pressure=%lu", __entry->cpu, __entry->hw_pressure) +); +#endif /* _TRACE_THERMAL_PRESSURE_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index dbb01b4b7451..d115d64c4011 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -752,7 +752,7 @@ DECLARE_TRACE(pelt_dl_tp, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_thermal_tp, +DECLARE_TRACE(pelt_hw_tp, TP_PROTO(struct rq *rq), TP_ARGS(rq)); diff --git a/include/trace/events/thermal_pressure.h b/include/trace/events/thermal_pressure.h deleted file mode 100644 index b68680201360..000000000000 --- a/include/trace/events/thermal_pressure.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM thermal_pressure - -#if !defined(_TRACE_THERMAL_PRESSURE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_THERMAL_PRESSURE_H - -#include - -TRACE_EVENT(thermal_pressure_update, - TP_PROTO(int cpu, unsigned long thermal_pressure), - TP_ARGS(cpu, thermal_pressure), - - TP_STRUCT__entry( - __field(unsigned long, thermal_pressure) - __field(int, cpu) - ), - - TP_fast_assign( - __entry->thermal_pressure = thermal_pressure; - __entry->cpu = cpu; - ), - - TP_printk("cpu=%d thermal_pressure=%lu", __entry->cpu, __entry->thermal_pressure) -); -#endif /* _TRACE_THERMAL_PRESSURE_H */ - -/* This part must be outside protection */ -#include diff --git a/init/Kconfig b/init/Kconfig index aa02aec6aa7d..f0c9117962ec 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -547,24 +547,24 @@ config HAVE_SCHED_AVG_IRQ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING depends on SMP -config SCHED_THERMAL_PRESSURE +config SCHED_HW_PRESSURE bool default y if ARM && ARM_CPU_TOPOLOGY default y if ARM64 depends on SMP depends on CPU_FREQ_THERMAL help - Select this option to enable thermal pressure accounting in the - scheduler. Thermal pressure is the value conveyed to the scheduler + Select this option to enable HW pressure accounting in the + scheduler. HW pressure is the value conveyed to the scheduler that reflects the reduction in CPU compute capacity resulted from - thermal throttling. Thermal throttling occurs when the performance of - a CPU is capped due to high operating temperatures. + HW throttling. HW throttling occurs when the performance of + a CPU is capped due to high operating temperatures as an example. If selected, the scheduler will be able to balance tasks accordingly, i.e. put less load on throttled CPUs than on non/less throttled ones. This requires the architecture to implement - arch_update_thermal_pressure() and arch_scale_thermal_pressure(). + arch_update_hw_pressure() and arch_scale_thermal_pressure(). config BSD_PROCESS_ACCT bool "BSD Process Accounting" diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0621e4ee31de..67a8302c3131 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -108,7 +108,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); -EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_hw_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); @@ -5668,7 +5668,7 @@ void sched_tick(void) struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; struct rq_flags rf; - unsigned long thermal_pressure; + unsigned long hw_pressure; u64 resched_latency; if (housekeeping_cpu(cpu, HK_TYPE_TICK)) @@ -5679,8 +5679,8 @@ void sched_tick(void) rq_lock(rq, &rf); update_rq_clock(rq); - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); + hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); + update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure); curr->sched_class->task_tick(rq, curr, 0); if (sched_feat(LATENCY_WARN)) resched_latency = cpu_resched_latency(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 19199c119829..eef39ae3efcf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -78,7 +78,7 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -int sched_thermal_decay_shift; +int sched_hw_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { int _shift = 0; @@ -86,7 +86,7 @@ static int __init setup_sched_thermal_decay_shift(char *str) if (kstrtoint(str, 0, &_shift)) pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n"); - sched_thermal_decay_shift = clamp(_shift, 0, 10); + sched_hw_decay_shift = clamp(_shift, 0, 10); return 1; } __setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift); @@ -4969,7 +4969,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu) { unsigned long capacity = arch_scale_cpu_capacity(cpu); - capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu)); + capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu)); return capacity; } @@ -5002,7 +5002,7 @@ static inline int util_fits_cpu(unsigned long util, * Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it * should fit a little cpu even if there's some pressure. * - * Only exception is for thermal pressure since it has a direct impact + * Only exception is for HW or cpufreq pressure since it has a direct impact * on available OPP of the system. * * We honour it for uclamp_min only as a drop in performance level @@ -9324,7 +9324,7 @@ static inline bool others_have_blocked(struct rq *rq) if (cpu_util_dl(rq)) return true; - if (thermal_load_avg(rq)) + if (hw_load_avg(rq)) return true; if (cpu_util_irq(rq)) @@ -9354,7 +9354,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done) { const struct sched_class *curr_class; u64 now = rq_clock_pelt(rq); - unsigned long thermal_pressure; + unsigned long hw_pressure; bool decayed; /* @@ -9363,11 +9363,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done) */ curr_class = rq->curr->sched_class; - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); + hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) | update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) | - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) | + update_hw_load_avg(rq_clock_hw(rq), rq, hw_pressure) | update_irq_load_avg(rq, 0); if (others_have_blocked(rq)) diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 3a96da25b67c..ef00382de595 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -384,30 +384,30 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) return 0; } -#ifdef CONFIG_SCHED_THERMAL_PRESSURE +#ifdef CONFIG_SCHED_HW_PRESSURE /* - * thermal: + * hardware: * * load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked * * util_avg and runnable_load_avg are not supported and meaningless. * * Unlike rt/dl utilization tracking that track time spent by a cpu - * running a rt/dl task through util_avg, the average thermal pressure is - * tracked through load_avg. This is because thermal pressure signal is + * running a rt/dl task through util_avg, the average HW pressure is + * tracked through load_avg. This is because HW pressure signal is * time weighted "delta" capacity unlike util_avg which is binary. * "delta capacity" = actual capacity - - * capped capacity a cpu due to a thermal event. + * capped capacity a cpu due to a HW event. */ -int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity) { - if (___update_load_sum(now, &rq->avg_thermal, + if (___update_load_sum(now, &rq->avg_hw, capacity, capacity, capacity)) { - ___update_load_avg(&rq->avg_thermal, 1); - trace_pelt_thermal_tp(rq); + ___update_load_avg(&rq->avg_hw, 1); + trace_pelt_hw_tp(rq); return 1; } diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 9e1083465fbc..2150062949d4 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -7,21 +7,21 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); -#ifdef CONFIG_SCHED_THERMAL_PRESSURE -int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); +#ifdef CONFIG_SCHED_HW_PRESSURE +int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity); -static inline u64 thermal_load_avg(struct rq *rq) +static inline u64 hw_load_avg(struct rq *rq) { - return READ_ONCE(rq->avg_thermal.load_avg); + return READ_ONCE(rq->avg_hw.load_avg); } #else static inline int -update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +update_hw_load_avg(u64 now, struct rq *rq, u64 capacity) { return 0; } -static inline u64 thermal_load_avg(struct rq *rq) +static inline u64 hw_load_avg(struct rq *rq) { return 0; } @@ -202,12 +202,12 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running) } static inline int -update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +update_hw_load_avg(u64 now, struct rq *rq, u64 capacity) { return 0; } -static inline u64 thermal_load_avg(struct rq *rq) +static inline u64 hw_load_avg(struct rq *rq) { return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7c39dbf31f75..993edb02fb0d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1108,8 +1108,8 @@ struct rq { #ifdef CONFIG_HAVE_SCHED_AVG_IRQ struct sched_avg avg_irq; #endif -#ifdef CONFIG_SCHED_THERMAL_PRESSURE - struct sched_avg avg_thermal; +#ifdef CONFIG_SCHED_HW_PRESSURE + struct sched_avg avg_hw; #endif u64 idle_stamp; u64 avg_idle; @@ -1561,11 +1561,11 @@ static inline u64 rq_clock_task(struct rq *rq) * 3 256 * 4 512 */ -extern int sched_thermal_decay_shift; +extern int sched_hw_decay_shift; -static inline u64 rq_clock_thermal(struct rq *rq) +static inline u64 rq_clock_hw(struct rq *rq) { - return rq_clock_task(rq) >> sched_thermal_decay_shift; + return rq_clock_task(rq) >> sched_hw_decay_shift; } static inline void rq_clock_skip_update(struct rq *rq) -- cgit v1.2.3 From 961a6ee5c7759afce06c27a83b36756d7b81a784 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:03:58 -0400 Subject: mm/arm64: merge pXd_huge() and pXd_leaf() definitions Unlike most archs, aarch64 defines pXd_huge() and pXd_leaf() slightly differently. Redefine the pXd_huge() with pXd_leaf(). There used to be two traps for old aarch64 definitions over these APIs that I found when reading the code around, they're: (1) 4797ec2dc83a ("arm64: fix pud_huge() for 2-level pagetables") (2) 23bc8f69f0ec ("arm64: mm: fix p?d_leaf()") Define pXd_huge() with the current pXd_leaf() will make sure (2) isn't a problem (on PROT_NONE checks). To make sure it also works for (1), we move over the __PAGETABLE_PMD_FOLDED check to pud_leaf(), allowing it to constantly returning "false" for 2-level pgtables, which looks even safer to cover both now. Link: https://lkml.kernel.org/r/20240318200404.448346-9-peterx@redhat.com Signed-off-by: Peter Xu Cc: Muchun Song Cc: Mark Salter Cc: Catalin Marinas Cc: Will Deacon Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Fabio Estevam Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Konrad Dybcio Cc: Krzysztof Kozlowski Cc: Lucas Stach Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Russell King Cc: Shawn Guo Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/arm64/include/asm/pgtable.h | 4 ++++ arch/arm64/mm/hugetlbpage.c | 8 ++------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index afdd56d26ad7..ec406e761e04 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -709,7 +709,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) +#ifndef __PAGETABLE_PMD_FOLDED #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) +#else +#define pud_leaf(pud) false +#endif #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0f0e10bb0a95..1234bbaef5bf 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -81,16 +81,12 @@ bool arch_hugetlb_migration_supported(struct hstate *h) int pmd_huge(pmd_t pmd) { - return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_leaf(pmd); } int pud_huge(pud_t pud) { -#ifndef __PAGETABLE_PMD_FOLDED - return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); -#else - return 0; -#endif + return pud_leaf(pud); } static int find_num_contig(struct mm_struct *mm, unsigned long addr, -- cgit v1.2.3 From 1965e933ddeba5e27e68ed80cfc7241931f70d4c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:04:01 -0400 Subject: mm/treewide: replace pXd_huge() with pXd_leaf() Now after we're sure all pXd_huge() definitions are the same as pXd_leaf(), reuse it. Luckily, pXd_huge() isn't widely used. Link: https://lkml.kernel.org/r/20240318200404.448346-12-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Arnd Bergmann Cc: Bjorn Andersson Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Fabio Estevam Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Konrad Dybcio Cc: Krzysztof Kozlowski Cc: Lucas Stach Cc: Mark Salter Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Russell King Cc: Shawn Guo Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-3level.h | 2 +- arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/mm/hugetlbpage.c | 4 ++-- arch/loongarch/mm/hugetlbpage.c | 2 +- arch/mips/mm/tlb-r4k.c | 2 +- arch/powerpc/mm/pgtable_64.c | 6 +++--- arch/x86/mm/pgtable.c | 4 ++-- mm/gup.c | 4 ++-- mm/hmm.c | 2 +- mm/memory.c | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index e7aecbef75c9..9e3c44f0aea2 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -190,7 +190,7 @@ static inline pte_t pte_mkspecial(pte_t pte) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) -#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) +#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ec406e761e04..a5c84f38c528 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -517,7 +517,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return pmd; } -#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) +#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) #define pmd_write(pmd) pte_write(pmd_pte(pmd)) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 1234bbaef5bf..f494fc31201f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -321,7 +321,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (sz != PUD_SIZE && pud_none(pud)) return NULL; /* hugepage or swap? */ - if (pud_huge(pud) || !pud_present(pud)) + if (pud_leaf(pud) || !pud_present(pud)) return (pte_t *)pudp; /* table; check the next level */ @@ -333,7 +333,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && pmd_none(pmd)) return NULL; - if (pmd_huge(pmd) || !pmd_present(pmd)) + if (pmd_leaf(pmd) || !pmd_present(pmd)) return (pte_t *)pmdp; if (sz == CONT_PTE_SIZE) diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 1e76fcb83093..a4e78e74aa21 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -64,7 +64,7 @@ uint64_t pmd_to_entrylo(unsigned long pmd_val) { uint64_t val; /* PMD as PTE. Must be huge page */ - if (!pmd_huge(__pmd(pmd_val))) + if (!pmd_leaf(__pmd(pmd_val))) panic("%s", __func__); val = pmd_val ^ _PAGE_HUGE; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 4106084e57d7..76f3b9c0a9f0 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -326,7 +326,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) idx = read_c0_index(); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT /* this could be a huge page */ - if (pmd_huge(*pmdp)) { + if (pmd_leaf(*pmdp)) { unsigned long lo; write_c0_pagemask(PM_HUGE_MASK); ptep = (pte_t *)pmdp; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 9b99113cb51a..6621cfc3baf8 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,7 @@ struct page *p4d_page(p4d_t p4d) { if (p4d_leaf(p4d)) { if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!p4d_huge(p4d)); + VM_WARN_ON(!p4d_leaf(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_pgtable(p4d)); @@ -113,7 +113,7 @@ struct page *pud_page(pud_t pud) { if (pud_leaf(pud)) { if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!pud_huge(pud)); + VM_WARN_ON(!pud_leaf(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_pgtable(pud)); @@ -132,7 +132,7 @@ struct page *pmd_page(pmd_t pmd) * enabled so these checks can't be used. */ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd))); + VM_WARN_ON(!pmd_leaf(pmd)); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d007591b8059..94767c82fc0d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -731,7 +731,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 0; /* Bail out if we are we on a populated non-leaf entry: */ - if (pud_present(*pud) && !pud_huge(*pud)) + if (pud_present(*pud) && !pud_leaf(*pud)) return 0; set_pte((pte_t *)pud, pfn_pte( @@ -760,7 +760,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) } /* Bail out if we are we on a populated non-leaf entry: */ - if (pmd_present(*pmd) && !pmd_huge(*pmd)) + if (pmd_present(*pmd) && !pmd_leaf(*pmd)) return 0; set_pte((pte_t *)pmd, pfn_pte( diff --git a/mm/gup.c b/mm/gup.c index d89377b87311..8433d3dc31fc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -778,7 +778,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, p4d = READ_ONCE(*p4dp); if (!p4d_present(p4d)) return no_page_table(vma, flags); - BUILD_BUG_ON(p4d_huge(p4d)); + BUILD_BUG_ON(p4d_leaf(p4d)); if (unlikely(p4d_bad(p4d))) return no_page_table(vma, flags); @@ -3082,7 +3082,7 @@ static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned lo next = p4d_addr_end(addr, end); if (!p4d_present(p4d)) return 0; - BUILD_BUG_ON(p4d_huge(p4d)); + BUILD_BUG_ON(p4d_leaf(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) diff --git a/mm/hmm.c b/mm/hmm.c index c95b9ec5d95f..93aebd9cc130 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -429,7 +429,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, return hmm_vma_walk_hole(start, end, -1, walk); } - if (pud_huge(pud) && pud_devmap(pud)) { + if (pud_leaf(pud) && pud_devmap(pud)) { unsigned long i, npages, pfn; unsigned int required_fault; unsigned long *hmm_pfns; diff --git a/mm/memory.c b/mm/memory.c index d2155ced45f8..e0576f2a8d23 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2765,7 +2765,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long next; int err = 0; - BUG_ON(pud_huge(*pud)); + BUG_ON(pud_leaf(*pud)); if (create) { pmd = pmd_alloc_track(mm, pud, addr, mask); -- cgit v1.2.3 From 502016e33ae6474177097bc43ee26bad8f4d6919 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2024 16:04:03 -0400 Subject: mm/arm: remove pmd_thp_or_huge() ARM/ARM64 used to define pmd_thp_or_huge(). Now this macro is completely redundant. Remove it and use pmd_leaf(). Link: https://lkml.kernel.org/r/20240318200404.448346-14-peterx@redhat.com Signed-off-by: Peter Xu Cc: Mark Salter Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: Shawn Guo Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Cc: Arnd Bergmann Cc: Konrad Dybcio Cc: Fabio Estevam Cc: Alistair Popple Cc: Andreas Larsson Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: David S. Miller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Lucas Stach Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Naoya Horiguchi Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-2level.h | 1 - arch/arm/include/asm/pgtable-3level.h | 1 - arch/arm/lib/uaccess_with_memcpy.c | 4 ++-- arch/arm64/include/asm/pgtable.h | 2 -- 4 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 4245c2e74720..6b5392e20f41 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -241,7 +241,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) * define empty stubs for use by pin_page_for_write. */ #define pmd_hugewillfault(pmd) (0) -#define pmd_thp_or_huge(pmd) (0) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 9e3c44f0aea2..fa5939eb9864 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -190,7 +190,6 @@ static inline pte_t pte_mkspecial(pte_t pte) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) -#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 2f6163f05e93..c0ac7796d775 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -56,10 +56,10 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) * to see that it's still huge and whether or not we will * need to fault on write. */ - if (unlikely(pmd_thp_or_huge(*pmd))) { + if (unlikely(pmd_leaf(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); - if (unlikely(!pmd_thp_or_huge(*pmd) + if (unlikely(!pmd_leaf(*pmd) || pmd_hugewillfault(*pmd))) { spin_unlock(ptl); return 0; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index a5c84f38c528..6870b60158fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -517,8 +517,6 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return pmd; } -#define pmd_thp_or_huge(pmd) (pmd_leaf(pmd) || pmd_trans_huge(pmd)) - #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) -- cgit v1.2.3 From f238b8c33c6738f146bbfbb09b78870ea157c2b7 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 23 Mar 2024 00:41:36 +1300 Subject: arm64: mm: swap: support THP_SWAP on hardware with MTE Commit d0637c505f8a1 ("arm64: enable THP_SWAP for arm64") brings up THP_SWAP on ARM64, but it doesn't enable THP_SWP on hardware with MTE as the MTE code works with the assumption tags save/restore is always handling a folio with only one page. The limitation should be removed as more and more ARM64 SoCs have this feature. Co-existence of MTE and THP_SWAP becomes more and more important. This patch makes MTE tags saving support large folios, then we don't need to split large folios into base pages for swapping out on ARM64 SoCs with MTE any more. arch_prepare_to_swap() should take folio rather than page as parameter because we support THP swap-out as a whole. It saves tags for all pages in a large folio. As now we are restoring tags based-on folio, in arch_swap_restore(), we may increase some extra loops and early-exitings while refaulting a large folio which is still in swapcache in do_swap_page(). In case a large folio has nr pages, do_swap_page() will only set the PTE of the particular page which is causing the page fault. Thus do_swap_page() runs nr times, and each time, arch_swap_restore() will loop nr times for those subpages in the folio. So right now the algorithmic complexity becomes O(nr^2). Once we support mapping large folios in do_swap_page(), extra loops and early-exitings will decrease while not being completely removed as a large folio might get partially tagged in corner cases such as, 1. a large folio in swapcache can be partially unmapped, thus, MTE tags for the unmapped pages will be invalidated; 2. users might use mprotect() to set MTEs on a part of a large folio. arch_thp_swp_supported() is dropped since ARM64 MTE was the only one who needed it. Link: https://lkml.kernel.org/r/20240322114136.61386-2-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Steven Price Acked-by: Chris Li Reviewed-by: Ryan Roberts Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: David Hildenbrand Cc: Kemeng Shi Cc: "Matthew Wilcox (Oracle)" Cc: Anshuman Khandual Cc: Peter Collingbourne Cc: Yosry Ahmed Cc: Peter Xu Cc: Lorenzo Stoakes Cc: "Mike Rapoport (IBM)" Cc: Hugh Dickins Cc: "Aneesh Kumar K.V" Cc: Rick Edgecombe Signed-off-by: Andrew Morton --- arch/arm64/include/asm/pgtable.h | 19 ++--------------- arch/arm64/mm/mteswap.c | 45 ++++++++++++++++++++++++++++++++++++++++ include/linux/huge_mm.h | 12 ----------- include/linux/pgtable.h | 2 +- mm/internal.h | 14 +++++++++++++ mm/memory.c | 2 +- mm/page_io.c | 2 +- mm/shmem.c | 2 +- mm/swap_slots.c | 2 +- mm/swapfile.c | 2 +- 10 files changed, 67 insertions(+), 35 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6870b60158fc..9fd8613b2db2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -49,12 +49,6 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool arch_thp_swp_supported(void) -{ - return !system_supports_mte(); -} -#define arch_thp_swp_supported arch_thp_swp_supported - /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page @@ -1282,12 +1276,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #ifdef CONFIG_ARM64_MTE #define __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) -{ - if (system_supports_mte()) - return mte_save_tags(page); - return 0; -} +extern int arch_prepare_to_swap(struct folio *folio); #define __HAVE_ARCH_SWAP_INVALIDATE static inline void arch_swap_invalidate_page(int type, pgoff_t offset) @@ -1303,11 +1292,7 @@ static inline void arch_swap_invalidate_area(int type) } #define __HAVE_ARCH_SWAP_RESTORE -static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) -{ - if (system_supports_mte()) - mte_restore_tags(entry, &folio->page); -} +extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index a31833e3ddc5..63e8d72f202a 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -68,6 +68,13 @@ void mte_invalidate_tags(int type, pgoff_t offset) mte_free_tag_storage(tags); } +static inline void __mte_invalidate_tags(struct page *page) +{ + swp_entry_t entry = page_swap_entry(page); + + mte_invalidate_tags(swp_type(entry), swp_offset(entry)); +} + void mte_invalidate_tags_area(int type) { swp_entry_t entry = swp_entry(type, 0); @@ -83,3 +90,41 @@ void mte_invalidate_tags_area(int type) } xa_unlock(&mte_pages); } + +int arch_prepare_to_swap(struct folio *folio) +{ + long i, nr; + int err; + + if (!system_supports_mte()) + return 0; + + nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + err = mte_save_tags(folio_page(folio, i)); + if (err) + goto out; + } + return 0; + +out: + while (i--) + __mte_invalidate_tags(folio_page(folio, i)); + return err; +} + +void arch_swap_restore(swp_entry_t entry, struct folio *folio) +{ + long i, nr; + + if (!system_supports_mte()) + return; + + nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + mte_restore_tags(entry, folio_page(folio, i)); + entry.val++; + } +} diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0e16451adaba..7576025db55d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -532,16 +532,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) #define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) #define split_folio(f) split_folio_to_order(f, 0) -/* - * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to - * limitations in the implementation like arm64 MTE can override this to - * false - */ -#ifndef arch_thp_swp_supported -static inline bool arch_thp_swp_supported(void) -{ - return true; -} -#endif - #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2a1c044ae467..600e17d03659 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1050,7 +1050,7 @@ static inline int arch_unmap_one(struct mm_struct *mm, * prototypes must be defined in the arch-specific asm/pgtable.h file. */ #ifndef __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) +static inline int arch_prepare_to_swap(struct folio *folio) { return 0; } diff --git a/mm/internal.h b/mm/internal.h index 63bdac6d0413..6c8d3844b6a3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -76,6 +76,20 @@ static inline int folio_nr_pages_mapped(struct folio *folio) return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED; } +/* + * Retrieve the first entry of a folio based on a provided entry within the + * folio. We cannot rely on folio->swap as there is no guarantee that it has + * been initialized. Used for calling arch_swap_restore() + */ +static inline swp_entry_t folio_swap(swp_entry_t entry, struct folio *folio) +{ + swp_entry_t swap = { + .val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)), + }; + + return swap; +} + static inline void *folio_raw_mapping(struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; diff --git a/mm/memory.c b/mm/memory.c index c859a09b4f72..805cebb6fd72 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4190,7 +4190,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) * when reading from swap. This metadata may be indexed by swap entry * so this must be called before swap_free(). */ - arch_swap_restore(entry, folio); + arch_swap_restore(folio_swap(entry, folio), folio); /* * Remove the swap entry and conditionally try to free up the swapcache. diff --git a/mm/page_io.c b/mm/page_io.c index ae2b49055e43..a9a7c236aecc 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -189,7 +189,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) * Arch code may have to preserve more data than just the page * contents, e.g. memory tags. */ - ret = arch_prepare_to_swap(&folio->page); + ret = arch_prepare_to_swap(folio); if (ret) { folio_mark_dirty(folio); folio_unlock(folio); diff --git a/mm/shmem.c b/mm/shmem.c index 94ab99b6b574..98985179f495 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1907,7 +1907,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * Some architectures may have to restore extra metadata to the * folio after reading from swap. */ - arch_swap_restore(swap, folio); + arch_swap_restore(folio_swap(swap, folio), folio); if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_folio(&folio, gfp, info, index); diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 90973ce7881d..53abeaf1371d 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -310,7 +310,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio) entry.val = 0; if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) + if (IS_ENABLED(CONFIG_THP_SWAP)) get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 4919423cce76..5e6d2304a2a4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1806,7 +1806,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, * when reading from swap. This metadata may be indexed by swap entry * so this must be called before swap_free(). */ - arch_swap_restore(entry, folio); + arch_swap_restore(folio_swap(entry, folio), folio); dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); -- cgit v1.2.3 From 51718e25c53f58fe9fa515f219367632cce4914d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 17:10:29 +0000 Subject: mm: convert arch_clear_hugepage_flags to take a folio All implementations that aren't no-ops just set a bit in the flags, and we want to use the folio flags rather than the page flags for that. Rename it to arch_clear_hugetlb_flags() while we're touching it so nobody thinks it's used for THP. [willy@infradead.org: fix arm64 build] Link: https://lkml.kernel.org/r/ZgQvNKGdlDkwhQEX@casper.infradead.org Link: https://lkml.kernel.org/r/20240326171045.410737-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- arch/arm/include/asm/hugetlb.h | 6 +++--- arch/arm64/include/asm/hugetlb.h | 6 +++--- arch/riscv/include/asm/hugetlb.h | 6 +++--- arch/s390/include/asm/hugetlb.h | 6 +++--- arch/sh/include/asm/hugetlb.h | 6 +++--- include/linux/hugetlb.h | 6 +++--- mm/hugetlb.c | 4 ++-- 7 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index a3a82b7158d4..b766c4b373f6 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -15,10 +15,10 @@ #include #include -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif /* _ASM_ARM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 2ddc33d93b13..3954cbd2ff56 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -18,11 +18,11 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); #endif -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 22deb7a2a6ec..b1ce97a9dbfc 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,11 +5,11 @@ #include #include -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h); diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index deb198a61039..ce5f4fe8be4d 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -39,11 +39,11 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_arch_1, &page->flags); + clear_bit(PG_arch_1, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index 4d3ba39e681c..75028bd568ba 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -27,11 +27,11 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, return *ptep; } -static inline void arch_clear_hugepage_flags(struct page *page) +static inline void arch_clear_hugetlb_flags(struct folio *folio) { - clear_bit(PG_dcache_clean, &page->flags); + clear_bit(PG_dcache_clean, &folio->flags); } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #include diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d748628efc5e..03fc6d625068 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -836,9 +836,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, #define is_hugepage_only_range is_hugepage_only_range #endif -#ifndef arch_clear_hugepage_flags -static inline void arch_clear_hugepage_flags(struct page *page) { } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifndef arch_clear_hugetlb_flags +static inline void arch_clear_hugetlb_flags(struct folio *folio) { } +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif #ifndef arch_make_huge_pte diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2e984554ec63..88bb38df1701 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1726,7 +1726,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, */ return; - arch_clear_hugepage_flags(&folio->page); + arch_clear_hugetlb_flags(folio); enqueue_hugetlb_folio(h, folio); } @@ -2024,7 +2024,7 @@ void free_huge_folio(struct folio *folio) spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_hugetlb_folio(h, folio, true); } else { - arch_clear_hugepage_flags(&folio->page); + arch_clear_hugetlb_flags(folio); enqueue_hugetlb_folio(h, folio); spin_unlock_irqrestore(&hugetlb_lock, flags); } -- cgit v1.2.3 From 12d712dc8e4f1a30b18f8c3789adfbc07f5eb050 Mon Sep 17 00:00:00 2001 From: Shiqi Liu Date: Sun, 21 Apr 2024 14:33:28 +0800 Subject: arm64/sysreg: Update PIE permission encodings Fix left shift overflow issue when the parameter idx is greater than or equal to 8 in the calculation of perm in PIRx_ELx_PERM macro. Fix this by modifying the encoding to use a long integer type. Signed-off-by: Shiqi Liu Acked-by: Marc Zyngier Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20240421063328.29710-1-shiqiliu@hust.edu.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 24 ++++++++++++------------ tools/arch/arm64/include/asm/sysreg.h | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e8999592f3a..af3b206fa423 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1036,18 +1036,18 @@ * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). */ -#define PIE_NONE_O 0x0 -#define PIE_R_O 0x1 -#define PIE_X_O 0x2 -#define PIE_RX_O 0x3 -#define PIE_RW_O 0x5 -#define PIE_RWnX_O 0x6 -#define PIE_RWX_O 0x7 -#define PIE_R 0x8 -#define PIE_GCS 0x9 -#define PIE_RX 0xa -#define PIE_RW 0xc -#define PIE_RWX 0xe +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index ccc13e991376..cd8420e8c3ad 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -701,18 +701,18 @@ * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). */ -#define PIE_NONE_O 0x0 -#define PIE_R_O 0x1 -#define PIE_X_O 0x2 -#define PIE_RX_O 0x3 -#define PIE_RW_O 0x5 -#define PIE_RWnX_O 0x6 -#define PIE_RWX_O 0x7 -#define PIE_R 0x8 -#define PIE_GCS 0x9 -#define PIE_RX 0xa -#define PIE_RW 0xc -#define PIE_RWX 0xe +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) -- cgit v1.2.3 From 3a2d2ca42975d7550d2ced663c64e54ab83ece68 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 22 Apr 2024 12:35:22 +0100 Subject: arm64: assembler: update stale comment for disable_step_tsk A comment in the disable_step_tsk macro refers to synchronising with enable_dbg, as historically the entry used enable_dbg to unmask debug exceptions after disabling single-stepping. These days the unmasking happens in entry-common.c via local_daif_restore() or local_daif_inherit(), so the comment is stale. This logic is likely to chang in future, so it would be best to avoid referring to those macros specifically. Update the comment to take this into account, and describe it in terms of clearing DAIF.D so that it doesn't macro where this logic lives nor what it is called. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240422113523.4070414-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ab8b396428da..b27dac4a9c0f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -59,7 +59,7 @@ mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp - isb // Synchronise with enable_dbg + isb // Take effect before a subsequent clear of DAIF.D 9990: .endm -- cgit v1.2.3 From 080297beccf77433053621a222c332ae603a1a84 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 22 Apr 2024 12:35:23 +0100 Subject: arm64: defer clearing DAIF.D For historical reasons we unmask debug exceptions in __cpu_setup(), but it's not necessary to unmask debug exceptions this early in the boot/idle entry paths. It would be better to unmask debug exceptions later in C code as this simplifies the current code and will make it easier to rework exception masking logic to handle non-DAIF bits in future (e.g. PSTATE.{ALLINT,PM}). We started clearing DAIF.D in __cpu_setup() in commit: 2ce39ad15182604b ("arm64: debug: unmask PSTATE.D earlier") At the time, we needed to ensure that DAIF.D was clear on the primary CPU before scheduling and preemption were possible, and chose to do this in __cpu_setup() so that this occurred in the same place for primary and secondary CPUs. As we cannot handle debug exceptions this early, we placed an ISB between initializing MDSCR_EL1 and clearing DAIF.D so that no exceptions should be triggered. Subsequently we rewrote the return-from-{idle,suspend} paths to use __cpu_setup() in commit: cabe1c81ea5be983 ("arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va") ... which allowed for earlier use of the MMU and had the desirable property of using the same code to reset the CPU in the cold and warm boot paths. This introduced a bug: DAIF.D was clear while cpu_do_resume() restored MDSCR_EL1 and other control registers (e.g. breakpoint/watchpoint control/value registers), and so we could unexpectedly take debug exceptions. We fixed that in commit: 744c6c37cc18705d ("arm64: kernel: Fix unmasked debug exceptions when restoring mdscr_el1") ... by having cpu_do_resume() use the `disable_dbg` macro to set DAIF.D before restoring MDSCR_EL1 and other control registers. This relies on DAIF.D being subsequently cleared again in cpu_resume(). Subsequently we reworked DAIF masking in commit: 0fbeb318754860b3 ("arm64: explicitly mask all exceptions") ... where we began enforcing a policy that DAIF.D being set implies all other DAIF bits are set, and so e.g. we cannot take an IRQ while DAIF.D is set. As part of this the use of `disable_dbg` in cpu_resume() was replaced with `disable_daif` for consistency with the rest of the kernel. These days, there's no need to clear DAIF.D early within __cpu_setup(): * setup_arch() clears DAIF.DA before scheduling and preemption are possible on the primary CPU, avoiding the problem we we originally trying to work around. Note: DAIF.IF get cleared later when interrupts are enabled for the first time. * secondary_start_kernel() clears all DAIF bits before scheduling and preemption are possible on secondary CPUs. Note: with pseudo-NMI, the PMR is initialized here before any DAIF bits are cleared. Similar will be necessary for the architectural NMI. * cpu_suspend() restores all DAIF bits when returning from idle, ensuring that we don't unexpectedly leave DAIF.D clear or set. Note: with pseudo-NMI, the PMR is initialized here before DAIF is cleared. Similar will be necessary for the architectural NMI. This patch removes the unmasking of debug exceptions from __cpu_setup(), relying on the above locations to initialize DAIF. This allows some other cleanups: * It is no longer necessary for cpu_resume() to explicitly mask debug (or other) exceptions, as it is always called with all DAIF bits set. Thus we drop the use of `disable_daif`. * The `enable_dbg` macro is no longer used, and so is dropped. * It is no longer necessary to have an ISB immediately after initializing MDSCR_EL1 in __cpu_setup(), and we can revert to relying on the context synchronization that occurs when the MMU is enabled between __cpu_setup() and code which clears DAIF.D Comments are added to setup_arch() and secondary_start_kernel() to explain the initial unmasking of the DAIF bits. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20240422113523.4070414-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 4 ---- arch/arm64/kernel/setup.c | 11 +++++++++-- arch/arm64/kernel/smp.c | 7 +++++++ arch/arm64/mm/proc.S | 10 ---------- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b27dac4a9c0f..6f9ad2d2bb40 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -50,10 +50,6 @@ msr daif, \flags .endm - .macro enable_dbg - msr daifclr, #8 - .endm - .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65a052bf741f..a096e2451044 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -298,8 +298,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) dynamic_scs_init(); /* - * Unmask SError as soon as possible after initializing earlycon so - * that we can report any SErrors immediately. + * The primary CPU enters the kernel with all DAIF exceptions masked. + * + * We must unmask Debug and SError before preemption or scheduling is + * possible to ensure that these are consistently unmasked across + * threads, and we want to unmask SError as soon as possible after + * initializing earlycon so that we can report any SErrors immediately. + * + * IRQ and FIQ will be unmasked after the root irqchip has been + * detected and initialized. */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4ced34f62dab..31c8b3094dd7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -264,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Secondary CPUs enter the kernel with all DAIF exceptions masked. + * + * As with setup_arch() we must unmask Debug and SError exceptions, and + * as the root irqchip has already been detected and initialized we can + * unmask IRQ and FIQ at the same time. + */ local_daif_restore(DAIF_PROCCTX); /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9d40f3ffd8d2..f4bc6c5bac06 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -135,14 +135,6 @@ SYM_FUNC_START(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 - - /* - * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking - * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug - * exception. Mask them until local_daif_restore() in cpu_suspend() - * resets them. - */ - disable_daif msr mdscr_el1, x10 msr sctlr_el1, x12 @@ -466,8 +458,6 @@ SYM_FUNC_START(__cpu_setup) msr cpacr_el1, xzr // Reset cpacr_el1 mov x1, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x1 // access to the DCC from EL0 - isb // Unmask debug exceptions now, - enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 -- cgit v1.2.3 From b5b85bd713b1623c192754cd39a3351fa0c13717 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:10 +0100 Subject: KVM: arm64: Move guest_owns_fp_regs() to increase its scope guest_owns_fp_regs() will be used to check fpsimd state ownership across kvm/arm64. Therefore, move it to kvm_host.h to widen its scope. Moreover, the host state is not per-vcpu anymore, the vcpu parameter isn't used, so remove it as well. No functional change intended. Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ------ arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2b63fdfad5b2..2889e1d8a8c1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1207,6 +1207,12 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); &this_cpu_ptr_hyp_sym(kvm_host_data)->f) #endif +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2629420d0659..38961b6b1a18 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -39,12 +39,6 @@ struct kvm_exception_table_entry { extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table; -/* Check whether the FP regs are owned by the guest */ -static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) -{ - return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; -} - /* Save the 32-bit only FPSIMD system register state */ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1f82d531a494..1b22654a3180 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -53,7 +53,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TSM; } - if (!guest_owns_fp_regs(vcpu)) { + if (!guest_owns_fp_regs()) { if (has_hvhe()) val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b92f9fe2d50e..7286db75b8d6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -75,7 +75,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM; - if (guest_owns_fp_regs(vcpu)) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { -- cgit v1.2.3 From f11290e0aa6e40e6823f80c7dcdacf033a54aaeb Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:11 +0100 Subject: KVM: arm64: Refactor checks for FP state ownership To avoid direct comparison against the fp_owner enum, add a new function that performs the check, host_owns_fp_regs(), to complement the existing guest_owns_fp_regs(). To check for fpsimd state ownership, use the helpers instead of directly using the enums. No functional change intended. Suggested-by: Marc Zyngier Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 6 ++---- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/fpsimd.c | 5 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d65d9413608..deaa88b972ec 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -587,16 +587,14 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); - if (!vcpu_has_sve(vcpu) || - (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; } else { val = CPTR_NVHE_EL2_RES1; - if (vcpu_has_sve(vcpu) && - (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) + if (vcpu_has_sve(vcpu) && guest_owns_fp_regs()) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2889e1d8a8c1..4609d1b9ddde 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1213,6 +1213,12 @@ static inline bool guest_owns_fp_regs(void) return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } +/* Check whether the FP regs are owned by the host */ +static inline bool host_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7507dcc4e553..d5837d65e4a1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -141,8 +141,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { - + if (guest_owns_fp_regs()) { /* * Currently we do not support SME guests so SVCR is * always 0 and we just need a variable to point to. @@ -195,7 +194,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 38961b6b1a18..38b4e2623b02 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -370,7 +370,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) + if (host_owns_fp_regs()) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1b22654a3180..5d2d4d6465e8 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -337,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7286db75b8d6..93f78df8b0f6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -258,7 +258,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); -- cgit v1.2.3 From d48965bc47e40b06034315260b18368d6ad152b4 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:19 +0100 Subject: KVM: arm64: Do not map the host fpsimd state to hyp in pKVM pKVM maintains its own state at EL2 for tracking the host fpsimd state. Therefore, no need to map and share the host's view with it. Signed-off-by: Fuad Tabba Reviewed-by: Mark Brown Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-12-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/fpsimd.c | 31 ++++--------------------------- arch/arm64/kvm/reset.c | 1 - 3 files changed, 4 insertions(+), 31 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4609d1b9ddde..74dc5a60f171 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -663,8 +663,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct task_struct *parent_task; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; @@ -1262,7 +1260,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index d5837d65e4a1..63a6f82934a6 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -14,19 +14,6 @@ #include #include -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) -{ - struct task_struct *p = vcpu->arch.parent_task; - struct user_fpsimd_state *fpsimd; - - if (!is_protected_kvm_enabled() || !p) - return; - - fpsimd = &p->thread.uw.fpsimd_state; - kvm_unshare_hyp(fpsimd, fpsimd + 1); - put_task_struct(p); -} - /* * Called on entry to KVM_RUN unless this vcpu previously ran at least * once and the most recent prior KVM_RUN for this vcpu was called from @@ -38,28 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) { - int ret; - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; + int ret; - kvm_vcpu_unshare_task_fp(vcpu); + /* pKVM has its own tracking of the host fpsimd state. */ + if (is_protected_kvm_enabled()) + return 0; /* Make sure the host task fpsimd state is visible to hyp: */ ret = kvm_share_hyp(fpsimd, fpsimd + 1); if (ret) return ret; - /* - * We need to keep current's task_struct pinned until its data has been - * unshared with the hypervisor to make sure it is not re-used by the - * kernel and donated to someone else while already shared -- see - * kvm_vcpu_unshare_task_fp() for the matching put_task_struct(). - */ - if (is_protected_kvm_enabled()) { - get_task_struct(current); - vcpu->arch.parent_task = current; - } - return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 68d1d05672bd..1b7b58cb121f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -151,7 +151,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { void *sve_state = vcpu->arch.sve_state; - kvm_vcpu_unshare_task_fp(vcpu); kvm_unshare_hyp(vcpu, vcpu + 1); if (sve_state) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); -- cgit v1.2.3 From 948e1a53c2e95ad4c03cc6201edcb5d92e87d841 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Apr 2024 16:05:24 +0100 Subject: KVM: arm64: Simplify vgic-v3 hypercalls Consolidate the GICv3 VMCR accessor hypercalls into the APR save/restore hypercalls so that all of the EL2 GICv3 state is covered by a single pair of hypercalls. Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-17-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 8 ++------ arch/arm64/include/asm/kvm_hyp.h | 4 ++-- arch/arm64/kvm/arm.c | 5 ++--- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 24 ++++++------------------ arch/arm64/kvm/hyp/vgic-v3-sr.c | 27 +++++++++++++++++++++++---- arch/arm64/kvm/vgic/vgic-v2.c | 9 +-------- arch/arm64/kvm/vgic/vgic-v3.c | 23 ++--------------------- arch/arm64/kvm/vgic/vgic.c | 11 ----------- arch/arm64/kvm/vgic/vgic.h | 2 -- include/kvm/arm_vgic.h | 1 - 10 files changed, 38 insertions(+), 76 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..a6330460d9e5 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -73,10 +73,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, @@ -241,8 +239,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u64 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 3e2a1ac0c9bb..3e80464f8953 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7f87fbb452c5..b6b6f60becdf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -784,9 +784,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); - kvm_vgic_vmcr_sync(vcpu); vcpu_set_flag(vcpu, IN_WFI); - vgic_v4_put(vcpu); + kvm_vgic_put(vcpu); preempt_enable(); kvm_vcpu_halt(vcpu); @@ -794,7 +793,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_disable(); vcpu_clear_flag(vcpu, IN_WFI); - vgic_v4_load(vcpu); + kvm_vgic_load(vcpu); preempt_enable(); } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 26561c562f7a..d5c48dc98f67 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -175,16 +175,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); @@ -195,18 +185,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); } -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if)); } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -337,10 +327,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 6cb638b184b1..7b397fad26f2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void) return val; } -u64 __vgic_v3_read_vmcr(void) +static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __vgic_v3_write_vmcr(u32 vmcr) +static void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + __vgic_v3_save_aprs(cpu_if); + if (cpu_if->vgic_sre) + cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); +} + +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (cpu_if->vgic_sre) + __vgic_v3_write_vmcr(cpu_if->vgic_vmcr); + __vgic_v3_restore_aprs(cpu_if); +} + static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7e9cdb78f7ce..ae5a44d5702d 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - - cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); -} - void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 4ea3340786b9..ed6e412cd74b 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - /* - * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen - * is dependent on ICC_SRE_EL1.SRE, and we have to perform the - * VMCR_EL2 save/restore in the world switch. - */ - if (likely(cpu_if->vgic_sre)) - kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); - - kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if); + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); if (has_vhe()) __vgic_v3_activate_traps(cpu_if); @@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) WARN_ON(vgic_v4_load(vcpu)); } -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - - if (likely(cpu_if->vgic_sre)) - cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); -} - void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); WARN_ON(vgic_v4_put(vcpu)); - vgic_v3_vmcr_sync(vcpu); - - kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); - if (has_vhe()) __vgic_v3_deactivate_traps(cpu_if); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 4ec93587c8cd..fcc5747f51e9 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -939,17 +939,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) -{ - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - return; - - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_vmcr_sync(vcpu); - else - vgic_v3_vmcr_sync(vcpu); -} - int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0c2b82de8fa3..4b93528e6a89 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -214,7 +214,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -253,7 +252,6 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 47035946648e..0c3cce31e0a2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -388,7 +388,6 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) -- cgit v1.2.3 From d81a91af417c8f34dc3c3f8f90240e843d1c5c08 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 23 Apr 2024 16:05:25 +0100 Subject: KVM: arm64: Add is_pkvm_initialized() helper Add a helper allowing to check when the pkvm static key is enabled to ease the introduction of pkvm hooks in other parts of the code. Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-18-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/virt.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 261d6e9df2e1..ebf4a9f943ed 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; -- cgit v1.2.3 From b6ed4fa9411f7c17ebc69949c1df66dc12b2f827 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:26 +0100 Subject: KVM: arm64: Introduce and use predicates that check for protected VMs In order to determine whether or not a VM or vcpu are protected, introduce helpers to query this state. While at it, use the vcpu helper to check vcpus protected state instead of the kvm one. Co-authored-by: Marc Zyngier Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-19-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 8 ++++---- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 5 +++++ arch/arm64/kvm/hyp/nvhe/switch.c | 6 ++---- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74dc5a60f171..0e6c186a6d6c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -211,6 +211,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; + bool enabled; }; struct kvm_mpidr_data { @@ -1295,10 +1296,9 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) + +#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 20c3f6e13b99..22f374e9f532 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -53,6 +53,11 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); } +static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return vcpu_is_protected(&hyp_vcpu->vcpu); +} + void pkvm_hyp_vm_table_init(void *tbl); void pkvm_host_fpsimd_state_init(void); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 5d2d4d6465e8..41d1ba6de41a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -209,7 +209,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) + if (unlikely(vcpu_is_protected(vcpu))) return pvm_exit_handlers; return hyp_exit_handlers; @@ -228,9 +228,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) */ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - - if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM -- cgit v1.2.3 From 838d992b84486311e6039170d28b79a7a0633f06 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 May 2024 16:42:47 +0100 Subject: KVM: arm64: Convert kvm_mpidr_index() to bitmap_gather() Linux 6.9 has introduced new bitmap manipulation helpers, with bitmap_gather() being of special interest, as it does exactly what kvm_mpidr_index() is already doing. Make the latter a wrapper around the former. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240502154247.3012042-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9e8a496fb284..403d7479c0bc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -220,20 +220,10 @@ struct kvm_mpidr_data { static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) { - unsigned long mask = data->mpidr_mask; - u64 aff = mpidr & MPIDR_HWID_BITMASK; - int nbits, bit, bit_idx = 0; - u16 index = 0; + unsigned long index = 0, mask = data->mpidr_mask; + unsigned long aff = mpidr & MPIDR_HWID_BITMASK; - /* - * If this looks like RISC-V's BEXT or x86's PEXT - * instructions, it isn't by accident. - */ - nbits = fls(mask); - for_each_set_bit(bit, &mask, nbits) { - index |= (aff & BIT(bit)) >> (bit - bit_idx); - bit_idx++; - } + bitmap_gather(&index, &aff, &mask, fls(mask)); return index; } -- cgit v1.2.3 From 588de8c6d3621a4d712ccf834c205a74a84180a8 Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 30 Apr 2024 16:56:55 +0800 Subject: arm64: simplify arch_static_branch/_jump function Extracted the jump table definition code from the arch_static_branch and arch_static_branch_jump functions into a macro JUMP_TABLE_ENTRY to reduce code duplication. Signed-off-by: George Guo Link: https://lore.kernel.org/r/20240430085655.2798551-2-dongtai.guo@linux.dev Signed-off-by: Will Deacon --- arch/arm64/include/asm/jump_label.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 6aafbb789991..4e753908b801 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,17 +15,23 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\"\n\t" \ + ".align 3\n\t" \ + ".long 1b - ., %l["#label"] - .\n\t" \ + ".quad %c0 - .\n\t" \ + ".popsection\n\t" \ + : : "i"(key) : : label + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; + asm goto( "1: nop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: @@ -35,15 +41,11 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; asm goto( "1: b %l[l_yes] \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); - + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: return true; -- cgit v1.2.3 From 2fd001cd36005846caa6456fff1008c6f5bae9d4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 29 Mar 2024 21:32:12 +0100 Subject: arch: Rename fbdev header and source files The per-architecture fbdev code has no dependencies on fbdev and can be used for any video-related subsystem. Rename the files to 'video'. Use video-sti.c on parisc as the source file depends on CONFIG_STI_CORE. On arc, arm, arm64, sh, and um the asm header file is an empty wrapper around the file in asm-generic. Let Kbuild generate the file. The build system does this automatically. Only um needs to generate video.h explicitly, so that it overrides the host architecture's header. The latter would otherwise interfere with the build. Further update all includes statements, include guards, and Makefiles. Also update a few strings and comments to refer to video instead of fbdev. v3: - arc, arm, arm64, sh: generate asm header via build system (Sam, Helge, Arnd) - um: rename fb.h to video.h - fix typo in commit message (Sam) Signed-off-by: Thomas Zimmermann Reviewed-by: Sam Ravnborg Cc: Vineet Gupta Cc: Catalin Marinas Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Geert Uytterhoeven Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Yoshinori Sato Cc: Rich Felker Cc: John Paul Adrian Glaubitz Cc: "David S. Miller" Cc: Andreas Larsson Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Signed-off-by: Arnd Bergmann --- arch/arc/include/asm/fb.h | 8 --- arch/arm/include/asm/fb.h | 6 -- arch/arm64/include/asm/fb.h | 10 --- arch/loongarch/include/asm/fb.h | 31 --------- arch/loongarch/include/asm/video.h | 31 +++++++++ arch/m68k/include/asm/fb.h | 32 --------- arch/m68k/include/asm/video.h | 32 +++++++++ arch/mips/include/asm/fb.h | 38 ----------- arch/mips/include/asm/video.h | 38 +++++++++++ arch/parisc/include/asm/fb.h | 16 ----- arch/parisc/include/asm/video.h | 16 +++++ arch/parisc/video/Makefile | 2 +- arch/parisc/video/fbdev.c | 27 -------- arch/parisc/video/video-sti.c | 27 ++++++++ arch/powerpc/include/asm/fb.h | 17 ----- arch/powerpc/include/asm/video.h | 17 +++++ arch/powerpc/kernel/pci-common.c | 2 +- arch/sh/include/asm/fb.h | 7 -- arch/sparc/include/asm/fb.h | 45 ------------ arch/sparc/include/asm/video.h | 45 ++++++++++++ arch/sparc/video/Makefile | 2 +- arch/sparc/video/fbdev.c | 25 ------- arch/sparc/video/video.c | 25 +++++++ arch/um/include/asm/Kbuild | 2 +- arch/x86/include/asm/fb.h | 21 ------ arch/x86/include/asm/video.h | 21 ++++++ arch/x86/video/Makefile | 2 +- arch/x86/video/fbdev.c | 40 ----------- arch/x86/video/video.c | 41 +++++++++++ include/asm-generic/Kbuild | 2 +- include/asm-generic/fb.h | 136 ------------------------------------- include/asm-generic/video.h | 136 +++++++++++++++++++++++++++++++++++++ include/linux/fb.h | 2 +- 33 files changed, 436 insertions(+), 466 deletions(-) delete mode 100644 arch/arc/include/asm/fb.h delete mode 100644 arch/arm/include/asm/fb.h delete mode 100644 arch/arm64/include/asm/fb.h delete mode 100644 arch/loongarch/include/asm/fb.h create mode 100644 arch/loongarch/include/asm/video.h delete mode 100644 arch/m68k/include/asm/fb.h create mode 100644 arch/m68k/include/asm/video.h delete mode 100644 arch/mips/include/asm/fb.h create mode 100644 arch/mips/include/asm/video.h delete mode 100644 arch/parisc/include/asm/fb.h create mode 100644 arch/parisc/include/asm/video.h delete mode 100644 arch/parisc/video/fbdev.c create mode 100644 arch/parisc/video/video-sti.c delete mode 100644 arch/powerpc/include/asm/fb.h create mode 100644 arch/powerpc/include/asm/video.h delete mode 100644 arch/sh/include/asm/fb.h delete mode 100644 arch/sparc/include/asm/fb.h create mode 100644 arch/sparc/include/asm/video.h delete mode 100644 arch/sparc/video/fbdev.c create mode 100644 arch/sparc/video/video.c delete mode 100644 arch/x86/include/asm/fb.h create mode 100644 arch/x86/include/asm/video.h delete mode 100644 arch/x86/video/fbdev.c create mode 100644 arch/x86/video/video.c delete mode 100644 include/asm-generic/fb.h create mode 100644 include/asm-generic/video.h (limited to 'arch/arm64/include') diff --git a/arch/arc/include/asm/fb.h b/arch/arc/include/asm/fb.h deleted file mode 100644 index 9c2383d29cbb..000000000000 --- a/arch/arc/include/asm/fb.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/arm/include/asm/fb.h b/arch/arm/include/asm/fb.h deleted file mode 100644 index ce20a43c3033..000000000000 --- a/arch/arm/include/asm/fb.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h deleted file mode 100644 index 1a495d8fb2ce..000000000000 --- a/arch/arm64/include/asm/fb.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Ltd. - */ -#ifndef __ASM_FB_H_ -#define __ASM_FB_H_ - -#include - -#endif /* __ASM_FB_H_ */ diff --git a/arch/loongarch/include/asm/fb.h b/arch/loongarch/include/asm/fb.h deleted file mode 100644 index 0b218b10a9ec..000000000000 --- a/arch/loongarch/include/asm/fb.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include -#include - -static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) -{ - memcpy(to, (void __force *)from, n); -} -#define fb_memcpy_fromio fb_memcpy_fromio - -static inline void fb_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) -{ - memcpy((void __force *)to, from, n); -} -#define fb_memcpy_toio fb_memcpy_toio - -static inline void fb_memset_io(volatile void __iomem *addr, int c, size_t n) -{ - memset((void __force *)addr, c, n); -} -#define fb_memset fb_memset_io - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/loongarch/include/asm/video.h b/arch/loongarch/include/asm/video.h new file mode 100644 index 000000000000..9f76845f2d4f --- /dev/null +++ b/arch/loongarch/include/asm/video.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_VIDEO_H_ +#define _ASM_VIDEO_H_ + +#include +#include + +static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + memcpy(to, (void __force *)from, n); +} +#define fb_memcpy_fromio fb_memcpy_fromio + +static inline void fb_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + memcpy((void __force *)to, from, n); +} +#define fb_memcpy_toio fb_memcpy_toio + +static inline void fb_memset_io(volatile void __iomem *addr, int c, size_t n) +{ + memset((void __force *)addr, c, n); +} +#define fb_memset fb_memset_io + +#include + +#endif /* _ASM_VIDEO_H_ */ diff --git a/arch/m68k/include/asm/fb.h b/arch/m68k/include/asm/fb.h deleted file mode 100644 index 9941b7434b69..000000000000 --- a/arch/m68k/include/asm/fb.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include -#include - -static inline pgprot_t pgprot_framebuffer(pgprot_t prot, - unsigned long vm_start, unsigned long vm_end, - unsigned long offset) -{ -#ifdef CONFIG_MMU -#ifdef CONFIG_SUN3 - pgprot_val(prot) |= SUN3_PAGE_NOCACHE; -#else - if (CPU_IS_020_OR_030) - pgprot_val(prot) |= _PAGE_NOCACHE030; - if (CPU_IS_040_OR_060) { - pgprot_val(prot) &= _CACHEMASK040; - /* Use no-cache mode, serialized */ - pgprot_val(prot) |= _PAGE_NOCACHE_S; - } -#endif /* CONFIG_SUN3 */ -#endif /* CONFIG_MMU */ - - return prot; -} -#define pgprot_framebuffer pgprot_framebuffer - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/m68k/include/asm/video.h b/arch/m68k/include/asm/video.h new file mode 100644 index 000000000000..6cf2194c413d --- /dev/null +++ b/arch/m68k/include/asm/video.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_VIDEO_H_ +#define _ASM_VIDEO_H_ + +#include +#include + +static inline pgprot_t pgprot_framebuffer(pgprot_t prot, + unsigned long vm_start, unsigned long vm_end, + unsigned long offset) +{ +#ifdef CONFIG_MMU +#ifdef CONFIG_SUN3 + pgprot_val(prot) |= SUN3_PAGE_NOCACHE; +#else + if (CPU_IS_020_OR_030) + pgprot_val(prot) |= _PAGE_NOCACHE030; + if (CPU_IS_040_OR_060) { + pgprot_val(prot) &= _CACHEMASK040; + /* Use no-cache mode, serialized */ + pgprot_val(prot) |= _PAGE_NOCACHE_S; + } +#endif /* CONFIG_SUN3 */ +#endif /* CONFIG_MMU */ + + return prot; +} +#define pgprot_framebuffer pgprot_framebuffer + +#include + +#endif /* _ASM_VIDEO_H_ */ diff --git a/arch/mips/include/asm/fb.h b/arch/mips/include/asm/fb.h deleted file mode 100644 index d98d6681d64e..000000000000 --- a/arch/mips/include/asm/fb.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include - -static inline pgprot_t pgprot_framebuffer(pgprot_t prot, - unsigned long vm_start, unsigned long vm_end, - unsigned long offset) -{ - return pgprot_noncached(prot); -} -#define pgprot_framebuffer pgprot_framebuffer - -/* - * MIPS doesn't define __raw_ I/O macros, so the helpers - * in don't generate fb_readq() and - * fb_write(). We have to provide them here. - * - * TODO: Convert MIPS to generic I/O. The helpers below can - * then be removed. - */ -#ifdef CONFIG_64BIT -static inline u64 fb_readq(const volatile void __iomem *addr) -{ - return __raw_readq(addr); -} -#define fb_readq fb_readq - -static inline void fb_writeq(u64 b, volatile void __iomem *addr) -{ - __raw_writeq(b, addr); -} -#define fb_writeq fb_writeq -#endif - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/mips/include/asm/video.h b/arch/mips/include/asm/video.h new file mode 100644 index 000000000000..007c106d980f --- /dev/null +++ b/arch/mips/include/asm/video.h @@ -0,0 +1,38 @@ +#ifndef _ASM_VIDEO_H_ +#define _ASM_VIDEO_H_ + +#include + +static inline pgprot_t pgprot_framebuffer(pgprot_t prot, + unsigned long vm_start, unsigned long vm_end, + unsigned long offset) +{ + return pgprot_noncached(prot); +} +#define pgprot_framebuffer pgprot_framebuffer + +/* + * MIPS doesn't define __raw_ I/O macros, so the helpers + * in don't generate fb_readq() and + * fb_writeq(). We have to provide them here. + * + * TODO: Convert MIPS to generic I/O. The helpers below can + * then be removed. + */ +#ifdef CONFIG_64BIT +static inline u64 fb_readq(const volatile void __iomem *addr) +{ + return __raw_readq(addr); +} +#define fb_readq fb_readq + +static inline void fb_writeq(u64 b, volatile void __iomem *addr) +{ + __raw_writeq(b, addr); +} +#define fb_writeq fb_writeq +#endif + +#include + +#endif /* _ASM_VIDEO_H_ */ diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h deleted file mode 100644 index ed2a195a3e76..000000000000 --- a/arch/parisc/include/asm/fb.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include - -struct device; - -#if defined(CONFIG_STI_CORE) -bool video_is_primary_device(struct device *dev); -#define video_is_primary_device video_is_primary_device -#endif - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/parisc/include/asm/video.h b/arch/parisc/include/asm/video.h new file mode 100644 index 000000000000..c5dff3223194 --- /dev/null +++ b/arch/parisc/include/asm/video.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_VIDEO_H_ +#define _ASM_VIDEO_H_ + +#include + +struct device; + +#if defined(CONFIG_STI_CORE) +bool video_is_primary_device(struct device *dev); +#define video_is_primary_device video_is_primary_device +#endif + +#include + +#endif /* _ASM_VIDEO_H_ */ diff --git a/arch/parisc/video/Makefile b/arch/parisc/video/Makefile index 16a73cce4661..b5db5b42880f 100644 --- a/arch/parisc/video/Makefile +++ b/arch/parisc/video/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_STI_CORE) += fbdev.o +obj-$(CONFIG_STI_CORE) += video-sti.o diff --git a/arch/parisc/video/fbdev.c b/arch/parisc/video/fbdev.c deleted file mode 100644 index 540fa0c919d5..000000000000 --- a/arch/parisc/video/fbdev.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2000 Philipp Rumpf - * Copyright (C) 2001-2020 Helge Deller - * Copyright (C) 2001-2002 Thomas Bogendoerfer - */ - -#include - -#include