// SPDX-License-Identifier: GPL-2.0-only /* * AMD Secure AVIC Support (SEV-SNP Guests) * * Copyright (C) 2024 Advanced Micro Devices, Inc. * * Author: Neeraj Upadhyay */ #include #include #include #include #include #include #include "local.h" struct secure_avic_page { u8 regs[PAGE_SIZE]; } __aligned(PAGE_SIZE); static struct secure_avic_page __percpu *savic_page __ro_after_init; static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); } static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset) { return &per_cpu_ptr(savic_page, cpu)->regs[offset]; } static inline void update_vector(unsigned int cpu, unsigned int offset, unsigned int vector, bool set) { void *bitmap = get_reg_bitmap(cpu, offset); if (set) apic_set_vector(vector, bitmap); else apic_clear_vector(vector, bitmap); } #define SAVIC_ALLOWED_IRR 0x204 /* * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers * result in #VC exception (for non-accelerated register accesses) * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler * can read/write the x2APIC register in the guest APIC backing page. * * Since doing this would increase the latency of accessing x2APIC * registers, instead of doing RDMSR/WRMSR based accesses and * handling the APIC register reads/writes in the #VC exception handler, * the read() and write() callbacks directly read/write the APIC register * from/to the vCPU's APIC backing page. */ static u32 savic_read(u32 reg) { void *ap = this_cpu_ptr(savic_page); switch (reg) { case APIC_LVTT: case APIC_TMICT: case APIC_TMCCT: case APIC_TDCR: case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT0: case APIC_LVT1: case APIC_LVTERR: return savic_ghcb_msr_read(reg); case APIC_ID: case APIC_LVR: case APIC_TASKPRI: case APIC_ARBPRI: case APIC_PROCPRI: case APIC_LDR: case APIC_SPIV: case APIC_ESR: case APIC_EFEAT: case APIC_ECTRL: case APIC_SEOI: case APIC_IER: case APIC_EILVTn(0) ... APIC_EILVTn(3): return apic_get_reg(ap, reg); case APIC_ICR: return (u32)apic_get_reg64(ap, reg); case APIC_ISR ... APIC_ISR + 0x70: case APIC_TMR ... APIC_TMR + 0x70: if (WARN_ONCE(!IS_ALIGNED(reg, 16), "APIC register read offset 0x%x not aligned at 16 bytes", reg)) return 0; return apic_get_reg(ap, reg); /* IRR and ALLOWED_IRR offset range */ case APIC_IRR ... APIC_IRR + 0x74: /* * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from * their respective base offset. APIC_IRRs are in the range * * (0x200, 0x210, ..., 0x270) * * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range * * (0x204, 0x214, ..., 0x274). * * Filter out everything else. */ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - 4, 16)), "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)) return 0; return apic_get_reg(ap, reg); default: pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg); return 0; } } #define SAVIC_NMI_REQ 0x278 /* * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware * updates the APIC_IRR in the APIC backing page of the vCPU. In addition, * hardware evaluates the new APIC_IRR update for interrupt injection to * the vCPU. So, self IPIs are hardware-accelerated. */ static inline void self_ipi_reg_write(unsigned int vector) { native_apic_msr_write(APIC_SELF_IPI, vector); } static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi) { if (nmi) apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1); else update_vector(cpu, APIC_IRR, vector, true); } static void send_ipi_allbut(unsigned int vector, bool nmi) { unsigned int cpu, src_cpu; guard(irqsave)(); src_cpu = raw_smp_processor_id(); for_each_cpu(cpu, cpu_online_mask) { if (cpu == src_cpu) continue; send_ipi_dest(cpu, vector, nmi); } } static inline void self_ipi(unsigned int vector, bool nmi) { u32 icr_low = APIC_SELF_IPI | vector; if (nmi) icr_low |= APIC_DM_NMI; native_x2apic_icr_write(icr_low, 0); } static void savic_icr_write(u32 icr_low, u32 icr_high) { unsigned int dsh, vector; u64 icr_data; bool nmi; dsh = icr_low & APIC_DEST_ALLBUT; vector = icr_low & APIC_VECTOR_MASK; nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI); switch (dsh) { case APIC_DEST_SELF: self_ipi(vector, nmi); break; case APIC_DEST_ALLINC: self_ipi(vector, nmi); fallthrough; case APIC_DEST_ALLBUT: send_ipi_allbut(vector, nmi); break; default: send_ipi_dest(icr_high, vector, nmi); break; } icr_data = ((u64)icr_high) << 32 | icr_low; if (dsh != APIC_DEST_SELF) savic_ghcb_msr_write(APIC_ICR, icr_data); apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data); } static void savic_write(u32 reg, u32 data) { void *ap = this_cpu_ptr(savic_page); switch (reg) { case APIC_LVTT: case APIC_TMICT: case APIC_TDCR: case APIC_LVT0: case APIC_LVT1: case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVTERR: savic_ghcb_msr_write(reg, data); break; case APIC_TASKPRI: case APIC_EOI: case APIC_SPIV: case SAVIC_NMI_REQ: case APIC_ESR: case APIC_ECTRL: case APIC_SEOI: case APIC_IER: case APIC_EILVTn(0) ... APIC_EILVTn(3): apic_set_reg(ap, reg, data); break; case APIC_ICR: savic_icr_write(data, 0); break; case APIC_SELF_IPI: self_ipi_reg_write(data); break; /* ALLOWED_IRR offsets are writable */ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70: if (IS_ALIGNED(reg - 4, 16)) { apic_set_reg(ap, reg, data); break; } fallthrough; default: pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg); } } static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh) { unsigned int icr_low; icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL); savic_icr_write(icr_low, dest); } static void savic_send_ipi(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); send_ipi(dest, vector, 0); } static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self) { unsigned int cpu, this_cpu; guard(irqsave)(); this_cpu = raw_smp_processor_id(); for_each_cpu(cpu, mask) { if (excl_self && cpu == this_cpu) continue; send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0); } } static void savic_send_ipi_mask(const struct cpumask *mask, int vector) { send_ipi_mask(mask, vector, false); } static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { send_ipi_mask(mask, vector, true); } static void savic_send_ipi_allbutself(int vector) { send_ipi(0, vector, APIC_DEST_ALLBUT); } static void savic_send_ipi_all(int vector) { send_ipi(0, vector, APIC_DEST_ALLINC); } static void savic_send_ipi_self(int vector) { self_ipi_reg_write(vector); } static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set) { update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set); } static void savic_eoi(void) { unsigned int cpu; int vec; cpu = raw_smp_processor_id(); vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR)); if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR")) return; /* Is level-triggered interrupt? */ if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) { update_vector(cpu, APIC_ISR, vec, false); /* * Propagate the EOI write to the hypervisor for level-triggered * interrupts. Return to the guest from GHCB protocol event takes * care of re-evaluating interrupt state. */ savic_ghcb_msr_write(APIC_EOI, 0); } else { /* * Hardware clears APIC_ISR and re-evaluates the interrupt state * to determine if there is any pending interrupt which can be * delivered to CPU. */ native_apic_msr_eoi(); } } static void savic_teardown(void) { /* Disable Secure AVIC */ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0); savic_unregister_gpa(NULL); } static void savic_setup(void) { void *ap = this_cpu_ptr(savic_page); enum es_result res; unsigned long gpa; /* * Before Secure AVIC is enabled, APIC MSR reads are intercepted. * APIC_ID MSR read returns the value from the hypervisor. */ apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID)); gpa = __pa(ap); /* * The NPT entry for a vCPU's APIC backing page must always be * present when the vCPU is running in order for Secure AVIC to * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot * be resumed if the NPT entry for the APIC backing page is not * present. Notify GPA of the vCPU's APIC backing page to the * hypervisor by calling savic_register_gpa(). Before executing * VMRUN, the hypervisor makes use of this information to make sure * the APIC backing page is mapped in NPT. */ res = savic_register_gpa(gpa); if (res != ES_OK) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI); } static int savic_probe(void) { if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) return 0; if (!x2apic_mode) { pr_err("Secure AVIC enabled in non x2APIC mode\n"); sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); /* unreachable */ } savic_page = alloc_percpu(struct secure_avic_page); if (!savic_page) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); return 1; } static struct apic apic_x2apic_savic __ro_after_init = { .name = "secure avic x2apic", .probe = savic_probe, .acpi_madt_oem_check = savic_acpi_madt_oem_check, .setup = savic_setup, .teardown = savic_teardown, .dest_mode_logical = false, .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = savic_send_ipi, .send_IPI_mask = savic_send_ipi_mask, .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself, .send_IPI_allbutself = savic_send_ipi_allbutself, .send_IPI_all = savic_send_ipi_all, .send_IPI_self = savic_send_ipi_self, .nmi_to_offline_cpu = true, .read = savic_read, .write = savic_write, .eoi = savic_eoi, .icr_read = native_x2apic_icr_read, .icr_write = savic_icr_write, .update_vector = savic_update_vector, }; apic_driver(apic_x2apic_savic);