diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-20 08:48:31 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-20 08:48:31 -0800 |
| commit | d31558c077d8be422b65e97974017c030b4bd91a (patch) | |
| tree | 6b894308c43fb455ce02e515dd6bd91d1412835d | |
| parent | 8bf22c33e7a172fbc72464f4cc484d23a6b412ba (diff) | |
| parent | 158ebb578cd5f7881fdc7c4ecebddcf9463f91fd (diff) | |
Merge tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull Hyper-V updates from Wei Liu:
- Debugfs support for MSHV statistics (Nuno Das Neves)
- Support for the integrated scheduler (Stanislav Kinsburskii)
- Various fixes for MSHV memory management and hypervisor status
handling (Stanislav Kinsburskii)
- Expose more capabilities and flags for MSHV partition management
(Anatol Belski, Muminul Islam, Magnus Kulke)
- Miscellaneous fixes to improve code quality and stability (Carlos
López, Ethan Nelson-Moore, Li RongQing, Michael Kelley, Mukesh
Rathor, Purna Pavan Chandra Aekkaladevi, Stanislav Kinsburskii, Uros
Bizjak)
- PREEMPT_RT fixes for vmbus interrupts (Jan Kiszka)
* tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (34 commits)
mshv: Handle insufficient root memory hypervisor statuses
mshv: Handle insufficient contiguous memory hypervisor status
mshv: Introduce hv_deposit_memory helper functions
mshv: Introduce hv_result_needs_memory() helper function
mshv: Add SMT_ENABLED_GUEST partition creation flag
mshv: Add nested virtualization creation flag
Drivers: hv: vmbus: Simplify allocation of vmbus_evt
mshv: expose the scrub partition hypercall
mshv: Add support for integrated scheduler
mshv: Use try_cmpxchg() instead of cmpxchg()
x86/hyperv: Fix error pointer dereference
x86/hyperv: Reserve 3 interrupt vectors used exclusively by MSHV
Drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
x86/hyperv: Remove ASM_CALL_CONSTRAINT with VMMCALL insn
x86/hyperv: Use savesegment() instead of inline asm() to save segment registers
mshv: fix SRCU protection in irqfd resampler ack handler
mshv: make field names descriptive in a header struct
x86/hyperv: Update comment in hyperv_cleanup()
mshv: clear eventfd counter on irqfd shutdown
x86/hyperv: Use memremap()/memunmap() instead of ioremap_cache()/iounmap()
...
27 files changed, 1776 insertions, 261 deletions
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c index a78e4fed5720..92da1b4f2e73 100644 --- a/arch/x86/hyperv/hv_crash.c +++ b/arch/x86/hyperv/hv_crash.c @@ -279,7 +279,6 @@ static void hv_notify_prepare_hyp(void) static noinline __noclone void crash_nmi_callback(struct pt_regs *regs) { struct hv_input_disable_hyp_ex *input; - u64 status; int msecs = 1000, ccpu = smp_processor_id(); if (ccpu == 0) { @@ -313,7 +312,7 @@ static noinline __noclone void crash_nmi_callback(struct pt_regs *regs) input->rip = trampoline_pa; input->arg = devirt_arg; - status = hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL); + (void)hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL); hv_panic_timeout_reboot(); } diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 14de43f4bc6c..5dbe9bd67891 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -103,9 +103,9 @@ static int hyperv_init_ghcb(void) */ rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + /* Mask out vTOM bit and map as decrypted */ ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary; - ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE); + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); if (!ghcb_va) return -ENOMEM; @@ -277,7 +277,7 @@ static int hv_cpu_die(unsigned int cpu) if (hv_ghcb_pg) { ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg); if (*ghcb_va) - iounmap(*ghcb_va); + memunmap(*ghcb_va); *ghcb_va = NULL; } @@ -558,7 +558,6 @@ void __init hyperv_init(void) memunmap(src); hv_remap_tsc_clocksource(); - hv_root_crash_init(); hv_sleep_notifiers_register(); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); @@ -567,6 +566,9 @@ void __init hyperv_init(void) hv_set_hypercall_pg(hv_hypercall_pg); + if (hv_root_partition()) /* after set hypercall pg */ + hv_root_crash_init(); + skip_hypercall_pg_init: /* * hyperv_init() is called before LAPIC is initialized: see @@ -633,9 +635,13 @@ void hyperv_cleanup(void) hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); /* - * Reset hypercall page reference before reset the page, - * let hypercall operations fail safely rather than - * panic the kernel for using invalid hypercall page + * Reset hv_hypercall_pg before resetting it in the hypervisor. + * hv_set_hypercall_pg(NULL) is not used because at this point in the + * panic path other CPUs have been stopped, causing static_call_update() + * to hang. So resetting hv_hypercall_pg to cause hypercalls to fail + * cleanly is only operative on 32-bit builds. But this is OK as it is + * just a preventative measure to ease detecting a hypercall being made + * after this point, which shouldn't be happening anyway. */ hv_hypercall_pg = NULL; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index c0edaed0efb3..9b6a9bc4ab76 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -110,7 +110,7 @@ static void hv_vtl_ap_entry(void) static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) { - u64 status; + u64 status, rsp, rip; int ret = 0; struct hv_enable_vp_vtl *input; unsigned long irq_flags; @@ -123,9 +123,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) struct desc_struct *gdt; struct task_struct *idle = idle_thread_get(cpu); - u64 rsp = (unsigned long)idle->thread.sp; + if (IS_ERR(idle)) + return PTR_ERR(idle); - u64 rip = (u64)&hv_vtl_ap_entry; + rsp = (unsigned long)idle->thread.sp; + rip = (u64)&hv_vtl_ap_entry; native_store_gdt(&gdt_ptr); store_idt(&idt_ptr); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 651771534cae..be7fad43a88d 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -25,6 +25,7 @@ #include <asm/e820/api.h> #include <asm/desc.h> #include <asm/msr.h> +#include <asm/segment.h> #include <uapi/asm/vmx.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -315,16 +316,16 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) vmsa->gdtr.base = gdtr.address; vmsa->gdtr.limit = gdtr.size; - asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); + savesegment(es, vmsa->es.selector); hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); - asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); + savesegment(cs, vmsa->cs.selector); hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); - asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); + savesegment(ss, vmsa->ss.selector); hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); - asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); + savesegment(ds, vmsa->ds.selector); hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); vmsa->efer = native_read_msr(MSR_EFER); @@ -391,7 +392,7 @@ u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) register u64 __r8 asm("r8") = param2; asm volatile("vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, + : "=a" (hv_status), "+c" (control), "+d" (param1), "+r" (__r8) : : "cc", "memory", "r9", "r10", "r11"); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 579fb2c64cfd..89a2eb8a0722 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -478,6 +478,28 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) } EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); +/* + * Reserved vectors hard coded in the hypervisor. If used outside, the hypervisor + * will either crash or hang or attempt to break into debugger. + */ +static void hv_reserve_irq_vectors(void) +{ + #define HYPERV_DBG_FASTFAIL_VECTOR 0x29 + #define HYPERV_DBG_ASSERT_VECTOR 0x2C + #define HYPERV_DBG_SERVICE_VECTOR 0x2D + + if (cpu_feature_enabled(X86_FEATURE_FRED)) + return; + + if (test_and_set_bit(HYPERV_DBG_ASSERT_VECTOR, system_vectors) || + test_and_set_bit(HYPERV_DBG_SERVICE_VECTOR, system_vectors) || + test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors)) + BUG(); + + pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR, + HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR); +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax, eax; @@ -510,6 +532,9 @@ static void __init ms_hyperv_init_platform(void) hv_identify_partition_type(); + if (hv_root_partition()) + hv_reserve_irq_vectors(); + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED; diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index a49f93c2d245..2593711c3628 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o +mshv_root-$(CONFIG_DEBUG_FS) += mshv_debugfs.o mshv_vtl-y := mshv_vtl_main.o # Code that must be built-in diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index c100f04b3581..ea6835638505 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -287,11 +287,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu) simp.simp_enabled = 1; if (ms_hyperv.paravisor_present || hv_root_partition()) { - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + /* Mask out vTOM bit and map as decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; hv_cpu->hyp_synic_message_page = - (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); if (!hv_cpu->hyp_synic_message_page) pr_err("Fail to map synic message page.\n"); } else { @@ -306,11 +306,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu) siefp.siefp_enabled = 1; if (ms_hyperv.paravisor_present || hv_root_partition()) { - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ + /* Mask out vTOM bit and map as decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; hv_cpu->hyp_synic_event_page = - (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); + memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); if (!hv_cpu->hyp_synic_event_page) pr_err("Fail to map synic event page.\n"); } else { @@ -429,7 +429,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu) simp.simp_enabled = 0; if (ms_hyperv.paravisor_present || hv_root_partition()) { if (hv_cpu->hyp_synic_message_page) { - iounmap(hv_cpu->hyp_synic_message_page); + memunmap(hv_cpu->hyp_synic_message_page); hv_cpu->hyp_synic_message_page = NULL; } } else { @@ -443,7 +443,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu) if (ms_hyperv.paravisor_present || hv_root_partition()) { if (hv_cpu->hyp_synic_event_page) { - iounmap(hv_cpu->hyp_synic_event_page); + memunmap(hv_cpu->hyp_synic_event_page); hv_cpu->hyp_synic_event_page = NULL; } } else { diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index f1c17fb60dc1..6b67ac616789 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -793,6 +793,9 @@ static const struct hv_status_info hv_status_infos[] = { _STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO), _STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO), _STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM), + _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM), + _STATUS_INFO(HV_STATUS_INSUFFICIENT_ROOT_MEMORY, -ENOMEM), + _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY, -ENOMEM), _STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL), _STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO), diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c index fbb4eb3901bb..5f4fd9c3231c 100644 --- a/drivers/hv/hv_proc.c +++ b/drivers/hv/hv_proc.c @@ -110,6 +110,50 @@ free_buf: } EXPORT_SYMBOL_GPL(hv_call_deposit_pages); +int hv_deposit_memory_node(int node, u64 partition_id, + u64 hv_status) +{ + u32 num_pages = 1; + + switch (hv_result(hv_status)) { + case HV_STATUS_INSUFFICIENT_MEMORY: + break; + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY: + num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES; + break; + + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY: + num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES; + fallthrough; + case HV_STATUS_INSUFFICIENT_ROOT_MEMORY: + if (!hv_root_partition()) { + hv_status_err(hv_status, "Unexpected root memory deposit\n"); + return -ENOMEM; + } + partition_id = HV_PARTITION_ID_SELF; + break; + + default: + hv_status_err(hv_status, "Unexpected!\n"); + return -ENOMEM; + } + return hv_call_deposit_pages(node, partition_id, num_pages); +} +EXPORT_SYMBOL_GPL(hv_deposit_memory_node); + +bool hv_result_needs_memory(u64 status) +{ + switch (hv_result(status)) { + case HV_STATUS_INSUFFICIENT_MEMORY: + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY: + case HV_STATUS_INSUFFICIENT_ROOT_MEMORY: + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY: + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(hv_result_needs_memory); + int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) { struct hv_input_add_logical_processor *input; @@ -137,7 +181,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) input, output); local_irq_restore(flags); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { if (!hv_result_success(status)) { hv_status_err(status, "cpu %u apic ID: %u\n", lp_index, apic_id); @@ -145,7 +189,8 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) } break; } - ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); + ret = hv_deposit_memory_node(node, hv_current_partition_id, + status); } while (!ret); return ret; @@ -179,7 +224,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { if (!hv_result_success(status)) { hv_status_err(status, "vcpu: %u, lp: %u\n", vp_index, flags); @@ -187,7 +232,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) } break; } - ret = hv_call_deposit_pages(node, partition_id, 1); + ret = hv_deposit_memory_node(node, partition_id, status); } while (!ret); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index cdbc5f5c3215..7bd8f8486e85 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -370,8 +370,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages * on crash. */ - if (cmpxchg(&msg->header.message_type, old_msg_type, - HVMSG_NONE) != old_msg_type) + if (!try_cmpxchg(&msg->header.message_type, + &old_msg_type, HVMSG_NONE)) return; /* diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c new file mode 100644 index 000000000000..ebf2549eb44d --- /dev/null +++ b/drivers/hv/mshv_debugfs.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2026, Microsoft Corporation. + * + * The /sys/kernel/debug/mshv directory contents. + * Contains various statistics data, provided by the hypervisor. + * + * Authors: Microsoft Linux virtualization team + */ + +#include <linux/debugfs.h> +#include <linux/stringify.h> +#include <asm/mshyperv.h> +#include <linux/slab.h> + +#include "mshv.h" +#include "mshv_root.h" + +/* Ensure this file is not used elsewhere by accident */ +#define MSHV_DEBUGFS_C +#include "mshv_debugfs_counters.c" + +#define U32_BUF_SZ 11 +#define U64_BUF_SZ 21 +/* Only support SELF and PARENT areas */ +#define NUM_STATS_AREAS 2 +static_assert(HV_STATS_AREA_SELF == 0 && HV_STATS_AREA_PARENT == 1, + "SELF and PARENT areas must be usable as indices into an array of size NUM_STATS_AREAS"); +/* HV_HYPERVISOR_COUNTER */ +#define HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS 1 + +static struct dentry *mshv_debugfs; +static struct dentry *mshv_debugfs_partition; +static struct dentry *mshv_debugfs_lp; +static struct dentry **parent_vp_stats; +static struct dentry *parent_partition_stats; + +static u64 mshv_lps_count; +static struct hv_stats_page **mshv_lps_stats; + +static int lp_stats_show(struct seq_file *m, void *v) +{ + const struct hv_stats_page *stats = m->private; + int idx; + + for (idx = 0; idx < ARRAY_SIZE(hv_lp_counters); idx++) { + char *name = hv_lp_counters[idx]; + + if (!name) + continue; + seq_printf(m, "%-32s: %llu\n", name, stats->data[idx]); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(lp_stats); + +static void mshv_lp_stats_unmap(u32 lp_index) +{ + union hv_stats_object_identity identity = { + .lp.lp_index = lp_index, + .lp.stats_area_type = HV_STATS_AREA_SELF, + }; + int err; + + err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, + mshv_lps_stats[lp_index], &identity); + if (err) + pr_err("%s: failed to unmap logical processor %u stats, err: %d\n", + __func__, lp_index, err); + + mshv_lps_stats[lp_index] = NULL; +} + +static struct hv_stats_page * __init mshv_lp_stats_map(u32 lp_index) +{ + union hv_stats_object_identity identity = { + .lp.lp_index = lp_index, + .lp.stats_area_type = HV_STATS_AREA_SELF, + }; + struct hv_stats_page *stats; + int err; + + err = hv_map_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, &identity, + &stats); + if (err) { + pr_err("%s: failed to map logical processor %u stats, err: %d\n", + __func__, lp_index, err); + return ERR_PTR(err); + } + mshv_lps_stats[lp_index] = stats; + + return stats; +} + +static struct hv_stats_page * __init lp_debugfs_stats_create(u32 lp_index, + struct dentry *parent) +{ + struct dentry *dentry; + struct hv_stats_page *stats; + + stats = mshv_lp_stats_map(lp_index); + if (IS_ERR(stats)) + return stats; + + dentry = debugfs_create_file("stats", 0400, parent, + stats, &lp_stats_fops); + if (IS_ERR(dentry)) { + mshv_lp_stats_unmap(lp_index); + return ERR_CAST(dentry); + } + return stats; +} + +static int __init lp_debugfs_create(u32 lp_index, struct dentry *parent) +{ + struct dentry *idx; + char lp_idx_str[U32_BUF_SZ]; + struct hv_stats_page *stats; + int err; + + sprintf(lp_idx_str, "%u", lp_index); + + idx = debugfs_create_dir(lp_idx_str, parent); + if (IS_ERR(idx)) + return PTR_ERR(idx); + + stats = lp_debugfs_stats_create(lp_index, idx); + if (IS_ERR(stats)) { + err = PTR_ERR(stats); + goto remove_debugfs_lp_idx; + } + + return 0; + +remove_debugfs_lp_idx: + debugfs_remove_recursive(idx); + return err; +} + +static void mshv_debugfs_lp_remove(void) +{ + int lp_index; + + debugfs_remove_recursive(mshv_debugfs_lp); + + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) + mshv_lp_stats_unmap(lp_index); + + kfree(mshv_lps_stats); + mshv_lps_stats = NULL; +} + +static int __init mshv_debugfs_lp_create(struct dentry *parent) +{ + struct dentry *lp_dir; + int err, lp_index; + + mshv_lps_stats = kcalloc(mshv_lps_count, + sizeof(*mshv_lps_stats), + GFP_KERNEL_ACCOUNT); + + if (!mshv_lps_stats) + return -ENOMEM; + + lp_dir = debugfs_create_dir("lp", parent); + if (IS_ERR(lp_dir)) { + err = PTR_ERR(lp_dir); + goto free_lp_stats; + } + + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) { + err = lp_debugfs_create(lp_index, lp_dir); + if (err) + goto remove_debugfs_lps; + } + + mshv_debugfs_lp = lp_dir; + + return 0; + +remove_debugfs_lps: + for (lp_index -= 1; lp_index >= 0; lp_index--) + mshv_lp_stats_unmap(lp_index); + debugfs_remove_recursive(lp_dir); +free_lp_stats: + kfree(mshv_lps_stats); + mshv_lps_stats = NULL; + + return err; +} + +static int vp_stats_show(struct seq_file *m, void *v) +{ + const struct hv_stats_page **pstats = m->private; + u64 parent_val, self_val; + int idx; + + /* + * For VP and partition stats, there may be two stats areas mapped, + * SELF and PARENT. These refer to the privilege level of the data in + * each page. Some fields may be 0 in SELF and nonzero in PARENT, or + * vice versa. + * + * Hence, prioritize printing from the PARENT page (more privileged + * data), but use the value from the SELF page if the PARENT value is + * 0. + */ + + for (idx = 0; idx < ARRAY_SIZE(hv_vp_counters); idx++) { + char *name = hv_vp_counters[idx]; + + if (!name) + continue; + + parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx]; + self_val = pstats[HV_STATS_AREA_SELF]->data[idx]; + seq_printf(m, "%-43s: %llu\n", name, + parent_val ? parent_val : self_val); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(vp_stats); + +static void vp_debugfs_remove(struct dentry *vp_stats) +{ + debugfs_remove_recursive(vp_stats->d_parent); +} + +static int vp_debugfs_create(u64 partition_id, u32 vp_index, + struct hv_stats_page **pstats, + struct dentry **vp_stats_ptr, + struct dentry *parent) +{ + struct dentry *vp_idx_dir, *d; + char vp_idx_str[U32_BUF_SZ]; + int err; + + sprintf(vp_idx_str, "%u", vp_index); + + vp_idx_dir = debugfs_create_dir(vp_idx_str, parent); + if (IS_ERR(vp_idx_dir)) + return PTR_ERR(vp_idx_dir); + + d = debugfs_create_file("stats", 0400, vp_idx_dir, + pstats, &vp_stats_fops); + if (IS_ERR(d)) { + err = PTR_ERR(d); + goto remove_debugfs_vp_idx; + } + + *vp_stats_ptr = d; + + return 0; + +remove_debugfs_vp_idx: + debugfs_remove_recursive(vp_idx_dir); + return err; +} + +static int partition_stats_show(struct seq_file *m, void *v) +{ + const struct hv_stats_page **pstats = m->private; + u64 parent_val, self_val; + int idx; + + for (idx = 0; idx < ARRAY_SIZE(hv_partition_counters); idx++) { + char *name = hv_partition_counters[idx]; + + if (!name) + continue; + + parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx]; + self_val = pstats[HV_STATS_AREA_SELF]->data[idx]; + seq_printf(m, "%-37s: %llu\n", name, + parent_val ? parent_val : self_val); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(partition_stats); + +static void mshv_partition_stats_unmap(u64 partition_id, + struct hv_stats_page *stats_page, + enum hv_stats_area_type stats_area_type) +{ + union hv_stats_object_identity identity = { + .partition.partition_id = partition_id, + .partition.stats_area_type = stats_area_type, + }; + int err; + + err = hv_unmap_stats_page(HV_STATS_OBJECT_PARTITION, stats_page, + &identity); + if (err) + pr_err("%s: failed to unmap partition %lld %s stats, err: %d\n", + __func__, partition_id, + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent", + err); +} + +static struct hv_stats_page *mshv_partition_stats_map(u64 partition_id, + enum hv_stats_area_type stats_area_type) +{ + union hv_stats_object_identity identity = { + .partition.partition_id = partition_id, + .partition.stats_area_type = stats_area_type, + }; + struct hv_stats_page *stats; + int err; + + err = hv_map_stats_page(HV_STATS_OBJECT_PARTITION, &identity, &stats); + if (err) { + pr_err("%s: failed to map partition %lld %s stats, err: %d\n", + __func__, partition_id, + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent", + err); + return ERR_PTR(err); + } + return stats; +} + +static int mshv_debugfs_partition_stats_create(u64 partition_id, + struct dentry **partition_stats_ptr, + struct dentry *parent) +{ + struct dentry *dentry; + struct hv_stats_page **pstats; + int err; + + pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *), + GFP_KERNEL_ACCOUNT); + if (!pstats) + return -ENOMEM; + + pstats[HV_STATS_AREA_SELF] = mshv_partition_stats_map(partition_id, + HV_STATS_AREA_SELF); + if (IS_ERR(pstats[HV_STATS_AREA_SELF])) { + err = PTR_ERR(pstats[HV_STATS_AREA_SELF]); + goto cleanup; + } + + /* + * L1VH partition cannot access its partition stats in parent area. + */ + if (is_l1vh_parent(partition_id)) { + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF]; + } else { + pstats[HV_STATS_AREA_PARENT] = mshv_partition_stats_map(partition_id, + HV_STATS_AREA_PARENT); + if (IS_ERR(pstats[HV_STATS_AREA_PARENT])) { + err = PTR_ERR(pstats[HV_STATS_AREA_PARENT]); + goto unmap_self; + } + if (!pstats[HV_STATS_AREA_PARENT]) + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF]; + } + + dentry = debugfs_create_file("stats", 0400, parent, + pstats, &partition_stats_fops); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto unmap_partition_stats; + } + + *partition_stats_ptr = dentry; + return 0; + +unmap_partition_stats: + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_PARENT], + HV_STATS_AREA_PARENT); +unmap_self: + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_SELF], + HV_STATS_AREA_SELF); +cleanup: + kfree(pstats); + return err; +} + +static void partition_debugfs_remove(u64 partition_id, struct dentry *dentry) +{ + struct hv_stats_page **pstats = NULL; + + pstats = dentry->d_inode->i_private; + + debugfs_remove_recursive(dentry->d_parent); + + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) { + mshv_partition_stats_unmap(partition_id, + pstats[HV_STATS_AREA_PARENT], + HV_STATS_AREA_PARENT); + } + + mshv_partition_stats_unmap(partition_id, + pstats[HV_STATS_AREA_SELF], + HV_STATS_AREA_SELF); + + kfree(pstats); +} + +static int partition_debugfs_create(u64 partition_id, + struct dentry **vp_dir_ptr, + struct dentry **partition_stats_ptr, + struct dentry *parent) +{ + char part_id_str[U64_BUF_SZ]; + struct dentry *part_id_dir, *vp_dir; + int err; + + if (is_l1vh_parent(partition_id)) + sprintf(part_id_str, "self"); + else + sprintf(part_id_str, "%llu", partition_id); + + part_id_dir = debugfs_create_dir(part_id_str, parent); + if (IS_ERR(part_id_dir)) + return PTR_ERR(part_id_dir); + + vp_dir = debugfs_create_dir("vp", part_id_dir); + if (IS_ERR(vp_dir)) { + err = PTR_ERR(vp_dir); + goto remove_debugfs_partition_id; + } + + err = mshv_debugfs_partition_stats_create(partition_id, + partition_stats_ptr, + part_id_dir); + if (err) + goto remove_debugfs_partition_id; + + *vp_dir_ptr = vp_dir; + + return 0; + +remove_debugfs_partition_id: + debugfs_remove_recursive(part_id_dir); + return err; +} + +static void parent_vp_debugfs_remove(u32 vp_index, + struct dentry *vp_stats_ptr) +{ + struct hv_stats_page **pstats; + + pstats = vp_stats_ptr->d_inode->i_private; + vp_debugfs_remove(vp_stats_ptr); + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats); + kfree(pstats); +} + +static void mshv_debugfs_parent_partition_remove(void) +{ + int idx; + + for_each_online_cpu(idx) + parent_vp_debugfs_remove(hv_vp_index[idx], + parent_vp_stats[idx]); + + partition_debugfs_remove(hv_current_partition_id, + parent_partition_stats); + kfree(parent_vp_stats); + parent_vp_stats = NULL; + parent_partition_stats = NULL; +} + +static int __init parent_vp_debugfs_create(u32 vp_index, + struct dentry **vp_stats_ptr, + struct dentry *parent) +{ + struct hv_stats_page **pstats; + int err; + + pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *), + GFP_KERNEL_ACCOUNT); + if (!pstats) + return -ENOMEM; + + err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats); + if (err) + goto cleanup; + + err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats, + vp_stats_ptr, parent); + if (err) + goto unmap_vp_stats; + + return 0; + +unmap_vp_stats: + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats); +cleanup: + kfree(pstats); + return err; +} + +static int __init mshv_debugfs_parent_partition_create(void) +{ + struct dentry *vp_dir; + int err, idx, i; + + mshv_debugfs_partition = debugfs_create_dir("partition", + mshv_debugfs); + if (IS_ERR(mshv_debugfs_partition)) + return PTR_ERR(mshv_debugfs_partition); + + err = partition_debugfs_create(hv_current_partition_id, + &vp_dir, + &parent_partition_stats, + mshv_debugfs_partition); + if (err) + goto remove_debugfs_partition; + + parent_vp_stats = kcalloc(nr_cpu_ids, sizeof(*parent_vp_stats), + GFP_KERNEL); + if (!parent_vp_stats) { + err = -ENOMEM; + goto remove_debugfs_partition; + } + + for_each_online_cpu(idx) { + err = parent_vp_debugfs_create(hv_vp_index[idx], + &parent_vp_stats[idx], + vp_dir); + if (err) + goto remove_debugfs_partition_vp; + } + + return 0; + +remove_debugfs_partition_vp: + for_each_online_cpu(i) { + if (i >= idx) + break; + parent_vp_debugfs_remove(i, parent_vp_stats[i]); + } + partition_debugfs_remove(hv_current_partition_id, + parent_partition_stats); + + kfree(parent_vp_stats); + parent_vp_stats = NULL; + parent_partition_stats = NULL; + +remove_debugfs_partition: + debugfs_remove_recursive(mshv_debugfs_partition); + mshv_debugfs_partition = NULL; + return err; +} + +static int hv_stats_show(struct seq_file *m, void *v) +{ + const struct hv_stats_page *stats = m->private; + int idx; + + for (idx = 0; idx < ARRAY_SIZE(hv_hypervisor_counters); idx++) { + char *name = hv_hypervisor_counters[idx]; + + if (!name) + continue; + seq_printf(m, "%-27s: %llu\n", name, stats->data[idx]); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(hv_stats); + +static void mshv_hv_stats_unmap(void) +{ + union hv_stats_object_identity identity = { + .hv.stats_area_type = HV_STATS_AREA_SELF, + }; + int err; + + err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity); + if (err) + pr_err("%s: failed to unmap hypervisor stats: %d\n", + __func__, err); +} + +static void * __init mshv_hv_stats_map(void) +{ + union hv_stats_object_identity identity = { + .hv.stats_area_type = HV_STATS_AREA_SELF, + }; + struct hv_stats_page *stats; + int err; + + err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats); + if (err) { + pr_err("%s: failed to map hypervisor stats: %d\n", + __func__, err); + return ERR_PTR(err); + } + return stats; +} + +static int __init mshv_debugfs_hv_stats_create(struct dentry *parent) +{ + struct dentry *dentry; + u64 *stats; + int err; + + stats = mshv_hv_stats_map(); + if (IS_ERR(stats)) + return PTR_ERR(stats); + + dentry = debugfs_create_file("stats", 0400, parent, + stats, &hv_stats_fops); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + pr_err("%s: failed to create hypervisor stats dentry: %d\n", + __func__, err); + goto unmap_hv_stats; + } + + mshv_lps_count = stats[HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS]; + + return 0; + +unmap_hv_stats: + mshv_hv_stats_unmap(); + return err; +} + +int mshv_debugfs_vp_create(struct mshv_vp *vp) +{ + struct mshv_partition *p = vp->vp_partition; + + if (!mshv_debugfs) + return 0; + + return vp_debugfs_create(p->pt_id, vp->vp_index, + vp->vp_stats_pages, + &vp->vp_stats_dentry, + p->pt_vp_dentry); +} + +void mshv_debugfs_vp_remove(struct mshv_vp *vp) +{ + if (!mshv_debugfs) + return; + + vp_debugfs_remove(vp->vp_stats_dentry); +} + +int mshv_debugfs_partition_create(struct mshv_partition *partition) +{ + int err; + + if (!mshv_debugfs) + return 0; + + err = partition_debugfs_create(partition->pt_id, + &partition->pt_vp_dentry, + &partition->pt_stats_dentry, + mshv_debugfs_partition); + if (err) + return err; + + return 0; +} + +void mshv_debugfs_partition_remove(struct mshv_partition *partition) +{ + if (!mshv_debugfs) + return; + + partition_debugfs_remove(partition->pt_id, + partition->pt_stats_dentry); +} + +int __init mshv_debugfs_init(void) +{ + int err; + + mshv_debugfs = debugfs_create_dir("mshv", NULL); + if (IS_ERR(mshv_debugfs)) { + pr_err("%s: failed to create debugfs directory\n", __func__); + return PTR_ERR(mshv_debugfs); + } + + if (hv_root_partition()) { + err = mshv_debugfs_hv_stats_create(mshv_debugfs); + if (err) + goto remove_mshv_dir; + + err = mshv_debugfs_lp_create(mshv_debugfs); + if (err) + goto unmap_hv_stats; + } + + err = mshv_debugfs_parent_partition_create(); + if (err) + goto unmap_lp_stats; + + return 0; + +unmap_lp_stats: + if (hv_root_partition()) { + mshv_debugfs_lp_remove(); + mshv_debugfs_lp = NULL; + } +unmap_hv_stats: + if (hv_root_partition()) + mshv_hv_stats_unmap(); +remove_mshv_dir: + debugfs_remove_recursive(mshv_debugfs); + mshv_debugfs = NULL; + return err; +} + +void mshv_debugfs_exit(void) +{ + mshv_debugfs_parent_partition_remove(); + + if (hv_root_partition()) { + mshv_debugfs_lp_remove(); + mshv_debugfs_lp = NULL; + mshv_hv_stats_unmap(); + } + + debugfs_remove_recursive(mshv_debugfs); + mshv_debugfs = NULL; + mshv_debugfs_partition = NULL; +} diff --git a/drivers/hv/mshv_debugfs_counters.c b/drivers/hv/mshv_debugfs_counters.c new file mode 100644 index 000000000000..978536ba691f --- /dev/null +++ b/drivers/hv/mshv_debugfs_counters.c @@ -0,0 +1,490 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2026, Microsoft Corporation. + * + * Data for printing stats page counters via debugfs. + * + * Authors: Microsoft Linux virtualization team + */ + +/* + * For simplicity, this file is included directly in mshv_debugfs.c. + * If these are ever needed elsewhere they should be compiled separately. + * Ensure this file is not used twice by accident. + */ +#ifndef MSHV_DEBUGFS_C +#error "This file should only be included in mshv_debugfs.c" +#endif + +/* HV_HYPERVISOR_COUNTER */ +static char *hv_hypervisor_counters[] = { + [1] = "HvLogicalProcessors", + [2] = "HvPartitions", + [3] = "HvTotalPages", + [4] = "HvVirtualProcessors", + [5] = "HvMonitoredNotifications", + [6] = "HvModernStandbyEntries", + [7] = "HvPlatformIdleTransitions", + [8] = "HvHypervisorStartupCost", + + [10] = "HvIOSpacePages", + [11] = "HvNonEssentialPagesForDump", + [12] = "HvSubsumedPages", +}; + +/* HV_CPU_COUNTER */ +static char *hv_lp_counters[] = { + [1] = "LpGlobalTime", + [2] = "LpTotalRunTime", + [3] = "LpHypervisorRunTime", + [4] = "LpHardwareInterrupts", + [5] = "LpContextSwitches", + [6] = "LpInterProcessorInterrupts", + [7] = "LpSchedulerInterrupts", + [8] = "LpTimerInterrupts", + [9] = "LpInterProcessorInterruptsSent", + [10] = "LpProcessorHalts", + [11] = "LpMonitorTransitionCost", + [12] = "LpContextSwitchTime", + [13] = "LpC1TransitionsCount", + [14] = "LpC1RunTime", + [15] = "LpC2TransitionsCount", + [16] = "LpC2RunTime", + [17] = "LpC3TransitionsCount", + [18] = "LpC3RunTime", + [19] = "LpRootVpIndex", + [20] = "LpIdleSequenceNumber", + [21] = "LpGlobalTscCount", + [22] = "LpActiveTscCount", + [23] = "LpIdleAccumulation", + [24] = "LpReferenceCycleCount0", + [25] = "LpActualCycleCount0", + [26] = "LpReferenceCycleCount1", + [27] = "LpActualCycleCount1", + [28] = "LpProximityDomainId", + [29] = "LpPostedInterruptNotifications", + [30] = "LpBranchPredictorFlushes", +#if IS_ENABLED(CONFIG_X86_64) + [31] = "LpL1DataCacheFlushes", + [32] = "LpImmediateL1DataCacheFlushes", + [33] = "LpMbFlushes", + [34] = "LpCounterRefreshSequenceNumber", + [35] = "LpCounterRefreshReferenceTime", + [36] = "LpIdleAccumulationSnapshot", + [37] = "LpActiveTscCountSnapshot", + [38] = "LpHwpRequestContextSwitches", + [39] = "LpPlaceholder1", + [40] = "LpPlaceholder2", + [41] = "LpPlaceholder3", + [42] = "LpPlaceholder4", + [43] = "LpPlaceholder5", + [44] = "LpPlaceholder6", + [45] = "LpPlaceholder7", + [46] = "LpPlaceholder8", + [47] = "LpPlaceholder9", + [48] = "LpSchLocalRunListSize", + [49] = "LpReserveGroupId", + [50] = "LpRunningPriority", + [51] = "LpPerfmonInterruptCount", +#elif IS_ENABLED(CONFIG_ARM64) + [31] = "LpCounterRefreshSequenceNumber", + [32] = "LpCounterRefreshReferenceTime", + [33] = "LpIdleAccumulationSnapshot", + [34] = "LpActiveTscCountSnapshot", + [35] = "LpHwpRequestContextSwitches", + [36] = "LpPlaceholder2", + [37] = "LpPlaceholder3", + [38] = "LpPlaceholder4", + [39] = "LpPlaceholder5", + [40] = "LpPlaceholder6", + [41] = "LpPlaceholder7", + [42] = "LpPlaceholder8", + [43] = "LpPlaceholder9", + [44] = "LpSchLocalRunListSize", + [45] = "LpReserveGroupId", + [46] = "LpRunningPriority", +#endif +}; + +/* HV_PROCESS_COUNTER */ +static char *hv_partition_counters[] = { + [1] = "PtVirtualProcessors", + + [3] = "PtTlbSize", + [4] = "PtAddressSpaces", + [5] = "PtDepositedPages", + [6] = "PtGpaPages", + [7] = "PtGpaSpaceModifications", + [8] = "PtVirtualTlbFlushEntires", + [9] = "PtRecommendedTlbSize", + [10] = "PtGpaPages4K", + [11] = "PtGpaPages2M", + [12] = "PtGpaPages1G", + [13] = "PtGpaPages512G", + [14] = "PtDevicePages4K", + [15] = "PtDevicePages2M", + [16] = "PtDevicePages1G", + [17] = "PtDevicePages512G", + [18] = "PtAttachedDevices", + [19] = "PtDeviceInterruptMappings", + [20] = "PtIoTlbFlushes", + [21] = "PtIoTlbFlushCost", + [22] = "PtDeviceInterruptErrors", + [23] = "PtDeviceDmaErrors", + [24] = "PtDeviceInterruptThrottleEvents", + [25] = "PtSkippedTimerTicks", + [26] = "PtPartitionId", +#if IS_ENABLED(CONFIG_X86_64) + [27] = "PtNestedTlbSize", + [28] = "PtRecommendedNestedTlbSize", + [29] = "PtNestedTlbFreeListSize", + [30] = "PtNestedTlbTrimmedPages", + [31] = "PtPagesShattered", + [32] = "PtPagesRecombined", + [33] = "PtHwpRequestValue", + [34] = "PtAutoSuspendEnableTime", + [35] = "PtAutoSuspendTriggerTime", + [36] = "PtAutoSuspendDisableTime", + [37] = "PtPlaceholder1", + [38] = "PtPlaceholder2", + [39] = "PtPlaceholder3", + [40] = "PtPlaceholder4", + [41] = "PtPlaceholder5", + [42] = "PtPlaceholder6", + [43] = "PtPlaceholder7", + [44] = "PtPlaceholder8", + [45] = "PtHypervisorStateTransferGeneration", + [46] = "PtNumberofActiveChildPartitions", +#elif IS_ENABLED(CONFIG_ARM64) + [27] = "PtHwpRequestValue", + [28] = "PtAutoSuspendEnableTime", + [29] = "PtAutoSuspendTriggerTime", + [30] = "PtAutoSuspendDisableTime", + [31] = "PtPlaceholder1", + [32] = "PtPlaceholder2", + [33] = "PtPlaceholder3", + [34] = "PtPlaceholder4", + [35] = "PtPlaceholder5", + [36] = "PtPlaceholder6", + [37] = "PtPlaceholder7", + [38] = "PtPlaceholder8", + [39] = "PtHypervisorStateTransferGeneration", + [40] = "PtNumberofActiveChildPartitions", +#endif +}; + +/* HV_THREAD_COUNTER */ +static char *hv_vp_counters[] = { + [1] = "VpTotalRunTime", + [2] = "VpHypervisorRunTime", + [3] = "VpRemoteNodeRunTime", + [4] = "VpNormalizedRunTime", + [5] = "VpIdealCpu", + + [7] = "VpHypercallsCount", + [8] = "VpHypercallsTime", +#if IS_ENABLED(CONFIG_X86_64) + [9] = "VpPageInvalidationsCount", + [10] = "VpPageInvalidationsTime", + [11] = "VpControlRegisterAccessesCount", + [12] = "VpControlRegisterAccessesTime", + [13] = "VpIoInstructionsCount", + [14] = "VpIoInstructionsTime", + [15] = "VpHltInstructionsCount", + [16] = "VpHltInstructionsTime", + [17] = "VpMwaitInstructionsCount", + [18] = "VpMwaitInstructionsTime", + [19] = "VpCpuidInstructionsCount", + [20] = "VpCpuidInstructionsTime", + [21] = "VpMsrAccessesCount", + [22] = "VpMsrAccessesTime", + [23] = "VpOtherInterceptsCount", + [24] = "VpOtherInterceptsTime", + [25] = "VpExternalInterruptsCount", + [26] = "VpExternalInterruptsTime", + [27] = "VpPendingInterruptsCount", + [28] = "VpPendingInterruptsTime", + [29] = "VpEmulatedInstructionsCount", + [30] = "VpEmulatedInstructionsTime", + [31] = "VpDebugRegisterAccessesCount", + [32] = "VpDebugRegisterAccessesTime", + [33] = "VpPageFaultInterceptsCount", + [34] = "VpPageFaultInterceptsTime", + [35] = "VpGuestPageTableMaps", + [36] = "VpLargePageTlbFills", + [37] = "VpSmallPageTlbFills", + [38] = "VpReflectedGuestPageFaults", + [39] = "VpApicMmioAccesses", + [40] = "VpIoInterceptMessages", + [41] = "VpMemoryInterceptMessages", + [42] = "VpApicEoiAccesses", + [43] = "VpOtherMessages", + [44] = "VpPageTableAllocations", + [45] = "VpLogicalProcessorMigrations", + [46] = "VpAddressSpaceEvictions", + [47] = "VpAddressSpaceSwitches", + [48] = "VpAddressDomainFlushes", + [49] = "VpAddressSpaceFlushes", + [50] = "VpGlobalGvaRangeFlushes", + [51] = "VpLocalGvaRangeFlushes", + [52] = "VpPageTableEvictions", + [53] = "VpPageTableReclamations", + [54] = "VpPageTableResets", + [55] = "VpPageTableValidations", + [56] = "VpApicTprAccesses", + [57] = "VpPageTableWriteIntercepts", + [58] = "VpSyntheticInterrupts", + [59] = "VpVirtualInterrupts", + [60] = "VpApicIpisSent", + [61] = "VpApicSelfIpisSent", + [62] = "VpGpaSpaceHypercalls", + [63] = "VpLogicalProcessorHypercalls", + [64] = "VpLongSpinWaitHypercalls", + [65] = "VpOtherHypercalls", + [66] = "VpSyntheticInterruptHypercalls", + [67] = "VpVirtualInterruptHypercalls", + [68] = "VpVirtualMmuHypercalls", + [69] = "VpVirtualProcessorHypercalls", + [70] = "VpHardwareInterrupts", + [71] = "VpNestedPageFaultInterceptsCount", + [72] = "VpNestedPageFaultInterceptsTime", + [73] = "VpPageScans", + [74] = "VpLogicalProcessorDispatches", + [75] = "VpWaitingForCpuTime", + [76] = "VpExtendedHypercalls", + [77] = "VpExtendedHypercallInterceptMessages", + [78] = "VpMbecNestedPageTableSwitches", + [79] = "VpOtherReflectedGuestExceptions", + [80] = "VpGlobalIoTlbFlushes", + [81] = "VpGlobalIoTlbFlushCost", + [82] = "VpLocalIoTlbFlushes", + [83] = "VpLocalIoTlbFlushCost", + [84] = "VpHypercallsForwardedCount", + [85] = "VpHypercallsForwardingTime", + [86] = "VpPageInvalidationsForwardedCount", + [87] = "VpPageInvalidationsForwardingTime", + [88] = "VpControlRegisterAccessesForwardedCount", + [89] = "VpControlRegisterAccessesForwardingTime", + [90] = "VpIoInstructionsForwardedCount", + [91] = "VpIoInstructionsForwardingTime", + [92] = "VpHltInstructionsForwardedCount", + [93] = "VpHltInstructionsForwardingTime", + [94] = "VpMwaitInstructionsForwardedCount", + [95] = "VpMwaitInstructionsForwardingTime", + [96] = "VpCpuidInstructionsForwardedCount", + [97] = "VpCpuidInstructionsForwardingTime", + [98] = "VpMsrAccessesForwardedCount", + [99] = "VpMsrAccessesForwardingTime", + [100] = "VpOtherInterceptsForwardedCount", + [101] = "VpOtherInterceptsForwardingTime", + [102] = "VpExternalInterruptsForwardedCount", + [103] = "VpExternalInterruptsForwardingTime", + [104] = "VpPendingInterruptsForwardedCount", + [105] = "VpPendingInterruptsForwardingTime", + [106] = "VpEmulatedInstructionsForwardedCount", + [107] = "VpEmulatedInstructionsForwardingTime", + [108] = "VpDebugRegisterAccessesForwardedCount", + [109] = "VpDebugRegisterAccessesForwardingTime", + [110] = "VpPageFaultInterceptsForwardedCount", + [111] = "VpPageFaultInterceptsForwardingTime", + [112] = "VpVmclearEmulationCount", + [113] = "VpVmclearEmulationTime", + [114] = "VpVmptrldEmulationCount", + [115] = "VpVmptrldEmulationTime", + [116] = "VpVmptrstEmulationCount", + [117] = "VpVmptrstEmulationTime", + [118] = "VpVmreadEmulationCount", + [119] = "VpVmreadEmulationTime", + [120] = "VpVmwriteEmulationCount", + [121] = "VpVmwriteEmulationTime", + [122] = "VpVmxoffEmulationCount", + [123] = "VpVmxoffEmulationTime", + [124] = "VpVmxonEmulationCount", + [125] = "VpVmxonEmulationTime", + [126] = "VpNestedVMEntriesCount", + [127] = "VpNestedVMEntriesTime", + [128] = "VpNestedSLATSoftPageFaultsCount", + [129] = "VpNestedSLATSoftPageFaultsTime", + [130] = "VpNestedSLATHardPageFaultsCount", + [131] = "VpNestedSLATHardPageFaultsTime", + [132] = "VpInvEptAllContextEmulationCount", + [133] = "VpInvEptAllContextEmulationTime", + [134] = "VpInvEptSingleContextEmulationCount", + [135] = "VpInvEptSingleContextEmulationTime", + [136] = "VpInvVpidAllContextEmulationCount", + [137] = "VpInvVpidAllContextEmulationTime", + [138] = "VpInvVpidSingleContextEmulationCount", + [139] = "VpInvVpidSingleContextEmulationTime", + [140] = "VpInvVpidSingleAddressEmulationCount", + [141] = "VpInvVpidSingleAddressEmulationTime", + [142] = "VpNestedTlbPageTableReclamations", + [143] = "VpNestedTlbPageTableEvictions", + [144] = "VpFlushGuestPhysicalAddressSpaceHypercalls", + [145] = "VpFlushGuestPhysicalAddressListHypercalls", + [146] = "VpPostedInterruptNotifications", + [147] = "VpPostedInterruptScans", + [148] = "VpTotalCoreRunTime", + [149] = "VpMaximumRunTime", + [150] = "VpHwpRequestContextSwitches", + [151] = "VpWaitingForCpuTimeBucket0", + [152] = "VpWaitingForCpuTimeBucket1", + [153] = "VpWaitingForCpuTimeBucket2", + [154] = "VpWaitingForCpuTimeBucket3", + [155] = "VpWaitingForCpuTimeBucket4", + [156] = "VpWaitingForCpuTimeBucket5", + [157] = "VpWaitingForCpuTimeBucket6", + [158] = "VpVmloadEmulationCount", + [159] = "VpVmloadEmulationTime", + [160] = "VpVmsaveEmulationCount", + [161] = "VpVmsaveEmulationTime", + [162] = "VpGifInstructionEmulationCount", + [163] = "VpGifInstructionEmulationTime", + [164] = "VpEmulatedErrataSvmInstructions", + [165] = "VpPlaceholder1", + [166] = "VpPlaceholder2", + [167] = "VpPlaceholder3", + [168] = "VpPlaceholder4", + [169] = "VpPlaceholder5", + [170] = "VpPlaceholder6", + [171] = "VpPlaceholder7", + [172] = "VpPlaceholder8", + [173] = "VpContentionTime", + [174] = "VpWakeUpTime", + [175] = "VpSchedulingPriority", + [176] = "VpRdpmcInstructionsCount", + [177] = "VpRdpmcInstructionsTime", + [178] = "VpPerfmonPmuMsrAccessesCount", + [179] = "VpPerfmonLbrMsrAccessesCount", + [180] = "VpPerfmonIptMsrAccessesCount", + [181] = "VpPerfmonInterruptCount", + [182] = "VpVtl1DispatchCount", + [183] = "VpVtl2DispatchCount", + [184] = "VpVtl2DispatchBucket0", + [185] = "VpVtl2DispatchBucket1", + [186] = "VpVtl2DispatchBucket2", + [187] = "VpVtl2DispatchBucket3", + [188] = "VpVtl2DispatchBucket4", + [189] = "VpVtl2DispatchBucket5", + [190] = "VpVtl2DispatchBucket6", + [191] = "VpVtl1RunTime", + [192] = "VpVtl2RunTime", + [193] = "VpIommuHypercalls", + [194] = "VpCpuGroupHypercalls", + [195] = "VpVsmHypercalls", + [196] = "VpEventLogHypercalls", + [197] = "VpDeviceDomainHypercalls", + [198] = "VpDepositHypercalls", + [199] = "VpSvmHypercalls", + [200] = "VpBusLockAcquisitionCount", + [201] = "VpLoadAvg", + [202] = "VpRootDispatchThreadBlocked", + [203] = "VpIdleCpuTime", + [204] = "VpWaitingForCpuTimeBucket7", + [205] = "VpWaitingForCpuTimeBucket8", + [206] = "VpWaitingForCpuTimeBucket9", + [207] = "VpWaitingForCpuTimeBucket10", + [208] = "VpWaitingForCpuTimeBucket11", + [209] = "VpWaitingForCpuTimeBucket12", + [210] = "VpHierarchicalSuspendTime", + [211] = "VpExpressSchedulingAttempts", + [212] = "VpExpressSchedulingCount", +#elif IS_ENABLED(CONFIG_ARM64) + [9] = "VpSysRegAccessesCount", + [10] = "VpSysRegAccessesTime", + [11] = "VpSmcInstructionsCount", + [12] = "VpSmcInstructionsTime", + [13] = "VpOtherInterceptsCount", + [14] = "VpOtherInterceptsTime", + [15] = "VpExternalInterruptsCount", + [16] = "VpExternalInterruptsTime", + [17] = "VpPendingInterruptsCount", + [18] = "VpPendingInterruptsTime", + [19] = "VpGuestPageTableMaps", + [20] = "VpLargePageTlbFills", + [21] = "VpSmallPageTlbFills", + [22] = "VpReflectedGuestPageFaults", + [23] = "VpMemoryInterceptMessages", + [24] = "VpOtherMessages", + [25] = "VpLogicalProcessorMigrations", + [26] = "VpAddressDomainFlushes", + [27] = "VpAddressSpaceFlushes", + [28] = "VpSyntheticInterrupts", + [29] = "VpVirtualInterrupts", + [30] = "VpApicSelfIpisSent", + [31] = "VpGpaSpaceHypercalls", + [32] = "VpLogicalProcessorHypercalls", + [33] = "VpLongSpinWaitHypercalls", + [34] = "VpOtherHypercalls", + [35] = "VpSyntheticInterruptHypercalls", + [36] = "VpVirtualInterruptHypercalls", + [37] = "VpVirtualMmuHypercalls", + [38] = "VpVirtualProcessorHypercalls", + [39] = "VpHardwareInterrupts", + [40] = "VpNestedPageFaultInterceptsCount", + [41] = "VpNestedPageFaultInterceptsTime", + [42] = "VpLogicalProcessorDispatches", + [43] = "VpWaitingForCpuTime", + [44] = "VpExtendedHypercalls", + [45] = "VpExtendedHypercallInterceptMessages", + [46] = "VpMbecNestedPageTableSwitches", + [47] = "VpOtherReflectedGuestExceptions", + [48] = "VpGlobalIoTlbFlushes", + [49] = "VpGlobalIoTlbFlushCost", + [50] = "VpLocalIoTlbFlushes", + [51] = "VpLocalIoTlbFlushCost", + [52] = "VpFlushGuestPhysicalAddressSpaceHypercalls", + [53] = "VpFlushGuestPhysicalAddressListHypercalls", + [54] = "VpPostedInterruptNotifications", + [55] = "VpPostedInterruptScans", + [56] = "VpTotalCoreRunTime", + [57] = "VpMaximumRunTime", + [58] = "VpWaitingForCpuTimeBucket0", + [59] = "VpWaitingForCpuTimeBucket1", + [60] = "VpWaitingForCpuTimeBucket2", + [61] = "VpWaitingForCpuTimeBucket3", + [62] = "VpWaitingForCpuTimeBucket4", + [63] = "VpWaitingForCpuTimeBucket5", + [64] = "VpWaitingForCpuTimeBucket6", + [65] = "VpHwpRequestContextSwitches", + [66] = "VpPlaceholder2", + [67] = "VpPlaceholder3", + [68] = "VpPlaceholder4", + [69] = "VpPlaceholder5", + [70] = "VpPlaceholder6", + [71] = "VpPlaceholder7", + [72] = "VpPlaceholder8", + [73] = "VpContentionTime", + [74] = "VpWakeUpTime", + [75] = "VpSchedulingPriority", + [76] = "VpVtl1DispatchCount", + [77] = "VpVtl2DispatchCount", + [78] = "VpVtl2DispatchBucket0", + [79] = "VpVtl2DispatchBucket1", + [80] = "VpVtl2DispatchBucket2", + [81] = "VpVtl2DispatchBucket3", + [82] = "VpVtl2DispatchBucket4", + [83] = "VpVtl2DispatchBucket5", + [84] = "VpVtl2DispatchBucket6", + [85] = "VpVtl1RunTime", + [86] = "VpVtl2RunTime", + [87] = "VpIommuHypercalls", + [88] = "VpCpuGroupHypercalls", + [89] = "VpVsmHypercalls", + [90] = "VpEventLogHypercalls", + [91] = "VpDeviceDomainHypercalls", + [92] = "VpDepositHypercalls", + [93] = "VpSvmHypercalls", + [94] = "VpLoadAvg", + [95] = "VpRootDispatchThreadBlocked", + [96] = "VpIdleCpuTime", + [97] = "VpWaitingForCpuTimeBucket7", + [98] = "VpWaitingForCpuTimeBucket8", + [99] = "VpWaitingForCpuTimeBucket9", + [100] = "VpWaitingForCpuTimeBucket10", + [101] = "VpWaitingForCpuTimeBucket11", + [102] = "VpWaitingForCpuTimeBucket12", + [103] = "VpHierarchicalSuspendTime", + [104] = "VpExpressSchedulingAttempts", + [105] = "VpExpressSchedulingCount", +#endif +}; diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c index 0b75ff1edb73..492c6258045c 100644 --- a/drivers/hv/mshv_eventfd.c +++ b/drivers/hv/mshv_eventfd.c @@ -87,8 +87,9 @@ static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian) idx = srcu_read_lock(&partition->pt_irq_srcu); - hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list, - irqfd_resampler_hnode) { + hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list, + irqfd_resampler_hnode, + srcu_read_lock_held(&partition->pt_irq_srcu)) { if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type)) hv_call_clear_virtual_interrupt(partition->pt_id); @@ -128,8 +129,8 @@ static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector) new_iv.vector[new_iv.vector_count++] = vector; - if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, - iv.as_uint64, new_iv.as_uint64) != iv.as_uint64) + if (!try_cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, + &iv.as_uint64, new_iv.as_uint64)) return -EAGAIN; return 0; @@ -247,12 +248,13 @@ static void mshv_irqfd_shutdown(struct work_struct *work) { struct mshv_irqfd *irqfd = container_of(work, struct mshv_irqfd, irqfd_shutdown); + u64 cnt; /* * Synchronize with the wait-queue and unhook ourselves to prevent * further events. */ - remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait); + eventfd_ctx_remove_wait_queue(irqfd->irqfd_eventfd_ctx, &irqfd->irqfd_wait, &cnt); if (irqfd->irqfd_resampler) { mshv_irqfd_resampler_shutdown(irqfd); @@ -295,13 +297,13 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, { struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd, irqfd_wait); - unsigned long flags = (unsigned long)key; + __poll_t flags = key_to_poll(key); int idx; unsigned int seq; struct mshv_partition *pt = irqfd->irqfd_partn; int ret = 0; - if (flags & POLLIN) { + if (flags & EPOLLIN) { u64 cnt; eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt); @@ -320,7 +322,7 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, ret = 1; } - if (flags & POLLHUP) { + if (flags & EPOLLHUP) { /* The eventfd is closing, detach from the partition */ unsigned long flags; @@ -371,8 +373,6 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh, struct mshv_irqfd *irqfd = container_of(polltbl, struct mshv_irqfd, irqfd_polltbl); - irqfd->irqfd_wqh = wqh; - /* * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. * that the irqfd isn't already bound to another partition. Only the @@ -506,7 +506,7 @@ static int mshv_irqfd_assign(struct mshv_partition *pt, */ events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl); - if (events & POLLIN) + if (events & EPOLLIN) mshv_assert_irq_slow(irqfd); srcu_read_unlock(&pt->pt_irq_srcu, idx); diff --git a/drivers/hv/mshv_eventfd.h b/drivers/hv/mshv_eventfd.h index 332e7670a344..464c6b81ab33 100644 --- a/drivers/hv/mshv_eventfd.h +++ b/drivers/hv/mshv_eventfd.h @@ -32,7 +32,6 @@ struct mshv_irqfd { struct mshv_lapic_irq irqfd_lapic_irq; struct hlist_node irqfd_hnode; poll_table irqfd_polltbl; - wait_queue_head_t *irqfd_wqh; wait_queue_entry_t irqfd_wait; struct work_struct irqfd_shutdown; struct mshv_irqfd_resampler *irqfd_resampler; diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c index adba3564d9f1..c28aac0726de 100644 --- a/drivers/hv/mshv_regions.c +++ b/drivers/hv/mshv_regions.c @@ -88,7 +88,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region, struct page *page; int stride, ret; - page = region->pages[page_offset]; + page = region->mreg_pages[page_offset]; if (!page) return -EINVAL; @@ -98,7 +98,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region, /* Start at stride since the first stride is validated */ for (count = stride; count < page_count; count += stride) { - page = region->pages[page_offset + count]; + page = region->mreg_pages[page_offset + count]; /* Break if current page is not present */ if (!page) @@ -152,7 +152,7 @@ static int mshv_region_process_range(struct mshv_mem_region *region, while (page_count) { /* Skip non-present pages */ - if (!region->pages[page_offset]) { + if (!region->mreg_pages[page_offset]) { page_offset++; page_count--; continue; @@ -190,7 +190,7 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages, if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE)) region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE; - kref_init(®ion->refcount); + kref_init(®ion->mreg_refcount); return region; } @@ -204,7 +204,7 @@ static int mshv_region_chunk_share(struct mshv_mem_region *region, flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; return hv_call_modify_spa_host_access(region->partition->pt_id, - region->pages + page_offset, + region->mreg_pages + page_offset, page_count, HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE, @@ -229,7 +229,7 @@ static int mshv_region_chunk_unshare(struct mshv_mem_region *region, flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; return hv_call_modify_spa_host_access(region->partition->pt_id, - region->pages + page_offset, + region->mreg_pages + page_offset, page_count, 0, flags, false); } @@ -254,7 +254,7 @@ static int mshv_region_chunk_remap(struct mshv_mem_region *region, return hv_call_map_gpa_pages(region->partition->pt_id, region->start_gfn + page_offset, page_count, flags, - region->pages + page_offset); + region->mreg_pages + page_offset); } static int mshv_region_remap_pages(struct mshv_mem_region *region, @@ -277,10 +277,10 @@ int mshv_region_map(struct mshv_mem_region *region) static void mshv_region_invalidate_pages(struct mshv_mem_region *region, u64 page_offset, u64 page_count) { - if (region->type == MSHV_REGION_TYPE_MEM_PINNED) - unpin_user_pages(region->pages + page_offset, page_count); + if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED) + unpin_user_pages(region->mreg_pages + page_offset, page_count); - memset(region->pages + page_offset, 0, + memset(region->mreg_pages + page_offset, 0, page_count * sizeof(struct page *)); } @@ -297,7 +297,7 @@ int mshv_region_pin(struct mshv_mem_region *region) int ret; for (done_count = 0; done_count < region->nr_pages; done_count += ret) { - pages = region->pages + done_count; + pages = region->mreg_pages + done_count; userspace_addr = region->start_uaddr + done_count * HV_HYP_PAGE_SIZE; nr_pages = min(region->nr_pages - done_count, @@ -348,11 +348,11 @@ static int mshv_region_unmap(struct mshv_mem_region *region) static void mshv_region_destroy(struct kref *ref) { struct mshv_mem_region *region = - container_of(ref, struct mshv_mem_region, refcount); + container_of(ref, struct mshv_mem_region, mreg_refcount); struct mshv_partition *partition = region->partition; int ret; - if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE) + if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) mshv_region_movable_fini(region); if (mshv_partition_encrypted(partition)) { @@ -374,12 +374,12 @@ static void mshv_region_destroy(struct kref *ref) void mshv_region_put(struct mshv_mem_region *region) { - kref_put(®ion->refcount, mshv_region_destroy); + kref_put(®ion->mreg_refcount, mshv_region_destroy); } int mshv_region_get(struct mshv_mem_region *region) { - return kref_get_unless_zero(®ion->refcount); + return kref_get_unless_zero(®ion->mreg_refcount); } /** @@ -405,16 +405,16 @@ static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region, int ret; range->notifier_seq = mmu_interval_read_begin(range->notifier); - mmap_read_lock(region->mni.mm); + mmap_read_lock(region->mreg_mni.mm); ret = hmm_range_fault(range); - mmap_read_unlock(region->mni.mm); + mmap_read_unlock(region->mreg_mni.mm); if (ret) return ret; - mutex_lock(®ion->mutex); + mutex_lock(®ion->mreg_mutex); if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { - mutex_unlock(®ion->mutex); + mutex_unlock(®ion->mreg_mutex); cond_resched(); return -EBUSY; } @@ -438,7 +438,7 @@ static int mshv_region_range_fault(struct mshv_mem_region *region, u64 page_offset, u64 page_count) { struct hmm_range range = { - .notifier = ®ion->mni, + .notifier = ®ion->mreg_mni, .default_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE, }; unsigned long *pfns; @@ -461,12 +461,12 @@ static int mshv_region_range_fault(struct mshv_mem_region *region, goto out; for (i = 0; i < page_count; i++) - region->pages[page_offset + i] = hmm_pfn_to_page(pfns[i]); + region->mreg_pages[page_offset + i] = hmm_pfn_to_page(pfns[i]); ret = mshv_region_remap_pages(region, region->hv_map_flags, page_offset, page_count); - mutex_unlock(®ion->mutex); + mutex_unlock(®ion->mreg_mutex); out: kfree(pfns); return ret; @@ -520,7 +520,7 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni, { struct mshv_mem_region *region = container_of(mni, struct mshv_mem_region, - mni); + mreg_mni); u64 page_offset, page_count; unsigned long mstart, mend; int ret = -EPERM; @@ -533,8 +533,8 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni, page_count = HVPFN_DOWN(mend - mstart); if (mmu_notifier_range_blockable(range)) - mutex_lock(®ion->mutex); - else if (!mutex_trylock(®ion->mutex)) + mutex_lock(®ion->mreg_mutex); + else if (!mutex_trylock(®ion->mreg_mutex)) goto out_fail; mmu_interval_set_seq(mni, cur_seq); @@ -546,12 +546,12 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni, mshv_region_invalidate_pages(region, page_offset, page_count); - mutex_unlock(®ion->mutex); + mutex_unlock(®ion->mreg_mutex); return true; out_unlock: - mutex_unlock(®ion->mutex); + mutex_unlock(®ion->mreg_mutex); out_fail: WARN_ONCE(ret, "Failed to invalidate region %#llx-%#llx (range %#lx-%#lx, event: %u, pages %#llx-%#llx, mm: %#llx): %d\n", @@ -568,21 +568,21 @@ static const struct mmu_interval_notifier_ops mshv_region_mni_ops = { void mshv_region_movable_fini(struct mshv_mem_region *region) { - mmu_interval_notifier_remove(®ion->mni); + mmu_interval_notifier_remove(®ion->mreg_mni); } bool mshv_region_movable_init(struct mshv_mem_region *region) { int ret; - ret = mmu_interval_notifier_insert(®ion->mni, current->mm, + ret = mmu_interval_notifier_insert(®ion->mreg_mni, current->mm, region->start_uaddr, region->nr_pages << HV_HYP_PAGE_SHIFT, &mshv_region_mni_ops); if (ret) return false; - mutex_init(®ion->mutex); + mutex_init(®ion->mreg_mutex); return true; } diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h index 3c1d88b36741..04c2a1910a8a 100644 --- a/drivers/hv/mshv_root.h +++ b/drivers/hv/mshv_root.h @@ -52,6 +52,9 @@ struct mshv_vp { unsigned int kicked_by_hv; wait_queue_head_t vp_suspend_queue; } run; +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *vp_stats_dentry; +#endif }; #define vp_fmt(fmt) "p%lluvp%u: " fmt @@ -79,16 +82,16 @@ enum mshv_region_type { struct mshv_mem_region { struct hlist_node hnode; - struct kref refcount; + struct kref mreg_refcount; u64 nr_pages; u64 start_gfn; u64 start_uaddr; u32 hv_map_flags; struct mshv_partition *partition; - enum mshv_region_type type; - struct mmu_interval_notifier mni; - struct mutex mutex; /* protects region pages remapping */ - struct page *pages[]; + enum mshv_region_type mreg_type; + struct mmu_interval_notifier mreg_mni; + struct mutex mreg_mutex; /* protects region pages remapping */ + struct page *mreg_pages[]; }; struct mshv_irq_ack_notifier { @@ -136,6 +139,10 @@ struct mshv_partition { u64 isolation_type; bool import_completed; bool pt_initialized; +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *pt_stats_dentry; + struct dentry *pt_vp_dentry; +#endif }; #define pt_fmt(fmt) "p%llu: " fmt @@ -254,6 +261,16 @@ struct mshv_partition *mshv_partition_get(struct mshv_partition *partition); void mshv_partition_put(struct mshv_partition *partition); struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU); +static inline bool is_l1vh_parent(u64 partition_id) +{ + return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF); +} + +int mshv_vp_stats_map(u64 partition_id, u32 vp_index, + struct hv_stats_page **stats_pages); +void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, + struct hv_stats_page **stats_pages); + /* hypercalls */ int hv_call_withdraw_memory(u64 count, int node, u64 partition_id); @@ -307,8 +324,9 @@ int hv_call_disconnect_port(u64 connection_partition_id, int hv_call_notify_port_ring_empty(u32 sint_index); int hv_map_stats_page(enum hv_stats_object_type type, const union hv_stats_object_identity *identity, - void **addr); -int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, + struct hv_stats_page **addr); +int hv_unmap_stats_page(enum hv_stats_object_type type, + struct hv_stats_page *page_addr, const union hv_stats_object_identity *identity); int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages, u64 page_struct_count, u32 host_access, @@ -316,6 +334,33 @@ int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages, int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg, void *property_value, size_t property_value_sz); +#if IS_ENABLED(CONFIG_DEBUG_FS) +int __init mshv_debugfs_init(void); +void mshv_debugfs_exit(void); + +int mshv_debugfs_partition_create(struct mshv_partition *partition); +void mshv_debugfs_partition_remove(struct mshv_partition *partition); +int mshv_debugfs_vp_create(struct mshv_vp *vp); +void mshv_debugfs_vp_remove(struct mshv_vp *vp); +#else +static inline int __init mshv_debugfs_init(void) +{ + return 0; +} +static inline void mshv_debugfs_exit(void) { } + +static inline int mshv_debugfs_partition_create(struct mshv_partition *partition) +{ + return 0; +} +static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { } +static inline int mshv_debugfs_vp_create(struct mshv_vp *vp) +{ + return 0; +} +static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { } +#endif + extern struct mshv_root mshv_root; extern enum hv_scheduler_type hv_scheduler_type; extern u8 * __percpu *hv_synic_eventring_tail; diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c index 598eaff4ff29..7f91096f95a8 100644 --- a/drivers/hv/mshv_root_hv_call.c +++ b/drivers/hv/mshv_root_hv_call.c @@ -115,7 +115,7 @@ int hv_call_create_partition(u64 flags, status = hv_do_hypercall(HVCALL_CREATE_PARTITION, input, output); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { if (hv_result_success(status)) *partition_id = output->partition_id; local_irq_restore(irq_flags); @@ -123,8 +123,7 @@ int hv_call_create_partition(u64 flags, break; } local_irq_restore(irq_flags); - ret = hv_call_deposit_pages(NUMA_NO_NODE, - hv_current_partition_id, 1); + ret = hv_deposit_memory(hv_current_partition_id, status); } while (!ret); return ret; @@ -147,11 +146,11 @@ int hv_call_initialize_partition(u64 partition_id) status = hv_do_fast_hypercall8(HVCALL_INITIALIZE_PARTITION, *(u64 *)&input); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { ret = hv_result_to_errno(status); break; } - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); + ret = hv_deposit_memory(partition_id, status); } while (!ret); return ret; @@ -239,7 +238,7 @@ static int hv_do_map_gpa_hcall(u64 partition_id, u64 gfn, u64 page_struct_count, completed = hv_repcomp(status); - if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) { + if (hv_result_needs_memory(status)) { ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, HV_MAP_GPA_DEPOSIT_PAGES); if (ret) @@ -455,7 +454,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id, status = hv_do_hypercall(control, input, output); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { if (hv_result_success(status) && ret_output) memcpy(ret_output, output, sizeof(*output)); @@ -465,8 +464,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id, } local_irq_restore(flags); - ret = hv_call_deposit_pages(NUMA_NO_NODE, - partition_id, 1); + ret = hv_deposit_memory(partition_id, status); } while (!ret); return ret; @@ -518,15 +516,14 @@ int hv_call_set_vp_state(u32 vp_index, u64 partition_id, status = hv_do_hypercall(control, input, NULL); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { local_irq_restore(flags); ret = hv_result_to_errno(status); break; } local_irq_restore(flags); - ret = hv_call_deposit_pages(NUMA_NO_NODE, - partition_id, 1); + ret = hv_deposit_memory(partition_id, status); } while (!ret); return ret; @@ -563,7 +560,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type, status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, input, output); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { if (hv_result_success(status)) *state_page = pfn_to_page(output->map_location); local_irq_restore(flags); @@ -573,7 +570,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type, local_irq_restore(flags); - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); + ret = hv_deposit_memory(partition_id, status); } while (!ret); return ret; @@ -718,12 +715,11 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id, if (hv_result_success(status)) break; - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { ret = hv_result_to_errno(status); break; } - ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1); - + ret = hv_deposit_memory(port_partition_id, status); } while (!ret); return ret; @@ -772,12 +768,11 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id, if (hv_result_success(status)) break; - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { ret = hv_result_to_errno(status); break; } - ret = hv_call_deposit_pages(NUMA_NO_NODE, - connection_partition_id, 1); + ret = hv_deposit_memory(connection_partition_id, status); } while (!ret); return ret; @@ -813,6 +808,13 @@ hv_call_notify_port_ring_empty(u32 sint_index) return hv_result_to_errno(status); } +/* + * Equivalent of hv_call_map_stats_page() for cases when the caller provides + * the map location. + * + * NOTE: This is a newer hypercall that always supports SELF and PARENT stats + * areas, unlike hv_call_map_stats_page(). + */ static int hv_call_map_stats_page2(enum hv_stats_object_type type, const union hv_stats_object_identity *identity, u64 map_location) @@ -843,21 +845,49 @@ static int hv_call_map_stats_page2(enum hv_stats_object_type type, if (!ret) break; - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { + if (!hv_result_needs_memory(status)) { hv_status_debug(status, "\n"); break; } - ret = hv_call_deposit_pages(NUMA_NO_NODE, - hv_current_partition_id, 1); + ret = hv_deposit_memory(hv_current_partition_id, status); } while (!ret); return ret; } -static int hv_call_map_stats_page(enum hv_stats_object_type type, - const union hv_stats_object_identity *identity, - void **addr) +static int +hv_stats_get_area_type(enum hv_stats_object_type type, + const union hv_stats_object_identity *identity) +{ + switch (type) { + case HV_STATS_OBJECT_HYPERVISOR: + return identity->hv.stats_area_type; + case HV_STATS_OBJECT_LOGICAL_PROCESSOR: + return identity->lp.stats_area_type; + case HV_STATS_OBJECT_PARTITION: + return identity->partition.stats_area_type; + case HV_STATS_OBJECT_VP: + return identity->vp.stats_area_type; + } + + return -EINVAL; +} + +/* + * Map a stats page, where the page location is provided by the hypervisor. + * + * NOTE: The concept of separate SELF and PARENT stats areas does not exist on + * older hypervisor versions. All the available stats information can be found + * on the SELF page. When attempting to map the PARENT area on a hypervisor + * that doesn't support it, return "success" but with a NULL address. The + * caller should check for this case and instead fallback to the SELF area + * alone. + */ +static int +hv_call_map_stats_page(enum hv_stats_object_type type, + const union hv_stats_object_identity *identity, + struct hv_stats_page **addr) { unsigned long flags; struct hv_input_map_stats_page *input; @@ -878,15 +908,22 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type, pfn = output->map_location; local_irq_restore(flags); - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { - ret = hv_result_to_errno(status); + + if (!hv_result_needs_memory(status)) { if (hv_result_success(status)) break; - return ret; + + if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT && + hv_result(status) == HV_STATUS_INVALID_PARAMETER) { + *addr = NULL; + return 0; + } + + hv_status_debug(status, "\n"); + return hv_result_to_errno(status); } - ret = hv_call_deposit_pages(NUMA_NO_NODE, - hv_current_partition_id, 1); + ret = hv_deposit_memory(hv_current_partition_id, status); if (ret) return ret; } while (!ret); @@ -898,7 +935,7 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type, int hv_map_stats_page(enum hv_stats_object_type type, const union hv_stats_object_identity *identity, - void **addr) + struct hv_stats_page **addr) { int ret; struct page *allocated_page = NULL; @@ -946,7 +983,8 @@ static int hv_call_unmap_stats_page(enum hv_stats_object_type type, return hv_result_to_errno(status); } -int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, +int hv_unmap_stats_page(enum hv_stats_object_type type, + struct hv_stats_page *page_addr, const union hv_stats_object_identity *identity) { int ret; diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 681b58154d5e..e6509c980763 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -39,22 +39,12 @@ MODULE_AUTHOR("Microsoft"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv"); -/* TODO move this to another file when debugfs code is added */ -enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */ -#if defined(CONFIG_X86) - VpRootDispatchThreadBlocked = 202, +/* HV_THREAD_COUNTER */ +#if defined(CONFIG_X86_64) +#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 202 #elif defined(CONFIG_ARM64) - VpRootDispatchThreadBlocked = 94, +#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 95 #endif - VpStatsMaxCounter -}; - -struct hv_stats_page { - union { - u64 vp_cntrs[VpStatsMaxCounter]; /* VP counters */ - u8 data[HV_HYP_PAGE_SIZE]; - }; -} __packed; struct mshv_root mshv_root; @@ -130,6 +120,7 @@ static u16 mshv_passthru_hvcalls[] = { HVCALL_SET_VP_REGISTERS, HVCALL_TRANSLATE_VIRTUAL_ADDRESS, HVCALL_CLEAR_VIRTUAL_INTERRUPT, + HVCALL_SCRUB_PARTITION, HVCALL_REGISTER_INTERCEPT_RESULT, HVCALL_ASSERT_VIRTUAL_INTERRUPT, HVCALL_GET_GPA_PAGES_ACCESS_STATES, @@ -261,11 +252,10 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition, if (hv_result_success(status)) break; - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) + if (!hv_result_needs_memory(status)) ret = hv_result_to_errno(status); else - ret = hv_call_deposit_pages(NUMA_NO_NODE, - pt_id, 1); + ret = hv_deposit_memory(pt_id, status); } while (!ret); args.status = hv_result(status); @@ -485,12 +475,11 @@ static u64 mshv_vp_interrupt_pending(struct mshv_vp *vp) static bool mshv_vp_dispatch_thread_blocked(struct mshv_vp *vp) { struct hv_stats_page **stats = vp->vp_stats_pages; - u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->vp_cntrs; - u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->vp_cntrs; + u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->data; + u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->data; - if (self_vp_cntrs[VpRootDispatchThreadBlocked]) - return self_vp_cntrs[VpRootDispatchThreadBlocked]; - return parent_vp_cntrs[VpRootDispatchThreadBlocked]; + return parent_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED] || + self_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED]; } static int @@ -661,7 +650,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) return false; /* Only movable memory ranges are supported for GPA intercepts */ - if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE) + if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) ret = mshv_region_handle_gfn_fault(region, gfn); else ret = false; @@ -957,23 +946,36 @@ mshv_vp_release(struct inode *inode, struct file *filp) return 0; } -static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, - void *stats_pages[]) +void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, + struct hv_stats_page *stats_pages[]) { union hv_stats_object_identity identity = { .vp.partition_id = partition_id, .vp.vp_index = vp_index, }; + int err; identity.vp.stats_area_type = HV_STATS_AREA_SELF; - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); - - identity.vp.stats_area_type = HV_STATS_AREA_PARENT; - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); + err = hv_unmap_stats_page(HV_STATS_OBJECT_VP, + stats_pages[HV_STATS_AREA_SELF], + &identity); + if (err) + pr_err("%s: failed to unmap partition %llu vp %u self stats, err: %d\n", + __func__, partition_id, vp_index, err); + + if (stats_pages[HV_STATS_AREA_PARENT] != stats_pages[HV_STATS_AREA_SELF]) { + identity.vp.stats_area_type = HV_STATS_AREA_PARENT; + err = hv_unmap_stats_page(HV_STATS_OBJECT_VP, + stats_pages[HV_STATS_AREA_PARENT], + &identity); + if (err) + pr_err("%s: failed to unmap partition %llu vp %u parent stats, err: %d\n", + __func__, partition_id, vp_index, err); + } } -static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, - void *stats_pages[]) +int mshv_vp_stats_map(u64 partition_id, u32 vp_index, + struct hv_stats_page *stats_pages[]) { union hv_stats_object_identity identity = { .vp.partition_id = partition_id, @@ -984,20 +986,37 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, identity.vp.stats_area_type = HV_STATS_AREA_SELF; err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, &stats_pages[HV_STATS_AREA_SELF]); - if (err) + if (err) { + pr_err("%s: failed to map partition %llu vp %u self stats, err: %d\n", + __func__, partition_id, vp_index, err); return err; + } - identity.vp.stats_area_type = HV_STATS_AREA_PARENT; - err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, - &stats_pages[HV_STATS_AREA_PARENT]); - if (err) - goto unmap_self; + /* + * L1VH partition cannot access its vp stats in parent area. + */ + if (is_l1vh_parent(partition_id)) { + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; + } else { + identity.vp.stats_area_type = HV_STATS_AREA_PARENT; + err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, + &stats_pages[HV_STATS_AREA_PARENT]); + if (err) { + pr_err("%s: failed to map partition %llu vp %u parent stats, err: %d\n", + __func__, partition_id, vp_index, err); + goto unmap_self; + } + if (!stats_pages[HV_STATS_AREA_PARENT]) + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; + } return 0; unmap_self: identity.vp.stats_area_type = HV_STATS_AREA_SELF; - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); + hv_unmap_stats_page(HV_STATS_OBJECT_VP, + stats_pages[HV_STATS_AREA_SELF], + &identity); return err; } @@ -1008,7 +1027,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition, struct mshv_create_vp args; struct mshv_vp *vp; struct page *intercept_msg_page, *register_page, *ghcb_page; - void *stats_pages[2]; + struct hv_stats_page *stats_pages[2]; long ret; if (copy_from_user(&args, arg, sizeof(args))) @@ -1048,16 +1067,10 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition, goto unmap_register_page; } - /* - * This mapping of the stats page is for detecting if dispatch thread - * is blocked - only relevant for root scheduler - */ - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) { - ret = mshv_vp_stats_map(partition->pt_id, args.vp_index, - stats_pages); - if (ret) - goto unmap_ghcb_page; - } + ret = mshv_vp_stats_map(partition->pt_id, args.vp_index, + stats_pages); + if (ret) + goto unmap_ghcb_page; vp = kzalloc(sizeof(*vp), GFP_KERNEL); if (!vp) @@ -1081,8 +1094,11 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition, if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available()) vp->vp_ghcb_page = page_to_virt(ghcb_page); - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) - memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages)); + memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages)); + + ret = mshv_debugfs_vp_create(vp); + if (ret) + goto put_partition; /* * Keep anon_inode_getfd last: it installs fd in the file struct and @@ -1091,7 +1107,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition, ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp, O_RDWR | O_CLOEXEC); if (ret < 0) - goto put_partition; + goto remove_debugfs_vp; /* already exclusive with the partition mutex for all ioctls */ partition->pt_vp_count++; @@ -1099,13 +1115,14 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition, return ret; +remove_debugfs_vp: + mshv_debugfs_vp_remove(vp); put_partition: mshv_partition_put(partition); free_vp: kfree(vp); unmap_stats_pages: - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) - mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages); + mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages); unmap_ghcb_page: if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available()) hv_unmap_vp_state_page(partition->pt_id, args.vp_index, @@ -1176,12 +1193,12 @@ static int mshv_partition_create_region(struct mshv_partition *partition, return PTR_ERR(rg); if (is_mmio) - rg->type = MSHV_REGION_TYPE_MMIO; + rg->mreg_type = MSHV_REGION_TYPE_MMIO; else if (mshv_partition_encrypted(partition) || !mshv_region_movable_init(rg)) - rg->type = MSHV_REGION_TYPE_MEM_PINNED; + rg->mreg_type = MSHV_REGION_TYPE_MEM_PINNED; else - rg->type = MSHV_REGION_TYPE_MEM_MOVABLE; + rg->mreg_type = MSHV_REGION_TYPE_MEM_MOVABLE; rg->partition = partition; @@ -1298,7 +1315,7 @@ mshv_map_user_memory(struct mshv_partition *partition, if (ret) return ret; - switch (region->type) { + switch (region->mreg_type) { case MSHV_REGION_TYPE_MEM_PINNED: ret = mshv_prepare_pinned_region(region); break; @@ -1542,10 +1559,16 @@ mshv_partition_ioctl_initialize(struct mshv_partition *partition) if (ret) goto withdraw_mem; + ret = mshv_debugfs_partition_create(partition); + if (ret) + goto finalize_partition; + partition->pt_initialized = true; return 0; +finalize_partition: + hv_call_finalize_partition(partition->pt_id); withdraw_mem: hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id); @@ -1725,9 +1748,9 @@ static void destroy_partition(struct mshv_partition *partition) if (!vp) continue; - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) - mshv_vp_stats_unmap(partition->pt_id, vp->vp_index, - (void **)vp->vp_stats_pages); + mshv_debugfs_vp_remove(vp); + mshv_vp_stats_unmap(partition->pt_id, vp->vp_index, + vp->vp_stats_pages); if (vp->vp_register_page) { (void)hv_unmap_vp_state_page(partition->pt_id, @@ -1759,6 +1782,8 @@ static void destroy_partition(struct mshv_partition *partition) partition->pt_vp_array[i] = NULL; } + mshv_debugfs_partition_remove(partition); + /* Deallocates and unmaps everything including vcpus, GPA mappings etc */ hv_call_finalize_partition(partition->pt_id); @@ -1921,6 +1946,10 @@ static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags, *pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE; if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES)) *pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED; + if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION)) + *pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE; + if (args.pt_flags & BIT(MSHV_PT_BIT_SMT_ENABLED_GUEST)) + *pt_flags |= HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST; isol_props->as_uint64 = 0; @@ -2054,6 +2083,29 @@ static const char *scheduler_type_to_string(enum hv_scheduler_type type) }; } +static int __init l1vh_retrieve_scheduler_type(enum hv_scheduler_type *out) +{ + u64 integrated_sched_enabled; + int ret; + + *out = HV_SCHEDULER_TYPE_CORE_SMT; + + if (!mshv_root.vmm_caps.vmm_enable_integrated_scheduler) + return 0; + + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED, + 0, &integrated_sched_enabled, + sizeof(integrated_sched_enabled)); + if (ret) + return ret; + + if (integrated_sched_enabled) + *out = HV_SCHEDULER_TYPE_ROOT; + + return 0; +} + /* TODO move this to hv_common.c when needed outside */ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) { @@ -2086,13 +2138,12 @@ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) /* Retrieve and stash the supported scheduler type */ static int __init mshv_retrieve_scheduler_type(struct device *dev) { - int ret = 0; + int ret; if (hv_l1vh_partition()) - hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT; + ret = l1vh_retrieve_scheduler_type(&hv_scheduler_type); else ret = hv_retrieve_scheduler_type(&hv_scheduler_type); - if (ret) return ret; @@ -2212,42 +2263,29 @@ struct notifier_block mshv_reboot_nb = { static void mshv_root_partition_exit(void) { unregister_reboot_notifier(&mshv_reboot_nb); - root_scheduler_deinit(); } static int __init mshv_root_partition_init(struct device *dev) { - int err; - - err = root_scheduler_init(dev); - if (err) - return err; - - err = register_reboot_notifier(&mshv_reboot_nb); - if (err) - goto root_sched_deinit; - - return 0; - -root_sched_deinit: - root_scheduler_deinit(); - return err; + return register_reboot_notifier(&mshv_reboot_nb); } -static void mshv_init_vmm_caps(struct device *dev) +static int __init mshv_init_vmm_caps(struct device *dev) { - /* - * This can only fail here if HVCALL_GET_PARTITION_PROPERTY_EX or - * HV_PARTITION_PROPERTY_VMM_CAPABILITIES are not supported. In that - * case it's valid to proceed as if all vmm_caps are disabled (zero). - */ - if (hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, - HV_PARTITION_PROPERTY_VMM_CAPABILITIES, - 0, &mshv_root.vmm_caps, - sizeof(mshv_root.vmm_caps))) - dev_warn(dev, "Unable to get VMM capabilities\n"); + int ret; + + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, + HV_PARTITION_PROPERTY_VMM_CAPABILITIES, + 0, &mshv_root.vmm_caps, + sizeof(mshv_root.vmm_caps)); + if (ret && hv_l1vh_partition()) { + dev_err(dev, "Failed to get VMM capabilities: %d\n", ret); + return ret; + } dev_dbg(dev, "vmm_caps = %#llx\n", mshv_root.vmm_caps.as_uint64[0]); + + return 0; } static int __init mshv_parent_partition_init(void) @@ -2293,6 +2331,10 @@ static int __init mshv_parent_partition_init(void) mshv_cpuhp_online = ret; + ret = mshv_init_vmm_caps(dev); + if (ret) + goto remove_cpu_state; + ret = mshv_retrieve_scheduler_type(dev); if (ret) goto remove_cpu_state; @@ -2302,11 +2344,17 @@ static int __init mshv_parent_partition_init(void) if (ret) goto remove_cpu_state; - mshv_init_vmm_caps(dev); + ret = root_scheduler_init(dev); + if (ret) + goto exit_partition; + + ret = mshv_debugfs_init(); + if (ret) + goto deinit_root_scheduler; ret = mshv_irqfd_wq_init(); if (ret) - goto exit_partition; + goto exit_debugfs; spin_lock_init(&mshv_root.pt_ht_lock); hash_init(mshv_root.pt_htable); @@ -2315,6 +2363,10 @@ static int __init mshv_parent_partition_init(void) return 0; +exit_debugfs: + mshv_debugfs_exit(); +deinit_root_scheduler: + root_scheduler_deinit(); exit_partition: if (hv_root_partition()) mshv_root_partition_exit(); @@ -2331,8 +2383,10 @@ static void __exit mshv_parent_partition_exit(void) { hv_setup_mshv_handler(NULL); mshv_port_table_fini(); + mshv_debugfs_exit(); misc_deregister(&mshv_dev); mshv_irqfd_wq_cleanup(); + root_scheduler_deinit(); if (hv_root_partition()) mshv_root_partition_exit(); cpuhp_remove_state(mshv_cpuhp_online); diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index 2cebe9de5a5a..7bbbce009732 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -845,9 +845,10 @@ static const struct file_operations mshv_vtl_fops = { .mmap = mshv_vtl_mmap, }; -static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask) +static void mshv_vtl_synic_mask_vmbus_sint(void *info) { union hv_synic_sint sint; + const u8 *mask = info; sint.as_uint64 = 0; sint.vector = HYPERVISOR_CALLBACK_VECTOR; @@ -999,7 +1000,7 @@ static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *ar if (copy_from_user(&mask, arg, sizeof(mask))) return -EFAULT; guard(mutex)(&vtl2_vmbus_sint_mask_mutex); - on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); + on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0); if (mask.mask) wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 6785ad63a9cb..3e7a52918ce0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -25,6 +25,7 @@ #include <linux/cpu.h> #include <linux/sched/isolation.h> #include <linux/sched/task_stack.h> +#include <linux/smpboot.h> #include <linux/delay.h> #include <linux/panic_notifier.h> @@ -51,7 +52,7 @@ static struct device *vmbus_root_device; static int hyperv_cpuhp_online; -static long __percpu *vmbus_evt; +static DEFINE_PER_CPU(long, vmbus_evt); /* Values parsed from ACPI DSDT */ int vmbus_irq; @@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message } } -void vmbus_isr(void) +static void __vmbus_isr(void) { struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); @@ -1363,6 +1364,53 @@ void vmbus_isr(void) add_interrupt_randomness(vmbus_interrupt); } + +static DEFINE_PER_CPU(bool, vmbus_irq_pending); +static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd); + +static void vmbus_irqd_wake(void) +{ + struct task_struct *tsk = __this_cpu_read(vmbus_irqd); + + __this_cpu_write(vmbus_irq_pending, true); + wake_up_process(tsk); +} + +static void vmbus_irqd_setup(unsigned int cpu) +{ + sched_set_fifo(current); +} + +static int vmbus_irqd_should_run(unsigned int cpu) +{ + return __this_cpu_read(vmbus_irq_pending); +} + +static void run_vmbus_irqd(unsigned int cpu) +{ + __this_cpu_write(vmbus_irq_pending, false); + __vmbus_isr(); +} + +static bool vmbus_irq_initialized; + +static struct smp_hotplug_thread vmbus_irq_threads = { + .store = &vmbus_irqd, + .setup = vmbus_irqd_setup, + .thread_should_run = vmbus_irqd_should_run, + .thread_fn = run_vmbus_irqd, + .thread_comm = "vmbus_irq/%u", +}; + +void vmbus_isr(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + vmbus_irqd_wake(); + } else { + lockdep_hardirq_threaded(); + __vmbus_isr(); + } +} EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl"); static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) @@ -1462,16 +1510,21 @@ static int vmbus_bus_init(void) * the VMbus interrupt handler. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) { + ret = smpboot_register_percpu_thread(&vmbus_irq_threads); + if (ret) + goto err_kthread; + vmbus_irq_initialized = true; + } + if (vmbus_irq == -1) { hv_setup_vmbus_handler(vmbus_isr); } else { - vmbus_evt = alloc_percpu(long); ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, - "Hyper-V VMbus", vmbus_evt); + "Hyper-V VMbus", &vmbus_evt); if (ret) { pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", vmbus_irq, ret); - free_percpu(vmbus_evt); goto err_setup; } } @@ -1500,13 +1553,16 @@ static int vmbus_bus_init(void) return 0; err_connect: - if (vmbus_irq == -1) { + if (vmbus_irq == -1) hv_remove_vmbus_handler(); - } else { - free_percpu_irq(vmbus_irq, vmbus_evt); - free_percpu(vmbus_evt); - } + else + free_percpu_irq(vmbus_irq, &vmbus_evt); err_setup: + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { + smpboot_unregister_percpu_thread(&vmbus_irq_threads); + vmbus_irq_initialized = false; + } +err_kthread: bus_unregister(&hv_bus); return ret; } @@ -2970,11 +3026,13 @@ static void __exit vmbus_exit(void) vmbus_connection.conn_state = DISCONNECTED; hv_stimer_global_cleanup(); vmbus_disconnect(); - if (vmbus_irq == -1) { + if (vmbus_irq == -1) hv_remove_vmbus_handler(); - } else { - free_percpu_irq(vmbus_irq, vmbus_evt); - free_percpu(vmbus_evt); + else + free_percpu_irq(vmbus_irq, &vmbus_evt); + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { + smpboot_unregister_percpu_thread(&vmbus_irq_threads); + vmbus_irq_initialized = false; } for_each_online_cpu(cpu) { struct hv_per_cpu_context *hv_cpu diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c index 28b3e93d31c0..18acbda867f0 100644 --- a/drivers/pci/controller/pci-hyperv-intf.c +++ b/drivers/pci/controller/pci-hyperv-intf.c @@ -52,17 +52,5 @@ int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, } EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate); -static void __exit exit_hv_pci_intf(void) -{ -} - -static int __init init_hv_pci_intf(void) -{ - return 0; -} - -module_init(init_hv_pci_intf); -module_exit(exit_hv_pci_intf); - MODULE_DESCRIPTION("Hyper-V PCI Interface"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 1e237d3538f9..7fcba05cec30 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -501,7 +501,6 @@ struct hv_pcibus_device { struct resource *low_mmio_res; struct resource *high_mmio_res; struct completion *survey_event; - struct pci_bus *pci_bus; spinlock_t config_lock; /* Avoid two threads writing index page */ spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index ecedab554c80..d37b68238c97 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -342,6 +342,9 @@ static inline bool hv_parent_partition(void) { return hv_root_partition() || hv_l1vh_partition(); } + +bool hv_result_needs_memory(u64 status); +int hv_deposit_memory_node(int node, u64 partition_id, u64 status); int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); @@ -350,6 +353,11 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); static inline bool hv_root_partition(void) { return false; } static inline bool hv_l1vh_partition(void) { return false; } static inline bool hv_parent_partition(void) { return false; } +static inline bool hv_result_needs_memory(u64 status) { return false; } +static inline int hv_deposit_memory_node(int node, u64 partition_id, u64 status) +{ + return -EOPNOTSUPP; +} static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) { return -EOPNOTSUPP; @@ -364,6 +372,11 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3 } #endif /* CONFIG_MSHV_ROOT */ +static inline int hv_deposit_memory(u64 partition_id, u64 status) +{ + return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status); +} + #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) u8 __init get_vtl(void); #else diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 30fbbde81c5c..056ef7b6b360 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -14,33 +14,36 @@ struct hv_u128 { } __packed; /* NOTE: when adding below, update hv_result_to_string() */ -#define HV_STATUS_SUCCESS 0x0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 -#define HV_STATUS_INVALID_ALIGNMENT 0x4 -#define HV_STATUS_INVALID_PARAMETER 0x5 -#define HV_STATUS_ACCESS_DENIED 0x6 -#define HV_STATUS_INVALID_PARTITION_STATE 0x7 -#define HV_STATUS_OPERATION_DENIED 0x8 -#define HV_STATUS_UNKNOWN_PROPERTY 0x9 -#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA -#define HV_STATUS_INSUFFICIENT_MEMORY 0xB -#define HV_STATUS_INVALID_PARTITION_ID 0xD -#define HV_STATUS_INVALID_VP_INDEX 0xE -#define HV_STATUS_NOT_FOUND 0x10 -#define HV_STATUS_INVALID_PORT_ID 0x11 -#define HV_STATUS_INVALID_CONNECTION_ID 0x12 -#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13 -#define HV_STATUS_NOT_ACKNOWLEDGED 0x14 -#define HV_STATUS_INVALID_VP_STATE 0x15 -#define HV_STATUS_NO_RESOURCES 0x1D -#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20 -#define HV_STATUS_INVALID_LP_INDEX 0x41 -#define HV_STATUS_INVALID_REGISTER_VALUE 0x50 -#define HV_STATUS_OPERATION_FAILED 0x71 -#define HV_STATUS_TIME_OUT 0x78 -#define HV_STATUS_CALL_PENDING 0x79 -#define HV_STATUS_VTL_ALREADY_ENABLED 0x86 +#define HV_STATUS_SUCCESS 0x0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 +#define HV_STATUS_INVALID_ALIGNMENT 0x4 +#define HV_STATUS_INVALID_PARAMETER 0x5 +#define HV_STATUS_ACCESS_DENIED 0x6 +#define HV_STATUS_INVALID_PARTITION_STATE 0x7 +#define HV_STATUS_OPERATION_DENIED 0x8 +#define HV_STATUS_UNKNOWN_PROPERTY 0x9 +#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA +#define HV_STATUS_INSUFFICIENT_MEMORY 0xB +#define HV_STATUS_INVALID_PARTITION_ID 0xD +#define HV_STATUS_INVALID_VP_INDEX 0xE +#define HV_STATUS_NOT_FOUND 0x10 +#define HV_STATUS_INVALID_PORT_ID 0x11 +#define HV_STATUS_INVALID_CONNECTION_ID 0x12 +#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13 +#define HV_STATUS_NOT_ACKNOWLEDGED 0x14 +#define HV_STATUS_INVALID_VP_STATE 0x15 +#define HV_STATUS_NO_RESOURCES 0x1D +#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20 +#define HV_STATUS_INVALID_LP_INDEX 0x41 +#define HV_STATUS_INVALID_REGISTER_VALUE 0x50 +#define HV_STATUS_OPERATION_FAILED 0x71 +#define HV_STATUS_INSUFFICIENT_ROOT_MEMORY 0x73 +#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75 +#define HV_STATUS_TIME_OUT 0x78 +#define HV_STATUS_CALL_PENDING 0x79 +#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY 0x83 +#define HV_STATUS_VTL_ALREADY_ENABLED 0x86 /* * The Hyper-V TimeRefCount register and the TSC @@ -474,6 +477,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_NOTIFY_PARTITION_EVENT 0x0087 #define HVCALL_ENTER_SLEEP_STATE 0x0084 #define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b +#define HVCALL_SCRUB_PARTITION 0x008d #define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 #define HVCALL_CREATE_PORT 0x0095 diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 08965970c17d..245f3db53bf1 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -10,6 +10,13 @@ #include "hvhdk_mini.h" #include "hvgdk.h" +/* + * Hypervisor statistics page format + */ +struct hv_stats_page { + u64 data[HV_HYP_PAGE_SIZE / sizeof(u64)]; +} __packed; + /* Bits for dirty mask of hv_vp_register_page */ #define HV_X64_REGISTER_CLASS_GENERAL 0 #define HV_X64_REGISTER_CLASS_IP 1 @@ -328,6 +335,8 @@ union hv_partition_isolation_properties { #define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 /* Note: Exo partition is enabled by default */ +#define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0) +#define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1) #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4) #define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) #define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13) diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 41a29bf8ec14..091c03e26046 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -7,6 +7,8 @@ #include "hvgdk_mini.h" +#define HV_MAX_CONTIGUOUS_ALLOCATION_PAGES 8 + /* * Doorbell connection_info flags. */ @@ -87,6 +89,9 @@ enum hv_partition_property_code { HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000, HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001, + /* Integrated scheduling properties */ + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED = 0x00020005, + /* Resource properties */ HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005, HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017, @@ -102,7 +107,7 @@ enum hv_partition_property_code { }; #define HV_PARTITION_VMM_CAPABILITIES_BANK_COUNT 1 -#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 59 +#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 57 struct hv_partition_property_vmm_capabilities { u16 bank_count; @@ -119,6 +124,8 @@ struct hv_partition_property_vmm_capabilities { u64 reservedbit3: 1; #endif u64 assignable_synthetic_proc_features: 1; + u64 reservedbit5: 1; + u64 vmm_enable_integrated_scheduler : 1; u64 reserved0: HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT; } __packed; }; diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index dee3ece28ce5..e0645a34b55b 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -27,6 +27,8 @@ enum { MSHV_PT_BIT_X2APIC, MSHV_PT_BIT_GPA_SUPER_PAGES, MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, + MSHV_PT_BIT_NESTED_VIRTUALIZATION, + MSHV_PT_BIT_SMT_ENABLED_GUEST, MSHV_PT_BIT_COUNT, }; |
