summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-20 08:48:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-20 08:48:31 -0800
commitd31558c077d8be422b65e97974017c030b4bd91a (patch)
tree6b894308c43fb455ce02e515dd6bd91d1412835d
parent8bf22c33e7a172fbc72464f4cc484d23a6b412ba (diff)
parent158ebb578cd5f7881fdc7c4ecebddcf9463f91fd (diff)
Merge tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull Hyper-V updates from Wei Liu: - Debugfs support for MSHV statistics (Nuno Das Neves) - Support for the integrated scheduler (Stanislav Kinsburskii) - Various fixes for MSHV memory management and hypervisor status handling (Stanislav Kinsburskii) - Expose more capabilities and flags for MSHV partition management (Anatol Belski, Muminul Islam, Magnus Kulke) - Miscellaneous fixes to improve code quality and stability (Carlos López, Ethan Nelson-Moore, Li RongQing, Michael Kelley, Mukesh Rathor, Purna Pavan Chandra Aekkaladevi, Stanislav Kinsburskii, Uros Bizjak) - PREEMPT_RT fixes for vmbus interrupts (Jan Kiszka) * tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (34 commits) mshv: Handle insufficient root memory hypervisor statuses mshv: Handle insufficient contiguous memory hypervisor status mshv: Introduce hv_deposit_memory helper functions mshv: Introduce hv_result_needs_memory() helper function mshv: Add SMT_ENABLED_GUEST partition creation flag mshv: Add nested virtualization creation flag Drivers: hv: vmbus: Simplify allocation of vmbus_evt mshv: expose the scrub partition hypercall mshv: Add support for integrated scheduler mshv: Use try_cmpxchg() instead of cmpxchg() x86/hyperv: Fix error pointer dereference x86/hyperv: Reserve 3 interrupt vectors used exclusively by MSHV Drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT x86/hyperv: Remove ASM_CALL_CONSTRAINT with VMMCALL insn x86/hyperv: Use savesegment() instead of inline asm() to save segment registers mshv: fix SRCU protection in irqfd resampler ack handler mshv: make field names descriptive in a header struct x86/hyperv: Update comment in hyperv_cleanup() mshv: clear eventfd counter on irqfd shutdown x86/hyperv: Use memremap()/memunmap() instead of ioremap_cache()/iounmap() ...
-rw-r--r--arch/x86/hyperv/hv_crash.c3
-rw-r--r--arch/x86/hyperv/hv_init.c20
-rw-r--r--arch/x86/hyperv/hv_vtl.c8
-rw-r--r--arch/x86/hyperv/ivm.c11
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c25
-rw-r--r--drivers/hv/Makefile1
-rw-r--r--drivers/hv/hv.c12
-rw-r--r--drivers/hv/hv_common.c3
-rw-r--r--drivers/hv/hv_proc.c53
-rw-r--r--drivers/hv/hyperv_vmbus.h4
-rw-r--r--drivers/hv/mshv_debugfs.c726
-rw-r--r--drivers/hv/mshv_debugfs_counters.c490
-rw-r--r--drivers/hv/mshv_eventfd.c22
-rw-r--r--drivers/hv/mshv_eventfd.h1
-rw-r--r--drivers/hv/mshv_regions.c60
-rw-r--r--drivers/hv/mshv_root.h59
-rw-r--r--drivers/hv/mshv_root_hv_call.c104
-rw-r--r--drivers/hv/mshv_root_main.c240
-rw-r--r--drivers/hv/mshv_vtl_main.c5
-rw-r--r--drivers/hv/vmbus_drv.c86
-rw-r--r--drivers/pci/controller/pci-hyperv-intf.c12
-rw-r--r--drivers/pci/controller/pci-hyperv.c1
-rw-r--r--include/asm-generic/mshyperv.h13
-rw-r--r--include/hyperv/hvgdk_mini.h58
-rw-r--r--include/hyperv/hvhdk.h9
-rw-r--r--include/hyperv/hvhdk_mini.h9
-rw-r--r--include/uapi/linux/mshv.h2
27 files changed, 1776 insertions, 261 deletions
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
index a78e4fed5720..92da1b4f2e73 100644
--- a/arch/x86/hyperv/hv_crash.c
+++ b/arch/x86/hyperv/hv_crash.c
@@ -279,7 +279,6 @@ static void hv_notify_prepare_hyp(void)
static noinline __noclone void crash_nmi_callback(struct pt_regs *regs)
{
struct hv_input_disable_hyp_ex *input;
- u64 status;
int msecs = 1000, ccpu = smp_processor_id();
if (ccpu == 0) {
@@ -313,7 +312,7 @@ static noinline __noclone void crash_nmi_callback(struct pt_regs *regs)
input->rip = trampoline_pa;
input->arg = devirt_arg;
- status = hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL);
+ (void)hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL);
hv_panic_timeout_reboot();
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 14de43f4bc6c..5dbe9bd67891 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -103,9 +103,9 @@ static int hyperv_init_ghcb(void)
*/
rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
- /* Mask out vTOM bit. ioremap_cache() maps decrypted */
+ /* Mask out vTOM bit and map as decrypted */
ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary;
- ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
+ ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!ghcb_va)
return -ENOMEM;
@@ -277,7 +277,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_ghcb_pg) {
ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg);
if (*ghcb_va)
- iounmap(*ghcb_va);
+ memunmap(*ghcb_va);
*ghcb_va = NULL;
}
@@ -558,7 +558,6 @@ void __init hyperv_init(void)
memunmap(src);
hv_remap_tsc_clocksource();
- hv_root_crash_init();
hv_sleep_notifiers_register();
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
@@ -567,6 +566,9 @@ void __init hyperv_init(void)
hv_set_hypercall_pg(hv_hypercall_pg);
+ if (hv_root_partition()) /* after set hypercall pg */
+ hv_root_crash_init();
+
skip_hypercall_pg_init:
/*
* hyperv_init() is called before LAPIC is initialized: see
@@ -633,9 +635,13 @@ void hyperv_cleanup(void)
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
- * Reset hypercall page reference before reset the page,
- * let hypercall operations fail safely rather than
- * panic the kernel for using invalid hypercall page
+ * Reset hv_hypercall_pg before resetting it in the hypervisor.
+ * hv_set_hypercall_pg(NULL) is not used because at this point in the
+ * panic path other CPUs have been stopped, causing static_call_update()
+ * to hang. So resetting hv_hypercall_pg to cause hypercalls to fail
+ * cleanly is only operative on 32-bit builds. But this is OK as it is
+ * just a preventative measure to ease detecting a hypercall being made
+ * after this point, which shouldn't be happening anyway.
*/
hv_hypercall_pg = NULL;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index c0edaed0efb3..9b6a9bc4ab76 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -110,7 +110,7 @@ static void hv_vtl_ap_entry(void)
static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
{
- u64 status;
+ u64 status, rsp, rip;
int ret = 0;
struct hv_enable_vp_vtl *input;
unsigned long irq_flags;
@@ -123,9 +123,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
struct desc_struct *gdt;
struct task_struct *idle = idle_thread_get(cpu);
- u64 rsp = (unsigned long)idle->thread.sp;
+ if (IS_ERR(idle))
+ return PTR_ERR(idle);
- u64 rip = (u64)&hv_vtl_ap_entry;
+ rsp = (unsigned long)idle->thread.sp;
+ rip = (u64)&hv_vtl_ap_entry;
native_store_gdt(&gdt_ptr);
store_idt(&idt_ptr);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 651771534cae..be7fad43a88d 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -25,6 +25,7 @@
#include <asm/e820/api.h>
#include <asm/desc.h>
#include <asm/msr.h>
+#include <asm/segment.h>
#include <uapi/asm/vmx.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -315,16 +316,16 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu)
vmsa->gdtr.base = gdtr.address;
vmsa->gdtr.limit = gdtr.size;
- asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
+ savesegment(es, vmsa->es.selector);
hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
- asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
+ savesegment(cs, vmsa->cs.selector);
hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
- asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
+ savesegment(ss, vmsa->ss.selector);
hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
- asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
+ savesegment(ds, vmsa->ds.selector);
hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
vmsa->efer = native_read_msr(MSR_EFER);
@@ -391,7 +392,7 @@ u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
register u64 __r8 asm("r8") = param2;
asm volatile("vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ : "=a" (hv_status),
"+c" (control), "+d" (param1), "+r" (__r8)
: : "cc", "memory", "r9", "r10", "r11");
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 579fb2c64cfd..89a2eb8a0722 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -478,6 +478,28 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
}
EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
+/*
+ * Reserved vectors hard coded in the hypervisor. If used outside, the hypervisor
+ * will either crash or hang or attempt to break into debugger.
+ */
+static void hv_reserve_irq_vectors(void)
+{
+ #define HYPERV_DBG_FASTFAIL_VECTOR 0x29
+ #define HYPERV_DBG_ASSERT_VECTOR 0x2C
+ #define HYPERV_DBG_SERVICE_VECTOR 0x2D
+
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return;
+
+ if (test_and_set_bit(HYPERV_DBG_ASSERT_VECTOR, system_vectors) ||
+ test_and_set_bit(HYPERV_DBG_SERVICE_VECTOR, system_vectors) ||
+ test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors))
+ BUG();
+
+ pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR,
+ HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR);
+}
+
static void __init ms_hyperv_init_platform(void)
{
int hv_max_functions_eax, eax;
@@ -510,6 +532,9 @@ static void __init ms_hyperv_init_platform(void)
hv_identify_partition_type();
+ if (hv_root_partition())
+ hv_reserve_irq_vectors();
+
if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED;
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index a49f93c2d245..2593711c3628 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
+mshv_root-$(CONFIG_DEBUG_FS) += mshv_debugfs.o
mshv_vtl-y := mshv_vtl_main.o
# Code that must be built-in
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index c100f04b3581..ea6835638505 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -287,11 +287,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu)
simp.simp_enabled = 1;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
- /* Mask out vTOM bit. ioremap_cache() maps decrypted */
+ /* Mask out vTOM bit and map as decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->hyp_synic_message_page =
- (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!hv_cpu->hyp_synic_message_page)
pr_err("Fail to map synic message page.\n");
} else {
@@ -306,11 +306,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu)
siefp.siefp_enabled = 1;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
- /* Mask out vTOM bit. ioremap_cache() maps decrypted */
+ /* Mask out vTOM bit and map as decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->hyp_synic_event_page =
- (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
+ memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!hv_cpu->hyp_synic_event_page)
pr_err("Fail to map synic event page.\n");
} else {
@@ -429,7 +429,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu)
simp.simp_enabled = 0;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
if (hv_cpu->hyp_synic_message_page) {
- iounmap(hv_cpu->hyp_synic_message_page);
+ memunmap(hv_cpu->hyp_synic_message_page);
hv_cpu->hyp_synic_message_page = NULL;
}
} else {
@@ -443,7 +443,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu)
if (ms_hyperv.paravisor_present || hv_root_partition()) {
if (hv_cpu->hyp_synic_event_page) {
- iounmap(hv_cpu->hyp_synic_event_page);
+ memunmap(hv_cpu->hyp_synic_event_page);
hv_cpu->hyp_synic_event_page = NULL;
}
} else {
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index f1c17fb60dc1..6b67ac616789 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -793,6 +793,9 @@ static const struct hv_status_info hv_status_infos[] = {
_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO),
_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_ROOT_MEMORY, -ENOMEM),
+ _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL),
_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL),
_STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index fbb4eb3901bb..5f4fd9c3231c 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -110,6 +110,50 @@ free_buf:
}
EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
+int hv_deposit_memory_node(int node, u64 partition_id,
+ u64 hv_status)
+{
+ u32 num_pages = 1;
+
+ switch (hv_result(hv_status)) {
+ case HV_STATUS_INSUFFICIENT_MEMORY:
+ break;
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
+ num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
+ break;
+
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
+ num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
+ fallthrough;
+ case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
+ if (!hv_root_partition()) {
+ hv_status_err(hv_status, "Unexpected root memory deposit\n");
+ return -ENOMEM;
+ }
+ partition_id = HV_PARTITION_ID_SELF;
+ break;
+
+ default:
+ hv_status_err(hv_status, "Unexpected!\n");
+ return -ENOMEM;
+ }
+ return hv_call_deposit_pages(node, partition_id, num_pages);
+}
+EXPORT_SYMBOL_GPL(hv_deposit_memory_node);
+
+bool hv_result_needs_memory(u64 status)
+{
+ switch (hv_result(status)) {
+ case HV_STATUS_INSUFFICIENT_MEMORY:
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
+ case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
+ case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(hv_result_needs_memory);
+
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
struct hv_input_add_logical_processor *input;
@@ -137,7 +181,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input, output);
local_irq_restore(flags);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
if (!hv_result_success(status)) {
hv_status_err(status, "cpu %u apic ID: %u\n",
lp_index, apic_id);
@@ -145,7 +189,8 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
}
break;
}
- ret = hv_call_deposit_pages(node, hv_current_partition_id, 1);
+ ret = hv_deposit_memory_node(node, hv_current_partition_id,
+ status);
} while (!ret);
return ret;
@@ -179,7 +224,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
if (!hv_result_success(status)) {
hv_status_err(status, "vcpu: %u, lp: %u\n",
vp_index, flags);
@@ -187,7 +232,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
}
break;
}
- ret = hv_call_deposit_pages(node, partition_id, 1);
+ ret = hv_deposit_memory_node(node, partition_id, status);
} while (!ret);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index cdbc5f5c3215..7bd8f8486e85 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -370,8 +370,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
* CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
* on crash.
*/
- if (cmpxchg(&msg->header.message_type, old_msg_type,
- HVMSG_NONE) != old_msg_type)
+ if (!try_cmpxchg(&msg->header.message_type,
+ &old_msg_type, HVMSG_NONE))
return;
/*
diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c
new file mode 100644
index 000000000000..ebf2549eb44d
--- /dev/null
+++ b/drivers/hv/mshv_debugfs.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2026, Microsoft Corporation.
+ *
+ * The /sys/kernel/debug/mshv directory contents.
+ * Contains various statistics data, provided by the hypervisor.
+ *
+ * Authors: Microsoft Linux virtualization team
+ */
+
+#include <linux/debugfs.h>
+#include <linux/stringify.h>
+#include <asm/mshyperv.h>
+#include <linux/slab.h>
+
+#include "mshv.h"
+#include "mshv_root.h"
+
+/* Ensure this file is not used elsewhere by accident */
+#define MSHV_DEBUGFS_C
+#include "mshv_debugfs_counters.c"
+
+#define U32_BUF_SZ 11
+#define U64_BUF_SZ 21
+/* Only support SELF and PARENT areas */
+#define NUM_STATS_AREAS 2
+static_assert(HV_STATS_AREA_SELF == 0 && HV_STATS_AREA_PARENT == 1,
+ "SELF and PARENT areas must be usable as indices into an array of size NUM_STATS_AREAS");
+/* HV_HYPERVISOR_COUNTER */
+#define HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS 1
+
+static struct dentry *mshv_debugfs;
+static struct dentry *mshv_debugfs_partition;
+static struct dentry *mshv_debugfs_lp;
+static struct dentry **parent_vp_stats;
+static struct dentry *parent_partition_stats;
+
+static u64 mshv_lps_count;
+static struct hv_stats_page **mshv_lps_stats;
+
+static int lp_stats_show(struct seq_file *m, void *v)
+{
+ const struct hv_stats_page *stats = m->private;
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(hv_lp_counters); idx++) {
+ char *name = hv_lp_counters[idx];
+
+ if (!name)
+ continue;
+ seq_printf(m, "%-32s: %llu\n", name, stats->data[idx]);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(lp_stats);
+
+static void mshv_lp_stats_unmap(u32 lp_index)
+{
+ union hv_stats_object_identity identity = {
+ .lp.lp_index = lp_index,
+ .lp.stats_area_type = HV_STATS_AREA_SELF,
+ };
+ int err;
+
+ err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR,
+ mshv_lps_stats[lp_index], &identity);
+ if (err)
+ pr_err("%s: failed to unmap logical processor %u stats, err: %d\n",
+ __func__, lp_index, err);
+
+ mshv_lps_stats[lp_index] = NULL;
+}
+
+static struct hv_stats_page * __init mshv_lp_stats_map(u32 lp_index)
+{
+ union hv_stats_object_identity identity = {
+ .lp.lp_index = lp_index,
+ .lp.stats_area_type = HV_STATS_AREA_SELF,
+ };
+ struct hv_stats_page *stats;
+ int err;
+
+ err = hv_map_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, &identity,
+ &stats);
+ if (err) {
+ pr_err("%s: failed to map logical processor %u stats, err: %d\n",
+ __func__, lp_index, err);
+ return ERR_PTR(err);
+ }
+ mshv_lps_stats[lp_index] = stats;
+
+ return stats;
+}
+
+static struct hv_stats_page * __init lp_debugfs_stats_create(u32 lp_index,
+ struct dentry *parent)
+{
+ struct dentry *dentry;
+ struct hv_stats_page *stats;
+
+ stats = mshv_lp_stats_map(lp_index);
+ if (IS_ERR(stats))
+ return stats;
+
+ dentry = debugfs_create_file("stats", 0400, parent,
+ stats, &lp_stats_fops);
+ if (IS_ERR(dentry)) {
+ mshv_lp_stats_unmap(lp_index);
+ return ERR_CAST(dentry);
+ }
+ return stats;
+}
+
+static int __init lp_debugfs_create(u32 lp_index, struct dentry *parent)
+{
+ struct dentry *idx;
+ char lp_idx_str[U32_BUF_SZ];
+ struct hv_stats_page *stats;
+ int err;
+
+ sprintf(lp_idx_str, "%u", lp_index);
+
+ idx = debugfs_create_dir(lp_idx_str, parent);
+ if (IS_ERR(idx))
+ return PTR_ERR(idx);
+
+ stats = lp_debugfs_stats_create(lp_index, idx);
+ if (IS_ERR(stats)) {
+ err = PTR_ERR(stats);
+ goto remove_debugfs_lp_idx;
+ }
+
+ return 0;
+
+remove_debugfs_lp_idx:
+ debugfs_remove_recursive(idx);
+ return err;
+}
+
+static void mshv_debugfs_lp_remove(void)
+{
+ int lp_index;
+
+ debugfs_remove_recursive(mshv_debugfs_lp);
+
+ for (lp_index = 0; lp_index < mshv_lps_count; lp_index++)
+ mshv_lp_stats_unmap(lp_index);
+
+ kfree(mshv_lps_stats);
+ mshv_lps_stats = NULL;
+}
+
+static int __init mshv_debugfs_lp_create(struct dentry *parent)
+{
+ struct dentry *lp_dir;
+ int err, lp_index;
+
+ mshv_lps_stats = kcalloc(mshv_lps_count,
+ sizeof(*mshv_lps_stats),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!mshv_lps_stats)
+ return -ENOMEM;
+
+ lp_dir = debugfs_create_dir("lp", parent);
+ if (IS_ERR(lp_dir)) {
+ err = PTR_ERR(lp_dir);
+ goto free_lp_stats;
+ }
+
+ for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) {
+ err = lp_debugfs_create(lp_index, lp_dir);
+ if (err)
+ goto remove_debugfs_lps;
+ }
+
+ mshv_debugfs_lp = lp_dir;
+
+ return 0;
+
+remove_debugfs_lps:
+ for (lp_index -= 1; lp_index >= 0; lp_index--)
+ mshv_lp_stats_unmap(lp_index);
+ debugfs_remove_recursive(lp_dir);
+free_lp_stats:
+ kfree(mshv_lps_stats);
+ mshv_lps_stats = NULL;
+
+ return err;
+}
+
+static int vp_stats_show(struct seq_file *m, void *v)
+{
+ const struct hv_stats_page **pstats = m->private;
+ u64 parent_val, self_val;
+ int idx;
+
+ /*
+ * For VP and partition stats, there may be two stats areas mapped,
+ * SELF and PARENT. These refer to the privilege level of the data in
+ * each page. Some fields may be 0 in SELF and nonzero in PARENT, or
+ * vice versa.
+ *
+ * Hence, prioritize printing from the PARENT page (more privileged
+ * data), but use the value from the SELF page if the PARENT value is
+ * 0.
+ */
+
+ for (idx = 0; idx < ARRAY_SIZE(hv_vp_counters); idx++) {
+ char *name = hv_vp_counters[idx];
+
+ if (!name)
+ continue;
+
+ parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx];
+ self_val = pstats[HV_STATS_AREA_SELF]->data[idx];
+ seq_printf(m, "%-43s: %llu\n", name,
+ parent_val ? parent_val : self_val);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(vp_stats);
+
+static void vp_debugfs_remove(struct dentry *vp_stats)
+{
+ debugfs_remove_recursive(vp_stats->d_parent);
+}
+
+static int vp_debugfs_create(u64 partition_id, u32 vp_index,
+ struct hv_stats_page **pstats,
+ struct dentry **vp_stats_ptr,
+ struct dentry *parent)
+{
+ struct dentry *vp_idx_dir, *d;
+ char vp_idx_str[U32_BUF_SZ];
+ int err;
+
+ sprintf(vp_idx_str, "%u", vp_index);
+
+ vp_idx_dir = debugfs_create_dir(vp_idx_str, parent);
+ if (IS_ERR(vp_idx_dir))
+ return PTR_ERR(vp_idx_dir);
+
+ d = debugfs_create_file("stats", 0400, vp_idx_dir,
+ pstats, &vp_stats_fops);
+ if (IS_ERR(d)) {
+ err = PTR_ERR(d);
+ goto remove_debugfs_vp_idx;
+ }
+
+ *vp_stats_ptr = d;
+
+ return 0;
+
+remove_debugfs_vp_idx:
+ debugfs_remove_recursive(vp_idx_dir);
+ return err;
+}
+
+static int partition_stats_show(struct seq_file *m, void *v)
+{
+ const struct hv_stats_page **pstats = m->private;
+ u64 parent_val, self_val;
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(hv_partition_counters); idx++) {
+ char *name = hv_partition_counters[idx];
+
+ if (!name)
+ continue;
+
+ parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx];
+ self_val = pstats[HV_STATS_AREA_SELF]->data[idx];
+ seq_printf(m, "%-37s: %llu\n", name,
+ parent_val ? parent_val : self_val);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(partition_stats);
+
+static void mshv_partition_stats_unmap(u64 partition_id,
+ struct hv_stats_page *stats_page,
+ enum hv_stats_area_type stats_area_type)
+{
+ union hv_stats_object_identity identity = {
+ .partition.partition_id = partition_id,
+ .partition.stats_area_type = stats_area_type,
+ };
+ int err;
+
+ err = hv_unmap_stats_page(HV_STATS_OBJECT_PARTITION, stats_page,
+ &identity);
+ if (err)
+ pr_err("%s: failed to unmap partition %lld %s stats, err: %d\n",
+ __func__, partition_id,
+ (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
+ err);
+}
+
+static struct hv_stats_page *mshv_partition_stats_map(u64 partition_id,
+ enum hv_stats_area_type stats_area_type)
+{
+ union hv_stats_object_identity identity = {
+ .partition.partition_id = partition_id,
+ .partition.stats_area_type = stats_area_type,
+ };
+ struct hv_stats_page *stats;
+ int err;
+
+ err = hv_map_stats_page(HV_STATS_OBJECT_PARTITION, &identity, &stats);
+ if (err) {
+ pr_err("%s: failed to map partition %lld %s stats, err: %d\n",
+ __func__, partition_id,
+ (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
+ err);
+ return ERR_PTR(err);
+ }
+ return stats;
+}
+
+static int mshv_debugfs_partition_stats_create(u64 partition_id,
+ struct dentry **partition_stats_ptr,
+ struct dentry *parent)
+{
+ struct dentry *dentry;
+ struct hv_stats_page **pstats;
+ int err;
+
+ pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *),
+ GFP_KERNEL_ACCOUNT);
+ if (!pstats)
+ return -ENOMEM;
+
+ pstats[HV_STATS_AREA_SELF] = mshv_partition_stats_map(partition_id,
+ HV_STATS_AREA_SELF);
+ if (IS_ERR(pstats[HV_STATS_AREA_SELF])) {
+ err = PTR_ERR(pstats[HV_STATS_AREA_SELF]);
+ goto cleanup;
+ }
+
+ /*
+ * L1VH partition cannot access its partition stats in parent area.
+ */
+ if (is_l1vh_parent(partition_id)) {
+ pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
+ } else {
+ pstats[HV_STATS_AREA_PARENT] = mshv_partition_stats_map(partition_id,
+ HV_STATS_AREA_PARENT);
+ if (IS_ERR(pstats[HV_STATS_AREA_PARENT])) {
+ err = PTR_ERR(pstats[HV_STATS_AREA_PARENT]);
+ goto unmap_self;
+ }
+ if (!pstats[HV_STATS_AREA_PARENT])
+ pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
+ }
+
+ dentry = debugfs_create_file("stats", 0400, parent,
+ pstats, &partition_stats_fops);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto unmap_partition_stats;
+ }
+
+ *partition_stats_ptr = dentry;
+ return 0;
+
+unmap_partition_stats:
+ if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF])
+ mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_PARENT],
+ HV_STATS_AREA_PARENT);
+unmap_self:
+ mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_SELF],
+ HV_STATS_AREA_SELF);
+cleanup:
+ kfree(pstats);
+ return err;
+}
+
+static void partition_debugfs_remove(u64 partition_id, struct dentry *dentry)
+{
+ struct hv_stats_page **pstats = NULL;
+
+ pstats = dentry->d_inode->i_private;
+
+ debugfs_remove_recursive(dentry->d_parent);
+
+ if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) {
+ mshv_partition_stats_unmap(partition_id,
+ pstats[HV_STATS_AREA_PARENT],
+ HV_STATS_AREA_PARENT);
+ }
+
+ mshv_partition_stats_unmap(partition_id,
+ pstats[HV_STATS_AREA_SELF],
+ HV_STATS_AREA_SELF);
+
+ kfree(pstats);
+}
+
+static int partition_debugfs_create(u64 partition_id,
+ struct dentry **vp_dir_ptr,
+ struct dentry **partition_stats_ptr,
+ struct dentry *parent)
+{
+ char part_id_str[U64_BUF_SZ];
+ struct dentry *part_id_dir, *vp_dir;
+ int err;
+
+ if (is_l1vh_parent(partition_id))
+ sprintf(part_id_str, "self");
+ else
+ sprintf(part_id_str, "%llu", partition_id);
+
+ part_id_dir = debugfs_create_dir(part_id_str, parent);
+ if (IS_ERR(part_id_dir))
+ return PTR_ERR(part_id_dir);
+
+ vp_dir = debugfs_create_dir("vp", part_id_dir);
+ if (IS_ERR(vp_dir)) {
+ err = PTR_ERR(vp_dir);
+ goto remove_debugfs_partition_id;
+ }
+
+ err = mshv_debugfs_partition_stats_create(partition_id,
+ partition_stats_ptr,
+ part_id_dir);
+ if (err)
+ goto remove_debugfs_partition_id;
+
+ *vp_dir_ptr = vp_dir;
+
+ return 0;
+
+remove_debugfs_partition_id:
+ debugfs_remove_recursive(part_id_dir);
+ return err;
+}
+
+static void parent_vp_debugfs_remove(u32 vp_index,
+ struct dentry *vp_stats_ptr)
+{
+ struct hv_stats_page **pstats;
+
+ pstats = vp_stats_ptr->d_inode->i_private;
+ vp_debugfs_remove(vp_stats_ptr);
+ mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
+ kfree(pstats);
+}
+
+static void mshv_debugfs_parent_partition_remove(void)
+{
+ int idx;
+
+ for_each_online_cpu(idx)
+ parent_vp_debugfs_remove(hv_vp_index[idx],
+ parent_vp_stats[idx]);
+
+ partition_debugfs_remove(hv_current_partition_id,
+ parent_partition_stats);
+ kfree(parent_vp_stats);
+ parent_vp_stats = NULL;
+ parent_partition_stats = NULL;
+}
+
+static int __init parent_vp_debugfs_create(u32 vp_index,
+ struct dentry **vp_stats_ptr,
+ struct dentry *parent)
+{
+ struct hv_stats_page **pstats;
+ int err;
+
+ pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *),
+ GFP_KERNEL_ACCOUNT);
+ if (!pstats)
+ return -ENOMEM;
+
+ err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats);
+ if (err)
+ goto cleanup;
+
+ err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats,
+ vp_stats_ptr, parent);
+ if (err)
+ goto unmap_vp_stats;
+
+ return 0;
+
+unmap_vp_stats:
+ mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
+cleanup:
+ kfree(pstats);
+ return err;
+}
+
+static int __init mshv_debugfs_parent_partition_create(void)
+{
+ struct dentry *vp_dir;
+ int err, idx, i;
+
+ mshv_debugfs_partition = debugfs_create_dir("partition",
+ mshv_debugfs);
+ if (IS_ERR(mshv_debugfs_partition))
+ return PTR_ERR(mshv_debugfs_partition);
+
+ err = partition_debugfs_create(hv_current_partition_id,
+ &vp_dir,
+ &parent_partition_stats,
+ mshv_debugfs_partition);
+ if (err)
+ goto remove_debugfs_partition;
+
+ parent_vp_stats = kcalloc(nr_cpu_ids, sizeof(*parent_vp_stats),
+ GFP_KERNEL);
+ if (!parent_vp_stats) {
+ err = -ENOMEM;
+ goto remove_debugfs_partition;
+ }
+
+ for_each_online_cpu(idx) {
+ err = parent_vp_debugfs_create(hv_vp_index[idx],
+ &parent_vp_stats[idx],
+ vp_dir);
+ if (err)
+ goto remove_debugfs_partition_vp;
+ }
+
+ return 0;
+
+remove_debugfs_partition_vp:
+ for_each_online_cpu(i) {
+ if (i >= idx)
+ break;
+ parent_vp_debugfs_remove(i, parent_vp_stats[i]);
+ }
+ partition_debugfs_remove(hv_current_partition_id,
+ parent_partition_stats);
+
+ kfree(parent_vp_stats);
+ parent_vp_stats = NULL;
+ parent_partition_stats = NULL;
+
+remove_debugfs_partition:
+ debugfs_remove_recursive(mshv_debugfs_partition);
+ mshv_debugfs_partition = NULL;
+ return err;
+}
+
+static int hv_stats_show(struct seq_file *m, void *v)
+{
+ const struct hv_stats_page *stats = m->private;
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(hv_hypervisor_counters); idx++) {
+ char *name = hv_hypervisor_counters[idx];
+
+ if (!name)
+ continue;
+ seq_printf(m, "%-27s: %llu\n", name, stats->data[idx]);
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(hv_stats);
+
+static void mshv_hv_stats_unmap(void)
+{
+ union hv_stats_object_identity identity = {
+ .hv.stats_area_type = HV_STATS_AREA_SELF,
+ };
+ int err;
+
+ err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity);
+ if (err)
+ pr_err("%s: failed to unmap hypervisor stats: %d\n",
+ __func__, err);
+}
+
+static void * __init mshv_hv_stats_map(void)
+{
+ union hv_stats_object_identity identity = {
+ .hv.stats_area_type = HV_STATS_AREA_SELF,
+ };
+ struct hv_stats_page *stats;
+ int err;
+
+ err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats);
+ if (err) {
+ pr_err("%s: failed to map hypervisor stats: %d\n",
+ __func__, err);
+ return ERR_PTR(err);
+ }
+ return stats;
+}
+
+static int __init mshv_debugfs_hv_stats_create(struct dentry *parent)
+{
+ struct dentry *dentry;
+ u64 *stats;
+ int err;
+
+ stats = mshv_hv_stats_map();
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
+
+ dentry = debugfs_create_file("stats", 0400, parent,
+ stats, &hv_stats_fops);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ pr_err("%s: failed to create hypervisor stats dentry: %d\n",
+ __func__, err);
+ goto unmap_hv_stats;
+ }
+
+ mshv_lps_count = stats[HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS];
+
+ return 0;
+
+unmap_hv_stats:
+ mshv_hv_stats_unmap();
+ return err;
+}
+
+int mshv_debugfs_vp_create(struct mshv_vp *vp)
+{
+ struct mshv_partition *p = vp->vp_partition;
+
+ if (!mshv_debugfs)
+ return 0;
+
+ return vp_debugfs_create(p->pt_id, vp->vp_index,
+ vp->vp_stats_pages,
+ &vp->vp_stats_dentry,
+ p->pt_vp_dentry);
+}
+
+void mshv_debugfs_vp_remove(struct mshv_vp *vp)
+{
+ if (!mshv_debugfs)
+ return;
+
+ vp_debugfs_remove(vp->vp_stats_dentry);
+}
+
+int mshv_debugfs_partition_create(struct mshv_partition *partition)
+{
+ int err;
+
+ if (!mshv_debugfs)
+ return 0;
+
+ err = partition_debugfs_create(partition->pt_id,
+ &partition->pt_vp_dentry,
+ &partition->pt_stats_dentry,
+ mshv_debugfs_partition);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mshv_debugfs_partition_remove(struct mshv_partition *partition)
+{
+ if (!mshv_debugfs)
+ return;
+
+ partition_debugfs_remove(partition->pt_id,
+ partition->pt_stats_dentry);
+}
+
+int __init mshv_debugfs_init(void)
+{
+ int err;
+
+ mshv_debugfs = debugfs_create_dir("mshv", NULL);
+ if (IS_ERR(mshv_debugfs)) {
+ pr_err("%s: failed to create debugfs directory\n", __func__);
+ return PTR_ERR(mshv_debugfs);
+ }
+
+ if (hv_root_partition()) {
+ err = mshv_debugfs_hv_stats_create(mshv_debugfs);
+ if (err)
+ goto remove_mshv_dir;
+
+ err = mshv_debugfs_lp_create(mshv_debugfs);
+ if (err)
+ goto unmap_hv_stats;
+ }
+
+ err = mshv_debugfs_parent_partition_create();
+ if (err)
+ goto unmap_lp_stats;
+
+ return 0;
+
+unmap_lp_stats:
+ if (hv_root_partition()) {
+ mshv_debugfs_lp_remove();
+ mshv_debugfs_lp = NULL;
+ }
+unmap_hv_stats:
+ if (hv_root_partition())
+ mshv_hv_stats_unmap();
+remove_mshv_dir:
+ debugfs_remove_recursive(mshv_debugfs);
+ mshv_debugfs = NULL;
+ return err;
+}
+
+void mshv_debugfs_exit(void)
+{
+ mshv_debugfs_parent_partition_remove();
+
+ if (hv_root_partition()) {
+ mshv_debugfs_lp_remove();
+ mshv_debugfs_lp = NULL;
+ mshv_hv_stats_unmap();
+ }
+
+ debugfs_remove_recursive(mshv_debugfs);
+ mshv_debugfs = NULL;
+ mshv_debugfs_partition = NULL;
+}
diff --git a/drivers/hv/mshv_debugfs_counters.c b/drivers/hv/mshv_debugfs_counters.c
new file mode 100644
index 000000000000..978536ba691f
--- /dev/null
+++ b/drivers/hv/mshv_debugfs_counters.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2026, Microsoft Corporation.
+ *
+ * Data for printing stats page counters via debugfs.
+ *
+ * Authors: Microsoft Linux virtualization team
+ */
+
+/*
+ * For simplicity, this file is included directly in mshv_debugfs.c.
+ * If these are ever needed elsewhere they should be compiled separately.
+ * Ensure this file is not used twice by accident.
+ */
+#ifndef MSHV_DEBUGFS_C
+#error "This file should only be included in mshv_debugfs.c"
+#endif
+
+/* HV_HYPERVISOR_COUNTER */
+static char *hv_hypervisor_counters[] = {
+ [1] = "HvLogicalProcessors",
+ [2] = "HvPartitions",
+ [3] = "HvTotalPages",
+ [4] = "HvVirtualProcessors",
+ [5] = "HvMonitoredNotifications",
+ [6] = "HvModernStandbyEntries",
+ [7] = "HvPlatformIdleTransitions",
+ [8] = "HvHypervisorStartupCost",
+
+ [10] = "HvIOSpacePages",
+ [11] = "HvNonEssentialPagesForDump",
+ [12] = "HvSubsumedPages",
+};
+
+/* HV_CPU_COUNTER */
+static char *hv_lp_counters[] = {
+ [1] = "LpGlobalTime",
+ [2] = "LpTotalRunTime",
+ [3] = "LpHypervisorRunTime",
+ [4] = "LpHardwareInterrupts",
+ [5] = "LpContextSwitches",
+ [6] = "LpInterProcessorInterrupts",
+ [7] = "LpSchedulerInterrupts",
+ [8] = "LpTimerInterrupts",
+ [9] = "LpInterProcessorInterruptsSent",
+ [10] = "LpProcessorHalts",
+ [11] = "LpMonitorTransitionCost",
+ [12] = "LpContextSwitchTime",
+ [13] = "LpC1TransitionsCount",
+ [14] = "LpC1RunTime",
+ [15] = "LpC2TransitionsCount",
+ [16] = "LpC2RunTime",
+ [17] = "LpC3TransitionsCount",
+ [18] = "LpC3RunTime",
+ [19] = "LpRootVpIndex",
+ [20] = "LpIdleSequenceNumber",
+ [21] = "LpGlobalTscCount",
+ [22] = "LpActiveTscCount",
+ [23] = "LpIdleAccumulation",
+ [24] = "LpReferenceCycleCount0",
+ [25] = "LpActualCycleCount0",
+ [26] = "LpReferenceCycleCount1",
+ [27] = "LpActualCycleCount1",
+ [28] = "LpProximityDomainId",
+ [29] = "LpPostedInterruptNotifications",
+ [30] = "LpBranchPredictorFlushes",
+#if IS_ENABLED(CONFIG_X86_64)
+ [31] = "LpL1DataCacheFlushes",
+ [32] = "LpImmediateL1DataCacheFlushes",
+ [33] = "LpMbFlushes",
+ [34] = "LpCounterRefreshSequenceNumber",
+ [35] = "LpCounterRefreshReferenceTime",
+ [36] = "LpIdleAccumulationSnapshot",
+ [37] = "LpActiveTscCountSnapshot",
+ [38] = "LpHwpRequestContextSwitches",
+ [39] = "LpPlaceholder1",
+ [40] = "LpPlaceholder2",
+ [41] = "LpPlaceholder3",
+ [42] = "LpPlaceholder4",
+ [43] = "LpPlaceholder5",
+ [44] = "LpPlaceholder6",
+ [45] = "LpPlaceholder7",
+ [46] = "LpPlaceholder8",
+ [47] = "LpPlaceholder9",
+ [48] = "LpSchLocalRunListSize",
+ [49] = "LpReserveGroupId",
+ [50] = "LpRunningPriority",
+ [51] = "LpPerfmonInterruptCount",
+#elif IS_ENABLED(CONFIG_ARM64)
+ [31] = "LpCounterRefreshSequenceNumber",
+ [32] = "LpCounterRefreshReferenceTime",
+ [33] = "LpIdleAccumulationSnapshot",
+ [34] = "LpActiveTscCountSnapshot",
+ [35] = "LpHwpRequestContextSwitches",
+ [36] = "LpPlaceholder2",
+ [37] = "LpPlaceholder3",
+ [38] = "LpPlaceholder4",
+ [39] = "LpPlaceholder5",
+ [40] = "LpPlaceholder6",
+ [41] = "LpPlaceholder7",
+ [42] = "LpPlaceholder8",
+ [43] = "LpPlaceholder9",
+ [44] = "LpSchLocalRunListSize",
+ [45] = "LpReserveGroupId",
+ [46] = "LpRunningPriority",
+#endif
+};
+
+/* HV_PROCESS_COUNTER */
+static char *hv_partition_counters[] = {
+ [1] = "PtVirtualProcessors",
+
+ [3] = "PtTlbSize",
+ [4] = "PtAddressSpaces",
+ [5] = "PtDepositedPages",
+ [6] = "PtGpaPages",
+ [7] = "PtGpaSpaceModifications",
+ [8] = "PtVirtualTlbFlushEntires",
+ [9] = "PtRecommendedTlbSize",
+ [10] = "PtGpaPages4K",
+ [11] = "PtGpaPages2M",
+ [12] = "PtGpaPages1G",
+ [13] = "PtGpaPages512G",
+ [14] = "PtDevicePages4K",
+ [15] = "PtDevicePages2M",
+ [16] = "PtDevicePages1G",
+ [17] = "PtDevicePages512G",
+ [18] = "PtAttachedDevices",
+ [19] = "PtDeviceInterruptMappings",
+ [20] = "PtIoTlbFlushes",
+ [21] = "PtIoTlbFlushCost",
+ [22] = "PtDeviceInterruptErrors",
+ [23] = "PtDeviceDmaErrors",
+ [24] = "PtDeviceInterruptThrottleEvents",
+ [25] = "PtSkippedTimerTicks",
+ [26] = "PtPartitionId",
+#if IS_ENABLED(CONFIG_X86_64)
+ [27] = "PtNestedTlbSize",
+ [28] = "PtRecommendedNestedTlbSize",
+ [29] = "PtNestedTlbFreeListSize",
+ [30] = "PtNestedTlbTrimmedPages",
+ [31] = "PtPagesShattered",
+ [32] = "PtPagesRecombined",
+ [33] = "PtHwpRequestValue",
+ [34] = "PtAutoSuspendEnableTime",
+ [35] = "PtAutoSuspendTriggerTime",
+ [36] = "PtAutoSuspendDisableTime",
+ [37] = "PtPlaceholder1",
+ [38] = "PtPlaceholder2",
+ [39] = "PtPlaceholder3",
+ [40] = "PtPlaceholder4",
+ [41] = "PtPlaceholder5",
+ [42] = "PtPlaceholder6",
+ [43] = "PtPlaceholder7",
+ [44] = "PtPlaceholder8",
+ [45] = "PtHypervisorStateTransferGeneration",
+ [46] = "PtNumberofActiveChildPartitions",
+#elif IS_ENABLED(CONFIG_ARM64)
+ [27] = "PtHwpRequestValue",
+ [28] = "PtAutoSuspendEnableTime",
+ [29] = "PtAutoSuspendTriggerTime",
+ [30] = "PtAutoSuspendDisableTime",
+ [31] = "PtPlaceholder1",
+ [32] = "PtPlaceholder2",
+ [33] = "PtPlaceholder3",
+ [34] = "PtPlaceholder4",
+ [35] = "PtPlaceholder5",
+ [36] = "PtPlaceholder6",
+ [37] = "PtPlaceholder7",
+ [38] = "PtPlaceholder8",
+ [39] = "PtHypervisorStateTransferGeneration",
+ [40] = "PtNumberofActiveChildPartitions",
+#endif
+};
+
+/* HV_THREAD_COUNTER */
+static char *hv_vp_counters[] = {
+ [1] = "VpTotalRunTime",
+ [2] = "VpHypervisorRunTime",
+ [3] = "VpRemoteNodeRunTime",
+ [4] = "VpNormalizedRunTime",
+ [5] = "VpIdealCpu",
+
+ [7] = "VpHypercallsCount",
+ [8] = "VpHypercallsTime",
+#if IS_ENABLED(CONFIG_X86_64)
+ [9] = "VpPageInvalidationsCount",
+ [10] = "VpPageInvalidationsTime",
+ [11] = "VpControlRegisterAccessesCount",
+ [12] = "VpControlRegisterAccessesTime",
+ [13] = "VpIoInstructionsCount",
+ [14] = "VpIoInstructionsTime",
+ [15] = "VpHltInstructionsCount",
+ [16] = "VpHltInstructionsTime",
+ [17] = "VpMwaitInstructionsCount",
+ [18] = "VpMwaitInstructionsTime",
+ [19] = "VpCpuidInstructionsCount",
+ [20] = "VpCpuidInstructionsTime",
+ [21] = "VpMsrAccessesCount",
+ [22] = "VpMsrAccessesTime",
+ [23] = "VpOtherInterceptsCount",
+ [24] = "VpOtherInterceptsTime",
+ [25] = "VpExternalInterruptsCount",
+ [26] = "VpExternalInterruptsTime",
+ [27] = "VpPendingInterruptsCount",
+ [28] = "VpPendingInterruptsTime",
+ [29] = "VpEmulatedInstructionsCount",
+ [30] = "VpEmulatedInstructionsTime",
+ [31] = "VpDebugRegisterAccessesCount",
+ [32] = "VpDebugRegisterAccessesTime",
+ [33] = "VpPageFaultInterceptsCount",
+ [34] = "VpPageFaultInterceptsTime",
+ [35] = "VpGuestPageTableMaps",
+ [36] = "VpLargePageTlbFills",
+ [37] = "VpSmallPageTlbFills",
+ [38] = "VpReflectedGuestPageFaults",
+ [39] = "VpApicMmioAccesses",
+ [40] = "VpIoInterceptMessages",
+ [41] = "VpMemoryInterceptMessages",
+ [42] = "VpApicEoiAccesses",
+ [43] = "VpOtherMessages",
+ [44] = "VpPageTableAllocations",
+ [45] = "VpLogicalProcessorMigrations",
+ [46] = "VpAddressSpaceEvictions",
+ [47] = "VpAddressSpaceSwitches",
+ [48] = "VpAddressDomainFlushes",
+ [49] = "VpAddressSpaceFlushes",
+ [50] = "VpGlobalGvaRangeFlushes",
+ [51] = "VpLocalGvaRangeFlushes",
+ [52] = "VpPageTableEvictions",
+ [53] = "VpPageTableReclamations",
+ [54] = "VpPageTableResets",
+ [55] = "VpPageTableValidations",
+ [56] = "VpApicTprAccesses",
+ [57] = "VpPageTableWriteIntercepts",
+ [58] = "VpSyntheticInterrupts",
+ [59] = "VpVirtualInterrupts",
+ [60] = "VpApicIpisSent",
+ [61] = "VpApicSelfIpisSent",
+ [62] = "VpGpaSpaceHypercalls",
+ [63] = "VpLogicalProcessorHypercalls",
+ [64] = "VpLongSpinWaitHypercalls",
+ [65] = "VpOtherHypercalls",
+ [66] = "VpSyntheticInterruptHypercalls",
+ [67] = "VpVirtualInterruptHypercalls",
+ [68] = "VpVirtualMmuHypercalls",
+ [69] = "VpVirtualProcessorHypercalls",
+ [70] = "VpHardwareInterrupts",
+ [71] = "VpNestedPageFaultInterceptsCount",
+ [72] = "VpNestedPageFaultInterceptsTime",
+ [73] = "VpPageScans",
+ [74] = "VpLogicalProcessorDispatches",
+ [75] = "VpWaitingForCpuTime",
+ [76] = "VpExtendedHypercalls",
+ [77] = "VpExtendedHypercallInterceptMessages",
+ [78] = "VpMbecNestedPageTableSwitches",
+ [79] = "VpOtherReflectedGuestExceptions",
+ [80] = "VpGlobalIoTlbFlushes",
+ [81] = "VpGlobalIoTlbFlushCost",
+ [82] = "VpLocalIoTlbFlushes",
+ [83] = "VpLocalIoTlbFlushCost",
+ [84] = "VpHypercallsForwardedCount",
+ [85] = "VpHypercallsForwardingTime",
+ [86] = "VpPageInvalidationsForwardedCount",
+ [87] = "VpPageInvalidationsForwardingTime",
+ [88] = "VpControlRegisterAccessesForwardedCount",
+ [89] = "VpControlRegisterAccessesForwardingTime",
+ [90] = "VpIoInstructionsForwardedCount",
+ [91] = "VpIoInstructionsForwardingTime",
+ [92] = "VpHltInstructionsForwardedCount",
+ [93] = "VpHltInstructionsForwardingTime",
+ [94] = "VpMwaitInstructionsForwardedCount",
+ [95] = "VpMwaitInstructionsForwardingTime",
+ [96] = "VpCpuidInstructionsForwardedCount",
+ [97] = "VpCpuidInstructionsForwardingTime",
+ [98] = "VpMsrAccessesForwardedCount",
+ [99] = "VpMsrAccessesForwardingTime",
+ [100] = "VpOtherInterceptsForwardedCount",
+ [101] = "VpOtherInterceptsForwardingTime",
+ [102] = "VpExternalInterruptsForwardedCount",
+ [103] = "VpExternalInterruptsForwardingTime",
+ [104] = "VpPendingInterruptsForwardedCount",
+ [105] = "VpPendingInterruptsForwardingTime",
+ [106] = "VpEmulatedInstructionsForwardedCount",
+ [107] = "VpEmulatedInstructionsForwardingTime",
+ [108] = "VpDebugRegisterAccessesForwardedCount",
+ [109] = "VpDebugRegisterAccessesForwardingTime",
+ [110] = "VpPageFaultInterceptsForwardedCount",
+ [111] = "VpPageFaultInterceptsForwardingTime",
+ [112] = "VpVmclearEmulationCount",
+ [113] = "VpVmclearEmulationTime",
+ [114] = "VpVmptrldEmulationCount",
+ [115] = "VpVmptrldEmulationTime",
+ [116] = "VpVmptrstEmulationCount",
+ [117] = "VpVmptrstEmulationTime",
+ [118] = "VpVmreadEmulationCount",
+ [119] = "VpVmreadEmulationTime",
+ [120] = "VpVmwriteEmulationCount",
+ [121] = "VpVmwriteEmulationTime",
+ [122] = "VpVmxoffEmulationCount",
+ [123] = "VpVmxoffEmulationTime",
+ [124] = "VpVmxonEmulationCount",
+ [125] = "VpVmxonEmulationTime",
+ [126] = "VpNestedVMEntriesCount",
+ [127] = "VpNestedVMEntriesTime",
+ [128] = "VpNestedSLATSoftPageFaultsCount",
+ [129] = "VpNestedSLATSoftPageFaultsTime",
+ [130] = "VpNestedSLATHardPageFaultsCount",
+ [131] = "VpNestedSLATHardPageFaultsTime",
+ [132] = "VpInvEptAllContextEmulationCount",
+ [133] = "VpInvEptAllContextEmulationTime",
+ [134] = "VpInvEptSingleContextEmulationCount",
+ [135] = "VpInvEptSingleContextEmulationTime",
+ [136] = "VpInvVpidAllContextEmulationCount",
+ [137] = "VpInvVpidAllContextEmulationTime",
+ [138] = "VpInvVpidSingleContextEmulationCount",
+ [139] = "VpInvVpidSingleContextEmulationTime",
+ [140] = "VpInvVpidSingleAddressEmulationCount",
+ [141] = "VpInvVpidSingleAddressEmulationTime",
+ [142] = "VpNestedTlbPageTableReclamations",
+ [143] = "VpNestedTlbPageTableEvictions",
+ [144] = "VpFlushGuestPhysicalAddressSpaceHypercalls",
+ [145] = "VpFlushGuestPhysicalAddressListHypercalls",
+ [146] = "VpPostedInterruptNotifications",
+ [147] = "VpPostedInterruptScans",
+ [148] = "VpTotalCoreRunTime",
+ [149] = "VpMaximumRunTime",
+ [150] = "VpHwpRequestContextSwitches",
+ [151] = "VpWaitingForCpuTimeBucket0",
+ [152] = "VpWaitingForCpuTimeBucket1",
+ [153] = "VpWaitingForCpuTimeBucket2",
+ [154] = "VpWaitingForCpuTimeBucket3",
+ [155] = "VpWaitingForCpuTimeBucket4",
+ [156] = "VpWaitingForCpuTimeBucket5",
+ [157] = "VpWaitingForCpuTimeBucket6",
+ [158] = "VpVmloadEmulationCount",
+ [159] = "VpVmloadEmulationTime",
+ [160] = "VpVmsaveEmulationCount",
+ [161] = "VpVmsaveEmulationTime",
+ [162] = "VpGifInstructionEmulationCount",
+ [163] = "VpGifInstructionEmulationTime",
+ [164] = "VpEmulatedErrataSvmInstructions",
+ [165] = "VpPlaceholder1",
+ [166] = "VpPlaceholder2",
+ [167] = "VpPlaceholder3",
+ [168] = "VpPlaceholder4",
+ [169] = "VpPlaceholder5",
+ [170] = "VpPlaceholder6",
+ [171] = "VpPlaceholder7",
+ [172] = "VpPlaceholder8",
+ [173] = "VpContentionTime",
+ [174] = "VpWakeUpTime",
+ [175] = "VpSchedulingPriority",
+ [176] = "VpRdpmcInstructionsCount",
+ [177] = "VpRdpmcInstructionsTime",
+ [178] = "VpPerfmonPmuMsrAccessesCount",
+ [179] = "VpPerfmonLbrMsrAccessesCount",
+ [180] = "VpPerfmonIptMsrAccessesCount",
+ [181] = "VpPerfmonInterruptCount",
+ [182] = "VpVtl1DispatchCount",
+ [183] = "VpVtl2DispatchCount",
+ [184] = "VpVtl2DispatchBucket0",
+ [185] = "VpVtl2DispatchBucket1",
+ [186] = "VpVtl2DispatchBucket2",
+ [187] = "VpVtl2DispatchBucket3",
+ [188] = "VpVtl2DispatchBucket4",
+ [189] = "VpVtl2DispatchBucket5",
+ [190] = "VpVtl2DispatchBucket6",
+ [191] = "VpVtl1RunTime",
+ [192] = "VpVtl2RunTime",
+ [193] = "VpIommuHypercalls",
+ [194] = "VpCpuGroupHypercalls",
+ [195] = "VpVsmHypercalls",
+ [196] = "VpEventLogHypercalls",
+ [197] = "VpDeviceDomainHypercalls",
+ [198] = "VpDepositHypercalls",
+ [199] = "VpSvmHypercalls",
+ [200] = "VpBusLockAcquisitionCount",
+ [201] = "VpLoadAvg",
+ [202] = "VpRootDispatchThreadBlocked",
+ [203] = "VpIdleCpuTime",
+ [204] = "VpWaitingForCpuTimeBucket7",
+ [205] = "VpWaitingForCpuTimeBucket8",
+ [206] = "VpWaitingForCpuTimeBucket9",
+ [207] = "VpWaitingForCpuTimeBucket10",
+ [208] = "VpWaitingForCpuTimeBucket11",
+ [209] = "VpWaitingForCpuTimeBucket12",
+ [210] = "VpHierarchicalSuspendTime",
+ [211] = "VpExpressSchedulingAttempts",
+ [212] = "VpExpressSchedulingCount",
+#elif IS_ENABLED(CONFIG_ARM64)
+ [9] = "VpSysRegAccessesCount",
+ [10] = "VpSysRegAccessesTime",
+ [11] = "VpSmcInstructionsCount",
+ [12] = "VpSmcInstructionsTime",
+ [13] = "VpOtherInterceptsCount",
+ [14] = "VpOtherInterceptsTime",
+ [15] = "VpExternalInterruptsCount",
+ [16] = "VpExternalInterruptsTime",
+ [17] = "VpPendingInterruptsCount",
+ [18] = "VpPendingInterruptsTime",
+ [19] = "VpGuestPageTableMaps",
+ [20] = "VpLargePageTlbFills",
+ [21] = "VpSmallPageTlbFills",
+ [22] = "VpReflectedGuestPageFaults",
+ [23] = "VpMemoryInterceptMessages",
+ [24] = "VpOtherMessages",
+ [25] = "VpLogicalProcessorMigrations",
+ [26] = "VpAddressDomainFlushes",
+ [27] = "VpAddressSpaceFlushes",
+ [28] = "VpSyntheticInterrupts",
+ [29] = "VpVirtualInterrupts",
+ [30] = "VpApicSelfIpisSent",
+ [31] = "VpGpaSpaceHypercalls",
+ [32] = "VpLogicalProcessorHypercalls",
+ [33] = "VpLongSpinWaitHypercalls",
+ [34] = "VpOtherHypercalls",
+ [35] = "VpSyntheticInterruptHypercalls",
+ [36] = "VpVirtualInterruptHypercalls",
+ [37] = "VpVirtualMmuHypercalls",
+ [38] = "VpVirtualProcessorHypercalls",
+ [39] = "VpHardwareInterrupts",
+ [40] = "VpNestedPageFaultInterceptsCount",
+ [41] = "VpNestedPageFaultInterceptsTime",
+ [42] = "VpLogicalProcessorDispatches",
+ [43] = "VpWaitingForCpuTime",
+ [44] = "VpExtendedHypercalls",
+ [45] = "VpExtendedHypercallInterceptMessages",
+ [46] = "VpMbecNestedPageTableSwitches",
+ [47] = "VpOtherReflectedGuestExceptions",
+ [48] = "VpGlobalIoTlbFlushes",
+ [49] = "VpGlobalIoTlbFlushCost",
+ [50] = "VpLocalIoTlbFlushes",
+ [51] = "VpLocalIoTlbFlushCost",
+ [52] = "VpFlushGuestPhysicalAddressSpaceHypercalls",
+ [53] = "VpFlushGuestPhysicalAddressListHypercalls",
+ [54] = "VpPostedInterruptNotifications",
+ [55] = "VpPostedInterruptScans",
+ [56] = "VpTotalCoreRunTime",
+ [57] = "VpMaximumRunTime",
+ [58] = "VpWaitingForCpuTimeBucket0",
+ [59] = "VpWaitingForCpuTimeBucket1",
+ [60] = "VpWaitingForCpuTimeBucket2",
+ [61] = "VpWaitingForCpuTimeBucket3",
+ [62] = "VpWaitingForCpuTimeBucket4",
+ [63] = "VpWaitingForCpuTimeBucket5",
+ [64] = "VpWaitingForCpuTimeBucket6",
+ [65] = "VpHwpRequestContextSwitches",
+ [66] = "VpPlaceholder2",
+ [67] = "VpPlaceholder3",
+ [68] = "VpPlaceholder4",
+ [69] = "VpPlaceholder5",
+ [70] = "VpPlaceholder6",
+ [71] = "VpPlaceholder7",
+ [72] = "VpPlaceholder8",
+ [73] = "VpContentionTime",
+ [74] = "VpWakeUpTime",
+ [75] = "VpSchedulingPriority",
+ [76] = "VpVtl1DispatchCount",
+ [77] = "VpVtl2DispatchCount",
+ [78] = "VpVtl2DispatchBucket0",
+ [79] = "VpVtl2DispatchBucket1",
+ [80] = "VpVtl2DispatchBucket2",
+ [81] = "VpVtl2DispatchBucket3",
+ [82] = "VpVtl2DispatchBucket4",
+ [83] = "VpVtl2DispatchBucket5",
+ [84] = "VpVtl2DispatchBucket6",
+ [85] = "VpVtl1RunTime",
+ [86] = "VpVtl2RunTime",
+ [87] = "VpIommuHypercalls",
+ [88] = "VpCpuGroupHypercalls",
+ [89] = "VpVsmHypercalls",
+ [90] = "VpEventLogHypercalls",
+ [91] = "VpDeviceDomainHypercalls",
+ [92] = "VpDepositHypercalls",
+ [93] = "VpSvmHypercalls",
+ [94] = "VpLoadAvg",
+ [95] = "VpRootDispatchThreadBlocked",
+ [96] = "VpIdleCpuTime",
+ [97] = "VpWaitingForCpuTimeBucket7",
+ [98] = "VpWaitingForCpuTimeBucket8",
+ [99] = "VpWaitingForCpuTimeBucket9",
+ [100] = "VpWaitingForCpuTimeBucket10",
+ [101] = "VpWaitingForCpuTimeBucket11",
+ [102] = "VpWaitingForCpuTimeBucket12",
+ [103] = "VpHierarchicalSuspendTime",
+ [104] = "VpExpressSchedulingAttempts",
+ [105] = "VpExpressSchedulingCount",
+#endif
+};
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 0b75ff1edb73..492c6258045c 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -87,8 +87,9 @@ static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
idx = srcu_read_lock(&partition->pt_irq_srcu);
- hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list,
- irqfd_resampler_hnode) {
+ hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
+ irqfd_resampler_hnode,
+ srcu_read_lock_held(&partition->pt_irq_srcu)) {
if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
hv_call_clear_virtual_interrupt(partition->pt_id);
@@ -128,8 +129,8 @@ static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector)
new_iv.vector[new_iv.vector_count++] = vector;
- if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
- iv.as_uint64, new_iv.as_uint64) != iv.as_uint64)
+ if (!try_cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
+ &iv.as_uint64, new_iv.as_uint64))
return -EAGAIN;
return 0;
@@ -247,12 +248,13 @@ static void mshv_irqfd_shutdown(struct work_struct *work)
{
struct mshv_irqfd *irqfd =
container_of(work, struct mshv_irqfd, irqfd_shutdown);
+ u64 cnt;
/*
* Synchronize with the wait-queue and unhook ourselves to prevent
* further events.
*/
- remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait);
+ eventfd_ctx_remove_wait_queue(irqfd->irqfd_eventfd_ctx, &irqfd->irqfd_wait, &cnt);
if (irqfd->irqfd_resampler) {
mshv_irqfd_resampler_shutdown(irqfd);
@@ -295,13 +297,13 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
{
struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd,
irqfd_wait);
- unsigned long flags = (unsigned long)key;
+ __poll_t flags = key_to_poll(key);
int idx;
unsigned int seq;
struct mshv_partition *pt = irqfd->irqfd_partn;
int ret = 0;
- if (flags & POLLIN) {
+ if (flags & EPOLLIN) {
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
@@ -320,7 +322,7 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
ret = 1;
}
- if (flags & POLLHUP) {
+ if (flags & EPOLLHUP) {
/* The eventfd is closing, detach from the partition */
unsigned long flags;
@@ -371,8 +373,6 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,
struct mshv_irqfd *irqfd =
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
- irqfd->irqfd_wqh = wqh;
-
/*
* TODO: Ensure there isn't already an exclusive, priority waiter, e.g.
* that the irqfd isn't already bound to another partition. Only the
@@ -506,7 +506,7 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
*/
events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
- if (events & POLLIN)
+ if (events & EPOLLIN)
mshv_assert_irq_slow(irqfd);
srcu_read_unlock(&pt->pt_irq_srcu, idx);
diff --git a/drivers/hv/mshv_eventfd.h b/drivers/hv/mshv_eventfd.h
index 332e7670a344..464c6b81ab33 100644
--- a/drivers/hv/mshv_eventfd.h
+++ b/drivers/hv/mshv_eventfd.h
@@ -32,7 +32,6 @@ struct mshv_irqfd {
struct mshv_lapic_irq irqfd_lapic_irq;
struct hlist_node irqfd_hnode;
poll_table irqfd_polltbl;
- wait_queue_head_t *irqfd_wqh;
wait_queue_entry_t irqfd_wait;
struct work_struct irqfd_shutdown;
struct mshv_irqfd_resampler *irqfd_resampler;
diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index adba3564d9f1..c28aac0726de 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -88,7 +88,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
struct page *page;
int stride, ret;
- page = region->pages[page_offset];
+ page = region->mreg_pages[page_offset];
if (!page)
return -EINVAL;
@@ -98,7 +98,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
/* Start at stride since the first stride is validated */
for (count = stride; count < page_count; count += stride) {
- page = region->pages[page_offset + count];
+ page = region->mreg_pages[page_offset + count];
/* Break if current page is not present */
if (!page)
@@ -152,7 +152,7 @@ static int mshv_region_process_range(struct mshv_mem_region *region,
while (page_count) {
/* Skip non-present pages */
- if (!region->pages[page_offset]) {
+ if (!region->mreg_pages[page_offset]) {
page_offset++;
page_count--;
continue;
@@ -190,7 +190,7 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE))
region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE;
- kref_init(&region->refcount);
+ kref_init(&region->mreg_refcount);
return region;
}
@@ -204,7 +204,7 @@ static int mshv_region_chunk_share(struct mshv_mem_region *region,
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
return hv_call_modify_spa_host_access(region->partition->pt_id,
- region->pages + page_offset,
+ region->mreg_pages + page_offset,
page_count,
HV_MAP_GPA_READABLE |
HV_MAP_GPA_WRITABLE,
@@ -229,7 +229,7 @@ static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
return hv_call_modify_spa_host_access(region->partition->pt_id,
- region->pages + page_offset,
+ region->mreg_pages + page_offset,
page_count, 0,
flags, false);
}
@@ -254,7 +254,7 @@ static int mshv_region_chunk_remap(struct mshv_mem_region *region,
return hv_call_map_gpa_pages(region->partition->pt_id,
region->start_gfn + page_offset,
page_count, flags,
- region->pages + page_offset);
+ region->mreg_pages + page_offset);
}
static int mshv_region_remap_pages(struct mshv_mem_region *region,
@@ -277,10 +277,10 @@ int mshv_region_map(struct mshv_mem_region *region)
static void mshv_region_invalidate_pages(struct mshv_mem_region *region,
u64 page_offset, u64 page_count)
{
- if (region->type == MSHV_REGION_TYPE_MEM_PINNED)
- unpin_user_pages(region->pages + page_offset, page_count);
+ if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
+ unpin_user_pages(region->mreg_pages + page_offset, page_count);
- memset(region->pages + page_offset, 0,
+ memset(region->mreg_pages + page_offset, 0,
page_count * sizeof(struct page *));
}
@@ -297,7 +297,7 @@ int mshv_region_pin(struct mshv_mem_region *region)
int ret;
for (done_count = 0; done_count < region->nr_pages; done_count += ret) {
- pages = region->pages + done_count;
+ pages = region->mreg_pages + done_count;
userspace_addr = region->start_uaddr +
done_count * HV_HYP_PAGE_SIZE;
nr_pages = min(region->nr_pages - done_count,
@@ -348,11 +348,11 @@ static int mshv_region_unmap(struct mshv_mem_region *region)
static void mshv_region_destroy(struct kref *ref)
{
struct mshv_mem_region *region =
- container_of(ref, struct mshv_mem_region, refcount);
+ container_of(ref, struct mshv_mem_region, mreg_refcount);
struct mshv_partition *partition = region->partition;
int ret;
- if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE)
+ if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE)
mshv_region_movable_fini(region);
if (mshv_partition_encrypted(partition)) {
@@ -374,12 +374,12 @@ static void mshv_region_destroy(struct kref *ref)
void mshv_region_put(struct mshv_mem_region *region)
{
- kref_put(&region->refcount, mshv_region_destroy);
+ kref_put(&region->mreg_refcount, mshv_region_destroy);
}
int mshv_region_get(struct mshv_mem_region *region)
{
- return kref_get_unless_zero(&region->refcount);
+ return kref_get_unless_zero(&region->mreg_refcount);
}
/**
@@ -405,16 +405,16 @@ static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region,
int ret;
range->notifier_seq = mmu_interval_read_begin(range->notifier);
- mmap_read_lock(region->mni.mm);
+ mmap_read_lock(region->mreg_mni.mm);
ret = hmm_range_fault(range);
- mmap_read_unlock(region->mni.mm);
+ mmap_read_unlock(region->mreg_mni.mm);
if (ret)
return ret;
- mutex_lock(&region->mutex);
+ mutex_lock(&region->mreg_mutex);
if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
- mutex_unlock(&region->mutex);
+ mutex_unlock(&region->mreg_mutex);
cond_resched();
return -EBUSY;
}
@@ -438,7 +438,7 @@ static int mshv_region_range_fault(struct mshv_mem_region *region,
u64 page_offset, u64 page_count)
{
struct hmm_range range = {
- .notifier = &region->mni,
+ .notifier = &region->mreg_mni,
.default_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
};
unsigned long *pfns;
@@ -461,12 +461,12 @@ static int mshv_region_range_fault(struct mshv_mem_region *region,
goto out;
for (i = 0; i < page_count; i++)
- region->pages[page_offset + i] = hmm_pfn_to_page(pfns[i]);
+ region->mreg_pages[page_offset + i] = hmm_pfn_to_page(pfns[i]);
ret = mshv_region_remap_pages(region, region->hv_map_flags,
page_offset, page_count);
- mutex_unlock(&region->mutex);
+ mutex_unlock(&region->mreg_mutex);
out:
kfree(pfns);
return ret;
@@ -520,7 +520,7 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
{
struct mshv_mem_region *region = container_of(mni,
struct mshv_mem_region,
- mni);
+ mreg_mni);
u64 page_offset, page_count;
unsigned long mstart, mend;
int ret = -EPERM;
@@ -533,8 +533,8 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
page_count = HVPFN_DOWN(mend - mstart);
if (mmu_notifier_range_blockable(range))
- mutex_lock(&region->mutex);
- else if (!mutex_trylock(&region->mutex))
+ mutex_lock(&region->mreg_mutex);
+ else if (!mutex_trylock(&region->mreg_mutex))
goto out_fail;
mmu_interval_set_seq(mni, cur_seq);
@@ -546,12 +546,12 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
mshv_region_invalidate_pages(region, page_offset, page_count);
- mutex_unlock(&region->mutex);
+ mutex_unlock(&region->mreg_mutex);
return true;
out_unlock:
- mutex_unlock(&region->mutex);
+ mutex_unlock(&region->mreg_mutex);
out_fail:
WARN_ONCE(ret,
"Failed to invalidate region %#llx-%#llx (range %#lx-%#lx, event: %u, pages %#llx-%#llx, mm: %#llx): %d\n",
@@ -568,21 +568,21 @@ static const struct mmu_interval_notifier_ops mshv_region_mni_ops = {
void mshv_region_movable_fini(struct mshv_mem_region *region)
{
- mmu_interval_notifier_remove(&region->mni);
+ mmu_interval_notifier_remove(&region->mreg_mni);
}
bool mshv_region_movable_init(struct mshv_mem_region *region)
{
int ret;
- ret = mmu_interval_notifier_insert(&region->mni, current->mm,
+ ret = mmu_interval_notifier_insert(&region->mreg_mni, current->mm,
region->start_uaddr,
region->nr_pages << HV_HYP_PAGE_SHIFT,
&mshv_region_mni_ops);
if (ret)
return false;
- mutex_init(&region->mutex);
+ mutex_init(&region->mreg_mutex);
return true;
}
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
index 3c1d88b36741..04c2a1910a8a 100644
--- a/drivers/hv/mshv_root.h
+++ b/drivers/hv/mshv_root.h
@@ -52,6 +52,9 @@ struct mshv_vp {
unsigned int kicked_by_hv;
wait_queue_head_t vp_suspend_queue;
} run;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct dentry *vp_stats_dentry;
+#endif
};
#define vp_fmt(fmt) "p%lluvp%u: " fmt
@@ -79,16 +82,16 @@ enum mshv_region_type {
struct mshv_mem_region {
struct hlist_node hnode;
- struct kref refcount;
+ struct kref mreg_refcount;
u64 nr_pages;
u64 start_gfn;
u64 start_uaddr;
u32 hv_map_flags;
struct mshv_partition *partition;
- enum mshv_region_type type;
- struct mmu_interval_notifier mni;
- struct mutex mutex; /* protects region pages remapping */
- struct page *pages[];
+ enum mshv_region_type mreg_type;
+ struct mmu_interval_notifier mreg_mni;
+ struct mutex mreg_mutex; /* protects region pages remapping */
+ struct page *mreg_pages[];
};
struct mshv_irq_ack_notifier {
@@ -136,6 +139,10 @@ struct mshv_partition {
u64 isolation_type;
bool import_completed;
bool pt_initialized;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct dentry *pt_stats_dentry;
+ struct dentry *pt_vp_dentry;
+#endif
};
#define pt_fmt(fmt) "p%llu: " fmt
@@ -254,6 +261,16 @@ struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
void mshv_partition_put(struct mshv_partition *partition);
struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
+static inline bool is_l1vh_parent(u64 partition_id)
+{
+ return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF);
+}
+
+int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
+ struct hv_stats_page **stats_pages);
+void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
+ struct hv_stats_page **stats_pages);
+
/* hypercalls */
int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
@@ -307,8 +324,9 @@ int hv_call_disconnect_port(u64 connection_partition_id,
int hv_call_notify_port_ring_empty(u32 sint_index);
int hv_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
- void **addr);
-int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
+ struct hv_stats_page **addr);
+int hv_unmap_stats_page(enum hv_stats_object_type type,
+ struct hv_stats_page *page_addr,
const union hv_stats_object_identity *identity);
int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
u64 page_struct_count, u32 host_access,
@@ -316,6 +334,33 @@ int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
void *property_value, size_t property_value_sz);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+int __init mshv_debugfs_init(void);
+void mshv_debugfs_exit(void);
+
+int mshv_debugfs_partition_create(struct mshv_partition *partition);
+void mshv_debugfs_partition_remove(struct mshv_partition *partition);
+int mshv_debugfs_vp_create(struct mshv_vp *vp);
+void mshv_debugfs_vp_remove(struct mshv_vp *vp);
+#else
+static inline int __init mshv_debugfs_init(void)
+{
+ return 0;
+}
+static inline void mshv_debugfs_exit(void) { }
+
+static inline int mshv_debugfs_partition_create(struct mshv_partition *partition)
+{
+ return 0;
+}
+static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { }
+static inline int mshv_debugfs_vp_create(struct mshv_vp *vp)
+{
+ return 0;
+}
+static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { }
+#endif
+
extern struct mshv_root mshv_root;
extern enum hv_scheduler_type hv_scheduler_type;
extern u8 * __percpu *hv_synic_eventring_tail;
diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
index 598eaff4ff29..7f91096f95a8 100644
--- a/drivers/hv/mshv_root_hv_call.c
+++ b/drivers/hv/mshv_root_hv_call.c
@@ -115,7 +115,7 @@ int hv_call_create_partition(u64 flags,
status = hv_do_hypercall(HVCALL_CREATE_PARTITION,
input, output);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
*partition_id = output->partition_id;
local_irq_restore(irq_flags);
@@ -123,8 +123,7 @@ int hv_call_create_partition(u64 flags,
break;
}
local_irq_restore(irq_flags);
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- hv_current_partition_id, 1);
+ ret = hv_deposit_memory(hv_current_partition_id, status);
} while (!ret);
return ret;
@@ -147,11 +146,11 @@ int hv_call_initialize_partition(u64 partition_id)
status = hv_do_fast_hypercall8(HVCALL_INITIALIZE_PARTITION,
*(u64 *)&input);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
- ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
+ ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@@ -239,7 +238,7 @@ static int hv_do_map_gpa_hcall(u64 partition_id, u64 gfn, u64 page_struct_count,
completed = hv_repcomp(status);
- if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (hv_result_needs_memory(status)) {
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id,
HV_MAP_GPA_DEPOSIT_PAGES);
if (ret)
@@ -455,7 +454,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
status = hv_do_hypercall(control, input, output);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
if (hv_result_success(status) && ret_output)
memcpy(ret_output, output, sizeof(*output));
@@ -465,8 +464,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
}
local_irq_restore(flags);
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- partition_id, 1);
+ ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@@ -518,15 +516,14 @@ int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
status = hv_do_hypercall(control, input, NULL);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
local_irq_restore(flags);
ret = hv_result_to_errno(status);
break;
}
local_irq_restore(flags);
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- partition_id, 1);
+ ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@@ -563,7 +560,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, input,
output);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
*state_page = pfn_to_page(output->map_location);
local_irq_restore(flags);
@@ -573,7 +570,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
local_irq_restore(flags);
- ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
+ ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@@ -718,12 +715,11 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
if (hv_result_success(status))
break;
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
- ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1);
-
+ ret = hv_deposit_memory(port_partition_id, status);
} while (!ret);
return ret;
@@ -772,12 +768,11 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
if (hv_result_success(status))
break;
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- connection_partition_id, 1);
+ ret = hv_deposit_memory(connection_partition_id, status);
} while (!ret);
return ret;
@@ -813,6 +808,13 @@ hv_call_notify_port_ring_empty(u32 sint_index)
return hv_result_to_errno(status);
}
+/*
+ * Equivalent of hv_call_map_stats_page() for cases when the caller provides
+ * the map location.
+ *
+ * NOTE: This is a newer hypercall that always supports SELF and PARENT stats
+ * areas, unlike hv_call_map_stats_page().
+ */
static int hv_call_map_stats_page2(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
u64 map_location)
@@ -843,21 +845,49 @@ static int hv_call_map_stats_page2(enum hv_stats_object_type type,
if (!ret)
break;
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
+ if (!hv_result_needs_memory(status)) {
hv_status_debug(status, "\n");
break;
}
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- hv_current_partition_id, 1);
+ ret = hv_deposit_memory(hv_current_partition_id, status);
} while (!ret);
return ret;
}
-static int hv_call_map_stats_page(enum hv_stats_object_type type,
- const union hv_stats_object_identity *identity,
- void **addr)
+static int
+hv_stats_get_area_type(enum hv_stats_object_type type,
+ const union hv_stats_object_identity *identity)
+{
+ switch (type) {
+ case HV_STATS_OBJECT_HYPERVISOR:
+ return identity->hv.stats_area_type;
+ case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
+ return identity->lp.stats_area_type;
+ case HV_STATS_OBJECT_PARTITION:
+ return identity->partition.stats_area_type;
+ case HV_STATS_OBJECT_VP:
+ return identity->vp.stats_area_type;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Map a stats page, where the page location is provided by the hypervisor.
+ *
+ * NOTE: The concept of separate SELF and PARENT stats areas does not exist on
+ * older hypervisor versions. All the available stats information can be found
+ * on the SELF page. When attempting to map the PARENT area on a hypervisor
+ * that doesn't support it, return "success" but with a NULL address. The
+ * caller should check for this case and instead fallback to the SELF area
+ * alone.
+ */
+static int
+hv_call_map_stats_page(enum hv_stats_object_type type,
+ const union hv_stats_object_identity *identity,
+ struct hv_stats_page **addr)
{
unsigned long flags;
struct hv_input_map_stats_page *input;
@@ -878,15 +908,22 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type,
pfn = output->map_location;
local_irq_restore(flags);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
- ret = hv_result_to_errno(status);
+
+ if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
break;
- return ret;
+
+ if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
+ hv_result(status) == HV_STATUS_INVALID_PARAMETER) {
+ *addr = NULL;
+ return 0;
+ }
+
+ hv_status_debug(status, "\n");
+ return hv_result_to_errno(status);
}
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- hv_current_partition_id, 1);
+ ret = hv_deposit_memory(hv_current_partition_id, status);
if (ret)
return ret;
} while (!ret);
@@ -898,7 +935,7 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type,
int hv_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
- void **addr)
+ struct hv_stats_page **addr)
{
int ret;
struct page *allocated_page = NULL;
@@ -946,7 +983,8 @@ static int hv_call_unmap_stats_page(enum hv_stats_object_type type,
return hv_result_to_errno(status);
}
-int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
+int hv_unmap_stats_page(enum hv_stats_object_type type,
+ struct hv_stats_page *page_addr,
const union hv_stats_object_identity *identity)
{
int ret;
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 681b58154d5e..e6509c980763 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -39,22 +39,12 @@ MODULE_AUTHOR("Microsoft");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv");
-/* TODO move this to another file when debugfs code is added */
-enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */
-#if defined(CONFIG_X86)
- VpRootDispatchThreadBlocked = 202,
+/* HV_THREAD_COUNTER */
+#if defined(CONFIG_X86_64)
+#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 202
#elif defined(CONFIG_ARM64)
- VpRootDispatchThreadBlocked = 94,
+#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 95
#endif
- VpStatsMaxCounter
-};
-
-struct hv_stats_page {
- union {
- u64 vp_cntrs[VpStatsMaxCounter]; /* VP counters */
- u8 data[HV_HYP_PAGE_SIZE];
- };
-} __packed;
struct mshv_root mshv_root;
@@ -130,6 +120,7 @@ static u16 mshv_passthru_hvcalls[] = {
HVCALL_SET_VP_REGISTERS,
HVCALL_TRANSLATE_VIRTUAL_ADDRESS,
HVCALL_CLEAR_VIRTUAL_INTERRUPT,
+ HVCALL_SCRUB_PARTITION,
HVCALL_REGISTER_INTERCEPT_RESULT,
HVCALL_ASSERT_VIRTUAL_INTERRUPT,
HVCALL_GET_GPA_PAGES_ACCESS_STATES,
@@ -261,11 +252,10 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
if (hv_result_success(status))
break;
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
+ if (!hv_result_needs_memory(status))
ret = hv_result_to_errno(status);
else
- ret = hv_call_deposit_pages(NUMA_NO_NODE,
- pt_id, 1);
+ ret = hv_deposit_memory(pt_id, status);
} while (!ret);
args.status = hv_result(status);
@@ -485,12 +475,11 @@ static u64 mshv_vp_interrupt_pending(struct mshv_vp *vp)
static bool mshv_vp_dispatch_thread_blocked(struct mshv_vp *vp)
{
struct hv_stats_page **stats = vp->vp_stats_pages;
- u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->vp_cntrs;
- u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->vp_cntrs;
+ u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->data;
+ u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->data;
- if (self_vp_cntrs[VpRootDispatchThreadBlocked])
- return self_vp_cntrs[VpRootDispatchThreadBlocked];
- return parent_vp_cntrs[VpRootDispatchThreadBlocked];
+ return parent_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED] ||
+ self_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED];
}
static int
@@ -661,7 +650,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp)
return false;
/* Only movable memory ranges are supported for GPA intercepts */
- if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE)
+ if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE)
ret = mshv_region_handle_gfn_fault(region, gfn);
else
ret = false;
@@ -957,23 +946,36 @@ mshv_vp_release(struct inode *inode, struct file *filp)
return 0;
}
-static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
- void *stats_pages[])
+void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
+ struct hv_stats_page *stats_pages[])
{
union hv_stats_object_identity identity = {
.vp.partition_id = partition_id,
.vp.vp_index = vp_index,
};
+ int err;
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
- hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
-
- identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
- hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
+ err = hv_unmap_stats_page(HV_STATS_OBJECT_VP,
+ stats_pages[HV_STATS_AREA_SELF],
+ &identity);
+ if (err)
+ pr_err("%s: failed to unmap partition %llu vp %u self stats, err: %d\n",
+ __func__, partition_id, vp_index, err);
+
+ if (stats_pages[HV_STATS_AREA_PARENT] != stats_pages[HV_STATS_AREA_SELF]) {
+ identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
+ err = hv_unmap_stats_page(HV_STATS_OBJECT_VP,
+ stats_pages[HV_STATS_AREA_PARENT],
+ &identity);
+ if (err)
+ pr_err("%s: failed to unmap partition %llu vp %u parent stats, err: %d\n",
+ __func__, partition_id, vp_index, err);
+ }
}
-static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
- void *stats_pages[])
+int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
+ struct hv_stats_page *stats_pages[])
{
union hv_stats_object_identity identity = {
.vp.partition_id = partition_id,
@@ -984,20 +986,37 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
&stats_pages[HV_STATS_AREA_SELF]);
- if (err)
+ if (err) {
+ pr_err("%s: failed to map partition %llu vp %u self stats, err: %d\n",
+ __func__, partition_id, vp_index, err);
return err;
+ }
- identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
- err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
- &stats_pages[HV_STATS_AREA_PARENT]);
- if (err)
- goto unmap_self;
+ /*
+ * L1VH partition cannot access its vp stats in parent area.
+ */
+ if (is_l1vh_parent(partition_id)) {
+ stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
+ } else {
+ identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
+ err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
+ &stats_pages[HV_STATS_AREA_PARENT]);
+ if (err) {
+ pr_err("%s: failed to map partition %llu vp %u parent stats, err: %d\n",
+ __func__, partition_id, vp_index, err);
+ goto unmap_self;
+ }
+ if (!stats_pages[HV_STATS_AREA_PARENT])
+ stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
+ }
return 0;
unmap_self:
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
- hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
+ hv_unmap_stats_page(HV_STATS_OBJECT_VP,
+ stats_pages[HV_STATS_AREA_SELF],
+ &identity);
return err;
}
@@ -1008,7 +1027,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
struct mshv_create_vp args;
struct mshv_vp *vp;
struct page *intercept_msg_page, *register_page, *ghcb_page;
- void *stats_pages[2];
+ struct hv_stats_page *stats_pages[2];
long ret;
if (copy_from_user(&args, arg, sizeof(args)))
@@ -1048,16 +1067,10 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
goto unmap_register_page;
}
- /*
- * This mapping of the stats page is for detecting if dispatch thread
- * is blocked - only relevant for root scheduler
- */
- if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) {
- ret = mshv_vp_stats_map(partition->pt_id, args.vp_index,
- stats_pages);
- if (ret)
- goto unmap_ghcb_page;
- }
+ ret = mshv_vp_stats_map(partition->pt_id, args.vp_index,
+ stats_pages);
+ if (ret)
+ goto unmap_ghcb_page;
vp = kzalloc(sizeof(*vp), GFP_KERNEL);
if (!vp)
@@ -1081,8 +1094,11 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available())
vp->vp_ghcb_page = page_to_virt(ghcb_page);
- if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
- memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));
+ memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));
+
+ ret = mshv_debugfs_vp_create(vp);
+ if (ret)
+ goto put_partition;
/*
* Keep anon_inode_getfd last: it installs fd in the file struct and
@@ -1091,7 +1107,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp,
O_RDWR | O_CLOEXEC);
if (ret < 0)
- goto put_partition;
+ goto remove_debugfs_vp;
/* already exclusive with the partition mutex for all ioctls */
partition->pt_vp_count++;
@@ -1099,13 +1115,14 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
return ret;
+remove_debugfs_vp:
+ mshv_debugfs_vp_remove(vp);
put_partition:
mshv_partition_put(partition);
free_vp:
kfree(vp);
unmap_stats_pages:
- if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
- mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages);
+ mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages);
unmap_ghcb_page:
if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available())
hv_unmap_vp_state_page(partition->pt_id, args.vp_index,
@@ -1176,12 +1193,12 @@ static int mshv_partition_create_region(struct mshv_partition *partition,
return PTR_ERR(rg);
if (is_mmio)
- rg->type = MSHV_REGION_TYPE_MMIO;
+ rg->mreg_type = MSHV_REGION_TYPE_MMIO;
else if (mshv_partition_encrypted(partition) ||
!mshv_region_movable_init(rg))
- rg->type = MSHV_REGION_TYPE_MEM_PINNED;
+ rg->mreg_type = MSHV_REGION_TYPE_MEM_PINNED;
else
- rg->type = MSHV_REGION_TYPE_MEM_MOVABLE;
+ rg->mreg_type = MSHV_REGION_TYPE_MEM_MOVABLE;
rg->partition = partition;
@@ -1298,7 +1315,7 @@ mshv_map_user_memory(struct mshv_partition *partition,
if (ret)
return ret;
- switch (region->type) {
+ switch (region->mreg_type) {
case MSHV_REGION_TYPE_MEM_PINNED:
ret = mshv_prepare_pinned_region(region);
break;
@@ -1542,10 +1559,16 @@ mshv_partition_ioctl_initialize(struct mshv_partition *partition)
if (ret)
goto withdraw_mem;
+ ret = mshv_debugfs_partition_create(partition);
+ if (ret)
+ goto finalize_partition;
+
partition->pt_initialized = true;
return 0;
+finalize_partition:
+ hv_call_finalize_partition(partition->pt_id);
withdraw_mem:
hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id);
@@ -1725,9 +1748,9 @@ static void destroy_partition(struct mshv_partition *partition)
if (!vp)
continue;
- if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
- mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
- (void **)vp->vp_stats_pages);
+ mshv_debugfs_vp_remove(vp);
+ mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
+ vp->vp_stats_pages);
if (vp->vp_register_page) {
(void)hv_unmap_vp_state_page(partition->pt_id,
@@ -1759,6 +1782,8 @@ static void destroy_partition(struct mshv_partition *partition)
partition->pt_vp_array[i] = NULL;
}
+ mshv_debugfs_partition_remove(partition);
+
/* Deallocates and unmaps everything including vcpus, GPA mappings etc */
hv_call_finalize_partition(partition->pt_id);
@@ -1921,6 +1946,10 @@ static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
+ if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION))
+ *pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE;
+ if (args.pt_flags & BIT(MSHV_PT_BIT_SMT_ENABLED_GUEST))
+ *pt_flags |= HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST;
isol_props->as_uint64 = 0;
@@ -2054,6 +2083,29 @@ static const char *scheduler_type_to_string(enum hv_scheduler_type type)
};
}
+static int __init l1vh_retrieve_scheduler_type(enum hv_scheduler_type *out)
+{
+ u64 integrated_sched_enabled;
+ int ret;
+
+ *out = HV_SCHEDULER_TYPE_CORE_SMT;
+
+ if (!mshv_root.vmm_caps.vmm_enable_integrated_scheduler)
+ return 0;
+
+ ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
+ HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED,
+ 0, &integrated_sched_enabled,
+ sizeof(integrated_sched_enabled));
+ if (ret)
+ return ret;
+
+ if (integrated_sched_enabled)
+ *out = HV_SCHEDULER_TYPE_ROOT;
+
+ return 0;
+}
+
/* TODO move this to hv_common.c when needed outside */
static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out)
{
@@ -2086,13 +2138,12 @@ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out)
/* Retrieve and stash the supported scheduler type */
static int __init mshv_retrieve_scheduler_type(struct device *dev)
{
- int ret = 0;
+ int ret;
if (hv_l1vh_partition())
- hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT;
+ ret = l1vh_retrieve_scheduler_type(&hv_scheduler_type);
else
ret = hv_retrieve_scheduler_type(&hv_scheduler_type);
-
if (ret)
return ret;
@@ -2212,42 +2263,29 @@ struct notifier_block mshv_reboot_nb = {
static void mshv_root_partition_exit(void)
{
unregister_reboot_notifier(&mshv_reboot_nb);
- root_scheduler_deinit();
}
static int __init mshv_root_partition_init(struct device *dev)
{
- int err;
-
- err = root_scheduler_init(dev);
- if (err)
- return err;
-
- err = register_reboot_notifier(&mshv_reboot_nb);
- if (err)
- goto root_sched_deinit;
-
- return 0;
-
-root_sched_deinit:
- root_scheduler_deinit();
- return err;
+ return register_reboot_notifier(&mshv_reboot_nb);
}
-static void mshv_init_vmm_caps(struct device *dev)
+static int __init mshv_init_vmm_caps(struct device *dev)
{
- /*
- * This can only fail here if HVCALL_GET_PARTITION_PROPERTY_EX or
- * HV_PARTITION_PROPERTY_VMM_CAPABILITIES are not supported. In that
- * case it's valid to proceed as if all vmm_caps are disabled (zero).
- */
- if (hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
- HV_PARTITION_PROPERTY_VMM_CAPABILITIES,
- 0, &mshv_root.vmm_caps,
- sizeof(mshv_root.vmm_caps)))
- dev_warn(dev, "Unable to get VMM capabilities\n");
+ int ret;
+
+ ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
+ HV_PARTITION_PROPERTY_VMM_CAPABILITIES,
+ 0, &mshv_root.vmm_caps,
+ sizeof(mshv_root.vmm_caps));
+ if (ret && hv_l1vh_partition()) {
+ dev_err(dev, "Failed to get VMM capabilities: %d\n", ret);
+ return ret;
+ }
dev_dbg(dev, "vmm_caps = %#llx\n", mshv_root.vmm_caps.as_uint64[0]);
+
+ return 0;
}
static int __init mshv_parent_partition_init(void)
@@ -2293,6 +2331,10 @@ static int __init mshv_parent_partition_init(void)
mshv_cpuhp_online = ret;
+ ret = mshv_init_vmm_caps(dev);
+ if (ret)
+ goto remove_cpu_state;
+
ret = mshv_retrieve_scheduler_type(dev);
if (ret)
goto remove_cpu_state;
@@ -2302,11 +2344,17 @@ static int __init mshv_parent_partition_init(void)
if (ret)
goto remove_cpu_state;
- mshv_init_vmm_caps(dev);
+ ret = root_scheduler_init(dev);
+ if (ret)
+ goto exit_partition;
+
+ ret = mshv_debugfs_init();
+ if (ret)
+ goto deinit_root_scheduler;
ret = mshv_irqfd_wq_init();
if (ret)
- goto exit_partition;
+ goto exit_debugfs;
spin_lock_init(&mshv_root.pt_ht_lock);
hash_init(mshv_root.pt_htable);
@@ -2315,6 +2363,10 @@ static int __init mshv_parent_partition_init(void)
return 0;
+exit_debugfs:
+ mshv_debugfs_exit();
+deinit_root_scheduler:
+ root_scheduler_deinit();
exit_partition:
if (hv_root_partition())
mshv_root_partition_exit();
@@ -2331,8 +2383,10 @@ static void __exit mshv_parent_partition_exit(void)
{
hv_setup_mshv_handler(NULL);
mshv_port_table_fini();
+ mshv_debugfs_exit();
misc_deregister(&mshv_dev);
mshv_irqfd_wq_cleanup();
+ root_scheduler_deinit();
if (hv_root_partition())
mshv_root_partition_exit();
cpuhp_remove_state(mshv_cpuhp_online);
diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
index 2cebe9de5a5a..7bbbce009732 100644
--- a/drivers/hv/mshv_vtl_main.c
+++ b/drivers/hv/mshv_vtl_main.c
@@ -845,9 +845,10 @@ static const struct file_operations mshv_vtl_fops = {
.mmap = mshv_vtl_mmap,
};
-static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask)
+static void mshv_vtl_synic_mask_vmbus_sint(void *info)
{
union hv_synic_sint sint;
+ const u8 *mask = info;
sint.as_uint64 = 0;
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
@@ -999,7 +1000,7 @@ static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *ar
if (copy_from_user(&mask, arg, sizeof(mask)))
return -EFAULT;
guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
- on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
+ on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
if (mask.mask)
wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6785ad63a9cb..3e7a52918ce0 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -25,6 +25,7 @@
#include <linux/cpu.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
+#include <linux/smpboot.h>
#include <linux/delay.h>
#include <linux/panic_notifier.h>
@@ -51,7 +52,7 @@ static struct device *vmbus_root_device;
static int hyperv_cpuhp_online;
-static long __percpu *vmbus_evt;
+static DEFINE_PER_CPU(long, vmbus_evt);
/* Values parsed from ACPI DSDT */
int vmbus_irq;
@@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message
}
}
-void vmbus_isr(void)
+static void __vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@@ -1363,6 +1364,53 @@ void vmbus_isr(void)
add_interrupt_randomness(vmbus_interrupt);
}
+
+static DEFINE_PER_CPU(bool, vmbus_irq_pending);
+static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
+
+static void vmbus_irqd_wake(void)
+{
+ struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
+
+ __this_cpu_write(vmbus_irq_pending, true);
+ wake_up_process(tsk);
+}
+
+static void vmbus_irqd_setup(unsigned int cpu)
+{
+ sched_set_fifo(current);
+}
+
+static int vmbus_irqd_should_run(unsigned int cpu)
+{
+ return __this_cpu_read(vmbus_irq_pending);
+}
+
+static void run_vmbus_irqd(unsigned int cpu)
+{
+ __this_cpu_write(vmbus_irq_pending, false);
+ __vmbus_isr();
+}
+
+static bool vmbus_irq_initialized;
+
+static struct smp_hotplug_thread vmbus_irq_threads = {
+ .store = &vmbus_irqd,
+ .setup = vmbus_irqd_setup,
+ .thread_should_run = vmbus_irqd_should_run,
+ .thread_fn = run_vmbus_irqd,
+ .thread_comm = "vmbus_irq/%u",
+};
+
+void vmbus_isr(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ vmbus_irqd_wake();
+ } else {
+ lockdep_hardirq_threaded();
+ __vmbus_isr();
+ }
+}
EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
@@ -1462,16 +1510,21 @@ static int vmbus_bus_init(void)
* the VMbus interrupt handler.
*/
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
+ ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
+ if (ret)
+ goto err_kthread;
+ vmbus_irq_initialized = true;
+ }
+
if (vmbus_irq == -1) {
hv_setup_vmbus_handler(vmbus_isr);
} else {
- vmbus_evt = alloc_percpu(long);
ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr,
- "Hyper-V VMbus", vmbus_evt);
+ "Hyper-V VMbus", &vmbus_evt);
if (ret) {
pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d",
vmbus_irq, ret);
- free_percpu(vmbus_evt);
goto err_setup;
}
}
@@ -1500,13 +1553,16 @@ static int vmbus_bus_init(void)
return 0;
err_connect:
- if (vmbus_irq == -1) {
+ if (vmbus_irq == -1)
hv_remove_vmbus_handler();
- } else {
- free_percpu_irq(vmbus_irq, vmbus_evt);
- free_percpu(vmbus_evt);
- }
+ else
+ free_percpu_irq(vmbus_irq, &vmbus_evt);
err_setup:
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+ smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+ vmbus_irq_initialized = false;
+ }
+err_kthread:
bus_unregister(&hv_bus);
return ret;
}
@@ -2970,11 +3026,13 @@ static void __exit vmbus_exit(void)
vmbus_connection.conn_state = DISCONNECTED;
hv_stimer_global_cleanup();
vmbus_disconnect();
- if (vmbus_irq == -1) {
+ if (vmbus_irq == -1)
hv_remove_vmbus_handler();
- } else {
- free_percpu_irq(vmbus_irq, vmbus_evt);
- free_percpu(vmbus_evt);
+ else
+ free_percpu_irq(vmbus_irq, &vmbus_evt);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+ smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+ vmbus_irq_initialized = false;
}
for_each_online_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
index 28b3e93d31c0..18acbda867f0 100644
--- a/drivers/pci/controller/pci-hyperv-intf.c
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -52,17 +52,5 @@ int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
}
EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
-static void __exit exit_hv_pci_intf(void)
-{
-}
-
-static int __init init_hv_pci_intf(void)
-{
- return 0;
-}
-
-module_init(init_hv_pci_intf);
-module_exit(exit_hv_pci_intf);
-
MODULE_DESCRIPTION("Hyper-V PCI Interface");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 1e237d3538f9..7fcba05cec30 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -501,7 +501,6 @@ struct hv_pcibus_device {
struct resource *low_mmio_res;
struct resource *high_mmio_res;
struct completion *survey_event;
- struct pci_bus *pci_bus;
spinlock_t config_lock; /* Avoid two threads writing index page */
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index ecedab554c80..d37b68238c97 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -342,6 +342,9 @@ static inline bool hv_parent_partition(void)
{
return hv_root_partition() || hv_l1vh_partition();
}
+
+bool hv_result_needs_memory(u64 status);
+int hv_deposit_memory_node(int node, u64 partition_id, u64 status);
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
@@ -350,6 +353,11 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
static inline bool hv_root_partition(void) { return false; }
static inline bool hv_l1vh_partition(void) { return false; }
static inline bool hv_parent_partition(void) { return false; }
+static inline bool hv_result_needs_memory(u64 status) { return false; }
+static inline int hv_deposit_memory_node(int node, u64 partition_id, u64 status)
+{
+ return -EOPNOTSUPP;
+}
static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
{
return -EOPNOTSUPP;
@@ -364,6 +372,11 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3
}
#endif /* CONFIG_MSHV_ROOT */
+static inline int hv_deposit_memory(u64 partition_id, u64 status)
+{
+ return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
+}
+
#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
u8 __init get_vtl(void);
#else
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 30fbbde81c5c..056ef7b6b360 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -14,33 +14,36 @@ struct hv_u128 {
} __packed;
/* NOTE: when adding below, update hv_result_to_string() */
-#define HV_STATUS_SUCCESS 0x0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
-#define HV_STATUS_INVALID_ALIGNMENT 0x4
-#define HV_STATUS_INVALID_PARAMETER 0x5
-#define HV_STATUS_ACCESS_DENIED 0x6
-#define HV_STATUS_INVALID_PARTITION_STATE 0x7
-#define HV_STATUS_OPERATION_DENIED 0x8
-#define HV_STATUS_UNKNOWN_PROPERTY 0x9
-#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
-#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
-#define HV_STATUS_INVALID_PARTITION_ID 0xD
-#define HV_STATUS_INVALID_VP_INDEX 0xE
-#define HV_STATUS_NOT_FOUND 0x10
-#define HV_STATUS_INVALID_PORT_ID 0x11
-#define HV_STATUS_INVALID_CONNECTION_ID 0x12
-#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
-#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
-#define HV_STATUS_INVALID_VP_STATE 0x15
-#define HV_STATUS_NO_RESOURCES 0x1D
-#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
-#define HV_STATUS_INVALID_LP_INDEX 0x41
-#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
-#define HV_STATUS_OPERATION_FAILED 0x71
-#define HV_STATUS_TIME_OUT 0x78
-#define HV_STATUS_CALL_PENDING 0x79
-#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
+#define HV_STATUS_SUCCESS 0x0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
+#define HV_STATUS_INVALID_ALIGNMENT 0x4
+#define HV_STATUS_INVALID_PARAMETER 0x5
+#define HV_STATUS_ACCESS_DENIED 0x6
+#define HV_STATUS_INVALID_PARTITION_STATE 0x7
+#define HV_STATUS_OPERATION_DENIED 0x8
+#define HV_STATUS_UNKNOWN_PROPERTY 0x9
+#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
+#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
+#define HV_STATUS_INVALID_PARTITION_ID 0xD
+#define HV_STATUS_INVALID_VP_INDEX 0xE
+#define HV_STATUS_NOT_FOUND 0x10
+#define HV_STATUS_INVALID_PORT_ID 0x11
+#define HV_STATUS_INVALID_CONNECTION_ID 0x12
+#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
+#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
+#define HV_STATUS_INVALID_VP_STATE 0x15
+#define HV_STATUS_NO_RESOURCES 0x1D
+#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
+#define HV_STATUS_INVALID_LP_INDEX 0x41
+#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
+#define HV_STATUS_OPERATION_FAILED 0x71
+#define HV_STATUS_INSUFFICIENT_ROOT_MEMORY 0x73
+#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75
+#define HV_STATUS_TIME_OUT 0x78
+#define HV_STATUS_CALL_PENDING 0x79
+#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY 0x83
+#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
/*
* The Hyper-V TimeRefCount register and the TSC
@@ -474,6 +477,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_NOTIFY_PARTITION_EVENT 0x0087
#define HVCALL_ENTER_SLEEP_STATE 0x0084
#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
+#define HVCALL_SCRUB_PARTITION 0x008d
#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
#define HVCALL_CREATE_PORT 0x0095
diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h
index 08965970c17d..245f3db53bf1 100644
--- a/include/hyperv/hvhdk.h
+++ b/include/hyperv/hvhdk.h
@@ -10,6 +10,13 @@
#include "hvhdk_mini.h"
#include "hvgdk.h"
+/*
+ * Hypervisor statistics page format
+ */
+struct hv_stats_page {
+ u64 data[HV_HYP_PAGE_SIZE / sizeof(u64)];
+} __packed;
+
/* Bits for dirty mask of hv_vp_register_page */
#define HV_X64_REGISTER_CLASS_GENERAL 0
#define HV_X64_REGISTER_CLASS_IP 1
@@ -328,6 +335,8 @@ union hv_partition_isolation_properties {
#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2
/* Note: Exo partition is enabled by default */
+#define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0)
+#define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1)
#define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4)
#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index 41a29bf8ec14..091c03e26046 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -7,6 +7,8 @@
#include "hvgdk_mini.h"
+#define HV_MAX_CONTIGUOUS_ALLOCATION_PAGES 8
+
/*
* Doorbell connection_info flags.
*/
@@ -87,6 +89,9 @@ enum hv_partition_property_code {
HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001,
+ /* Integrated scheduling properties */
+ HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED = 0x00020005,
+
/* Resource properties */
HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005,
HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017,
@@ -102,7 +107,7 @@ enum hv_partition_property_code {
};
#define HV_PARTITION_VMM_CAPABILITIES_BANK_COUNT 1
-#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 59
+#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 57
struct hv_partition_property_vmm_capabilities {
u16 bank_count;
@@ -119,6 +124,8 @@ struct hv_partition_property_vmm_capabilities {
u64 reservedbit3: 1;
#endif
u64 assignable_synthetic_proc_features: 1;
+ u64 reservedbit5: 1;
+ u64 vmm_enable_integrated_scheduler : 1;
u64 reserved0: HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT;
} __packed;
};
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index dee3ece28ce5..e0645a34b55b 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -27,6 +27,8 @@ enum {
MSHV_PT_BIT_X2APIC,
MSHV_PT_BIT_GPA_SUPER_PAGES,
MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
+ MSHV_PT_BIT_NESTED_VIRTUALIZATION,
+ MSHV_PT_BIT_SMT_ENABLED_GUEST,
MSHV_PT_BIT_COUNT,
};