summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/cxl/Kbuild3
-rw-r--r--tools/testing/cxl/test/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c86
-rw-r--r--tools/testing/cxl/test/cxl_translate.c445
-rw-r--r--tools/testing/cxl/test/mem.c11
-rw-r--r--tools/testing/cxl/test/mock.c52
-rw-r--r--tools/testing/cxl/test/mock.h4
-rwxr-xr-xtools/testing/ktest/config-bisect.pl4
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py11
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py9
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py71
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest34
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions6
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py71
-rw-r--r--tools/testing/selftests/iommu/iommufd.c103
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h56
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm12
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c287
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c4
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c98
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h45
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h81
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h3
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c10
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c145
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S6
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c47
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c82
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c10
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c32
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c4
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c2
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c5
-rw-r--r--tools/testing/selftests/mm/guard-regions.c185
-rw-r--r--tools/testing/selftests/mm/gup_test.c24
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c919
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c57
-rw-r--r--tools/testing/selftests/mm/mremap_test.c5
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c127
-rw-r--r--tools/testing/selftests/mm/uffd-common.c24
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c8
-rw-r--r--tools/testing/selftests/mm/vm_util.c5
-rw-r--r--tools/testing/selftests/mm/vm_util.h1
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c165
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile5
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py4
-rw-r--r--tools/testing/selftests/verification/.gitignore2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/selftests/vfio/Makefile10
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c36
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c18
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/include/vfio_util.h331
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk23
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c556
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c16
-rwxr-xr-xtools/testing/selftests/vfio/run.sh109
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c46
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c2
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c12
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c51
-rw-r--r--tools/testing/vma/vma.c112
-rw-r--r--tools/testing/vma/vma_internal.h603
117 files changed, 6902 insertions, 1762 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0d5ce4b74b9f..0e151d0572d1 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -4,13 +4,12 @@ ldflags-y += --wrap=is_acpi_device_node
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
-ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=devm_cxl_add_rch_dport
-ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
ldflags-y += --wrap=cxl_dport_init_ras_reporting
ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
+ldflags-y += --wrap=hmat_get_extended_linear_cache_size
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
index 6b1927897856..af50972c8b6d 100644
--- a/tools/testing/cxl/test/Kbuild
+++ b/tools/testing/cxl/test/Kbuild
@@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
obj-m += cxl_test.o
obj-m += cxl_mock.o
obj-m += cxl_mock_mem.o
+obj-m += cxl_translate.o
cxl_test-y := cxl.o
cxl_mock-y := mock.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 2d135ca533d0..81e2aef3627a 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -15,6 +15,7 @@
#include "mock.h"
static int interleave_arithmetic;
+static bool extended_linear_cache;
#define FAKE_QTG_ID 42
@@ -26,6 +27,9 @@ static int interleave_arithmetic;
#define NR_CXL_PORT_DECODERS 8
#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
+#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M
+static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT;
+
static struct platform_device *cxl_acpi;
static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS)
@@ -426,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
return res;
}
+/* Only update CFMWS0 as this is used by the auto region. */
+static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index)
+{
+ if (!extended_linear_cache)
+ return;
+
+ if (index != 0)
+ return;
+
+ /*
+ * The window size should be 2x of the CXL region size where half is
+ * DRAM and half is CXL
+ */
+ window->window_size = mock_auto_region_size * 2;
+}
+
static int populate_cedt(void)
{
struct cxl_mock_res *res;
@@ -450,6 +470,7 @@ static int populate_cedt(void)
for (i = cfmws_start; i <= cfmws_end; i++) {
struct acpi_cedt_cfmws *window = mock_cfmws[i];
+ cfmws_elc_update(window, i);
res = alloc_mock_res(window->window_size, SZ_256M);
if (!res)
return -ENOMEM;
@@ -591,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
return AE_OK;
}
+static int
+mock_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *cache_size)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct resource cfmws0_res =
+ DEFINE_RES_MEM(window->base_hpa, window->window_size);
+
+ if (!extended_linear_cache ||
+ !resource_contains(&cfmws0_res, backing_res)) {
+ return hmat_get_extended_linear_cache_size(backing_res,
+ nid, cache_size);
+ }
+
+ *cache_size = mock_auto_region_size;
+
+ return 0;
+}
+
static struct pci_bus mock_pci_bus[NR_BRIDGES];
static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
[0] = {
@@ -738,7 +778,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
struct cxl_endpoint_decoder *cxled;
struct cxl_switch_decoder *cxlsd;
struct cxl_port *port, *iter;
- const int size = SZ_512M;
struct cxl_memdev *cxlmd;
struct cxl_dport *dport;
struct device *dev;
@@ -781,9 +820,11 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
}
base = window->base_hpa;
+ if (extended_linear_cache)
+ base += mock_auto_region_size;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
cxld->interleave_ways = 2;
@@ -792,7 +833,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->flags = CXL_DECODER_F_ENABLE;
cxled->state = CXL_DECODER_STATE_AUTO;
port->commit_end = cxld->id;
- devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0);
+ devm_cxl_dpa_reserve(cxled, 0,
+ mock_auto_region_size / cxld->interleave_ways, 0);
cxld->commit = mock_decoder_commit;
cxld->reset = mock_decoder_reset;
@@ -841,7 +883,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->interleave_granularity = 4096;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
put_device(dev);
}
@@ -995,37 +1037,6 @@ static int get_port_array(struct cxl_port *port,
return 0;
}
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- struct platform_device **array;
- int i, array_size;
- int rc;
-
- rc = get_port_array(port, &array, &array_size);
- if (rc)
- return rc;
-
- for (i = 0; i < array_size; i++) {
- struct platform_device *pdev = array[i];
- struct cxl_dport *dport;
-
- if (pdev->dev.parent != port->uport_dev) {
- dev_dbg(&port->dev, "%s: mismatch parent %s\n",
- dev_name(port->uport_dev),
- dev_name(pdev->dev.parent));
- continue;
- }
-
- dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
- CXL_RESOURCE_NONE);
-
- if (IS_ERR(dport))
- return PTR_ERR(dport);
- }
-
- return 0;
-}
-
static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
struct device *dport_dev)
{
@@ -1114,9 +1125,10 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_pci_find_root = mock_acpi_pci_find_root,
.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
- .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
+ .hmat_get_extended_linear_cache_size =
+ mock_hmat_get_extended_linear_cache_size,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};
@@ -1606,6 +1618,8 @@ static __exit void cxl_test_exit(void)
module_param(interleave_arithmetic, int, 0444);
MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
+module_param(extended_linear_cache, bool, 0444);
+MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support");
module_init(cxl_test_init);
module_exit(cxl_test_exit);
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
new file mode 100644
index 000000000000..2200ae21795c
--- /dev/null
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2025 Intel Corporation. All rights reserved.
+
+/* Preface all log entries with "cxl_translate" */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+/* Maximum number of test vectors and entry length */
+#define MAX_TABLE_ENTRIES 128
+#define MAX_ENTRY_LEN 128
+
+/* Expected number of parameters in each test vector */
+#define EXPECTED_PARAMS 7
+
+/* Module parameters for test vectors */
+static char *table[MAX_TABLE_ENTRIES];
+static int table_num;
+
+/* Interleave Arithmetic */
+#define MODULO_MATH 0
+#define XOR_MATH 1
+
+/*
+ * XOR mapping configuration
+ * The test data sets all use the same set of xormaps. When additional
+ * data sets arrive for validation, this static setup will need to
+ * be changed to accept xormaps as additional parameters.
+ */
+struct cxl_cxims_data *cximsd;
+static u64 xormaps[] = {
+ 0x2020900,
+ 0x4041200,
+ 0x1010400,
+ 0x800,
+};
+
+static int nr_maps = ARRAY_SIZE(xormaps);
+
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+ [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+/**
+ * to_hpa - calculate an HPA offset from a DPA offset and position
+ *
+ * dpa_offset: device physical address offset
+ * pos: devices position in interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: host physical address offset
+ */
+static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
+ u8 math)
+{
+ u64 hpa_offset;
+
+ /* Calculate base HPA offset from DPA and position */
+ hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return hpa_offset;
+}
+
+/**
+ * to_dpa - translate an HPA offset to DPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: device physical address offset
+ */
+static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ offset =
+ cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return cxl_calculate_dpa_offset(offset, r_eiw, r_eig);
+}
+
+/**
+ * to_pos - extract an interleave position from an HPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: devices position in region interleave
+ */
+static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ /* Reverse XOR mapping if specified */
+ if (math == XOR_MATH)
+ offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+
+ return cxl_calculate_position(offset, r_eiw, r_eig);
+}
+
+/**
+ * run_translation_test - execute forward and reverse translations
+ *
+ * @dpa: device physical address
+ * @pos: expected position in region interleave
+ * @r_eiw: region encoded interleave ways
+ * @r_eig: region encoded interleave granularity
+ * @hb_ways: host bridge interleave ways
+ * @math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * @expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig,
+ u8 hb_ways, int math, u64 expect_hpa)
+{
+ u64 translated_spa, reverse_dpa;
+ int reverse_pos;
+
+ /* Test Device to Host translation: DPA + POS -> SPA */
+ translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math);
+ if (translated_spa != expect_hpa) {
+ pr_err("Device to host failed: expected HPA %llu, got %llu\n",
+ expect_hpa, translated_spa);
+ return -1;
+ }
+
+ /* Test Host to Device DPA translation: SPA -> DPA */
+ reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_dpa != dpa) {
+ pr_err("Host to Device DPA failed: expected %llu, got %llu\n",
+ dpa, reverse_dpa);
+ return -1;
+ }
+
+ /* Test Host to Device Position translation: SPA -> POS */
+ reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_pos != pos) {
+ pr_err("Position lookup failed: expected %d, got %d\n", pos,
+ reverse_pos);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * parse_test_vector - parse a single test vector string
+ *
+ * entry: test vector string to parse
+ * dpa: device physical address
+ * pos: expected position in region interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw,
+ u16 *r_eig, u8 *hb_ways, int *math,
+ u64 *expect_hpa)
+{
+ unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways;
+ int parsed;
+
+ parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw,
+ &tmp_r_eig, &tmp_hb_ways, math, expect_hpa);
+
+ if (parsed != EXPECTED_PARAMS) {
+ pr_err("Parse error: expected %d parameters, got %d in '%s'\n",
+ EXPECTED_PARAMS, parsed, entry);
+ return -EINVAL;
+ }
+ if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) {
+ pr_err("Parameter overflow in entry: '%s'\n", entry);
+ return -ERANGE;
+ }
+ if (*math != MODULO_MATH && *math != XOR_MATH) {
+ pr_err("Invalid math type %d in entry: '%s'\n", *math, entry);
+ return -EINVAL;
+ }
+ *r_eiw = tmp_r_eiw;
+ *r_eig = tmp_r_eig;
+ *hb_ways = tmp_hb_ways;
+
+ return 0;
+}
+
+/*
+ * setup_xor_mapping - Initialize XOR mapping data structure
+ *
+ * The test data sets all use the same HBIG so we can use one set
+ * of xormaps, and set the number to apply based on HBIW before
+ * calling cxl_do_xormap_calc().
+ *
+ * When additional data sets arrive for validation with different
+ * HBIG's this static setup will need to be updated.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int setup_xor_mapping(void)
+{
+ if (nr_maps <= 0)
+ return -EINVAL;
+
+ cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL);
+ if (!cximsd)
+ return -ENOMEM;
+
+ memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps));
+ cximsd->nr_maps = nr_maps;
+
+ return 0;
+}
+
+static int test_random_params(void)
+{
+ u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 };
+ u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 };
+ int i, ways, pos, reverse_pos;
+ u64 dpa, hpa, reverse_dpa;
+ int iterations = 10000;
+ int failures = 0;
+
+ for (i = 0; i < iterations; i++) {
+ /* Generate valid random parameters for eiw, eig, pos, dpa */
+ u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)];
+ u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)];
+
+ eiw_to_ways(eiw, &ways);
+ pos = get_random_u32() % ways;
+ dpa = get_random_u64() >> 12;
+
+ hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
+ reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+ reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+
+ if (reverse_dpa != dpa || reverse_pos != pos) {
+ pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+ i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
+ eig);
+
+ if (failures++ > 10) {
+ pr_err("test random too many failures, stop\n");
+ break;
+ }
+ }
+ }
+ pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+struct param_test {
+ u8 eiw;
+ u16 eig;
+ int pos;
+ bool expect; /* true: expect pass, false: expect fail */
+ const char *desc;
+};
+
+static struct param_test param_tests[] = {
+ { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" },
+ { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" },
+ { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" },
+ { 0x1, 0, 0, true, "2-way, eig=0, pos=0" },
+ { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" },
+ { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" },
+ { 0x2, 0, 0, true, "4-way, eig=0, pos=0" },
+ { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" },
+ { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" },
+ { 0x3, 0, 0, true, "8-way, eig=0, pos=0" },
+ { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" },
+ { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" },
+ { 0x4, 0, 0, true, "16-way, eig=0, pos=0" },
+ { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" },
+ { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" },
+ { 0x8, 0, 0, true, "3-way, eig=0, pos=0" },
+ { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" },
+ { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" },
+ { 0x9, 0, 0, true, "6-way, eig=0, pos=0" },
+ { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" },
+ { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" },
+ { 0xA, 0, 0, true, "12-way, eig=0, pos=0" },
+ { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" },
+ { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" },
+ { 0x5, 0, 0, false, "invalid eiw=5" },
+ { 0x7, 0, 0, false, "invalid eiw=7" },
+ { 0xB, 0, 0, false, "invalid eiw=0xB" },
+ { 0xFF, 0, 0, false, "invalid eiw=0xFF" },
+ { 0x1, 7, 0, false, "invalid eig=7 (out of range)" },
+ { 0x2, 0x10, 0, false, "invalid eig=0x10" },
+ { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" },
+ { 0x1, 0, -1, false, "pos < 0" },
+ { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" },
+ { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" },
+ { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" },
+ { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" },
+ { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" },
+ { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" },
+ { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" },
+};
+
+static int test_cxl_validate_translation_params(void)
+{
+ int i, rc, failures = 0;
+ bool valid;
+
+ for (i = 0; i < ARRAY_SIZE(param_tests); i++) {
+ struct param_test *t = &param_tests[i];
+
+ rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos);
+ valid = (rc == 0);
+
+ if (valid != t->expect) {
+ pr_err("test params failed: %s\n", t->desc);
+ failures++;
+ }
+ }
+ pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * cxl_translate_init
+ *
+ * Run the internal validation tests when no params are passed.
+ * Otherwise, parse the parameters (test vectors), and kick off
+ * the translation test.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int __init cxl_translate_init(void)
+{
+ int rc, i;
+
+ /* If no tables are passed, validate module params only */
+ if (table_num == 0) {
+ pr_info("Internal validation test start...\n");
+ rc = test_cxl_validate_translation_params();
+ if (rc)
+ return rc;
+
+ rc = test_random_params();
+ if (rc)
+ return rc;
+
+ pr_info("Internal validation test completed successfully\n");
+
+ return 0;
+ }
+
+ pr_info("CXL translate test module loaded with %d test vectors\n",
+ table_num);
+
+ rc = setup_xor_mapping();
+ if (rc)
+ return rc;
+
+ /* Process each test vector */
+ for (i = 0; i < table_num; i++) {
+ u64 dpa, expect_spa;
+ int pos, math;
+ u8 r_eiw, hb_ways;
+ u16 r_eig;
+
+ pr_debug("Processing test vector %d: '%s'\n", i, table[i]);
+
+ /* Parse the test vector */
+ rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig,
+ &hb_ways, &math, &expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " Failed to parse test vector '%s'\n",
+ i, table[i]);
+ continue;
+ }
+ /* Run the translation test */
+ rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math,
+ expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n",
+ i, dpa, pos, r_eiw, r_eig, hb_ways,
+ (math == XOR_MATH) ? "XOR" : "MODULO",
+ expect_spa);
+ } else {
+ pr_info("CXL Translate Test %d: PASS\n", i);
+ }
+ }
+
+ kfree(cximsd);
+ pr_info("CXL translate test completed\n");
+
+ return 0;
+}
+
+static void __exit cxl_translate_exit(void)
+{
+ pr_info("CXL translate test module unloaded\n");
+}
+
+module_param_array(table, charp, &table_num, 0444);
+MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cxl_test: cxl address translation test module");
+MODULE_IMPORT_NS("CXL");
+
+module_init(cxl_translate_init);
+module_exit(cxl_translate_exit);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index d533481672b7..176dcde570cd 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -250,22 +250,21 @@ static void mes_add_event(struct mock_event_store *mes,
* Vary the number of events returned to simulate events occuring while the
* logs are being read.
*/
-static int ret_limit = 0;
+static atomic_t event_counter = ATOMIC_INIT(0);
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
struct mock_event_log *log;
- u16 nr_overflow;
+ int ret_limit;
u8 log_type;
int i;
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
- if (!ret_limit)
- ret_limit = 1;
+ /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */
+ ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1;
if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
@@ -299,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
u64 ns;
pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
- pl->overflow_err_count = cpu_to_le16(nr_overflow);
+ pl->overflow_err_count = cpu_to_le16(log->nr_overflow);
ns = ktime_get_real_ns();
ns -= 5000000000; /* 5s ago */
pl->first_overflow_timestamp = cpu_to_le64(ns);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 995269a75cbd..44bce80ef3ff 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -111,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_integer);
+int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+ else
+ rc = hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size);
+
struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
{
int index;
@@ -172,21 +192,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
-int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- int rc, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_port_enumerate_dports(port);
- else
- rc = devm_cxl_port_enumerate_dports(port);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
-
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
{
int rc, index;
@@ -226,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL");
-resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
- struct cxl_dport *dport)
-{
- int index;
- resource_size_t component_reg_phys;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(dev))
- component_reg_phys = CXL_RESOURCE_NONE;
- else
- component_reg_phys = cxl_rcd_component_reg_phys(dev, dport);
- put_cxl_mock_ops(index);
-
- return component_reg_phys;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL");
-
void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
{
int index;
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 4ed932e76aae..2684b89c8aa2 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,12 +19,14 @@ struct cxl_mock_ops {
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
- int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
struct device *dport_dev);
+ int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 6fd864935319..bee7cb34a289 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -741,9 +741,9 @@ if ($start) {
die "Can not find file $bad\n";
}
if ($val eq "good") {
- run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ run_command "cp $output_config $good" or die "failed to copy $output_config to $good\n";
} elsif ($val eq "bad") {
- run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ run_command "cp $output_config $bad" or die "failed to copy $output_config to $bad\n";
}
}
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index a0e6290833fb..748778b563cd 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -475,12 +475,14 @@ class Damos:
class DamonTarget:
pid = None
+ obsolete = None
# todo: Support target regions if test is made
idx = None
context = None
- def __init__(self, pid):
+ def __init__(self, pid, obsolete=False):
self.pid = pid
+ self.obsolete = obsolete
def sysfs_dir(self):
return os.path.join(
@@ -491,8 +493,13 @@ class DamonTarget:
os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
if err is not None:
return err
- return write_file(
+ err = write_file(
os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+ if err is not None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'obsolete_target'),
+ 'Y' if self.obsolete else 'N')
class IntervalsGoal:
access_bp = None
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
index 7233369a3a44..5374d18d1fa8 100755
--- a/tools/testing/selftests/damon/drgn_dump_damon_status.py
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -73,6 +73,7 @@ def target_to_dict(target):
['pid', int],
['nr_regions', int],
['regions_list', regions_to_list],
+ ['obsolete', bool],
])
def targets_to_list(targets):
@@ -174,11 +175,11 @@ def scheme_to_dict(scheme):
['target_nid', int],
['migrate_dests', damos_migrate_dests_to_dict],
])
- filters = []
+ core_filters = []
for f in list_for_each_entry(
- 'struct damos_filter', scheme.filters.address_of_(), 'list'):
- filters.append(damos_filter_to_dict(f))
- dict_['filters'] = filters
+ 'struct damos_filter', scheme.core_filters.address_of_(), 'list'):
+ core_filters.append(damos_filter_to_dict(f))
+ dict_['core_filters'] = core_filters
ops_filters = []
for f in list_for_each_entry(
'struct damos_filter', scheme.ops_filters.address_of_(), 'list'):
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
index 2666c6f0f1a5..9cca71eb0325 100755
--- a/tools/testing/selftests/damon/sysfs.py
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -132,7 +132,7 @@ def assert_scheme_committed(scheme, dump):
assert_watermarks_committed(scheme.watermarks, dump['wmarks'])
# TODO: test filters directory
for idx, f in enumerate(scheme.core_filters.filters):
- assert_filter_committed(f, dump['filters'][idx])
+ assert_filter_committed(f, dump['core_filters'][idx])
for idx, f in enumerate(scheme.ops_filters.filters):
assert_filter_committed(f, dump['ops_filters'][idx])
@@ -164,6 +164,16 @@ def assert_monitoring_attrs_committed(attrs, dump):
assert_true(dump['max_nr_regions'] == attrs.max_nr_regions,
'max_nr_regions', dump)
+def assert_monitoring_target_committed(target, dump):
+ # target.pid is the pid "number", while dump['pid'] is 'struct pid'
+ # pointer, and hence cannot be compared.
+ assert_true(dump['obsolete'] == target.obsolete, 'target obsolete', dump)
+
+def assert_monitoring_targets_committed(targets, dump):
+ assert_true(len(targets) == len(dump), 'len_targets', dump)
+ for idx, target in enumerate(targets):
+ assert_monitoring_target_committed(target, dump[idx])
+
def assert_ctx_committed(ctx, dump):
ops_val = {
'vaddr': 0,
@@ -172,9 +182,18 @@ def assert_ctx_committed(ctx, dump):
}
assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump)
assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
+ assert_monitoring_targets_committed(ctx.targets, dump['adaptive_targets'])
assert_schemes_committed(ctx.schemes, dump['schemes'])
-def assert_ctxs_committed(ctxs, dump):
+def assert_ctxs_committed(kdamonds):
+ status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print(err)
+ kdamonds.stop()
+ exit(1)
+
+ ctxs = kdamonds.kdamonds[0].contexts
+ dump = status['contexts']
assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
for idx, ctx in enumerate(ctxs):
assert_ctx_committed(ctx, dump[idx])
@@ -191,13 +210,7 @@ def main():
print('kdamond start failed: %s' % err)
exit(1)
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- kdamonds.stop()
- exit(1)
-
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ assert_ctxs_committed(kdamonds)
context = _damon_sysfs.DamonCtx(
monitoring_attrs=_damon_sysfs.DamonAttrs(
@@ -245,12 +258,7 @@ def main():
kdamonds.kdamonds[0].contexts = [context]
kdamonds.kdamonds[0].commit()
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- exit(1)
-
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ assert_ctxs_committed(kdamonds)
# test online commitment of minimum context.
context = _damon_sysfs.DamonCtx()
@@ -259,13 +267,36 @@ def main():
kdamonds.kdamonds[0].contexts = [context]
kdamonds.kdamonds[0].commit()
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- exit(1)
+ assert_ctxs_committed(kdamonds)
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ kdamonds.stop()
+ # test obsolete_target.
+ proc1 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc2 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc3 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[
+ _damon_sysfs.DamonTarget(pid=proc1.pid),
+ _damon_sysfs.DamonTarget(pid=proc2.pid),
+ _damon_sysfs.DamonTarget(pid=proc3.pid),
+ ],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+ kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+ kdamonds.kdamonds[0].commit()
+ del kdamonds.kdamonds[0].contexts[0].targets[1]
+ assert_ctxs_committed(kdamonds)
kdamonds.stop()
if __name__ == '__main__':
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index cce72f8b03dc..3230bd54dba8 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -22,6 +22,7 @@ echo " --fail-unresolved Treat UNRESOLVED as a failure"
echo " -d|--debug Debug mode (trace all shell commands)"
echo " -l|--logdir <dir> Save logs on the <dir>"
echo " If <dir> is -, all logs output in console only"
+echo " --rv Run RV selftests instead of ftrace ones"
exit $1
}
@@ -133,6 +134,10 @@ parse_opts() { # opts
LINK_PTR=
shift 2
;;
+ --rv)
+ RV_TEST=1
+ shift 1
+ ;;
*.tc)
if [ -f "$1" ]; then
OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
@@ -152,9 +157,13 @@ parse_opts() { # opts
;;
esac
done
- if [ ! -z "$OPT_TEST_CASES" ]; then
+ if [ -n "$OPT_TEST_CASES" ]; then
TEST_CASES=$OPT_TEST_CASES
fi
+ if [ -n "$OPT_TEST_DIR" -a -f "$OPT_TEST_DIR"/test.d/functions ]; then
+ TOP_DIR=$OPT_TEST_DIR
+ TEST_DIR=$TOP_DIR/test.d
+ fi
}
# Parameters
@@ -190,10 +199,6 @@ fi
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
-LOG_TOP_DIR=$TOP_DIR/logs
-LOG_DATE=`date +%Y%m%d-%H%M%S`
-LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
-LINK_PTR=$LOG_TOP_DIR/latest
KEEP_LOG=0
KTAP=0
DEBUG=0
@@ -201,14 +206,23 @@ VERBOSE=0
UNSUPPORTED_RESULT=0
UNRESOLVED_RESULT=0
STOP_FAILURE=0
+RV_TEST=0
# Parse command-line options
parse_opts $*
+LOG_TOP_DIR=$TOP_DIR/logs
+LOG_DATE=`date +%Y%m%d-%H%M%S`
+LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
+LINK_PTR=$LOG_TOP_DIR/latest
+
[ $DEBUG -ne 0 ] && set -x
-# Verify parameters
-if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
- errexit "No ftrace directory found"
+if [ $RV_TEST -ne 0 ]; then
+ TRACING_DIR=$TRACING_DIR/rv
+ if [ ! -d "$TRACING_DIR" ]; then
+ err_ret=$err_skip
+ errexit "rv is not configured in this kernel"
+ fi
fi
# Preparing logs
@@ -419,7 +433,7 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
__run_test() { # testfile
# setup PID and PPID, $$ is not updated.
(cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x;
- checkreq $1; initialize_ftrace; . $1)
+ checkreq $1; initialize_system; . $1)
[ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
}
@@ -496,7 +510,7 @@ for t in $TEST_CASES; do
exit 1
fi
done
-(cd $TRACING_DIR; finish_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_system) # for cleanup
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
index 8a7ce647a60d..318939451caf 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
@@ -28,7 +28,7 @@ unmount_tracefs() {
local mount_point="$1"
# Need to make sure the mount isn't busy so that we can umount it
- (cd $mount_point; finish_ftrace;)
+ (cd $mount_point; finish_system;)
cleanup
}
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index a1052bf460fc..e8e718139294 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -104,7 +104,7 @@ clear_dynamic_events() { # reset all current dynamic events
done
}
-initialize_ftrace() { # Reset ftrace to initial-state
+initialize_system() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
# no probes, and tracing on.
@@ -134,8 +134,8 @@ initialize_ftrace() { # Reset ftrace to initial-state
enable_tracing
}
-finish_ftrace() {
- initialize_ftrace
+finish_system() {
+ initialize_system
# And recover it to default.
[ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
}
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
index 50d5699812bb..5b9abb616db4 100644
--- a/tools/testing/selftests/hid/tests/test_tablet.py
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -452,6 +452,7 @@ class Pen(object):
def __init__(self, x, y):
self.x = x
self.y = y
+ self.distance = -10
self.tipswitch = False
self.tippressure = 15
self.azimuth = 0
@@ -473,6 +474,7 @@ class Pen(object):
for i in [
"x",
"y",
+ "distance",
"tippressure",
"azimuth",
"width",
@@ -554,6 +556,7 @@ class PenDigitizer(base.UHIDTestDevice):
pen.tipswitch = False
pen.tippressure = 0
pen.azimuth = 0
+ pen.distance = 0
pen.inrange = False
pen.width = 0
pen.height = 0
@@ -868,6 +871,29 @@ class BaseTest:
state machine."""
self._test_states(state_list, scribble, allow_intermediate_states=True)
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Z" not in uhdev.fields,
+ "Device not compatible, missing Z usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()],
+ )
+ def test_z_reported_as_distance(self, state_list, scribble):
+ """Verify stylus Z values are reported as ABS_DISTANCE."""
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ p = Pen(0, 0)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, None)
+ p = Pen(100, 200)
+ uhdev.move_to(p, PenState.PEN_IS_IN_RANGE, None)
+ p.distance = -1
+ events = self.post(uhdev, p, None)
+ assert evdev.value[libevdev.EV_ABS.ABS_DISTANCE] == -1
+
class GXTP_pen(PenDigitizer):
def event(self, pen, test_button):
@@ -1292,6 +1318,35 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
return rs
+class Wacom_2d1f_014b(PenDigitizer):
+ """
+ Pen that reports distance values with HID_GD_Z usage.
+ """
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+
+ def match_evdev_rule(self, application, evdev):
+ # there are 2 nodes created by the device, only one matters
+ return evdev.name.endswith("Stylus")
+
+ def event(self, pen, test_button):
+ # this device reports the distance through Z
+ pen.z = pen.distance
+
+ return super().event(pen, test_button)
+
+
################################################################################
#
# Windows 7 compatible devices
@@ -1504,3 +1559,19 @@ class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
input_info=(BusType.USB, 0x256C, 0x006B),
)
+
+
+################################################################################
+#
+# Devices Reporting Distance
+#
+################################################################################
+
+
+class TestWacom_2d1f_014b(BaseTest.TestTablet):
+ def create_device(self):
+ return Wacom_2d1f_014b(
+ "uhid test Wacom 2d1f_014b",
+ rdesc="05 0d 09 02 a1 01 85 02 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 95 02 81 03 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 45 00 65 00 55 00 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 09 00 75 08 26 ff 00 b1 12 85 03 09 00 95 12 b1 12 85 05 09 00 95 04 b1 02 85 06 09 00 95 24 b1 02 85 16 09 00 15 00 26 ff 00 95 06 b1 02 85 17 09 00 95 0c b1 02 85 19 09 00 95 01 b1 02 85 0a 09 00 75 08 95 01 15 10 26 ff 00 b1 02 85 1e 09 00 95 10 b1 02 c0 06 00 ff 09 00 a1 01 85 09 05 0d 09 20 a1 00 09 00 15 00 26 ff 00 75 08 95 10 81 02 c0 09 00 95 03 b1 12 c0 06 00 ff 09 02 a1 01 85 07 09 00 96 09 01 b1 02 85 08 09 00 95 03 81 02 09 00 b1 02 85 0e 09 00 96 0a 01 b1 02 c0 05 0d 09 02 a1 01 85 1a 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 09 38 25 03 75 02 95 01 81 02 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 46 b0 0f 66 11 e1 55 02 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 c0 06 00 ff 09 00 a1 01 85 1b 05 0d 09 20 a1 00 09 00 26 ff 00 75 08 95 10 81 02 c0 c0",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ )
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index bb4d33dde3c8..10e051b6f592 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -13,9 +13,6 @@
static unsigned long HUGEPAGE_SIZE;
-#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
-#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
-
static unsigned long get_huge_page_size(void)
{
char buf[80];
@@ -1574,6 +1571,49 @@ TEST_F(iommufd_ioas, copy_sweep)
test_ioctl_destroy(dst_ioas_id);
}
+TEST_F(iommufd_ioas, dmabuf_simple)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u64 iova;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, 0, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, buf_size, buf_size, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, buf_size + 1, &iova);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+
+ close(dfd);
+}
+
+TEST_F(iommufd_ioas, dmabuf_revoke)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u32 hwpt_id;
+ __u64 iova;
+ __u64 iova2;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+ test_cmd_revoke_dmabuf(dfd, true);
+
+ if (variant->mock_domains)
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &hwpt_id);
+
+ test_err_ioctl_ioas_map_file(ENODEV, dfd, 0, buf_size, &iova2);
+
+ test_cmd_revoke_dmabuf(dfd, false);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova2);
+
+ /* Restore the iova back */
+ test_ioctl_ioas_unmap(iova, buf_size);
+ test_ioctl_ioas_map_fixed_file(dfd, 0, buf_size, iova);
+
+ close(dfd);
+}
+
FIXTURE(iommufd_mock_domain)
{
int fd;
@@ -2058,6 +2098,12 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_SET,
+ .val64 = 0,
+ };
size_t mmap_buffer_size;
unsigned long size;
int mmap_flags;
@@ -2066,7 +2112,7 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
if (variant->buffer_size < MOCK_PAGE_SIZE) {
SKIP(return,
- "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
+ "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%u",
variant->buffer_size, MOCK_PAGE_SIZE);
}
@@ -2114,16 +2160,18 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
test_ioctl_ioas_alloc(&self->ioas_id);
- /* Enable 1M mock IOMMU hugepages */
- if (variant->hugepages) {
- test_cmd_mock_domain_flags(self->ioas_id,
- MOCK_FLAGS_DEVICE_HUGE_IOVA,
- &self->stdev_id, &self->hwpt_id,
- &self->idev_id);
- } else {
- test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
- &self->hwpt_id, &self->idev_id);
- }
+
+ /*
+ * For dirty testing it is important that the page size fed into
+ * the iommu page tables matches the size the dirty logic
+ * expects, or set_dirty can touch too much stuff.
+ */
+ cmd.object_id = self->ioas_id;
+ if (!variant->hugepages)
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
}
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
@@ -2248,18 +2296,23 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
{
uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
uint32_t hwpt_id;
- uint32_t ioas_id;
if (variant->hugepages)
page_size = MOCK_HUGE_PAGE_SIZE;
- test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
- test_cmd_hwpt_alloc(self->idev_id, ioas_id,
- IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
test_cmd_set_dirty_tracking(hwpt_id, true);
@@ -2285,18 +2338,24 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
{
uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
uint32_t hwpt_id;
- uint32_t ioas_id;
if (variant->hugepages)
page_size = MOCK_HUGE_PAGE_SIZE;
- test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
- test_cmd_hwpt_alloc(self->idev_id, ioas_id,
- IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
test_cmd_set_dirty_tracking(hwpt_id, true);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 9f472c20c190..0a0ff6f7926d 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -215,6 +215,18 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
0))
+#define test_cmd_hwpt_alloc_iommupt(device_id, pt_id, flags, iommupt_type, \
+ hwpt_id) \
+ ({ \
+ struct iommu_hwpt_selftest user_cfg = { \
+ .pagetable_type = iommupt_type \
+ }; \
+ \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_SELFTEST, \
+ &user_cfg, sizeof(user_cfg))); \
+ })
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
self->fd, device_id, pt_id, 0, flags, \
@@ -548,6 +560,39 @@ static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \
self->fd, access_id, access_pages_id))
+static int _test_cmd_get_dmabuf(int fd, size_t len, int *out_fd)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_GET,
+ .dmabuf_get = { .length = len, .open_flags = O_CLOEXEC },
+ };
+
+ *out_fd = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (*out_fd < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_get_dmabuf(len, out_fd) \
+ ASSERT_EQ(0, _test_cmd_get_dmabuf(self->fd, len, out_fd))
+
+static int _test_cmd_revoke_dmabuf(int fd, int dmabuf_fd, bool revoked)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_REVOKE,
+ .dmabuf_revoke = { .dmabuf_fd = dmabuf_fd, .revoked = revoked },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_revoke_dmabuf(dmabuf_fd, revoke) \
+ ASSERT_EQ(0, _test_cmd_revoke_dmabuf(self->fd, dmabuf_fd, revoke))
+
static int _test_ioctl_destroy(int fd, unsigned int id)
{
struct iommu_destroy cmd = {
@@ -718,6 +763,17 @@ static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd,
self->fd, ioas_id, mfd, start, length, iova_p, \
IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))
+#define test_ioctl_ioas_map_fixed_file(mfd, start, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, _test_ioctl_ioas_map_file( \
+ self->fd, self->ioas_id, mfd, start, \
+ length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
{
struct iommu_test_cmd memlimit_cmd = {
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d9fffe06d3ea..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -6,7 +6,7 @@ ARCH ?= $(SUBARCH)
ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
- ARCH := x86
+ override ARCH := x86
endif
include Makefile.kvm
else
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 148d427ff24b..ba5c2b643efa 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -88,8 +88,12 @@ TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -111,14 +115,12 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
@@ -156,6 +158,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
@@ -163,6 +166,7 @@ TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
@@ -194,6 +198,7 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
TEST_GEN_PROGS_s390 += rseq_test
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
@@ -210,6 +215,7 @@ TEST_GEN_PROGS_riscv += mmu_stress_test
TEST_GEN_PROGS_riscv += rseq_test
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch = arch_timer
TEST_GEN_PROGS_loongarch += coalesced_io_test
TEST_GEN_PROGS_loongarch += demand_paging_test
TEST_GEN_PROGS_loongarch += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..c8ee6f520734
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+ TEST_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ GUEST_SYNC(TEST_ACCESS_FLAG); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ /*
+ * KVM's software PTW makes the implementation choice that the AT
+ * instruction sets the access flag.
+ */
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, forcing KVM to
+ * use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ case TEST_ACCESS_FLAG:
+ TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
+ "Expected access flag to be set (desc: %lu)", *ptep_hva);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..573dd790aeb8
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static vm_paddr_t einj_gpa;
+static void *einj_hva;
+static uint64_t einj_hpa;
+static bool far_invalid;
+
+static uint64_t translate_to_host_paddr(unsigned long vaddr)
+{
+ uint64_t pinfo;
+ int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
+ int fd;
+ uint64_t page_addr;
+ uint64_t paddr;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
+ paddr = page_addr + (vaddr & (getpagesize() - 1));
+ return paddr;
+}
+
+static void write_einj_entry(const char *einj_path, uint64_t val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(uint64_t paddr)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, paddr);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ uint64_t guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(uint64_t *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ uint64_t num_guest_pages;
+ vm_paddr_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*guest_paddr=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ uint64_t guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(uint64_t *)einj_hva = 0xBAADCAFE;
+ guest_data = *(uint64_t *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index 6338f5bbdb70..2fb2c7939fe9 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ uint32_t shared_data;
};
/*
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
+ uint32_t first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
}
local_irq_enable();
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -636,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
+ kvm_close(fd[f]);
}
/* handles the valid case: intid=0xffffffff num=1 */
@@ -779,6 +802,221 @@ done:
kvm_vm_free(vm);
}
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ uint32_t intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ vm_vaddr_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void help(const char *name)
{
printf(
@@ -835,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index 687d04463983..e857a605f577 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -118,6 +118,10 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index e7d9aeb418d3..618c937f3c90 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -19,6 +19,7 @@
#include <sys/stat.h>
#include "kvm_util.h"
+#include "numaif.h"
#include "test_util.h"
#include "ucall_common.h"
@@ -75,6 +76,101 @@ static void test_mmap_supported(int fd, size_t total_size)
kvm_munmap(mem, total_size);
}
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = 8;
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
{
const char val = 0xaa;
@@ -273,11 +369,13 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
gmem_test(mmap_supported, vm, flags);
gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
} else {
gmem_test(fault_private, vm, flags);
}
gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
} else {
gmem_test(mmap_not_supported, vm, flags);
}
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..cc7a7f34ed37 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..58feef3eb386 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..d4e613162bba
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+#include <sys/syscall.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index d3f3e455c031..81f4355ff28a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -23,6 +23,7 @@
#include <pthread.h>
+#include "kvm_syscalls.h"
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -177,7 +178,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
VM_MODE_P36V48_4K,
@@ -219,7 +220,7 @@ extern enum vm_guest_mode vm_mode_default;
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -283,34 +284,6 @@ static inline bool kvm_has_cap(long cap)
return kvm_check_cap(cap);
}
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
-static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
- off_t offset)
-{
- void *mem;
-
- mem = mmap(NULL, size, prot, flags, fd, offset);
- TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
- (int)(unsigned long)MAP_FAILED));
-
- return mem;
-}
-
-static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
-{
- return __kvm_mmap(size, prot, flags, fd, 0);
-}
-
-static inline void kvm_munmap(void *mem, size_t size)
-{
- int ret;
-
- ret = munmap(mem, size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
-}
-
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -700,12 +673,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
uint32_t guest_memfd, uint64_t guest_memfd_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd_fd, uint64_t guest_memfd_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -715,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
@@ -1230,6 +1204,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
virt_arch_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..2ed106b32c81
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
index 6427a3275e6a..76840ddda57d 100644
--- a/tools/testing/selftests/kvm/include/loongarch/processor.h
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -83,7 +83,14 @@
#define LOONGARCH_CSR_PRMD 0x1
#define LOONGARCH_CSR_EUEN 0x2
#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_TI 11 /* Timer interrupt*/
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
#define LOONGARCH_CSR_EENTRY 0xc
@@ -106,6 +113,14 @@
#define LOONGARCH_CSR_KS1 0x31
#define LOONGARCH_CSR_TMID 0x40
#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
/* TLB refill exception entry */
#define LOONGARCH_CSR_TLBRENTRY 0x88
#define LOONGARCH_CSR_TLBRSAVE 0x8b
@@ -113,6 +128,28 @@
#define CSR_TLBREHI_PS_SHIFT 0
#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
#define EXREGS_GPRS (32)
#ifndef __ASSEMBLER__
@@ -124,18 +161,60 @@ struct ex_regs {
unsigned long pc;
unsigned long estat;
unsigned long badv;
+ unsigned long prmd;
};
#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
#define EXREGS_SIZE sizeof(struct ex_regs)
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
#else
#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
-#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
#endif
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 51cd84b9ca66..57d62a425109 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1441,7 +1441,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
- PG_LEVEL_NUM
+ PG_LEVEL_256T
};
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index edb3c391b982..96e2b4c630a9 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -568,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index f02355c3c4c2..b7dbde9c0843 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -239,14 +239,14 @@ int main(int argc, char *argv[])
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
- stats_test(dup(vm_stats_fds));
+ stats_test(kvm_dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
- stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..b023868fe0b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..b6a7e30c3eb1 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..50754a27f493 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
+static void gicv3_set_group(uint32_t intid, bool grp)
+{
+ uint32_t cpu_or_dist;
+ uint32_t val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
+ u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 0e2f8ed90f30..7f9fdcf42ae6 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -253,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 54f6d17c78f7..d46e4b13b92c 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -324,7 +324,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
/* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1a93d6361671..8279b6ced8d2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -201,7 +201,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
- [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
@@ -228,7 +228,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
- [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
@@ -310,24 +310,26 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
kvm_init_vm_address_properties(vm);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this mode (48-bit virtual address space).
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
@@ -704,8 +706,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
static void kvm_stats_release(struct kvm_binary_stats *stats)
{
- int ret;
-
if (stats->fd < 0)
return;
@@ -714,8 +714,7 @@ static void kvm_stats_release(struct kvm_binary_stats *stats)
stats->desc = NULL;
}
- ret = close(stats->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(stats->fd);
stats->fd = -1;
}
@@ -738,8 +737,6 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
*/
static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
@@ -747,9 +744,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_munmap(vcpu->run, vcpu_mmap_sz());
- ret = close(vcpu->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
-
+ kvm_close(vcpu->fd);
kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -761,16 +756,12 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
void kvm_vm_release(struct kvm_vm *vmp)
{
struct kvm_vcpu *vcpu, *tmp;
- int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
-
- ret = close(vmp->kvm_fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
/* Free cached stats metadata and close FD */
kvm_stats_release(&vmp->stats);
@@ -828,7 +819,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
int kvm_memfd_alloc(size_t size, bool hugepages)
{
int memfd_flags = MFD_CLOEXEC;
- int fd, r;
+ int fd;
if (hugepages)
memfd_flags |= MFD_HUGETLB;
@@ -836,11 +827,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- r = ftruncate(fd, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
-
- r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
return fd;
}
@@ -957,8 +945,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
@@ -972,30 +960,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
@@ -1009,8 +996,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
+ slot, gpa, npages, region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
}
@@ -1036,7 +1022,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -1084,8 +1070,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ guest_memfd = kvm_dup(guest_memfd);
}
region->region.guest_memfd = guest_memfd;
@@ -1097,20 +1082,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size,
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
region->region.guest_memfd);
/* Add to quick lookup data structures */
@@ -1132,10 +1115,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot,
- uint64_t npages, uint32_t flags)
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags)
{
- vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -1201,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -1456,8 +1449,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
pages--, vaddr += vm->page_size, paddr += vm->page_size) {
virt_pg_map(vm, vaddr, paddr);
-
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
return vaddr_start;
@@ -1571,7 +1562,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
while (npages--) {
virt_pg_map(vm, vaddr, paddr);
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
vaddr += page_size;
paddr += page_size;
@@ -2025,6 +2015,7 @@ static struct exit_reason {
KVM_EXIT_STRING(NOTIFY),
KVM_EXIT_STRING(LOONGARCH_IOCSR),
KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -2305,11 +2296,35 @@ __weak void kvm_selftest_arch_init(void)
{
}
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
void __attribute((constructor)) kvm_selftest_init(void)
{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
index 88bfa505c6f5..3f1e4b67c5ae 100644
--- a/tools/testing/selftests/kvm/lib/loongarch/exception.S
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -51,9 +51,15 @@ handle_exception:
st.d t0, sp, ESTAT_OFFSET_EXREGS
csrrd t0, LOONGARCH_CSR_BADV
st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
or a0, sp, zero
bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
restore_gprs sp
csrrd sp, LOONGARCH_CSR_KS0
ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
index 0ac1abcb71cb..07c103369ddb 100644
--- a/tools/testing/selftests/kvm/lib/loongarch/processor.c
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -3,6 +3,7 @@
#include <assert.h>
#include <linux/compiler.h>
+#include <asm/kvm.h>
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
@@ -11,6 +12,7 @@
#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
static vm_paddr_t invalid_pgtable[4];
+static vm_vaddr_t exception_handlers;
static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
@@ -183,7 +185,14 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
void route_exception(struct ex_regs *regs)
{
+ int vector;
unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
pc = regs->pc;
badv = regs->badv;
@@ -192,6 +201,32 @@ void route_exception(struct ex_regs *regs)
while (1) ;
}
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
int i;
@@ -211,6 +246,11 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
vcpu_regs_set(vcpu, &regs);
}
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
{
uint64_t csrid;
@@ -242,8 +282,8 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- /* user mode and page enable mode */
- val = PLV_USER | CSR_CRMD_PG;
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
@@ -251,7 +291,10 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+ /* time count start from 0 */
val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
width = vm->page_shift - 3;
switch (vm->pgtable_levels) {
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 7f5d62a65c68..0b1f288ad556 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
{
uint64_t start, end;
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
/*
* Identity map the first 4G and the test region with 1G pages so that
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index b418502c5ecc..36104d27f3d9 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -158,10 +158,10 @@ bool kvm_is_tdp_enabled(void)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
- /* If needed, create page map l4 table. */
+ /* If needed, create the top-level page table. */
if (!vm->pgd_created) {
vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
@@ -218,11 +218,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % pg_size) == 0,
"Virtual address not aligned,\n"
@@ -243,20 +243,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, pte, vaddr, paddr,
+ current_level, level);
+ if (*pte & PTE_LARGE_MASK)
+ return;
+ }
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -289,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
for (i = 0; i < nr_pages; i++) {
__virt_pg_map(vm, vaddr, paddr, level);
+ sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
vaddr += pg_size;
paddr += pg_size;
@@ -310,40 +309,38 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level)
{
- uint64_t *pml4e, *pdpe, *pde;
+ int va_width = 12 + (vm->pgtable_levels) * 9;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
/*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
+ * Check that the vaddr is a sign-extended va_width value.
*/
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
-
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
-
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
+ TEST_ASSERT(vaddr ==
+ (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, pte, vaddr, current_level);
+ if (vm_is_target_pte(pte, level, current_level))
+ return pte;
+ }
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
}
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
@@ -526,7 +523,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
@@ -540,6 +538,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d4d1208dd023..29b082a58daa 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -401,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
uint16_t index;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
+ "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
nested_paddr);
TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
@@ -534,8 +534,7 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm)
{
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..355ecac30954
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ uint32_t cpu = guest_get_vcpuid();
+ uint64_t xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(uint32_t cpu)
+{
+ uint32_t config_iter;
+ uint64_t xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(uint32_t cpu)
+{
+ uint32_t config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 37b7e6524533..51c070556f3e 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void)
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
errno, strerror(errno));
- nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ nr_vcpus = CPU_COUNT(&possible_mask);
TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
}
int main(int argc, char *argv[])
@@ -360,11 +362,9 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
- for (i = 0; i < slot_size; i += SZ_1G)
- __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm->page_size)
- virt_pg_map(vm, gpa + i, gpa + i);
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
#endif
}
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index f04768c1d2e4..93e603d91311 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -17,13 +17,13 @@
#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
#define TEST_SLOT 10
-static void guest_code(uint64_t base_gpa)
+static void guest_code(uint64_t base_gva)
{
volatile uint64_t val __used;
int i;
for (i = 0; i < TEST_NPAGES; i++) {
- uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+ uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
val = *src;
}
@@ -161,6 +161,7 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
{
+ uint64_t gpa, gva, alignment, guest_page_size;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
.type = vm_type,
@@ -170,35 +171,30 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
struct kvm_vm *vm;
struct ucall uc;
- uint64_t guest_test_phys_mem;
- uint64_t guest_test_virt_mem;
- uint64_t alignment, guest_page_size;
-
vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
- guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
#ifdef __s390x__
alignment = max(0x100000UL, guest_page_size);
#else
alignment = SZ_2M;
#endif
- guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
- guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
- private ? KVM_MEM_GUEST_MEMFD : 0);
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
if (private)
- vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
- pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
- vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 705ab3d7778b..cb54a56990a0 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -133,6 +133,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -639,6 +640,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -1142,6 +1144,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
@@ -1222,6 +1225,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
+ &config_sbi_mpxy,
&config_sbi_fwft,
&config_aia,
&config_fp_f,
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 99d327084172..130b9ce7e5dd 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
- output = pgs_gpa + 4096;
+ output = pgs_gpa + PAGE_SIZE;
} else {
input = output = 0;
}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 2b5b4bc6ef7e..ca61836c4e32 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
ipi->vector = IPI_VECTOR;
ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 0;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..a3b7ce155981 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -621,7 +621,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NTEST_PAGES; i++) {
pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..f001cb836bfa 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_close_while_nested
- *
* Copyright (C) 2019, Red Hat, Inc.
*
* Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
PORT_L0_EXIT = 0x2000,
};
+#define L2_GUEST_STACK_SIZE 64
+
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
: : [port] "d" (PORT_L0_EXIT) : "rax");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(0);
}
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
for (;;) {
volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..a6b6da9cf7fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..2839f650e5c9 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -35,6 +34,8 @@
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -72,42 +73,47 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
@@ -119,16 +125,19 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t nested_gva;
struct kvm_vcpu *vcpu;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..4260c9e4f489 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#include "kselftest.h"
/* L2 is scaled up (from L1's perspective) by this factor */
@@ -79,7 +80,30 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint32_t control;
@@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva = 0;
uint64_t tsc_start, tsc_end;
uint64_t tsc_khz;
@@ -129,7 +161,8 @@ int main(int argc, char *argv[])
uint64_t l1_tsc_freq = 0;
uint64_t l2_tsc_freq = 0;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1969f4ab9b28 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
pthread_t threads[KVM_MAX_VCPUS];
struct kvm_vm *vm;
- int memfd, i, r;
+ int memfd, i;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
* should prevent the VM from being fully destroyed until the last
* reference to the guest_memfd is also put.
*/
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
close(memfd);
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index 77256c89bb8d..86ad1c7d068f 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -104,7 +104,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy)
vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
+ memset(hva, 0, PAGE_SIZE);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..f2c7a1c297e3 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -141,7 +141,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
if (this_cpu_has(X86_FEATURE_XSAVE)) {
uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
+ uint8_t buffer[PAGE_SIZE];
memset(buffer, 0xcc, sizeof(buffer));
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..be7d72f3c029 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
vcpu_regs_set(vcpu, &regs);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index fa512d033205..98cb6bdab3e6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
@@ -120,17 +120,17 @@ static void test_vmx_dirty_log(bool enable_ept)
* GPAs as the EPT enabled case.
*/
if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
}
bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept)
}
TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..cf1d2d1f2a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ vm_paddr_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->pgd, vm->pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index 35cb9de54a82..ae4a4b6c05ca 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -256,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
uint64_t hlt_count;
@@ -267,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 8dd81c0a4a5a..795bf3f39f44 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -94,6 +94,7 @@ static void *mmap_(FIXTURE_DATA(guard_regions) * self,
case ANON_BACKED:
flags |= MAP_PRIVATE | MAP_ANON;
fd = -1;
+ offset = 0;
break;
case SHMEM_BACKED:
case LOCAL_FILE_BACKED:
@@ -260,6 +261,54 @@ static bool is_buf_eq(char *buf, size_t size, char chr)
return true;
}
+/*
+ * Some file systems have issues with merging due to changing merge-sensitive
+ * parameters in the .mmap callback, and prior to .mmap_prepare being
+ * implemented everywhere this will now result in an unexpected failure to
+ * merge (e.g. - overlayfs).
+ *
+ * Perform a simple test to see if the local file system suffers from this, if
+ * it does then we can skip test logic that assumes local file system merging is
+ * sane.
+ */
+static bool local_fs_has_sane_mmap(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr2;
+ struct procmap_fd procmap;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ return true;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ if (ptr == MAP_FAILED)
+ return false;
+ /* Unmap the middle. */
+ munmap(&ptr[5 * page_size], page_size);
+
+ /* Map again. */
+ ptr2 = mmap_(self, variant, &ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 5 * page_size);
+
+ if (ptr2 == MAP_FAILED)
+ return false;
+
+ /* Now make sure they all merged. */
+ if (open_self_procmap(&procmap) != 0)
+ return false;
+ if (!find_vma_procmap(&procmap, ptr))
+ return false;
+ if (procmap.query.vma_start != (unsigned long)ptr)
+ return false;
+ if (procmap.query.vma_end != (unsigned long)ptr + 10 * page_size)
+ return false;
+ close_procmap(&procmap);
+
+ return true;
+}
+
FIXTURE_SETUP(guard_regions)
{
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
@@ -2138,4 +2187,140 @@ TEST_F(guard_regions, pagemap_scan)
ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
}
+TEST_F(guard_regions, collapse)
+{
+ const unsigned long page_size = self->page_size;
+ const unsigned long size = 2 * HPAGE_SIZE;
+ const unsigned long num_pages = size / page_size;
+ char *ptr;
+ int i;
+
+ /* Need file to be correct size for tests for non-anon. */
+ if (variant->backing != ANON_BACKED)
+ ASSERT_EQ(ftruncate(self->fd, size), 0);
+
+ /*
+ * We must close and re-open local-file backed as read-only for
+ * CONFIG_READ_ONLY_THP_FOR_FS to work.
+ */
+ if (variant->backing == LOCAL_FILE_BACKED) {
+ ASSERT_EQ(close(self->fd), 0);
+
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_GE(self->fd, 0);
+ }
+
+ ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Prevent being faulted-in as huge. */
+ ASSERT_EQ(madvise(ptr, size, MADV_NOHUGEPAGE), 0);
+ /* Fault in. */
+ ASSERT_EQ(madvise(ptr, size, MADV_POPULATE_READ), 0);
+
+ /* Install guard regions in ever other page. */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_page, page_size, MADV_GUARD_INSTALL), 0);
+ /* Accesses should now fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+
+ /* Allow huge page throughout region. */
+ ASSERT_EQ(madvise(ptr, size, MADV_HUGEPAGE), 0);
+
+ /*
+ * Now collapse the entire region. This should fail in all cases.
+ *
+ * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is
+ * not set for the local file case, but we can't differentiate whether
+ * this occurred or if the collapse was rightly rejected.
+ */
+ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0);
+
+ /*
+ * If we introduce a bug that causes the collapse to succeed, gather
+ * data on whether guard regions are at least preserved. The test will
+ * fail at this point in any case.
+ */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ /* Accesses should still fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+}
+
+TEST_F(guard_regions, smaps)
+{
+ const unsigned long page_size = self->page_size;
+ struct procmap_fd procmap;
+ char *ptr, *ptr2;
+ int i;
+
+ /* Map a region. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We shouldn't yet see a guard flag. */
+ ASSERT_FALSE(check_vmflag_guard(ptr));
+
+ /* Install a single guard region. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Now we should see a guard flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /*
+ * Removing the guard region should not change things because we simply
+ * cannot accurately track whether a given VMA has had all of its guard
+ * regions removed.
+ */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_REMOVE), 0);
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /* Install guard regions throughout. */
+ for (i = 0; i < 10; i++) {
+ ASSERT_EQ(madvise(&ptr[i * page_size], page_size, MADV_GUARD_INSTALL), 0);
+ /* We should always see the guard region flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ }
+
+ /* Split into two VMAs. */
+ ASSERT_EQ(munmap(&ptr[4 * page_size], page_size), 0);
+
+ /* Both VMAs should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ ASSERT_TRUE(check_vmflag_guard(&ptr[5 * page_size]));
+
+ /*
+ * If the local file system is unable to merge VMAs due to having
+ * unusual characteristics, there is no point in asserting merge
+ * behaviour.
+ */
+ if (!local_fs_has_sane_mmap(self, variant)) {
+ TH_LOG("local filesystem does not support sane merging skipping merge test");
+ return;
+ }
+
+ /* Map a fresh VMA between the two split VMAs. */
+ ptr2 = mmap_(self, variant, &ptr[4 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 4 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Check the procmap to ensure that this VMA merged with the adjacent
+ * two. The guard region flag is 'sticky' so should not preclude
+ * merging.
+ */
+ ASSERT_EQ(open_self_procmap(&procmap), 0);
+ ASSERT_TRUE(find_vma_procmap(&procmap, ptr));
+ ASSERT_EQ(procmap.query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap.query.vma_end, (unsigned long)ptr + 10 * page_size);
+ ASSERT_EQ(close_procmap(&procmap), 0);
+ /* And, of course, this VMA should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index 8900b840c17a..40c1538a17b4 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -17,9 +17,8 @@
#define MB (1UL << 20)
-/* Just the flags we need, copied from mm.h: */
+/* Just the flags we need, copied from the kernel internals. */
#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_TOUCH 0x02 /* mark page accessed */
#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
@@ -93,7 +92,7 @@ int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
- int flags = MAP_PRIVATE, touch = 0;
+ int flags = MAP_PRIVATE;
char *file = "/dev/zero";
pthread_t *tid;
char *p;
@@ -170,10 +169,6 @@ int main(int argc, char **argv)
case 'H':
flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
- case 'z':
- /* fault pages in gup, do not fault in userland */
- touch = 1;
- break;
default:
ksft_exit_fail_msg("Wrong argument\n");
}
@@ -244,18 +239,9 @@ int main(int argc, char **argv)
else if (thp == 0)
madvise(p, size, MADV_NOHUGEPAGE);
- /*
- * FOLL_TOUCH, in gup_test, is used as an either/or case: either
- * fault pages in from the kernel via FOLL_TOUCH, or fault them
- * in here, from user space. This allows comparison of performance
- * between those two cases.
- */
- if (touch) {
- gup.gup_flags |= FOLL_TOUCH;
- } else {
- for (; (unsigned long)p < gup.addr + size; p += psize())
- p[0] = 0;
- }
+ /* Fault them in here, from user space. */
+ for (; (unsigned long)p < gup.addr + size; p += psize())
+ p[0] = 0;
tid = malloc(sizeof(pthread_t) * nthreads);
assert(tid);
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 15aadaf24a66..5a1525f72daa 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -25,6 +25,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
/*
@@ -50,6 +51,8 @@ enum {
HMM_COHERENCE_DEVICE_TWO,
};
+#define ONEKB (1 << 10)
+#define ONEMEG (1 << 20)
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
@@ -207,8 +210,10 @@ static void hmm_buffer_free(struct hmm_buffer *buffer)
if (buffer == NULL)
return;
- if (buffer->ptr)
+ if (buffer->ptr) {
munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ }
free(buffer->mirror);
free(buffer);
}
@@ -525,6 +530,8 @@ TEST_F(hmm, anon_write_prot)
/*
* Check that a device writing an anonymous private mapping
* will copy-on-write if a child process inherits the mapping.
+ *
+ * Also verifies after fork() memory the device can be read by child.
*/
TEST_F(hmm, anon_write_child)
{
@@ -532,72 +539,101 @@ TEST_F(hmm, anon_write_child)
unsigned long npages;
unsigned long size;
unsigned long i;
+ void *old_ptr;
+ void *map;
int *ptr;
pid_t pid;
int child_fd;
- int ret;
-
- npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
- ASSERT_NE(npages, 0);
- size = npages << self->page_shift;
-
- buffer = malloc(sizeof(*buffer));
- ASSERT_NE(buffer, NULL);
-
- buffer->fd = -1;
- buffer->size = size;
- buffer->mirror = malloc(size);
- ASSERT_NE(buffer->mirror, NULL);
-
- buffer->ptr = mmap(NULL, size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- buffer->fd, 0);
- ASSERT_NE(buffer->ptr, MAP_FAILED);
-
- /* Initialize buffer->ptr so we can tell if it is written. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ptr[i] = i;
-
- /* Initialize data that the device will write to buffer->ptr. */
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ptr[i] = -i;
+ int ret, use_thp, migrate;
+
+ for (migrate = 0; migrate < 2; ++migrate) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ npages = ALIGN(use_thp ? TWOMEG : HMM_BUFFER_SIZE,
+ self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size * 2;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ old_ptr = buffer->ptr;
+ if (use_thp) {
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+ }
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ if (migrate) {
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ }
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ continue;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ if (!migrate) {
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
- pid = fork();
- if (pid == -1)
- ASSERT_EQ(pid, 0);
- if (pid != 0) {
- waitpid(pid, &ret, 0);
- ASSERT_EQ(WIFEXITED(ret), 1);
+ /* Check what the device wrote. */
+ if (!migrate) {
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
- /* Check that the parent's buffer did not change. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- return;
+ close(child_fd);
+ exit(0);
+ }
}
-
- /* Check that we see the parent's values. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- /* The child process needs its own mirror to its own mm. */
- child_fd = hmm_open(0);
- ASSERT_GE(child_fd, 0);
-
- /* Simulate a device writing system memory. */
- ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
- ASSERT_EQ(ret, 0);
- ASSERT_EQ(buffer->cpages, npages);
- ASSERT_EQ(buffer->faults, 1);
-
- /* Check what the device wrote. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- close(child_fd);
- exit(0);
}
/*
@@ -2055,4 +2091,765 @@ TEST_F(hmm, hmm_cow_in_device)
hmm_buffer_free(buffer);
}
+
+/*
+ * Migrate private anonymous huge empty page.
+ */
+TEST_F(hmm, migrate_anon_huge_empty)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page.
+ */
+TEST_F(hmm, migrate_anon_huge_zero)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+ int val;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize a read-only zero huge page. */
+ val = *(int *)buffer->ptr;
+ ASSERT_EQ(val, 0);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], 0);
+ /* If it asserts once, it probably will 500,000 times */
+ if (ptr[i] != 0)
+ break;
+ }
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and free.
+ */
+TEST_F(hmm, migrate_anon_huge_free)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Try freeing it. */
+ ret = madvise(map, size, MADV_FREE);
+ ASSERT_EQ(ret, 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and fault back to sysmem.
+ */
+TEST_F(hmm, migrate_anon_huge_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate memory and fault back to sysmem after partially unmapping.
+ */
+TEST_F(hmm, migrate_partial_unmap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(buffer->ptr + offsets[j], ONEMEG);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ if (i * sizeof(int) < offsets[j] ||
+ i * sizeof(int) >= offsets[j] + ONEMEG)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+}
+
+TEST_F(hmm, migrate_remap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr, *new_ptr = NULL;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp, dont_unmap, before;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (before = 0; before < 2; ++before) {
+ for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
+
+ if (dont_unmap)
+ flags |= MREMAP_DONTUNMAP;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 8 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, buffer->size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ munmap(map + size, size * 2);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ if (before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ if (!before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(new_ptr, size);
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Migrate private anonymous huge page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], i);
+ if (ptr[i] != i)
+ break;
+ }
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate THP to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /*
+ * Force an allocation error when faulting back a THP resident in the
+ * device.
+ */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_zero_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 0);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory (zero THP page). */
+ ret = ptr[0];
+ ASSERT_EQ(ret, 0);
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Fault the device memory back and check it. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+struct benchmark_results {
+ double sys_to_dev_time;
+ double dev_to_sys_time;
+ double throughput_s2d;
+ double throughput_d2s;
+};
+
+static double get_time_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
+{
+ struct hmm_buffer *buffer;
+
+ buffer = malloc(sizeof(*buffer));
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ memset(buffer->mirror, 0xFF, size);
+ return buffer;
+}
+
+static void print_benchmark_results(const char *test_name, size_t buffer_size,
+ struct benchmark_results *thp,
+ struct benchmark_results *regular)
+{
+ double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
+ regular->sys_to_dev_time) * 100.0;
+ double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
+ regular->dev_to_sys_time) * 100.0;
+ double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
+ regular->throughput_s2d) * 100.0;
+ double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
+ regular->throughput_d2s) * 100.0;
+
+ printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
+ printf(" | With THP | Without THP | Improvement\n");
+ printf("---------------------------------------------------------------------\n");
+ printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
+ printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
+ printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
+ printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
+}
+
+/*
+ * Run a single migration benchmark
+ * fd: file descriptor for hmm device
+ * use_thp: whether to use THP
+ * buffer_size: size of buffer to allocate
+ * iterations: number of iterations
+ * results: where to store results
+ */
+static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
+ int iterations, struct benchmark_results *results)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
+ double start, end;
+ double s2d_total = 0, d2s_total = 0;
+ int ret, i;
+ int *ptr;
+
+ buffer = hmm_buffer_alloc(buffer_size);
+
+ /* Map memory */
+ buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (!buffer->ptr)
+ return -1;
+
+ /* Apply THP hint if requested */
+ if (use_thp)
+ ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
+ else
+ ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
+
+ if (ret)
+ return ret;
+
+ /* Initialize memory to make sure pages are allocated */
+ ptr = (int *)buffer->ptr;
+ for (i = 0; i < buffer_size / sizeof(int); i++)
+ ptr[i] = i & 0xFF;
+
+ /* Warmup iteration */
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ /* Benchmark iterations */
+ for (i = 0; i < iterations; i++) {
+ /* System to device migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ s2d_total += (end - start);
+
+ /* Device to system migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ d2s_total += (end - start);
+ }
+
+ /* Calculate average times and throughput */
+ results->sys_to_dev_time = s2d_total / iterations;
+ results->dev_to_sys_time = d2s_total / iterations;
+ results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->sys_to_dev_time / 1000.0);
+ results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->dev_to_sys_time / 1000.0);
+
+ /* Cleanup */
+ hmm_buffer_free(buffer);
+ return 0;
+}
+
+/*
+ * Benchmark THP migration with different buffer sizes
+ */
+TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
+{
+ struct benchmark_results thp_results, regular_results;
+ size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
+ int iterations = 5;
+
+ printf("\nHMM THP Migration Benchmark\n");
+ printf("---------------------------\n");
+ printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
+
+ /* Test different buffer sizes */
+ size_t test_sizes[] = {
+ thp_size / 4, /* 512KB - smaller than THP */
+ thp_size / 2, /* 1MB - half THP */
+ thp_size, /* 2MB - single THP */
+ thp_size * 2, /* 4MB - two THPs */
+ thp_size * 4, /* 8MB - four THPs */
+ thp_size * 8, /* 16MB - eight THPs */
+ thp_size * 128, /* 256MB - one twenty eight THPs */
+ };
+
+ static const char *const test_names[] = {
+ "Small Buffer (512KB)",
+ "Half THP Size (1MB)",
+ "Single THP Size (2MB)",
+ "Two THP Size (4MB)",
+ "Four THP Size (8MB)",
+ "Eight THP Size (16MB)",
+ "One twenty eight THP Size (256MB)"
+ };
+
+ int num_tests = ARRAY_SIZE(test_sizes);
+
+ /* Run all tests */
+ for (int i = 0; i < num_tests; i++) {
+ /* Test with THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
+ iterations, &thp_results), 0);
+
+ /* Test without THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
+ iterations, &regular_results), 0);
+
+ /* Print results */
+ print_benchmark_results(test_names[i], test_sizes[i],
+ &thp_results, &regular_results);
+ }
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index ac136f04b8d6..95afa5cfc062 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -38,6 +38,8 @@ enum ksm_merge_mode {
};
static int mem_fd;
+static int pages_to_scan_fd;
+static int sleep_millisecs_fd;
static int pagemap_fd;
static size_t pagesize;
@@ -493,6 +495,46 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
+static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "1", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, pages_to_scan, strlen(pages_to_scan)) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, sleep_ms, strlen(sleep_ms)) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static int stop_ksmd_and_restore_frequency(void)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, "100", 3) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, "20", 2) <= 0)
+ return -errno;
+
+ return 0;
+}
+
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -500,6 +542,9 @@ static void test_prctl_fork_exec(void)
ksft_print_msg("[RUN] %s\n", __func__);
+ if (start_ksmd_and_set_frequency("2000", "0"))
+ ksft_test_result_fail("set ksmd's scanning frequency failed\n");
+
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
@@ -542,6 +587,11 @@ static void test_prctl_fork_exec(void)
return;
}
+ if (stop_ksmd_and_restore_frequency()) {
+ ksft_test_result_fail("restore ksmd frequency failed\n");
+ return;
+ }
+
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
@@ -656,6 +706,13 @@ static void init_global_file_handles(void)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
if (ksm_get_self_merging_pages() < 0)
ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n");
+
+ pages_to_scan_fd = open("/sys/kernel/mm/ksm/pages_to_scan", O_RDWR);
+ if (pages_to_scan_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/pages_to_scan failed\n");
+ sleep_millisecs_fd = open("/sys/kernel/mm/ksm/sleep_millisecs", O_RDWR);
+ if (sleep_millisecs_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/sleep_millisecs failed\n");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index bf2863b102e3..5f073504e0b1 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -994,7 +994,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_siz
static long long remap_region(struct config c, unsigned int threshold_mb,
char *rand_addr)
{
- void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
+ void *addr, *tmp_addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
@@ -1032,7 +1032,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Don't destroy existing mappings unless expected to overlap */
while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
/* Check for unsigned overflow */
- if (addr + c.dest_alignment < addr) {
+ tmp_addr = addr + c.dest_alignment;
+ if (tmp_addr < addr) {
ksft_print_msg("Couldn't find a valid region to remap to\n");
ret = -1;
goto clean_up_src;
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index 4ee4db3750c1..c3a9585de98c 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -184,6 +184,130 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
close(test_fd);
}
+static void test_merge(int pagemap_fd, int pagesize)
+{
+ char *reserved, *map, *map2;
+
+ /*
+ * Reserve space for tests:
+ *
+ * ---padding to ---
+ * | avoid adj. |
+ * v merge v
+ * |---|---|---|---|---|
+ * | | 1 | 2 | 3 | |
+ * |---|---|---|---|---|
+ */
+ reserved = mmap(NULL, 5 * pagesize, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (reserved == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ munmap(reserved, 4 * pagesize);
+
+ /*
+ * Establish initial VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* This will clear VM_SOFTDIRTY too. */
+ clear_softdirty();
+
+ /*
+ * Now place a new mapping which will be marked VM_SOFTDIRTY. Away from
+ * map:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | | 2 | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[3 * pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now remap it immediately adjacent to map, if the merge correctly
+ * propagates VM_SOFTDIRTY, we should then observe the VMA as a whole
+ * being marked soft-dirty:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ map2 = mremap(map2, pagesize, pagesize, MREMAP_FIXED | MREMAP_MAYMOVE,
+ &reserved[2 * pagesize]);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mremap failed\n");
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after remap merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after remap merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+
+ /*
+ * Now establish another VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear VM_SOFTDIRTY... */
+ clear_softdirty();
+ /* ...and establish incompatible adjacent VMA:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | 2 | | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[2 * pagesize], pagesize,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now mprotect() VMA 1 so it's compatible with 2 and therefore merges:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ if (mprotect(map, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC))
+ ksft_exit_fail_msg("mprotect failed\n");
+
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+}
+
static void test_mprotect_anon(int pagemap_fd, int pagesize)
{
test_mprotect(pagemap_fd, pagesize, true);
@@ -204,7 +328,7 @@ int main(int argc, char **argv)
if (!softdirty_supported())
ksft_exit_skip("soft-dirty is not support\n");
- ksft_set_plan(15);
+ ksft_set_plan(19);
pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
if (pagemap_fd < 0)
ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
@@ -216,6 +340,7 @@ int main(int argc, char **argv)
test_hugepage(pagemap_fd, pagesize);
test_mprotect_anon(pagemap_fd, pagesize);
test_mprotect_file(pagemap_fd, pagesize);
+ test_merge(pagemap_fd, pagesize);
close(pagemap_fd);
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 994fe8c03923..edd02328f77b 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -10,7 +10,6 @@
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
-#define BASE_PMD_ADDR ((void *)(1UL << 30))
/* pthread_mutex_t starts at page offset 0 */
pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts)
@@ -142,30 +141,37 @@ static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area
unsigned long offset = is_src ? 0 : bytes;
char *p = NULL, *p_alias = NULL;
int mem_fd = uffd_mem_fd_create(bytes * 2, false);
+ size_t region_size = bytes * 2 + hpage_size;
- /* TODO: clean this up. Use a static addr is ugly */
- p = BASE_PMD_ADDR;
- if (!is_src)
- /* src map + alias + interleaved hpages */
- p += 2 * (bytes + hpage_size);
+ void *reserve = mmap(NULL, region_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (reserve == MAP_FAILED) {
+ close(mem_fd);
+ return -errno;
+ }
+
+ p = reserve;
p_alias = p;
p_alias += bytes;
p_alias += hpage_size; /* Prevent src/dst VMA merge */
- *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
mem_fd, offset);
if (*alloc_area == MAP_FAILED) {
*alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
return -errno;
}
if (*alloc_area != p)
err("mmap of memfd failed at %p", p);
- area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
mem_fd, offset);
if (area_alias == MAP_FAILED) {
- munmap(*alloc_area, bytes);
*alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
return -errno;
}
if (area_alias != p_alias)
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index b51c89e1cd1a..700fbaa18d44 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -241,7 +241,7 @@ static int stress(struct uffd_args *args)
return 1;
for (cpu = 0; cpu < gopts->nr_parallel; cpu++) {
- char c;
+ char c = '\0';
if (bounces & BOUNCE_POLL) {
if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1)
err("pipefd write error");
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index f917b4c4c943..f4807242c5b2 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -543,7 +543,7 @@ static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_col
{
unsigned long p;
pthread_t uffd_mon;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -759,7 +759,7 @@ static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
pthread_t uffd_mon;
pid_t pid;
int err;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -819,7 +819,7 @@ static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
pthread_t uffd_mon;
pid_t pid;
int err;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -1125,7 +1125,7 @@ uffd_move_test_common(uffd_global_test_opts_t *gopts,
{
unsigned long nr;
pthread_t uffd_mon;
- char c;
+ char c = '\0';
unsigned long long count;
struct uffd_args args = { 0 };
char *orig_area_src = NULL, *orig_area_dst = NULL;
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index e33cda301dad..605cb58ea5c3 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -449,6 +449,11 @@ bool check_vmflag_pfnmap(void *addr)
return check_vmflag(addr, "pf");
}
+bool check_vmflag_guard(void *addr)
+{
+ return check_vmflag(addr, "gu");
+}
+
bool softdirty_supported(void)
{
char *addr;
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 26c30fdc0241..a8abdf414d46 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -98,6 +98,7 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
bool check_vmflag_pfnmap(void *addr);
+bool check_vmflag_guard(void *addr);
int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
int query_procmap(struct procmap_fd *procmap);
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index 5e96ef785d0d..6d99726aceac 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -15,24 +15,31 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <asm/ucontext.h>
+#include <getopt.h>
#include "hwprobe.h"
#include "../../kselftest.h"
#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+#define MK_PREFETCH(fn) \
+ le32_bswap(0 << 25 | (uint32_t)(fn) << 20 | 10 << 15 | 6 << 12 | 0 << 7 | 19)
static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
-static bool illegal_insn;
+static bool got_fault;
-static void sigill_handler(int sig, siginfo_t *info, void *context)
+static void fault_handler(int sig, siginfo_t *info, void *context)
{
unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
uint32_t insn = *(uint32_t *)regs[0];
- assert(insn == MK_CBO(regs[11]));
+ if (sig == SIGILL)
+ assert(insn == MK_CBO(regs[11]));
- illegal_insn = true;
+ if (sig == SIGSEGV || sig == SIGBUS)
+ assert(insn == MK_PREFETCH(regs[11]));
+
+ got_fault = true;
regs[0] += 4;
}
@@ -45,39 +52,51 @@ static void sigill_handler(int sig, siginfo_t *info, void *context)
: : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \
})
+#define prefetch_insn(base, fn) \
+({ \
+ asm volatile( \
+ "mv a0, %0\n" \
+ "li a1, %1\n" \
+ ".4byte %2\n" \
+ : : "r" (base), "i" (fn), "i" (MK_PREFETCH(fn)) : "a0", "a1"); \
+})
+
static void cbo_inval(char *base) { cbo_insn(base, 0); }
static void cbo_clean(char *base) { cbo_insn(base, 1); }
static void cbo_flush(char *base) { cbo_insn(base, 2); }
static void cbo_zero(char *base) { cbo_insn(base, 4); }
+static void prefetch_i(char *base) { prefetch_insn(base, 0); }
+static void prefetch_r(char *base) { prefetch_insn(base, 1); }
+static void prefetch_w(char *base) { prefetch_insn(base, 3); }
static void test_no_cbo_inval(void *arg)
{
ksft_print_msg("Testing cbo.inval instruction remain privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_inval(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.inval\n");
+ ksft_test_result(got_fault, "No cbo.inval\n");
}
static void test_no_zicbom(void *arg)
{
ksft_print_msg("Testing Zicbom instructions remain privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_clean(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.clean\n");
+ ksft_test_result(got_fault, "No cbo.clean\n");
- illegal_insn = false;
+ got_fault = false;
cbo_flush(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.flush\n");
+ ksft_test_result(got_fault, "No cbo.flush\n");
}
static void test_no_zicboz(void *arg)
{
ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_zero(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.zero\n");
+ ksft_test_result(got_fault, "No cbo.zero\n");
}
static bool is_power_of_2(__u64 n)
@@ -85,6 +104,51 @@ static bool is_power_of_2(__u64 n)
return n != 0 && (n & (n - 1)) == 0;
}
+static void test_zicbop(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE,
+ };
+ struct sigaction act = {
+ .sa_sigaction = &fault_handler,
+ .sa_flags = SA_SIGINFO
+ };
+ struct sigaction dfl = {
+ .sa_handler = SIG_DFL
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ long rc;
+
+ rc = sigaction(SIGSEGV, &act, NULL);
+ assert(rc == 0);
+ rc = sigaction(SIGBUS, &act, NULL);
+ assert(rc == 0);
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicbop block size\n");
+ ksft_print_msg("Zicbop block size: %llu\n", block_size);
+
+ got_fault = false;
+ prefetch_i(&mem[0]);
+ prefetch_r(&mem[0]);
+ prefetch_w(&mem[0]);
+ ksft_test_result(!got_fault, "Zicbop prefetch.* on valid address\n");
+
+ got_fault = false;
+ prefetch_i(NULL);
+ prefetch_r(NULL);
+ prefetch_w(NULL);
+ ksft_test_result(!got_fault, "Zicbop prefetch.* on NULL\n");
+
+ rc = sigaction(SIGBUS, &dfl, NULL);
+ assert(rc == 0);
+ rc = sigaction(SIGSEGV, &dfl, NULL);
+ assert(rc == 0);
+}
+
static void test_zicbom(void *arg)
{
struct riscv_hwprobe pair = {
@@ -100,13 +164,13 @@ static void test_zicbom(void *arg)
is_power_of_2(block_size), "Zicbom block size\n");
ksft_print_msg("Zicbom block size: %llu\n", block_size);
- illegal_insn = false;
+ got_fault = false;
cbo_clean(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.clean\n");
+ ksft_test_result(!got_fault, "cbo.clean\n");
- illegal_insn = false;
+ got_fault = false;
cbo_flush(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.flush\n");
+ ksft_test_result(!got_fault, "cbo.flush\n");
}
static void test_zicboz(void *arg)
@@ -125,11 +189,11 @@ static void test_zicboz(void *arg)
is_power_of_2(block_size), "Zicboz block size\n");
ksft_print_msg("Zicboz block size: %llu\n", block_size);
- illegal_insn = false;
+ got_fault = false;
cbo_zero(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.zero\n");
+ ksft_test_result(!got_fault, "cbo.zero\n");
- if (illegal_insn || !is_power_of_2(block_size)) {
+ if (got_fault || !is_power_of_2(block_size)) {
ksft_test_result_skip("cbo.zero check\n");
return;
}
@@ -177,7 +241,19 @@ static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo)
rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
- cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom";
+ switch (cbo) {
+ case RISCV_HWPROBE_EXT_ZICBOZ:
+ cbostr = "Zicboz";
+ break;
+ case RISCV_HWPROBE_EXT_ZICBOM:
+ cbostr = "Zicbom";
+ break;
+ case RISCV_HWPROBE_EXT_ZICBOP:
+ cbostr = "Zicbop";
+ break;
+ default:
+ ksft_exit_fail_msg("Internal error: invalid cbo %llu\n", cbo);
+ }
if (pair.value & cbo)
ksft_exit_fail_msg("%s is only present on a subset of harts.\n"
@@ -194,6 +270,7 @@ enum {
TEST_ZICBOM,
TEST_NO_ZICBOM,
TEST_NO_CBO_INVAL,
+ TEST_ZICBOP,
};
static struct test_info {
@@ -206,26 +283,51 @@ static struct test_info {
[TEST_ZICBOM] = { .nr_tests = 3, test_zicbom },
[TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom },
[TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval },
+ [TEST_ZICBOP] = { .nr_tests = 3, test_zicbop },
+};
+
+static const struct option long_opts[] = {
+ {"zicbom-raises-sigill", no_argument, 0, 'm'},
+ {"zicboz-raises-sigill", no_argument, 0, 'z'},
+ {0, 0, 0, 0}
};
int main(int argc, char **argv)
{
struct sigaction act = {
- .sa_sigaction = &sigill_handler,
+ .sa_sigaction = &fault_handler,
.sa_flags = SA_SIGINFO,
};
struct riscv_hwprobe pair;
unsigned int plan = 0;
cpu_set_t cpus;
long rc;
- int i;
-
- if (argc > 1 && !strcmp(argv[1], "--sigill")) {
- rc = sigaction(SIGILL, &act, NULL);
- assert(rc == 0);
- tests[TEST_NO_ZICBOZ].enabled = true;
- tests[TEST_NO_ZICBOM].enabled = true;
- tests[TEST_NO_CBO_INVAL].enabled = true;
+ int i, opt, long_index;
+
+ long_index = 0;
+
+ while ((opt = getopt_long(argc, argv, "mz", long_opts, &long_index)) != -1) {
+ switch (opt) {
+ case 'm':
+ tests[TEST_NO_ZICBOM].enabled = true;
+ tests[TEST_NO_CBO_INVAL].enabled = true;
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ break;
+ case 'z':
+ tests[TEST_NO_ZICBOZ].enabled = true;
+ tests[TEST_NO_CBO_INVAL].enabled = true;
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ break;
+ case '?':
+ fprintf(stderr,
+ "Usage: %s [--zicbom-raises-sigill|-m] [--zicboz-raises-sigill|-z]\n",
+ argv[0]);
+ exit(1);
+ default:
+ break;
+ }
}
rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
@@ -253,6 +355,11 @@ int main(int argc, char **argv)
check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM);
}
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOP)
+ tests[TEST_ZICBOP].enabled = true;
+ else
+ check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOP);
+
for (i = 0; i < ARRAY_SIZE(tests); ++i)
plan += tests[i].enabled ? tests[i].nr_tests : 0;
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index 6f7497f4e7b3..2c2a33fc083e 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,7 +2,7 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := v_initval vstate_prctl
+TEST_GEN_PROGS := v_initval vstate_prctl vstate_ptrace
TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
include ../../lib.mk
@@ -26,3 +26,6 @@ $(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
+
+$(OUTPUT)/vstate_ptrace: vstate_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/vector/vstate_ptrace.c b/tools/testing/selftests/riscv/vector/vstate_ptrace.c
new file mode 100644
index 000000000000..1479abc0c9cb
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_ptrace.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <stdlib.h>
+#include <asm/ptrace.h>
+#include <linux/elf.h>
+#include <sys/ptrace.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include "../../kselftest.h"
+#include "v_helpers.h"
+
+int parent_set_val, child_set_val;
+
+static long do_ptrace(enum __ptrace_request op, pid_t pid, long type, size_t size, void *data)
+{
+ struct iovec v_iovec = {
+ .iov_len = size,
+ .iov_base = data
+ };
+
+ return ptrace(op, pid, type, &v_iovec);
+}
+
+static int do_child(void)
+{
+ int out;
+
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) {
+ ksft_perror("PTRACE_TRACEME failed\n");
+ return EXIT_FAILURE;
+ }
+
+ asm volatile (".option push\n\t"
+ ".option arch, +v\n\t"
+ ".option norvc\n\t"
+ "vsetivli x0, 1, e32, m1, ta, ma\n\t"
+ "vmv.s.x v31, %[in]\n\t"
+ "ebreak\n\t"
+ "vmv.x.s %[out], v31\n\t"
+ ".option pop\n\t"
+ : [out] "=r" (out)
+ : [in] "r" (child_set_val));
+
+ if (out != parent_set_val)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+static void do_parent(pid_t child)
+{
+ int status;
+ void *data = NULL;
+
+ /* Attach to the child */
+ while (waitpid(child, &status, 0)) {
+ if (WIFEXITED(status)) {
+ ksft_test_result(WEXITSTATUS(status) == 0, "SETREGSET vector\n");
+ goto out;
+ } else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) {
+ size_t size;
+ void *data, *v31;
+ struct __riscv_v_regset_state *v_regset_hdr;
+ struct user_regs_struct *gpreg;
+
+ size = sizeof(*v_regset_hdr);
+ data = malloc(size);
+ if (!data)
+ goto out;
+ v_regset_hdr = (struct __riscv_v_regset_state *)data;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ ksft_print_msg("vlenb %ld\n", v_regset_hdr->vlenb);
+ data = realloc(data, size + v_regset_hdr->vlenb * 32);
+ if (!data)
+ goto out;
+ v_regset_hdr = (struct __riscv_v_regset_state *)data;
+ v31 = (void *)(data + size + v_regset_hdr->vlenb * 31);
+ size += v_regset_hdr->vlenb * 32;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ ksft_test_result(*(int *)v31 == child_set_val, "GETREGSET vector\n");
+
+ *(int *)v31 = parent_set_val;
+ if (do_ptrace(PTRACE_SETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ /* move the pc forward */
+ size = sizeof(*gpreg);
+ data = realloc(data, size);
+ gpreg = (struct user_regs_struct *)data;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, size, data))
+ goto out;
+
+ gpreg->pc += 4;
+ if (do_ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, size, data))
+ goto out;
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+ }
+
+out:
+ free(data);
+}
+
+int main(void)
+{
+ pid_t child;
+
+ ksft_set_plan(2);
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ ksft_exit_skip("Vector not supported\n");
+
+ srandom(getpid());
+ parent_set_val = rand();
+ child_set_val = rand();
+
+ child = fork();
+ if (child < 0)
+ ksft_exit_fail_msg("Fork failed %d\n", child);
+
+ if (!child)
+ return do_child();
+
+ do_parent(child);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index bba8cb54548e..3d130c30bc7c 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -437,7 +437,7 @@ class Client:
def extend_pcr(self, i, dig, bank_alg = TPM2_ALG_SHA1):
ds = get_digest_size(bank_alg)
- assert(ds == len(dig))
+ assert ds == len(dig)
auth_cmd = AuthCommand()
@@ -589,7 +589,7 @@ class Client:
def seal(self, parent_key, data, auth_value, policy_dig,
name_alg = TPM2_ALG_SHA1):
ds = get_digest_size(name_alg)
- assert(not policy_dig or ds == len(policy_dig))
+ assert not policy_dig or ds == len(policy_dig)
attributes = 0
if not policy_dig:
diff --git a/tools/testing/selftests/verification/.gitignore b/tools/testing/selftests/verification/.gitignore
new file mode 100644
index 000000000000..2659417cb2c7
--- /dev/null
+++ b/tools/testing/selftests/verification/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+logs
diff --git a/tools/testing/selftests/verification/Makefile b/tools/testing/selftests/verification/Makefile
new file mode 100644
index 000000000000..aa8790c22a71
--- /dev/null
+++ b/tools/testing/selftests/verification/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := verificationtest-ktap
+TEST_FILES := test.d settings
+EXTRA_CLEAN := $(OUTPUT)/logs/*
+
+include ../lib.mk
diff --git a/tools/testing/selftests/verification/config b/tools/testing/selftests/verification/config
new file mode 100644
index 000000000000..43072c1c38f4
--- /dev/null
+++ b/tools/testing/selftests/verification/config
@@ -0,0 +1 @@
+CONFIG_RV=y
diff --git a/tools/testing/selftests/verification/settings b/tools/testing/selftests/verification/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/verification/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/verification/test.d/functions b/tools/testing/selftests/verification/test.d/functions
new file mode 100644
index 000000000000..ec36a092f56e
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/functions
@@ -0,0 +1,39 @@
+check_requires() { # Check required files, monitors and reactors
+ for i in "$@" ; do
+ p=${i%:program}
+ m=${i%:monitor}
+ r=${i%:reactor}
+ if [ $p != $i ]; then
+ if ! which $p ; then
+ echo "Required program $p is not found."
+ exit_unresolved
+ fi
+ elif [ $m != $i ]; then
+ if ! grep -wq $m available_monitors ; then
+ echo "Required monitor $m is not configured."
+ exit_unsupported
+ fi
+ elif [ $r != $i ]; then
+ if ! grep -wq $r available_reactors ; then
+ echo "Required reactor $r is not configured."
+ exit_unsupported
+ fi
+ elif [ ! -e $i ]; then
+ echo "Required feature interface $i doesn't exist."
+ exit_unsupported
+ fi
+ done
+}
+
+initialize_system() { # Reset RV to initial-state
+ echo > enabled_monitors
+ for m in monitors/*; do
+ echo nop > $m/reactors || true
+ done
+ echo 1 > monitoring_on
+ echo 1 > reacting_on || true
+}
+
+finish_system() {
+ initialize_system
+}
diff --git a/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc b/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc
new file mode 100644
index 000000000000..f29236defb5a
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc
@@ -0,0 +1,75 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test monitor enable/disable
+
+test_simple_monitor() {
+ local monitor="$1"
+ local prefix="$2" # nested monitors
+
+ echo 1 > "monitors/$prefix$monitor/enable"
+ grep -q "$monitor$" enabled_monitors
+
+ echo 0 > "monitors/$prefix$monitor/enable"
+ ! grep -q "$monitor$" enabled_monitors
+
+ echo "$monitor" >> enabled_monitors
+ grep -q 1 "monitors/$prefix$monitor/enable"
+
+ echo "!$monitor" >> enabled_monitors
+ grep -q 0 "monitors/$prefix$monitor/enable"
+}
+
+test_container_monitor() {
+ local monitor="$1"
+ local nested
+
+ echo 1 > "monitors/$monitor/enable"
+ grep -q "^$monitor$" enabled_monitors
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "^$monitor:$nested$" enabled_monitors
+ done
+ test -n "$nested"
+
+ echo 0 > "monitors/$monitor/enable"
+ ! grep -q "^$monitor$" enabled_monitors
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ ! grep -q "^$monitor:$nested$" enabled_monitors
+ done
+
+ echo "$monitor" >> enabled_monitors
+ grep -q 1 "monitors/$monitor/enable"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "^$monitor:$nested$" enabled_monitors
+ done
+
+ echo "!$monitor" >> enabled_monitors
+ grep -q 0 "monitors/$monitor/enable"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ test_simple_monitor "$nested" "$monitor/"
+ done
+}
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ if find "$monitor_dir" -mindepth 1 -type d | grep -q .; then
+ test_container_monitor "$monitor"
+ else
+ test_simple_monitor "$monitor"
+ fi
+done
+
+! echo non_existent_monitor > enabled_monitors
+! grep -q "^non_existent_monitor$" enabled_monitors
diff --git a/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc b/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc
new file mode 100644
index 000000000000..2958bf849338
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test monitor reactor setting
+# requires: available_reactors
+
+test_monitor_reactor() {
+ local monitor="$1"
+ local prefix="$2" # nested monitors
+
+ while read -r reactor; do
+ [ "$reactor" = nop ] && continue
+
+ echo "$reactor" > "monitors/$prefix$monitor/reactors"
+ grep -q "\\[$reactor\\]" "monitors/$prefix$monitor/reactors"
+ done < available_reactors
+
+ echo nop > "monitors/$prefix$monitor/reactors"
+ grep -q "\\[nop\\]" "monitors/$prefix$monitor/reactors"
+}
+
+test_container_monitor() {
+ local monitor="$1"
+ local nested
+
+ while read -r reactor; do
+ [ "$reactor" = nop ] && continue
+
+ echo "$reactor" > "monitors/$monitor/reactors"
+ grep -q "\\[$reactor\\]" "monitors/$monitor/reactors"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "\\[$reactor\\]" "monitors/$monitor/$nested/reactors"
+ done
+ done < available_reactors
+ test -n "$nested"
+
+ echo nop > "monitors/$monitor/reactors"
+ grep -q "\\[nop\\]" "monitors/$monitor/reactors"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "\\[nop\\]" "monitors/$monitor/$nested/reactors"
+ done
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ test_monitor_reactor "$nested" "$monitor/"
+ done
+}
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ if find "$monitor_dir" -mindepth 1 -type d | grep -q .; then
+ test_container_monitor "$monitor"
+ else
+ test_monitor_reactor "$monitor"
+ fi
+done
+
+monitor=$(ls /sys/kernel/tracing/rv/monitors -1 | head -n 1)
+test -f "monitors/$monitor/reactors"
+! echo non_existent_reactor > "monitors/$monitor/reactors"
+! grep -q "\\[non_existent_reactor\\]" "monitors/$monitor/reactors"
diff --git a/tools/testing/selftests/verification/test.d/rv_monitors_available.tc b/tools/testing/selftests/verification/test.d/rv_monitors_available.tc
new file mode 100644
index 000000000000..e6a4a1410690
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitors_available.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Check available monitors
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ grep -q "^$monitor$" available_monitors
+ grep -q . "$monitor_dir"/desc
+
+ for nested_dir in "$monitor_dir"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+
+ grep -q "^$monitor:$nested$" available_monitors
+ grep -q . "$nested_dir"/desc
+ done
+done
diff --git a/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc b/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc
new file mode 100644
index 000000000000..5a59432b1d93
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test wwnr monitor with printk reactor
+# requires: available_reactors wwnr:monitor printk:reactor stress-ng:program
+
+load() { # returns true if there was a reaction
+ local lines_before num
+ num=$((($(nproc) + 1) / 2))
+ lines_before=$(dmesg | wc -l)
+ stress-ng --cpu-sched "$num" --timer "$num" -t 5 -q
+ dmesg | tail -n $((lines_before + 1)) | grep -q "rv: monitor wwnr does not allow event"
+}
+
+echo 1 > monitors/wwnr/enable
+echo printk > monitors/wwnr/reactors
+
+load
+
+echo 0 > monitoring_on
+! load
+echo 1 > monitoring_on
+
+load
+
+echo 0 > reacting_on
+! load
+echo 1 > reacting_on
+
+echo nop > monitors/wwnr/reactors
+echo 0 > monitors/wwnr/enable
diff --git a/tools/testing/selftests/verification/verificationtest-ktap b/tools/testing/selftests/verification/verificationtest-ktap
new file mode 100755
index 000000000000..18f7fe324e2f
--- /dev/null
+++ b/tools/testing/selftests/verification/verificationtest-ktap
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner
+#
+# Copyright (C) Arm Ltd., 2023
+
+../ftrace/ftracetest -K -v --rv ../verification
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
index 324ba0175a33..3c796ca99a50 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -2,8 +2,14 @@ CFLAGS = $(KHDR_INCLUDES)
TEST_GEN_PROGS += vfio_dma_mapping_test
TEST_GEN_PROGS += vfio_iommufd_setup_test
TEST_GEN_PROGS += vfio_pci_device_test
+TEST_GEN_PROGS += vfio_pci_device_init_perf_test
TEST_GEN_PROGS += vfio_pci_driver_test
-TEST_PROGS_EXTENDED := run.sh
+
+TEST_FILES += scripts/cleanup.sh
+TEST_FILES += scripts/lib.sh
+TEST_FILES += scripts/run.sh
+TEST_FILES += scripts/setup.sh
+
include ../lib.mk
include lib/libvfio.mk
@@ -11,6 +17,8 @@ CFLAGS += -I$(top_srcdir)/tools/include
CFLAGS += -MD
CFLAGS += $(EXTRA_CFLAGS)
+LDFLAGS += -pthread
+
$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
index 0ca2cbc2a316..c75045bcab79 100644
--- a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
@@ -9,7 +9,7 @@
#include <linux/pci_ids.h>
#include <linux/sizes.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "registers.h"
@@ -70,7 +70,7 @@ static int dsa_probe(struct vfio_pci_device *device)
return -EINVAL;
if (dsa_int_handle_request_required(device)) {
- printf("Device requires requesting interrupt handles\n");
+ dev_err(device, "Device requires requesting interrupt handles\n");
return -EINVAL;
}
@@ -91,23 +91,23 @@ static void dsa_check_sw_err(struct vfio_pci_device *device)
return;
}
- fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ dev_err(device, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
err.bits[0], err.bits[1], err.bits[2], err.bits[3]);
- fprintf(stderr, " valid: 0x%x\n", err.valid);
- fprintf(stderr, " overflow: 0x%x\n", err.overflow);
- fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid);
- fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
- fprintf(stderr, " batch: 0x%x\n", err.batch);
- fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw);
- fprintf(stderr, " priv: 0x%x\n", err.priv);
- fprintf(stderr, " error: 0x%x\n", err.error);
- fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx);
- fprintf(stderr, " operation: 0x%x\n", err.operation);
- fprintf(stderr, " pasid: 0x%x\n", err.pasid);
- fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx);
- fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags);
- fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr);
+ dev_err(device, " valid: 0x%x\n", err.valid);
+ dev_err(device, " overflow: 0x%x\n", err.overflow);
+ dev_err(device, " desc_valid: 0x%x\n", err.desc_valid);
+ dev_err(device, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
+ dev_err(device, " batch: 0x%x\n", err.batch);
+ dev_err(device, " fault_rw: 0x%x\n", err.fault_rw);
+ dev_err(device, " priv: 0x%x\n", err.priv);
+ dev_err(device, " error: 0x%x\n", err.error);
+ dev_err(device, " wq_idx: 0x%x\n", err.wq_idx);
+ dev_err(device, " operation: 0x%x\n", err.operation);
+ dev_err(device, " pasid: 0x%x\n", err.pasid);
+ dev_err(device, " batch_idx: 0x%x\n", err.batch_idx);
+ dev_err(device, " invalid_flags: 0x%x\n", err.invalid_flags);
+ dev_err(device, " fault_addr: 0x%lx\n", err.fault_addr);
VFIO_FAIL("Software Error Detected!\n");
}
@@ -256,7 +256,7 @@ static int dsa_completion_wait(struct vfio_pci_device *device,
if (status == DSA_COMP_SUCCESS)
return 0;
- printf("Error detected during memcpy operation: 0x%x\n", status);
+ dev_err(device, "Error detected during memcpy operation: 0x%x\n", status);
return -1;
}
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
index c3b91d9b1f59..a871b935542b 100644
--- a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
@@ -7,7 +7,7 @@
#include <linux/pci_ids.h>
#include <linux/sizes.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "hw.h"
#include "registers.h"
@@ -51,7 +51,7 @@ static int ioat_probe(struct vfio_pci_device *device)
r = 0;
break;
default:
- printf("ioat: Unsupported version: 0x%x\n", version);
+ dev_err(device, "ioat: Unsupported version: 0x%x\n", version);
r = -EINVAL;
}
return r;
@@ -135,13 +135,13 @@ static void ioat_handle_error(struct vfio_pci_device *device)
{
void *registers = ioat_channel_registers(device);
- printf("Error detected during memcpy operation!\n"
- " CHANERR: 0x%x\n"
- " CHANERR_INT: 0x%x\n"
- " DMAUNCERRSTS: 0x%x\n",
- readl(registers + IOAT_CHANERR_OFFSET),
- vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
- vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
+ dev_err(device, "Error detected during memcpy operation!\n"
+ " CHANERR: 0x%x\n"
+ " CHANERR_INT: 0x%x\n"
+ " DMAUNCERRSTS: 0x%x\n",
+ readl(registers + IOAT_CHANERR_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
ioat_reset(device);
}
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
new file mode 100644
index 000000000000..279ddcd70194
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/iova_allocator.h>
+#include <libvfio/vfio_pci_device.h>
+#include <libvfio/vfio_pci_driver.h>
+
+/*
+ * Return the BDF string of the device that the test should use.
+ *
+ * If a BDF string is provided by the user on the command line (as the last
+ * element of argv[]), then this function will return that and decrement argc
+ * by 1.
+ *
+ * Otherwise this function will attempt to use the environment variable
+ * $VFIO_SELFTESTS_BDF.
+ *
+ * If BDF cannot be determined then the test will exit with KSFT_SKIP.
+ */
+const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/assert.h b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
new file mode 100644
index 000000000000..f4ebd122d9b6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../../../../kselftest.h"
+
+#define VFIO_LOG_AND_EXIT(...) do { \
+ fprintf(stderr, " " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
+ typeof(_lhs) __lhs = (_lhs); \
+ typeof(_rhs) __rhs = (_rhs); \
+ \
+ if (__lhs _op __rhs) \
+ break; \
+ \
+ fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
+ fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
+ fprintf(stderr, " Observed: %#lx %s %#lx\n", \
+ (u64)__lhs, #_op, (u64)__rhs); \
+ fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
+ VFIO_LOG_AND_EXIT(__VA_ARGS__); \
+} while (0)
+
+#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
+#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
+#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
+#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
+#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
+#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
+#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
+#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
+
+#define VFIO_FAIL(_fmt, ...) do { \
+ fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
+ VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ioctl_assert(_fd, _op, _arg) do { \
+ void *__arg = (_arg); \
+ int __ret = ioctl((_fd), (_op), (__arg)); \
+ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
+} while (0)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
new file mode 100644
index 000000000000..5c9b9dc6d993
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include <libvfio/assert.h>
+
+typedef u64 iova_t;
+
+struct iommu_mode {
+ const char *name;
+ const char *container_path;
+ unsigned long iommu_type;
+};
+
+extern const char *default_iommu_mode;
+
+struct dma_region {
+ struct list_head link;
+ void *vaddr;
+ iova_t iova;
+ u64 size;
+};
+
+struct iommu {
+ const struct iommu_mode *mode;
+ int container_fd;
+ int iommufd;
+ u32 ioas_id;
+ struct list_head dma_regions;
+};
+
+struct iommu *iommu_init(const char *iommu_mode);
+void iommu_cleanup(struct iommu *iommu);
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region);
+
+static inline void iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_map(iommu, region), 0);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped);
+
+static inline void iommu_unmap(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap(iommu, region, NULL), 0);
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped);
+
+static inline void iommu_unmap_all(struct iommu *iommu)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap_all(iommu, NULL), 0);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova);
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
+
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
+
+/*
+ * Generator for VFIO selftests fixture variants that replicate across all
+ * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+ * which should then use FIXTURE_VARIANT_ADD() to create the variant.
+ */
+#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
new file mode 100644
index 000000000000..8f1d994e9ea2
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+
+#include <uapi/linux/types.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/iommufd.h>
+
+#include <libvfio/iommu.h>
+
+struct iova_allocator {
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+ u32 range_idx;
+ u64 range_offset;
+};
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu);
+void iova_allocator_cleanup(struct iova_allocator *allocator);
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
new file mode 100644
index 000000000000..2858885a89bb
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+
+#include <fcntl.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/vfio_pci_driver.h>
+
+struct vfio_pci_bar {
+ struct vfio_region_info info;
+ void *vaddr;
+};
+
+struct vfio_pci_device {
+ const char *bdf;
+ int fd;
+ int group_fd;
+
+ struct iommu *iommu;
+
+ struct vfio_device_info info;
+ struct vfio_region_info config_space;
+ struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
+
+ struct vfio_irq_info msi_info;
+ struct vfio_irq_info msix_info;
+
+ /* eventfds for MSI and MSI-x interrupts */
+ int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
+
+ struct vfio_pci_driver driver;
+};
+
+#define dev_info(_dev, _fmt, ...) printf("%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+#define dev_err(_dev, _fmt, ...) fprintf(stderr, "%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu);
+void vfio_pci_device_cleanup(struct vfio_pci_device *device);
+
+void vfio_pci_device_reset(struct vfio_pci_device *device);
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data);
+
+#define vfio_pci_config_read(_device, _offset, _type) ({ \
+ _type __data; \
+ vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
+ __data; \
+})
+
+#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
+#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
+#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
+
+#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
+ _type __data = (_value); \
+ vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
+} while (0)
+
+#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
+#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
+#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
+ u32 vector, int count);
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
+
+static inline void fcntl_set_nonblock(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GETFL, 0);
+ VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
+
+ r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
+ VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
+}
+
+static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
+}
+
+static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
+}
+
+static inline int __to_iova(struct vfio_pci_device *device, void *vaddr, iova_t *iova)
+{
+ return __iommu_hva2iova(device->iommu, vaddr, iova);
+}
+
+static inline iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ return iommu_hva2iova(device->iommu, vaddr);
+}
+
+static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
+ u16 vendor_id, u16 device_id)
+{
+ return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
+ (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
new file mode 100644
index 000000000000..e5ada209b1d1
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+
+#include <libvfio/iommu.h>
+
+struct vfio_pci_device;
+
+struct vfio_pci_driver_ops {
+ const char *name;
+
+ /**
+ * @probe() - Check if the driver supports the given device.
+ *
+ * Return: 0 on success, non-0 on failure.
+ */
+ int (*probe)(struct vfio_pci_device *device);
+
+ /**
+ * @init() - Initialize the driver for @device.
+ *
+ * Must be called after device->driver.region has been initialized.
+ */
+ void (*init)(struct vfio_pci_device *device);
+
+ /**
+ * remove() - Deinitialize the driver for @device.
+ */
+ void (*remove)(struct vfio_pci_device *device);
+
+ /**
+ * memcpy_start() - Kick off @count repeated memcpy operations from
+ * [@src, @src + @size) to [@dst, @dst + @size).
+ *
+ * Guarantees:
+ * - The device will attempt DMA reads on [src, src + size).
+ * - The device will attempt DMA writes on [dst, dst + size).
+ * - The device will not generate any interrupts.
+ *
+ * memcpy_start() returns immediately, it does not wait for the
+ * copies to complete.
+ */
+ void (*memcpy_start)(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count);
+
+ /**
+ * memcpy_wait() - Wait until the memcpy operations started by
+ * memcpy_start() have finished.
+ *
+ * Guarantees:
+ * - All in-flight DMAs initiated by memcpy_start() are fully complete
+ * before memcpy_wait() returns.
+ *
+ * Returns non-0 if the driver detects that an error occurred during the
+ * memcpy, 0 otherwise.
+ */
+ int (*memcpy_wait)(struct vfio_pci_device *device);
+
+ /**
+ * send_msi() - Make the device send the MSI device->driver.msi.
+ *
+ * Guarantees:
+ * - The device will send the MSI once.
+ */
+ void (*send_msi)(struct vfio_pci_device *device);
+};
+
+struct vfio_pci_driver {
+ const struct vfio_pci_driver_ops *ops;
+ bool initialized;
+ bool memcpy_in_progress;
+
+ /* Region to be used by the driver (e.g. for in-memory descriptors) */
+ struct dma_region region;
+
+ /* The maximum size that can be passed to memcpy_start(). */
+ u64 max_memcpy_size;
+
+ /* The maximum count that can be passed to memcpy_start(). */
+ u64 max_memcpy_count;
+
+ /* The MSI vector the device will signal in ops->send_msi(). */
+ int msi;
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device);
+void vfio_pci_driver_init(struct vfio_pci_device *device);
+void vfio_pci_driver_remove(struct vfio_pci_device *device);
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size);
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count);
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H */
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
deleted file mode 100644
index 69ec0c856481..000000000000
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
-#define SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
-
-#include <fcntl.h>
-#include <string.h>
-
-#include <uapi/linux/types.h>
-#include <linux/iommufd.h>
-#include <linux/list.h>
-#include <linux/pci_regs.h>
-#include <linux/vfio.h>
-
-#include "../../../kselftest.h"
-
-#define VFIO_LOG_AND_EXIT(...) do { \
- fprintf(stderr, " " __VA_ARGS__); \
- fprintf(stderr, "\n"); \
- exit(KSFT_FAIL); \
-} while (0)
-
-#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
- typeof(_lhs) __lhs = (_lhs); \
- typeof(_rhs) __rhs = (_rhs); \
- \
- if (__lhs _op __rhs) \
- break; \
- \
- fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
- fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
- fprintf(stderr, " Observed: %#lx %s %#lx\n", \
- (u64)__lhs, #_op, (u64)__rhs); \
- fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
- VFIO_LOG_AND_EXIT(__VA_ARGS__); \
-} while (0)
-
-#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
-#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
-#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
-#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
-#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
-#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
-#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
-#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
-#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
-#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
-
-#define VFIO_FAIL(_fmt, ...) do { \
- fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
- VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
-} while (0)
-
-struct vfio_iommu_mode {
- const char *name;
- const char *container_path;
- unsigned long iommu_type;
-};
-
-/*
- * Generator for VFIO selftests fixture variants that replicate across all
- * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
- * which should then use FIXTURE_VARIANT_ADD() to create the variant.
- */
-#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
-FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
-FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
-FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
-FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
-FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
-
-struct vfio_pci_bar {
- struct vfio_region_info info;
- void *vaddr;
-};
-
-typedef u64 iova_t;
-
-#define INVALID_IOVA UINT64_MAX
-
-struct vfio_dma_region {
- struct list_head link;
- void *vaddr;
- iova_t iova;
- u64 size;
-};
-
-struct vfio_pci_device;
-
-struct vfio_pci_driver_ops {
- const char *name;
-
- /**
- * @probe() - Check if the driver supports the given device.
- *
- * Return: 0 on success, non-0 on failure.
- */
- int (*probe)(struct vfio_pci_device *device);
-
- /**
- * @init() - Initialize the driver for @device.
- *
- * Must be called after device->driver.region has been initialized.
- */
- void (*init)(struct vfio_pci_device *device);
-
- /**
- * remove() - Deinitialize the driver for @device.
- */
- void (*remove)(struct vfio_pci_device *device);
-
- /**
- * memcpy_start() - Kick off @count repeated memcpy operations from
- * [@src, @src + @size) to [@dst, @dst + @size).
- *
- * Guarantees:
- * - The device will attempt DMA reads on [src, src + size).
- * - The device will attempt DMA writes on [dst, dst + size).
- * - The device will not generate any interrupts.
- *
- * memcpy_start() returns immediately, it does not wait for the
- * copies to complete.
- */
- void (*memcpy_start)(struct vfio_pci_device *device,
- iova_t src, iova_t dst, u64 size, u64 count);
-
- /**
- * memcpy_wait() - Wait until the memcpy operations started by
- * memcpy_start() have finished.
- *
- * Guarantees:
- * - All in-flight DMAs initiated by memcpy_start() are fully complete
- * before memcpy_wait() returns.
- *
- * Returns non-0 if the driver detects that an error occurred during the
- * memcpy, 0 otherwise.
- */
- int (*memcpy_wait)(struct vfio_pci_device *device);
-
- /**
- * send_msi() - Make the device send the MSI device->driver.msi.
- *
- * Guarantees:
- * - The device will send the MSI once.
- */
- void (*send_msi)(struct vfio_pci_device *device);
-};
-
-struct vfio_pci_driver {
- const struct vfio_pci_driver_ops *ops;
- bool initialized;
- bool memcpy_in_progress;
-
- /* Region to be used by the driver (e.g. for in-memory descriptors) */
- struct vfio_dma_region region;
-
- /* The maximum size that can be passed to memcpy_start(). */
- u64 max_memcpy_size;
-
- /* The maximum count that can be passed to memcpy_start(). */
- u64 max_memcpy_count;
-
- /* The MSI vector the device will signal in ops->send_msi(). */
- int msi;
-};
-
-struct vfio_pci_device {
- int fd;
-
- const struct vfio_iommu_mode *iommu_mode;
- int group_fd;
- int container_fd;
-
- int iommufd;
- u32 ioas_id;
-
- struct vfio_device_info info;
- struct vfio_region_info config_space;
- struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
-
- struct vfio_irq_info msi_info;
- struct vfio_irq_info msix_info;
-
- struct list_head dma_regions;
-
- /* eventfds for MSI and MSI-x interrupts */
- int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
-
- struct vfio_pci_driver driver;
-};
-
-struct iova_allocator {
- struct iommu_iova_range *ranges;
- u32 nranges;
- u32 range_idx;
- u64 range_offset;
-};
-
-/*
- * Return the BDF string of the device that the test should use.
- *
- * If a BDF string is provided by the user on the command line (as the last
- * element of argv[]), then this function will return that and decrement argc
- * by 1.
- *
- * Otherwise this function will attempt to use the environment variable
- * $VFIO_SELFTESTS_BDF.
- *
- * If BDF cannot be determined then the test will exit with KSFT_SKIP.
- */
-const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
-const char *vfio_pci_get_cdev_path(const char *bdf);
-
-extern const char *default_iommu_mode;
-
-struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode);
-void vfio_pci_device_cleanup(struct vfio_pci_device *device);
-void vfio_pci_device_reset(struct vfio_pci_device *device);
-
-struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
- u32 *nranges);
-
-struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device);
-void iova_allocator_cleanup(struct iova_allocator *allocator);
-iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
-
-int __vfio_pci_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region);
-int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region,
- u64 *unmapped);
-int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped);
-
-static inline void vfio_pci_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
-{
- VFIO_ASSERT_EQ(__vfio_pci_dma_map(device, region), 0);
-}
-
-static inline void vfio_pci_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
-{
- VFIO_ASSERT_EQ(__vfio_pci_dma_unmap(device, region, NULL), 0);
-}
-
-static inline void vfio_pci_dma_unmap_all(struct vfio_pci_device *device)
-{
- VFIO_ASSERT_EQ(__vfio_pci_dma_unmap_all(device, NULL), 0);
-}
-
-void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
- size_t config, size_t size, void *data);
-
-#define vfio_pci_config_read(_device, _offset, _type) ({ \
- _type __data; \
- vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
- __data; \
-})
-
-#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
-#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
-#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
-
-#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
- _type __data = (_value); \
- vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
-} while (0)
-
-#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
-#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
-#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
-
-void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
- u32 vector, int count);
-void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
-void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
-
-static inline void fcntl_set_nonblock(int fd)
-{
- int r;
-
- r = fcntl(fd, F_GETFL, 0);
- VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
-
- r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
- VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
-}
-
-static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
- u32 vector, int count)
-{
- vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
-}
-
-static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
-{
- vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
-}
-
-static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
- u32 vector, int count)
-{
- vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
-}
-
-static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
-{
- vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
-}
-
-iova_t __to_iova(struct vfio_pci_device *device, void *vaddr);
-iova_t to_iova(struct vfio_pci_device *device, void *vaddr);
-
-static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
- u16 vendor_id, u16 device_id)
-{
- return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
- (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
-}
-
-void vfio_pci_driver_probe(struct vfio_pci_device *device);
-void vfio_pci_driver_init(struct vfio_pci_device *device);
-void vfio_pci_driver_remove(struct vfio_pci_device *device);
-int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
- iova_t src, iova_t dst, u64 size);
-void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
- iova_t src, iova_t dst, u64 size,
- u64 count);
-int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
-void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
-
-#endif /* SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H */
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
new file mode 100644
index 000000000000..8079d43523f3
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+const char *default_iommu_mode = "iommufd";
+
+/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
+static const struct iommu_mode iommu_modes[] = {
+ {
+ .name = "vfio_type1_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "vfio_type1v2_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1v2",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd",
+ },
+};
+
+static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode)
+{
+ int i;
+
+ if (!iommu_mode)
+ iommu_mode = default_iommu_mode;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
+ if (strcmp(iommu_mode, iommu_modes[i].name))
+ continue;
+
+ return &iommu_modes[i];
+ }
+
+ VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
+{
+ struct dma_region *region;
+
+ list_for_each_entry(region, &iommu->dma_regions, link) {
+ if (vaddr < region->vaddr)
+ continue;
+
+ if (vaddr >= region->vaddr + region->size)
+ continue;
+
+ if (iova)
+ *iova = region->iova + (vaddr - region->vaddr);
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
+{
+ iova_t iova;
+ int ret;
+
+ ret = __iommu_hva2iova(iommu, vaddr, &iova);
+ VFIO_ASSERT_EQ(ret, 0, "%p is not mapped into the iommu\n", vaddr);
+
+ return iova;
+}
+
+static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct vfio_iommu_type1_dma_map args = {
+ .argsz = sizeof(args),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (u64)region->vaddr,
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+ return -errno;
+
+ return 0;
+}
+
+static int iommufd_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct iommu_ioas_map args = {
+ .size = sizeof(args),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .user_va = (u64)region->vaddr,
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = iommu->ioas_id,
+ };
+
+ if (ioctl(iommu->iommufd, IOMMU_IOAS_MAP, &args))
+ return -errno;
+
+ return 0;
+}
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_map(iommu, region);
+ else
+ ret = vfio_iommu_map(iommu, region);
+
+ if (ret)
+ return ret;
+
+ list_add(&region->link, &iommu->dma_regions);
+
+ return 0;
+}
+
+static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapped)
+{
+ struct vfio_iommu_type1_dma_unmap args = {
+ .argsz = sizeof(args),
+ .iova = iova,
+ .size = size,
+ .flags = flags,
+ };
+
+ if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.size;
+
+ return 0;
+}
+
+static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __vfio_iommu_unmap(iommu->container_fd, region->iova,
+ region->size, 0, unmapped);
+}
+
+static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
+{
+ struct iommu_ioas_unmap args = {
+ .size = sizeof(args),
+ .iova = iova,
+ .length = length,
+ .ioas_id = ioas_id,
+ };
+
+ if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.length;
+
+ return 0;
+}
+
+static int iommufd_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __iommufd_unmap(iommu->iommufd, region->iova, region->size,
+ iommu->ioas_id, unmapped);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_unmap(iommu, region, unmapped);
+ else
+ ret = vfio_iommu_unmap(iommu, region, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_del_init(&region->link);
+
+ return 0;
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped)
+{
+ int ret;
+ struct dma_region *curr, *next;
+
+ if (iommu->iommufd)
+ ret = __iommufd_unmap(iommu->iommufd, 0, UINT64_MAX,
+ iommu->ioas_id, unmapped);
+ else
+ ret = __vfio_iommu_unmap(iommu->container_fd, 0, 0,
+ VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(curr, next, &iommu->dma_regions, link)
+ list_del_init(&curr->link);
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
+ u32 *cap_offset)
+{
+ struct vfio_info_cap_header *hdr;
+
+ if (!*cap_offset)
+ return NULL;
+
+ VFIO_ASSERT_LT(*cap_offset, bufsz);
+ VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+ hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+ *cap_offset = hdr->next;
+
+ return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+ u16 cap_id)
+{
+ struct vfio_info_cap_header *hdr;
+ u32 cap_offset = info->cap_offset;
+ u32 max_depth;
+ u32 depth = 0;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+ return NULL;
+
+ if (cap_offset)
+ VFIO_ASSERT_GE(cap_offset, sizeof(*info));
+
+ max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
+
+ while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+ depth++;
+ VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
+
+ if (hdr->id == cap_id)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(int container_fd)
+{
+ struct vfio_iommu_type1_info *info;
+
+ info = malloc(sizeof(*info));
+ VFIO_ASSERT_NOT_NULL(info);
+
+ *info = (struct vfio_iommu_type1_info) {
+ .argsz = sizeof(*info),
+ };
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ info = realloc(info, info->argsz);
+ VFIO_ASSERT_NOT_NULL(info);
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct iommu_iova_range *ranges = NULL;
+
+ info = vfio_iommu_get_info(iommu->container_fd);
+ hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ VFIO_ASSERT_NOT_NULL(hdr);
+
+ cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+ VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+ ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+ ranges[i] = (struct iommu_iova_range){
+ .start = cap_range->iova_ranges[i].start,
+ .last = cap_range->iova_ranges[i].end,
+ };
+ }
+
+ *nranges = cap_range->nr_iovas;
+
+ free(info);
+ return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+ int ret;
+
+ struct iommu_ioas_iova_ranges query = {
+ .size = sizeof(query),
+ .ioas_id = iommu->ioas_id,
+ };
+
+ ret = ioctl(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ VFIO_ASSERT_EQ(ret, -1);
+ VFIO_ASSERT_EQ(errno, EMSGSIZE);
+ VFIO_ASSERT_GT(query.num_iovas, 0);
+
+ ranges = calloc(query.num_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ query.allowed_iovas = (uintptr_t)ranges;
+
+ ioctl_assert(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ *nranges = query.num_iovas;
+
+ return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+ const struct iommu_iova_range *ra = a, *rb = b;
+
+ if (ra->start < rb->start)
+ return -1;
+
+ if (ra->start > rb->start)
+ return 1;
+
+ return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+
+ if (iommu->iommufd)
+ ranges = iommufd_iova_ranges(iommu, nranges);
+ else
+ ranges = vfio_iommu_iova_ranges(iommu, nranges);
+
+ if (!ranges)
+ return NULL;
+
+ VFIO_ASSERT_GT(*nranges, 0);
+
+ /* Sort and check that ranges are sane and non-overlapping */
+ qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+ VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+ for (u32 i = 1; i < *nranges; i++) {
+ VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+ VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+ }
+
+ return ranges;
+}
+
+static u32 iommufd_ioas_alloc(int iommufd)
+{
+ struct iommu_ioas_alloc args = {
+ .size = sizeof(args),
+ };
+
+ ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
+ return args.out_ioas_id;
+}
+
+struct iommu *iommu_init(const char *iommu_mode)
+{
+ const char *container_path;
+ struct iommu *iommu;
+ int version;
+
+ iommu = calloc(1, sizeof(*iommu));
+ VFIO_ASSERT_NOT_NULL(iommu);
+
+ INIT_LIST_HEAD(&iommu->dma_regions);
+
+ iommu->mode = lookup_iommu_mode(iommu_mode);
+
+ container_path = iommu->mode->container_path;
+ if (container_path) {
+ iommu->container_fd = open(container_path, O_RDWR);
+ VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path);
+
+ version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION);
+ VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
+ } else {
+ /*
+ * Require device->iommufd to be >0 so that a simple non-0 check can be
+ * used to check if iommufd is enabled. In practice open() will never
+ * return 0 unless stdin is closed.
+ */
+ iommu->iommufd = open("/dev/iommu", O_RDWR);
+ VFIO_ASSERT_GT(iommu->iommufd, 0);
+
+ iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd);
+ }
+
+ return iommu;
+}
+
+void iommu_cleanup(struct iommu *iommu)
+{
+ if (iommu->iommufd)
+ VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
+ else
+ VFIO_ASSERT_EQ(close(iommu->container_fd), 0);
+
+ free(iommu);
+}
diff --git a/tools/testing/selftests/vfio/lib/iova_allocator.c b/tools/testing/selftests/vfio/lib/iova_allocator.c
new file mode 100644
index 000000000000..a12b0a51e9e6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iova_allocator.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/overflow.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu)
+{
+ struct iova_allocator *allocator;
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+
+ ranges = iommu_iova_ranges(iommu, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ allocator = malloc(sizeof(*allocator));
+ VFIO_ASSERT_NOT_NULL(allocator);
+
+ *allocator = (struct iova_allocator){
+ .ranges = ranges,
+ .nranges = nranges,
+ .range_idx = 0,
+ .range_offset = 0,
+ };
+
+ return allocator;
+}
+
+void iova_allocator_cleanup(struct iova_allocator *allocator)
+{
+ free(allocator->ranges);
+ free(allocator);
+}
+
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
+{
+ VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+ VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+ for (;;) {
+ struct iommu_iova_range *range;
+ iova_t iova, last;
+
+ VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
+ "IOVA allocator out of space\n");
+
+ range = &allocator->ranges[allocator->range_idx];
+ iova = range->start + allocator->range_offset;
+
+ /* Check for sufficient space at the current offset */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ /* Align iova to size */
+ iova = last & ~(size - 1);
+
+ /* Check for sufficient space at the aligned iova */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ if (last == range->last) {
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ } else {
+ allocator->range_offset = last - range->start + 1;
+ }
+
+ return iova;
+
+next_range:
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ }
+}
+
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
new file mode 100644
index 000000000000..a23a3cc5be69
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+static bool is_bdf(const char *str)
+{
+ unsigned int s, b, d, f;
+ int length, count;
+
+ count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
+ return count == 4 && length == strlen(str);
+}
+
+static char **get_bdfs_cmdline(int *argc, char *argv[], int *nr_bdfs)
+{
+ int i;
+
+ for (i = *argc - 1; i > 0 && is_bdf(argv[i]); i--)
+ continue;
+
+ i++;
+ *nr_bdfs = *argc - i;
+ *argc -= *nr_bdfs;
+
+ return *nr_bdfs ? &argv[i] : NULL;
+}
+
+static char *get_bdf_env(void)
+{
+ char *bdf;
+
+ bdf = getenv("VFIO_SELFTESTS_BDF");
+ if (!bdf)
+ return NULL;
+
+ VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
+ return bdf;
+}
+
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs)
+{
+ static char *env_bdf;
+ char **bdfs;
+
+ bdfs = get_bdfs_cmdline(argc, argv, nr_bdfs);
+ if (bdfs)
+ return bdfs;
+
+ env_bdf = get_bdf_env();
+ if (env_bdf) {
+ *nr_bdfs = 1;
+ return &env_bdf;
+ }
+
+ fprintf(stderr, "Unable to determine which device(s) to use, skipping test.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via environment variable:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " export VFIO_SELFTESTS_BDF=\"segment:bus:device.function\"\n");
+ fprintf(stderr, " %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address(es) via argv:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s [options] segment:bus:device.function ...\n", argv[0]);
+ fprintf(stderr, "\n");
+ exit(KSFT_SKIP);
+}
+
+const char *vfio_selftests_get_bdf(int *argc, char *argv[])
+{
+ int nr_bdfs;
+
+ return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
+}
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
index 5d11c3a89a28..9f47bceed16f 100644
--- a/tools/testing/selftests/vfio/lib/libvfio.mk
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -1,24 +1,29 @@
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-VFIO_DIR := $(selfdir)/vfio
+LIBVFIO_SRCDIR := $(selfdir)/vfio/lib
-LIBVFIO_C := lib/vfio_pci_device.c
-LIBVFIO_C += lib/vfio_pci_driver.c
+LIBVFIO_C := iommu.c
+LIBVFIO_C += iova_allocator.c
+LIBVFIO_C += libvfio.c
+LIBVFIO_C += vfio_pci_device.c
+LIBVFIO_C += vfio_pci_driver.c
ifeq ($(ARCH:x86_64=x86),x86)
-LIBVFIO_C += lib/drivers/ioat/ioat.c
-LIBVFIO_C += lib/drivers/dsa/dsa.c
+LIBVFIO_C += drivers/ioat/ioat.c
+LIBVFIO_C += drivers/dsa/dsa.c
endif
-LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C))
+LIBVFIO_OUTPUT := $(OUTPUT)/libvfio
+
+LIBVFIO_O := $(patsubst %.c, $(LIBVFIO_OUTPUT)/%.o, $(LIBVFIO_C))
LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
$(shell mkdir -p $(LIBVFIO_O_DIRS))
-CFLAGS += -I$(VFIO_DIR)/lib/include
+CFLAGS += -I$(LIBVFIO_SRCDIR)/include
-$(LIBVFIO_O): $(OUTPUT)/%.o : $(VFIO_DIR)/%.c
+$(LIBVFIO_O): $(LIBVFIO_OUTPUT)/%.o : $(LIBVFIO_SRCDIR)/%.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-EXTRA_CLEAN += $(LIBVFIO_O)
+EXTRA_CLEAN += $(LIBVFIO_OUTPUT)
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index b479a359da12..13fdb4b0b10f 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -20,286 +20,10 @@
#include <linux/vfio.h>
#include "../../../kselftest.h"
-#include <vfio_util.h>
+#include <libvfio.h>
#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
-#define ioctl_assert(_fd, _op, _arg) do { \
- void *__arg = (_arg); \
- int __ret = ioctl((_fd), (_op), (__arg)); \
- VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
-} while (0)
-
-static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
- u32 *cap_offset)
-{
- struct vfio_info_cap_header *hdr;
-
- if (!*cap_offset)
- return NULL;
-
- VFIO_ASSERT_LT(*cap_offset, bufsz);
- VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
-
- hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
- *cap_offset = hdr->next;
-
- return hdr;
-}
-
-static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
- u16 cap_id)
-{
- struct vfio_info_cap_header *hdr;
- u32 cap_offset = info->cap_offset;
- u32 max_depth;
- u32 depth = 0;
-
- if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
- return NULL;
-
- if (cap_offset)
- VFIO_ASSERT_GE(cap_offset, sizeof(*info));
-
- max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
-
- while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
- depth++;
- VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
-
- if (hdr->id == cap_id)
- return hdr;
- }
-
- return NULL;
-}
-
-/* Return buffer including capability chain, if present. Free with free() */
-static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device)
-{
- struct vfio_iommu_type1_info *info;
-
- info = malloc(sizeof(*info));
- VFIO_ASSERT_NOT_NULL(info);
-
- *info = (struct vfio_iommu_type1_info) {
- .argsz = sizeof(*info),
- };
-
- ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
- VFIO_ASSERT_GE(info->argsz, sizeof(*info));
-
- info = realloc(info, info->argsz);
- VFIO_ASSERT_NOT_NULL(info);
-
- ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
- VFIO_ASSERT_GE(info->argsz, sizeof(*info));
-
- return info;
-}
-
-/*
- * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
- * report iommufd's iommu_iova_range. Free with free().
- */
-static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
- u32 *nranges)
-{
- struct vfio_iommu_type1_info_cap_iova_range *cap_range;
- struct vfio_iommu_type1_info *info;
- struct vfio_info_cap_header *hdr;
- struct iommu_iova_range *ranges = NULL;
-
- info = vfio_iommu_get_info(device);
- hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
- VFIO_ASSERT_NOT_NULL(hdr);
-
- cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
- VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
-
- ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
- VFIO_ASSERT_NOT_NULL(ranges);
-
- for (u32 i = 0; i < cap_range->nr_iovas; i++) {
- ranges[i] = (struct iommu_iova_range){
- .start = cap_range->iova_ranges[i].start,
- .last = cap_range->iova_ranges[i].end,
- };
- }
-
- *nranges = cap_range->nr_iovas;
-
- free(info);
- return ranges;
-}
-
-/* Return iova ranges of the device's IOAS. Free with free() */
-static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
- u32 *nranges)
-{
- struct iommu_iova_range *ranges;
- int ret;
-
- struct iommu_ioas_iova_ranges query = {
- .size = sizeof(query),
- .ioas_id = device->ioas_id,
- };
-
- ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
- VFIO_ASSERT_EQ(ret, -1);
- VFIO_ASSERT_EQ(errno, EMSGSIZE);
- VFIO_ASSERT_GT(query.num_iovas, 0);
-
- ranges = calloc(query.num_iovas, sizeof(*ranges));
- VFIO_ASSERT_NOT_NULL(ranges);
-
- query.allowed_iovas = (uintptr_t)ranges;
-
- ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
- *nranges = query.num_iovas;
-
- return ranges;
-}
-
-static int iova_range_comp(const void *a, const void *b)
-{
- const struct iommu_iova_range *ra = a, *rb = b;
-
- if (ra->start < rb->start)
- return -1;
-
- if (ra->start > rb->start)
- return 1;
-
- return 0;
-}
-
-/* Return sorted IOVA ranges of the device. Free with free(). */
-struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
- u32 *nranges)
-{
- struct iommu_iova_range *ranges;
-
- if (device->iommufd)
- ranges = iommufd_iova_ranges(device, nranges);
- else
- ranges = vfio_iommu_iova_ranges(device, nranges);
-
- if (!ranges)
- return NULL;
-
- VFIO_ASSERT_GT(*nranges, 0);
-
- /* Sort and check that ranges are sane and non-overlapping */
- qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
- VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
-
- for (u32 i = 1; i < *nranges; i++) {
- VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
- VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
- }
-
- return ranges;
-}
-
-struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device)
-{
- struct iova_allocator *allocator;
- struct iommu_iova_range *ranges;
- u32 nranges;
-
- ranges = vfio_pci_iova_ranges(device, &nranges);
- VFIO_ASSERT_NOT_NULL(ranges);
-
- allocator = malloc(sizeof(*allocator));
- VFIO_ASSERT_NOT_NULL(allocator);
-
- *allocator = (struct iova_allocator){
- .ranges = ranges,
- .nranges = nranges,
- .range_idx = 0,
- .range_offset = 0,
- };
-
- return allocator;
-}
-
-void iova_allocator_cleanup(struct iova_allocator *allocator)
-{
- free(allocator->ranges);
- free(allocator);
-}
-
-iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
-{
- VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
- VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
-
- for (;;) {
- struct iommu_iova_range *range;
- iova_t iova, last;
-
- VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
- "IOVA allocator out of space\n");
-
- range = &allocator->ranges[allocator->range_idx];
- iova = range->start + allocator->range_offset;
-
- /* Check for sufficient space at the current offset */
- if (check_add_overflow(iova, size - 1, &last) ||
- last > range->last)
- goto next_range;
-
- /* Align iova to size */
- iova = last & ~(size - 1);
-
- /* Check for sufficient space at the aligned iova */
- if (check_add_overflow(iova, size - 1, &last) ||
- last > range->last)
- goto next_range;
-
- if (last == range->last) {
- allocator->range_idx++;
- allocator->range_offset = 0;
- } else {
- allocator->range_offset = last - range->start + 1;
- }
-
- return iova;
-
-next_range:
- allocator->range_idx++;
- allocator->range_offset = 0;
- }
-}
-
-iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
-{
- struct vfio_dma_region *region;
-
- list_for_each_entry(region, &device->dma_regions, link) {
- if (vaddr < region->vaddr)
- continue;
-
- if (vaddr >= region->vaddr + region->size)
- continue;
-
- return region->iova + (vaddr - region->vaddr);
- }
-
- return INVALID_IOVA;
-}
-
-iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
-{
- iova_t iova;
-
- iova = __to_iova(device, vaddr);
- VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr);
-
- return iova;
-}
-
static void vfio_pci_irq_set(struct vfio_pci_device *device,
u32 index, u32 vector, u32 count, int *fds)
{
@@ -386,141 +110,6 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
}
-static int vfio_iommu_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
-{
- struct vfio_iommu_type1_dma_map args = {
- .argsz = sizeof(args),
- .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
- .vaddr = (u64)region->vaddr,
- .iova = region->iova,
- .size = region->size,
- };
-
- if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args))
- return -errno;
-
- return 0;
-}
-
-static int iommufd_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
-{
- struct iommu_ioas_map args = {
- .size = sizeof(args),
- .flags = IOMMU_IOAS_MAP_READABLE |
- IOMMU_IOAS_MAP_WRITEABLE |
- IOMMU_IOAS_MAP_FIXED_IOVA,
- .user_va = (u64)region->vaddr,
- .iova = region->iova,
- .length = region->size,
- .ioas_id = device->ioas_id,
- };
-
- if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args))
- return -errno;
-
- return 0;
-}
-
-int __vfio_pci_dma_map(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
-{
- int ret;
-
- if (device->iommufd)
- ret = iommufd_dma_map(device, region);
- else
- ret = vfio_iommu_dma_map(device, region);
-
- if (ret)
- return ret;
-
- list_add(&region->link, &device->dma_regions);
-
- return 0;
-}
-
-static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags,
- u64 *unmapped)
-{
- struct vfio_iommu_type1_dma_unmap args = {
- .argsz = sizeof(args),
- .iova = iova,
- .size = size,
- .flags = flags,
- };
-
- if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
- return -errno;
-
- if (unmapped)
- *unmapped = args.size;
-
- return 0;
-}
-
-static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id,
- u64 *unmapped)
-{
- struct iommu_ioas_unmap args = {
- .size = sizeof(args),
- .iova = iova,
- .length = length,
- .ioas_id = ioas_id,
- };
-
- if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
- return -errno;
-
- if (unmapped)
- *unmapped = args.length;
-
- return 0;
-}
-
-int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
- struct vfio_dma_region *region, u64 *unmapped)
-{
- int ret;
-
- if (device->iommufd)
- ret = iommufd_dma_unmap(device->iommufd, region->iova,
- region->size, device->ioas_id,
- unmapped);
- else
- ret = vfio_iommu_dma_unmap(device->container_fd, region->iova,
- region->size, 0, unmapped);
-
- if (ret)
- return ret;
-
- list_del_init(&region->link);
-
- return 0;
-}
-
-int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped)
-{
- int ret;
- struct vfio_dma_region *curr, *next;
-
- if (device->iommufd)
- ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX,
- device->ioas_id, unmapped);
- else
- ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0,
- VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
-
- if (ret)
- return ret;
-
- list_for_each_entry_safe(curr, next, &device->dma_regions, link)
- list_del_init(&curr->link);
-
- return 0;
-}
-
static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
struct vfio_region_info *info)
{
@@ -627,28 +216,26 @@ static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf
ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
- ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd);
+ ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
}
static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
{
- unsigned long iommu_type = device->iommu_mode->iommu_type;
- const char *path = device->iommu_mode->container_path;
- int version;
+ struct iommu *iommu = device->iommu;
+ unsigned long iommu_type = iommu->mode->iommu_type;
int ret;
- device->container_fd = open(path, O_RDWR);
- VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path);
-
- version = ioctl(device->container_fd, VFIO_GET_API_VERSION);
- VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
-
vfio_pci_group_setup(device, bdf);
- ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
+ ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
- ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+ /*
+ * Allow multiple threads to race to set the IOMMU type on the
+ * container. The first will succeed and the rest should fail
+ * because the IOMMU type is already set.
+ */
+ (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
VFIO_ASSERT_GE(device->fd, 0);
@@ -712,52 +299,6 @@ const char *vfio_pci_get_cdev_path(const char *bdf)
return cdev_path;
}
-/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
-static const struct vfio_iommu_mode iommu_modes[] = {
- {
- .name = "vfio_type1_iommu",
- .container_path = "/dev/vfio/vfio",
- .iommu_type = VFIO_TYPE1_IOMMU,
- },
- {
- .name = "vfio_type1v2_iommu",
- .container_path = "/dev/vfio/vfio",
- .iommu_type = VFIO_TYPE1v2_IOMMU,
- },
- {
- .name = "iommufd_compat_type1",
- .container_path = "/dev/iommu",
- .iommu_type = VFIO_TYPE1_IOMMU,
- },
- {
- .name = "iommufd_compat_type1v2",
- .container_path = "/dev/iommu",
- .iommu_type = VFIO_TYPE1v2_IOMMU,
- },
- {
- .name = "iommufd",
- },
-};
-
-const char *default_iommu_mode = "iommufd";
-
-static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode)
-{
- int i;
-
- if (!iommu_mode)
- iommu_mode = default_iommu_mode;
-
- for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
- if (strcmp(iommu_mode, iommu_modes[i].name))
- continue;
-
- return &iommu_modes[i];
- }
-
- VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
-}
-
static void vfio_device_bind_iommufd(int device_fd, int iommufd)
{
struct vfio_device_bind_iommufd args = {
@@ -768,16 +309,6 @@ static void vfio_device_bind_iommufd(int device_fd, int iommufd)
ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
}
-static u32 iommufd_ioas_alloc(int iommufd)
-{
- struct iommu_ioas_alloc args = {
- .size = sizeof(args),
- };
-
- ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
- return args.out_ioas_id;
-}
-
static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
{
struct vfio_device_attach_iommufd_pt args = {
@@ -796,31 +327,22 @@ static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *b
VFIO_ASSERT_GE(device->fd, 0);
free((void *)cdev_path);
- /*
- * Require device->iommufd to be >0 so that a simple non-0 check can be
- * used to check if iommufd is enabled. In practice open() will never
- * return 0 unless stdin is closed.
- */
- device->iommufd = open("/dev/iommu", O_RDWR);
- VFIO_ASSERT_GT(device->iommufd, 0);
-
- vfio_device_bind_iommufd(device->fd, device->iommufd);
- device->ioas_id = iommufd_ioas_alloc(device->iommufd);
- vfio_device_attach_iommufd_pt(device->fd, device->ioas_id);
+ vfio_device_bind_iommufd(device->fd, device->iommu->iommufd);
+ vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
}
-struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode)
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
{
struct vfio_pci_device *device;
device = calloc(1, sizeof(*device));
VFIO_ASSERT_NOT_NULL(device);
- INIT_LIST_HEAD(&device->dma_regions);
-
- device->iommu_mode = lookup_iommu_mode(iommu_mode);
+ VFIO_ASSERT_NOT_NULL(iommu);
+ device->iommu = iommu;
+ device->bdf = bdf;
- if (device->iommu_mode->container_path)
+ if (iommu->mode->container_path)
vfio_pci_container_setup(device, bdf);
else
vfio_pci_iommufd_setup(device, bdf);
@@ -849,48 +371,8 @@ void vfio_pci_device_cleanup(struct vfio_pci_device *device)
VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
}
- if (device->iommufd) {
- VFIO_ASSERT_EQ(close(device->iommufd), 0);
- } else {
+ if (device->group_fd)
VFIO_ASSERT_EQ(close(device->group_fd), 0);
- VFIO_ASSERT_EQ(close(device->container_fd), 0);
- }
free(device);
}
-
-static bool is_bdf(const char *str)
-{
- unsigned int s, b, d, f;
- int length, count;
-
- count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
- return count == 4 && length == strlen(str);
-}
-
-const char *vfio_selftests_get_bdf(int *argc, char *argv[])
-{
- char *bdf;
-
- if (*argc > 1 && is_bdf(argv[*argc - 1]))
- return argv[--(*argc)];
-
- bdf = getenv("VFIO_SELFTESTS_BDF");
- if (bdf) {
- VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
- return bdf;
- }
-
- fprintf(stderr, "Unable to determine which device to use, skipping test.\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "To pass the device address via environment variable:\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n");
- fprintf(stderr, " %s [options]\n", argv[0]);
- fprintf(stderr, "\n");
- fprintf(stderr, "To pass the device address via argv:\n");
- fprintf(stderr, "\n");
- fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]);
- fprintf(stderr, "\n");
- exit(KSFT_SKIP);
-}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
index e5e8723ecb41..ca0e25efbfa1 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <stdio.h>
-
#include "../../../kselftest.h"
-#include <vfio_util.h>
+#include <libvfio.h>
#ifdef __x86_64__
extern struct vfio_pci_driver_ops dsa_ops;
@@ -29,7 +27,6 @@ void vfio_pci_driver_probe(struct vfio_pci_device *device)
if (ops->probe(device))
continue;
- printf("Driver found: %s\n", ops->name);
device->driver.ops = ops;
}
}
@@ -58,17 +55,6 @@ void vfio_pci_driver_init(struct vfio_pci_device *device)
driver->ops->init(device);
driver->initialized = true;
-
- printf("%s: region: vaddr %p, iova 0x%lx, size 0x%lx\n",
- driver->ops->name,
- driver->region.vaddr,
- driver->region.iova,
- driver->region.size);
-
- printf("%s: max_memcpy_size 0x%lx, max_memcpy_count 0x%lx\n",
- driver->ops->name,
- driver->max_memcpy_size,
- driver->max_memcpy_count);
}
void vfio_pci_driver_remove(struct vfio_pci_device *device)
diff --git a/tools/testing/selftests/vfio/run.sh b/tools/testing/selftests/vfio/run.sh
deleted file mode 100755
index 0476b6d7adc3..000000000000
--- a/tools/testing/selftests/vfio/run.sh
+++ /dev/null
@@ -1,109 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-or-later
-
-# Global variables initialized in main() and then used during cleanup() when
-# the script exits.
-declare DEVICE_BDF
-declare NEW_DRIVER
-declare OLD_DRIVER
-declare OLD_NUMVFS
-declare DRIVER_OVERRIDE
-
-function write_to() {
- # Unfortunately set -x does not show redirects so use echo to manually
- # tell the user what commands are being run.
- echo "+ echo \"${2}\" > ${1}"
- echo "${2}" > ${1}
-}
-
-function bind() {
- write_to /sys/bus/pci/drivers/${2}/bind ${1}
-}
-
-function unbind() {
- write_to /sys/bus/pci/drivers/${2}/unbind ${1}
-}
-
-function set_sriov_numvfs() {
- write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2}
-}
-
-function set_driver_override() {
- write_to /sys/bus/pci/devices/${1}/driver_override ${2}
-}
-
-function clear_driver_override() {
- set_driver_override ${1} ""
-}
-
-function cleanup() {
- if [ "${NEW_DRIVER}" ]; then unbind ${DEVICE_BDF} ${NEW_DRIVER} ; fi
- if [ "${DRIVER_OVERRIDE}" ]; then clear_driver_override ${DEVICE_BDF} ; fi
- if [ "${OLD_DRIVER}" ]; then bind ${DEVICE_BDF} ${OLD_DRIVER} ; fi
- if [ "${OLD_NUMVFS}" ]; then set_sriov_numvfs ${DEVICE_BDF} ${OLD_NUMVFS} ; fi
-}
-
-function usage() {
- echo "usage: $0 [-d segment:bus:device.function] [-s] [-h] [cmd ...]" >&2
- echo >&2
- echo " -d: The BDF of the device to use for the test (required)" >&2
- echo " -h: Show this help message" >&2
- echo " -s: Drop into a shell rather than running a command" >&2
- echo >&2
- echo " cmd: The command to run and arguments to pass to it." >&2
- echo " Required when not using -s. The SBDF will be " >&2
- echo " appended to the argument list." >&2
- exit 1
-}
-
-function main() {
- local shell
-
- while getopts "d:hs" opt; do
- case $opt in
- d) DEVICE_BDF="$OPTARG" ;;
- s) shell=true ;;
- *) usage ;;
- esac
- done
-
- # Shift past all optional arguments.
- shift $((OPTIND - 1))
-
- # Check that the user passed in the command to run.
- [ ! "${shell}" ] && [ $# = 0 ] && usage
-
- # Check that the user passed in a BDF.
- [ "${DEVICE_BDF}" ] || usage
-
- trap cleanup EXIT
- set -e
-
- test -d /sys/bus/pci/devices/${DEVICE_BDF}
-
- if [ -f /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs ]; then
- OLD_NUMVFS=$(cat /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs)
- set_sriov_numvfs ${DEVICE_BDF} 0
- fi
-
- if [ -L /sys/bus/pci/devices/${DEVICE_BDF}/driver ]; then
- OLD_DRIVER=$(basename $(readlink -m /sys/bus/pci/devices/${DEVICE_BDF}/driver))
- unbind ${DEVICE_BDF} ${OLD_DRIVER}
- fi
-
- set_driver_override ${DEVICE_BDF} vfio-pci
- DRIVER_OVERRIDE=true
-
- bind ${DEVICE_BDF} vfio-pci
- NEW_DRIVER=vfio-pci
-
- echo
- if [ "${shell}" ]; then
- echo "Dropping into ${SHELL} with VFIO_SELFTESTS_BDF=${DEVICE_BDF}"
- VFIO_SELFTESTS_BDF=${DEVICE_BDF} ${SHELL}
- else
- "$@" ${DEVICE_BDF}
- fi
- echo
-}
-
-main "$@"
diff --git a/tools/testing/selftests/vfio/scripts/cleanup.sh b/tools/testing/selftests/vfio/scripts/cleanup.sh
new file mode 100755
index 000000000000..69c922d8aafb
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/cleanup.sh
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function cleanup_devices() {
+ local device_bdf
+ local device_dir
+
+ for device_bdf in "$@"; do
+ device_dir=${DEVICES_DIR}/${device_bdf}
+
+ if [ -f ${device_dir}/vfio-pci ]; then
+ unbind ${device_bdf} vfio-pci
+ fi
+
+ if [ -f ${device_dir}/driver_override ]; then
+ clear_driver_override ${device_bdf}
+ fi
+
+ if [ -f ${device_dir}/driver ]; then
+ bind ${device_bdf} $(cat ${device_dir}/driver)
+ fi
+
+ if [ -f ${device_dir}/sriov_numvfs ]; then
+ set_sriov_numvfs ${device_bdf} $(cat ${device_dir}/sriov_numvfs)
+ fi
+
+ rm -rf ${device_dir}
+ done
+}
+
+function main() {
+ if [ $# = 0 ]; then
+ cleanup_devices $(ls ${DEVICES_DIR})
+ rmdir ${DEVICES_DIR}
+ else
+ cleanup_devices "$@"
+ fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/scripts/lib.sh b/tools/testing/selftests/vfio/scripts/lib.sh
new file mode 100755
index 000000000000..9f05f29c7b86
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/lib.sh
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+readonly DEVICES_DIR="${TMPDIR:-/tmp}/vfio-selftests-devices"
+
+function write_to() {
+ # Unfortunately set -x does not show redirects so use echo to manually
+ # tell the user what commands are being run.
+ echo "+ echo \"${2}\" > ${1}"
+ echo "${2}" > ${1}
+}
+
+function get_driver() {
+ if [ -L /sys/bus/pci/devices/${1}/driver ]; then
+ basename $(readlink -m /sys/bus/pci/devices/${1}/driver)
+ fi
+}
+
+function bind() {
+ write_to /sys/bus/pci/drivers/${2}/bind ${1}
+}
+
+function unbind() {
+ write_to /sys/bus/pci/drivers/${2}/unbind ${1}
+}
+
+function set_sriov_numvfs() {
+ write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2}
+}
+
+function get_sriov_numvfs() {
+ if [ -f /sys/bus/pci/devices/${1}/sriov_numvfs ]; then
+ cat /sys/bus/pci/devices/${1}/sriov_numvfs
+ fi
+}
+
+function set_driver_override() {
+ write_to /sys/bus/pci/devices/${1}/driver_override ${2}
+}
+
+function clear_driver_override() {
+ set_driver_override ${1} ""
+}
diff --git a/tools/testing/selftests/vfio/scripts/run.sh b/tools/testing/selftests/vfio/scripts/run.sh
new file mode 100755
index 000000000000..91fd38f9f6f6
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/run.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function main() {
+ local device_bdfs=$(ls ${DEVICES_DIR})
+
+ if [ -z "${device_bdfs}" ]; then
+ echo "No devices found, skipping."
+ exit 4
+ fi
+
+ "$@" ${device_bdfs}
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/scripts/setup.sh b/tools/testing/selftests/vfio/scripts/setup.sh
new file mode 100755
index 000000000000..49a499e51cbe
--- /dev/null
+++ b/tools/testing/selftests/vfio/scripts/setup.sh
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+set -e
+
+source $(dirname -- "${BASH_SOURCE[0]}")/lib.sh
+
+function main() {
+ local device_bdf
+ local device_dir
+ local numvfs
+ local driver
+
+ if [ $# = 0 ]; then
+ echo "usage: $0 segment:bus:device.function ..." >&2
+ exit 1
+ fi
+
+ for device_bdf in "$@"; do
+ test -d /sys/bus/pci/devices/${device_bdf}
+
+ device_dir=${DEVICES_DIR}/${device_bdf}
+ if [ -d "${device_dir}" ]; then
+ echo "${device_bdf} has already been set up, exiting."
+ exit 0
+ fi
+
+ mkdir -p ${device_dir}
+
+ numvfs=$(get_sriov_numvfs ${device_bdf})
+ if [ "${numvfs}" ]; then
+ set_sriov_numvfs ${device_bdf} 0
+ echo ${numvfs} > ${device_dir}/sriov_numvfs
+ fi
+
+ driver=$(get_driver ${device_bdf})
+ if [ "${driver}" ]; then
+ unbind ${device_bdf} ${driver}
+ echo ${driver} > ${device_dir}/driver
+ fi
+
+ set_driver_override ${device_bdf} vfio-pci
+ touch ${device_dir}/driver_override
+
+ bind ${device_bdf} vfio-pci
+ touch ${device_dir}/vfio-pci
+ done
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 102603d4407d..5397822c3dd4 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -10,7 +10,7 @@
#include <linux/sizes.h>
#include <linux/vfio.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "../kselftest_harness.h"
@@ -94,6 +94,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova,
}
FIXTURE(vfio_dma_mapping_test) {
+ struct iommu *iommu;
struct vfio_pci_device *device;
struct iova_allocator *iova_allocator;
};
@@ -119,21 +120,23 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB |
FIXTURE_SETUP(vfio_dma_mapping_test)
{
- self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
- self->iova_allocator = iova_allocator_init(self->device);
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
}
FIXTURE_TEARDOWN(vfio_dma_mapping_test)
{
iova_allocator_cleanup(self->iova_allocator);
vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
}
TEST_F(vfio_dma_mapping_test, dma_map_unmap)
{
const u64 size = variant->size ?: getpagesize();
const int flags = variant->mmap_flags;
- struct vfio_dma_region region;
+ struct dma_region region;
struct iommu_mapping mapping;
u64 mapping_size = size;
u64 unmapped;
@@ -150,7 +153,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
region.iova = iova_allocator_alloc(self->iova_allocator, size);
region.size = size;
- vfio_pci_dma_map(self->device, &region);
+ iommu_map(self->iommu, &region);
printf("Mapped HVA %p (size 0x%lx) at IOVA 0x%lx\n", region.vaddr, size, region.iova);
ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr));
@@ -192,19 +195,20 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
}
unmap:
- rc = __vfio_pci_dma_unmap(self->device, &region, &unmapped);
+ rc = __iommu_unmap(self->iommu, &region, &unmapped);
ASSERT_EQ(rc, 0);
ASSERT_EQ(unmapped, region.size);
printf("Unmapped IOVA 0x%lx\n", region.iova);
- ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr));
+ ASSERT_NE(0, __to_iova(self->device, region.vaddr, NULL));
ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping));
ASSERT_TRUE(!munmap(region.vaddr, size));
}
FIXTURE(vfio_dma_map_limit_test) {
+ struct iommu *iommu;
struct vfio_pci_device *device;
- struct vfio_dma_region region;
+ struct dma_region region;
size_t mmap_size;
};
@@ -223,7 +227,7 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
FIXTURE_SETUP(vfio_dma_map_limit_test)
{
- struct vfio_dma_region *region = &self->region;
+ struct dma_region *region = &self->region;
struct iommu_iova_range *ranges;
u64 region_size = getpagesize();
iova_t last_iova;
@@ -235,12 +239,13 @@ FIXTURE_SETUP(vfio_dma_map_limit_test)
*/
self->mmap_size = 2 * region_size;
- self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ASSERT_NE(region->vaddr, MAP_FAILED);
- ranges = vfio_pci_iova_ranges(self->device, &nranges);
+ ranges = iommu_iova_ranges(self->iommu, &nranges);
VFIO_ASSERT_NOT_NULL(ranges);
last_iova = ranges[nranges - 1].last;
free(ranges);
@@ -253,49 +258,50 @@ FIXTURE_SETUP(vfio_dma_map_limit_test)
FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
{
vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
}
TEST_F(vfio_dma_map_limit_test, unmap_range)
{
- struct vfio_dma_region *region = &self->region;
+ struct dma_region *region = &self->region;
u64 unmapped;
int rc;
- vfio_pci_dma_map(self->device, region);
+ iommu_map(self->iommu, region);
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
- rc = __vfio_pci_dma_unmap(self->device, region, &unmapped);
+ rc = __iommu_unmap(self->iommu, region, &unmapped);
ASSERT_EQ(rc, 0);
ASSERT_EQ(unmapped, region->size);
}
TEST_F(vfio_dma_map_limit_test, unmap_all)
{
- struct vfio_dma_region *region = &self->region;
+ struct dma_region *region = &self->region;
u64 unmapped;
int rc;
- vfio_pci_dma_map(self->device, region);
+ iommu_map(self->iommu, region);
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
- rc = __vfio_pci_dma_unmap_all(self->device, &unmapped);
+ rc = __iommu_unmap_all(self->iommu, &unmapped);
ASSERT_EQ(rc, 0);
ASSERT_EQ(unmapped, region->size);
}
TEST_F(vfio_dma_map_limit_test, overflow)
{
- struct vfio_dma_region *region = &self->region;
+ struct dma_region *region = &self->region;
int rc;
region->iova = ~(iova_t)0 & ~(region->size - 1);
region->size = self->mmap_size;
- rc = __vfio_pci_dma_map(self->device, region);
+ rc = __iommu_map(self->iommu, region);
ASSERT_EQ(rc, -EOVERFLOW);
- rc = __vfio_pci_dma_unmap(self->device, region, NULL);
+ rc = __iommu_unmap(self->iommu, region, NULL);
ASSERT_EQ(rc, -EOVERFLOW);
}
diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
index 3655106b912d..caf1c6291f3d 100644
--- a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
+++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
@@ -10,7 +10,7 @@
#include <sys/ioctl.h>
#include <unistd.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "../kselftest_harness.h"
static const char iommu_dev_path[] = "/dev/iommu";
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c b/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c
new file mode 100644
index 000000000000..33b0c31fe2ed
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/sizes.h>
+#include <linux/time64.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "../kselftest_harness.h"
+
+static char **device_bdfs;
+static int nr_devices;
+
+struct thread_args {
+ struct iommu *iommu;
+ int device_index;
+ struct timespec start;
+ struct timespec end;
+ pthread_barrier_t *barrier;
+};
+
+FIXTURE(vfio_pci_device_init_perf_test) {
+ pthread_t *threads;
+ pthread_barrier_t barrier;
+ struct thread_args *thread_args;
+ struct iommu *iommu;
+};
+
+FIXTURE_VARIANT(vfio_pci_device_init_perf_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_pci_device_init_perf_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+FIXTURE_SETUP(vfio_pci_device_init_perf_test)
+{
+ int i;
+
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->threads = calloc(nr_devices, sizeof(self->threads[0]));
+ self->thread_args = calloc(nr_devices, sizeof(self->thread_args[0]));
+
+ pthread_barrier_init(&self->barrier, NULL, nr_devices);
+
+ for (i = 0; i < nr_devices; i++) {
+ self->thread_args[i].iommu = self->iommu;
+ self->thread_args[i].barrier = &self->barrier;
+ self->thread_args[i].device_index = i;
+ }
+}
+
+FIXTURE_TEARDOWN(vfio_pci_device_init_perf_test)
+{
+ iommu_cleanup(self->iommu);
+ free(self->threads);
+ free(self->thread_args);
+}
+
+static s64 to_ns(struct timespec ts)
+{
+ return (s64)ts.tv_nsec + NSEC_PER_SEC * (s64)ts.tv_sec;
+}
+
+static struct timespec to_timespec(s64 ns)
+{
+ struct timespec ts = {
+ .tv_nsec = ns % NSEC_PER_SEC,
+ .tv_sec = ns / NSEC_PER_SEC,
+ };
+
+ return ts;
+}
+
+static struct timespec timespec_sub(struct timespec a, struct timespec b)
+{
+ return to_timespec(to_ns(a) - to_ns(b));
+}
+
+static struct timespec timespec_min(struct timespec a, struct timespec b)
+{
+ return to_ns(a) < to_ns(b) ? a : b;
+}
+
+static struct timespec timespec_max(struct timespec a, struct timespec b)
+{
+ return to_ns(a) > to_ns(b) ? a : b;
+}
+
+static void *thread_main(void *__args)
+{
+ struct thread_args *args = __args;
+ struct vfio_pci_device *device;
+
+ pthread_barrier_wait(args->barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &args->start);
+ device = vfio_pci_device_init(device_bdfs[args->device_index], args->iommu);
+ clock_gettime(CLOCK_MONOTONIC, &args->end);
+
+ pthread_barrier_wait(args->barrier);
+
+ vfio_pci_device_cleanup(device);
+ return NULL;
+}
+
+TEST_F(vfio_pci_device_init_perf_test, init)
+{
+ struct timespec start = to_timespec(INT64_MAX), end = {};
+ struct timespec min = to_timespec(INT64_MAX);
+ struct timespec max = {};
+ struct timespec avg = {};
+ struct timespec wall_time;
+ s64 thread_ns = 0;
+ int i;
+
+ for (i = 0; i < nr_devices; i++) {
+ pthread_create(&self->threads[i], NULL, thread_main,
+ &self->thread_args[i]);
+ }
+
+ for (i = 0; i < nr_devices; i++) {
+ struct thread_args *args = &self->thread_args[i];
+ struct timespec init_time;
+
+ pthread_join(self->threads[i], NULL);
+
+ start = timespec_min(start, args->start);
+ end = timespec_max(end, args->end);
+
+ init_time = timespec_sub(args->end, args->start);
+ min = timespec_min(min, init_time);
+ max = timespec_max(max, init_time);
+ thread_ns += to_ns(init_time);
+ }
+
+ avg = to_timespec(thread_ns / nr_devices);
+ wall_time = timespec_sub(end, start);
+
+ printf("Wall time: %lu.%09lus\n",
+ wall_time.tv_sec, wall_time.tv_nsec);
+ printf("Min init time (per device): %lu.%09lus\n",
+ min.tv_sec, min.tv_nsec);
+ printf("Max init time (per device): %lu.%09lus\n",
+ max.tv_sec, max.tv_nsec);
+ printf("Avg init time (per device): %lu.%09lus\n",
+ avg.tv_sec, avg.tv_nsec);
+}
+
+int main(int argc, char *argv[])
+{
+ int i;
+
+ device_bdfs = vfio_selftests_get_bdfs(&argc, argv, &nr_devices);
+
+ printf("Testing parallel initialization of %d devices:\n", nr_devices);
+ for (i = 0; i < nr_devices; i++)
+ printf(" %s\n", device_bdfs[i]);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c
index 7a270698e4d2..ecbb669b3765 100644
--- a/tools/testing/selftests/vfio/vfio_pci_device_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c
@@ -10,7 +10,7 @@
#include <linux/sizes.h>
#include <linux/vfio.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "../kselftest_harness.h"
@@ -23,17 +23,20 @@ static const char *device_bdf;
#define MAX_TEST_MSI 16U
FIXTURE(vfio_pci_device_test) {
+ struct iommu *iommu;
struct vfio_pci_device *device;
};
FIXTURE_SETUP(vfio_pci_device_test)
{
- self->device = vfio_pci_device_init(device_bdf, default_iommu_mode);
+ self->iommu = iommu_init(default_iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
}
FIXTURE_TEARDOWN(vfio_pci_device_test)
{
vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
}
#define read_pci_id_from_sysfs(_file) ({ \
@@ -99,6 +102,7 @@ TEST_F(vfio_pci_device_test, validate_bars)
}
FIXTURE(vfio_pci_irq_test) {
+ struct iommu *iommu;
struct vfio_pci_device *device;
};
@@ -116,12 +120,14 @@ FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msix) {
FIXTURE_SETUP(vfio_pci_irq_test)
{
- self->device = vfio_pci_device_init(device_bdf, default_iommu_mode);
+ self->iommu = iommu_init(default_iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
}
FIXTURE_TEARDOWN(vfio_pci_irq_test)
{
vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
}
TEST_F(vfio_pci_irq_test, enable_trigger_disable)
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
index f69eec8b928d..f0ca8310d6a8 100644
--- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -5,7 +5,7 @@
#include <linux/sizes.h>
#include <linux/vfio.h>
-#include <vfio_util.h>
+#include <libvfio.h>
#include "../kselftest_harness.h"
@@ -18,9 +18,9 @@ static const char *device_bdf;
ASSERT_EQ(EAGAIN, errno); \
} while (0)
-static void region_setup(struct vfio_pci_device *device,
+static void region_setup(struct iommu *iommu,
struct iova_allocator *iova_allocator,
- struct vfio_dma_region *region, u64 size)
+ struct dma_region *region, u64 size)
{
const int flags = MAP_SHARED | MAP_ANONYMOUS;
const int prot = PROT_READ | PROT_WRITE;
@@ -33,20 +33,20 @@ static void region_setup(struct vfio_pci_device *device,
region->iova = iova_allocator_alloc(iova_allocator, size);
region->size = size;
- vfio_pci_dma_map(device, region);
+ iommu_map(iommu, region);
}
-static void region_teardown(struct vfio_pci_device *device,
- struct vfio_dma_region *region)
+static void region_teardown(struct iommu *iommu, struct dma_region *region)
{
- vfio_pci_dma_unmap(device, region);
+ iommu_unmap(iommu, region);
VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0);
}
FIXTURE(vfio_pci_driver_test) {
+ struct iommu *iommu;
struct vfio_pci_device *device;
struct iova_allocator *iova_allocator;
- struct vfio_dma_region memcpy_region;
+ struct dma_region memcpy_region;
void *vaddr;
int msi_fd;
@@ -73,13 +73,14 @@ FIXTURE_SETUP(vfio_pci_driver_test)
{
struct vfio_pci_driver *driver;
- self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
- self->iova_allocator = iova_allocator_init(self->device);
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
driver = &self->device->driver;
- region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G);
- region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M);
+ region_setup(self->iommu, self->iova_allocator, &self->memcpy_region, SZ_1G);
+ region_setup(self->iommu, self->iova_allocator, &driver->region, SZ_2M);
/* Any IOVA that doesn't overlap memcpy_region and driver->region. */
self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G);
@@ -108,11 +109,12 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test)
vfio_pci_driver_remove(self->device);
- region_teardown(self->device, &self->memcpy_region);
- region_teardown(self->device, &driver->region);
+ region_teardown(self->iommu, &self->memcpy_region);
+ region_teardown(self->iommu, &driver->region);
iova_allocator_cleanup(self->iova_allocator);
vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
}
TEST_F(vfio_pci_driver_test, init_remove)
@@ -231,18 +233,31 @@ TEST_F_TIMEOUT(vfio_pci_driver_test, memcpy_storm, 60)
ASSERT_NO_MSI(self->msi_fd);
}
-int main(int argc, char *argv[])
+static bool device_has_selftests_driver(const char *bdf)
{
struct vfio_pci_device *device;
+ struct iommu *iommu;
+ bool has_driver;
+
+ iommu = iommu_init(default_iommu_mode);
+ device = vfio_pci_device_init(device_bdf, iommu);
+
+ has_driver = !!device->driver.ops;
+
+ vfio_pci_device_cleanup(device);
+ iommu_cleanup(iommu);
+ return has_driver;
+}
+
+int main(int argc, char *argv[])
+{
device_bdf = vfio_selftests_get_bdf(&argc, argv);
- device = vfio_pci_device_init(device_bdf, default_iommu_mode);
- if (!device->driver.ops) {
+ if (!device_has_selftests_driver(device_bdf)) {
fprintf(stderr, "No driver found for device %s\n", device_bdf);
return KSFT_SKIP;
}
- vfio_pci_device_cleanup(device);
return test_harness_run(argc, argv);
}
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 656e1c75b711..93d21bc7e112 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -48,6 +48,8 @@ static struct anon_vma dummy_anon_vma;
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
+#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
+
static struct task_struct __current;
struct task_struct *get_current(void)
@@ -67,18 +69,18 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
pgoff_t pgoff,
vm_flags_t vm_flags)
{
- struct vm_area_struct *ret = vm_area_alloc(mm);
+ struct vm_area_struct *vma = vm_area_alloc(mm);
- if (ret == NULL)
+ if (vma == NULL)
return NULL;
- ret->vm_start = start;
- ret->vm_end = end;
- ret->vm_pgoff = pgoff;
- ret->__vm_flags = vm_flags;
- vma_assert_detached(ret);
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_pgoff = pgoff;
+ vm_flags_reset(vma, vm_flags);
+ vma_assert_detached(vma);
- return ret;
+ return vma;
}
/* Helper function to allocate a VMA and link it to the tree. */
@@ -339,6 +341,7 @@ static bool test_simple_modify(void)
struct mm_struct mm = {};
struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
VMA_ITERATOR(vmi, &mm, 0x1000);
+ vm_flags_t flags = VM_READ | VM_MAYREAD;
ASSERT_FALSE(attach_vma(&mm, init_vma));
@@ -347,7 +350,7 @@ static bool test_simple_modify(void)
* performs the merge/split only.
*/
vma = vma_modify_flags(&vmi, init_vma, init_vma,
- 0x1000, 0x2000, VM_READ | VM_MAYREAD);
+ 0x1000, 0x2000, &flags);
ASSERT_NE(vma, NULL);
/* We modify the provided VMA, and on split allocate new VMAs. */
ASSERT_EQ(vma, init_vma);
@@ -441,7 +444,7 @@ static bool test_simple_shrink(void)
return true;
}
-static bool test_merge_new(void)
+static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky, bool c_is_sticky)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
struct mm_struct mm = {};
@@ -469,23 +472,32 @@ static bool test_merge_new(void)
struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d;
bool merged;
+ if (is_sticky)
+ vm_flags |= VM_STICKY;
+
/*
* 0123456789abc
* AA B CC
*/
vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
ASSERT_NE(vma_a, NULL);
+ if (a_is_sticky)
+ vm_flags_set(vma_a, VM_STICKY);
/* We give each VMA a single avc so we can test anon_vma duplication. */
INIT_LIST_HEAD(&vma_a->anon_vma_chain);
list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain);
vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags);
ASSERT_NE(vma_b, NULL);
+ if (b_is_sticky)
+ vm_flags_set(vma_b, VM_STICKY);
INIT_LIST_HEAD(&vma_b->anon_vma_chain);
list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain);
vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, vm_flags);
ASSERT_NE(vma_c, NULL);
+ if (c_is_sticky)
+ vm_flags_set(vma_c, VM_STICKY);
INIT_LIST_HEAD(&vma_c->anon_vma_chain);
list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain);
@@ -520,6 +532,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky || a_is_sticky || b_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to PREVIOUS VMA.
@@ -537,6 +551,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky || a_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to NEXT VMA.
@@ -556,6 +572,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky) /* D uses is_sticky. */
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge BOTH sides.
@@ -574,6 +592,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (is_sticky || a_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to NEXT VMA.
@@ -592,6 +612,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (is_sticky || c_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge BOTH sides.
@@ -609,6 +631,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 1);
+ if (is_sticky || a_is_sticky || c_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Final state.
@@ -637,6 +661,20 @@ static bool test_merge_new(void)
return true;
}
+static bool test_merge_new(void)
+{
+ int i, j, k, l;
+
+ /* Generate every possible permutation of sticky flags. */
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ for (l = 0; l < 2; l++)
+ ASSERT_TRUE(__test_merge_new(i, j, k, l));
+
+ return true;
+}
+
static bool test_vma_merge_special_flags(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
@@ -676,7 +714,7 @@ static bool test_vma_merge_special_flags(void)
for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
vm_flags_t special_flag = special_flags[i];
- vma_left->__vm_flags = vm_flags | special_flag;
+ vm_flags_reset(vma_left, vm_flags | special_flag);
vmg.vm_flags = vm_flags | special_flag;
vma = merge_new(&vmg);
ASSERT_EQ(vma, NULL);
@@ -698,7 +736,7 @@ static bool test_vma_merge_special_flags(void)
for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
vm_flags_t special_flag = special_flags[i];
- vma_left->__vm_flags = vm_flags | special_flag;
+ vm_flags_reset(vma_left, vm_flags | special_flag);
vmg.vm_flags = vm_flags | special_flag;
vma = merge_existing(&vmg);
ASSERT_EQ(vma, NULL);
@@ -973,9 +1011,11 @@ static bool test_vma_merge_new_with_close(void)
return true;
}
-static bool test_merge_existing(void)
+static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bool next_is_sticky)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+ vm_flags_t prev_flags = vm_flags;
+ vm_flags_t next_flags = vm_flags;
struct mm_struct mm = {};
VMA_ITERATOR(vmi, &mm, 0);
struct vm_area_struct *vma, *vma_prev, *vma_next;
@@ -988,6 +1028,13 @@ static bool test_merge_existing(void)
};
struct anon_vma_chain avc = {};
+ if (prev_is_sticky)
+ prev_flags |= VM_STICKY;
+ if (middle_is_sticky)
+ vm_flags |= VM_STICKY;
+ if (next_is_sticky)
+ next_flags |= VM_STICKY;
+
/*
* Merge right case - partial span.
*
@@ -1000,7 +1047,7 @@ static bool test_merge_existing(void)
*/
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
- vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma);
vmg.middle = vma;
@@ -1018,6 +1065,8 @@ static bool test_merge_existing(void)
ASSERT_TRUE(vma_write_started(vma));
ASSERT_TRUE(vma_write_started(vma_next));
ASSERT_EQ(mm.map_count, 2);
+ if (middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY));
/* Clear down and reset. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
@@ -1033,7 +1082,7 @@ static bool test_merge_existing(void)
* NNNNNNN
*/
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, vm_flags, &dummy_anon_vma);
vmg.middle = vma;
@@ -1046,6 +1095,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_next));
ASSERT_EQ(mm.map_count, 1);
+ if (middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted vma. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1060,7 +1111,7 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPV
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
@@ -1080,6 +1131,8 @@ static bool test_merge_existing(void)
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (prev_is_sticky || middle_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
@@ -1094,7 +1147,7 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPP
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
@@ -1109,6 +1162,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_EQ(mm.map_count, 1);
+ if (prev_is_sticky || middle_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted vma. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1123,10 +1178,10 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPPPPP
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, next_flags);
vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1139,6 +1194,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_EQ(mm.map_count, 1);
+ if (prev_is_sticky || middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted prev and next. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1158,9 +1215,9 @@ static bool test_merge_existing(void)
* PPPVVVVVNNN
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, next_flags);
vmg_set_range(&vmg, 0x4000, 0x5000, 4, vm_flags);
vmg.prev = vma;
@@ -1203,6 +1260,19 @@ static bool test_merge_existing(void)
return true;
}
+static bool test_merge_existing(void)
+{
+ int i, j, k;
+
+ /* Generate every possible permutation of sticky flags. */
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ ASSERT_TRUE(__test_merge_existing(i, j, k));
+
+ return true;
+}
+
static bool test_anon_vma_non_mergeable(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index dc976a285ad2..9f0a9f5ed0fe 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -46,42 +46,272 @@ extern unsigned long dac_mmap_min_addr;
#define MMF_HAS_MDWE 28
+/*
+ * vm_flags in vm_area_struct, see mm_types.h.
+ * When changing, update also include/trace/events/mmflags.h
+ */
+
#define VM_NONE 0x00000000
-#define VM_READ 0x00000001
-#define VM_WRITE 0x00000002
-#define VM_EXEC 0x00000004
-#define VM_SHARED 0x00000008
-#define VM_MAYREAD 0x00000010
-#define VM_MAYWRITE 0x00000020
-#define VM_MAYEXEC 0x00000040
-#define VM_GROWSDOWN 0x00000100
-#define VM_PFNMAP 0x00000400
-#define VM_LOCKED 0x00002000
-#define VM_IO 0x00004000
-#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
-#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
-#define VM_DONTEXPAND 0x00040000
-#define VM_LOCKONFAULT 0x00080000
-#define VM_ACCOUNT 0x00100000
-#define VM_NORESERVE 0x00200000
-#define VM_MIXEDMAP 0x10000000
-#define VM_STACK VM_GROWSDOWN
-#define VM_SHADOW_STACK VM_NONE
-#define VM_SOFTDIRTY 0
-#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
-#define VM_GROWSUP VM_NONE
-#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+/**
+ * typedef vma_flag_t - specifies an individual VMA flag by bit number.
+ *
+ * This value is made type safe by sparse to avoid passing invalid flag values
+ * around.
+ */
+typedef int __bitwise vma_flag_t;
+#define DECLARE_VMA_BIT(name, bitnum) \
+ VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
+#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
+ VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
+enum {
+ DECLARE_VMA_BIT(READ, 0),
+ DECLARE_VMA_BIT(WRITE, 1),
+ DECLARE_VMA_BIT(EXEC, 2),
+ DECLARE_VMA_BIT(SHARED, 3),
+ /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
+ DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
+ DECLARE_VMA_BIT(MAYWRITE, 5),
+ DECLARE_VMA_BIT(MAYEXEC, 6),
+ DECLARE_VMA_BIT(MAYSHARE, 7),
+ DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
+#ifdef CONFIG_MMU
+ DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
+#else
+ /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+ DECLARE_VMA_BIT(MAYOVERLAY, 9),
+#endif /* CONFIG_MMU */
+ /* Page-ranges managed without "struct page", just pure PFN */
+ DECLARE_VMA_BIT(PFNMAP, 10),
+ DECLARE_VMA_BIT(MAYBE_GUARD, 11),
+ DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
+ DECLARE_VMA_BIT(LOCKED, 13),
+ DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
+ DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
+ DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
+ DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
+ DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
+ DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
+ DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
+ DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
+ DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
+ DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
+ DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
+ DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
+ DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
+ DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
+ DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
+ DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
+ /* These bits are reused, we define specific uses below. */
+ DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
+ DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
+ DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
+ DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
+ DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
+ DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
+ DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
+ /*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+ DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
+#ifdef CONFIG_PPC32
+ DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
+#else
+ DECLARE_VMA_BIT(DROPPABLE, 40),
+#endif
+ DECLARE_VMA_BIT(UFFD_MINOR, 41),
+ DECLARE_VMA_BIT(SEALED, 42),
+ /* Flags that reuse flags above. */
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
+#if defined(CONFIG_X86_USER_SHADOW_STACK)
+ /*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace
+ * protect itself from attacks. A single page is enough for current
+ * shadow stack archs (x86). See the comments near alloc_shstk() in
+ * arch/x86/kernel/shstk.c for more details on the guard size.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
+#elif defined(CONFIG_ARM64_GCS)
+ /*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
+#endif
+ DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
+ DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
+ DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
+ DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
+ DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
+ DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
#ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK VM_GROWSUP
-#define VM_STACK_EARLY VM_GROWSDOWN
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
+ DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
+#else
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
+#endif
+};
+
+#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
+#define VM_READ INIT_VM_FLAG(READ)
+#define VM_WRITE INIT_VM_FLAG(WRITE)
+#define VM_EXEC INIT_VM_FLAG(EXEC)
+#define VM_SHARED INIT_VM_FLAG(SHARED)
+#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
+#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
+#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
+#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
+#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
+#ifdef CONFIG_MMU
+#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
+#else
+#define VM_UFFD_MISSING VM_NONE
+#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
+#endif
+#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
+#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
+#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
+#define VM_LOCKED INIT_VM_FLAG(LOCKED)
+#define VM_IO INIT_VM_FLAG(IO)
+#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
+#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
+#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
+#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
+#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
+#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
+#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
+#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
+#define VM_SYNC INIT_VM_FLAG(SYNC)
+#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
+#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
+#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
+#else
+#define VM_SOFTDIRTY VM_NONE
+#endif
+#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
+#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
+#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
+#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
+#define VM_STACK INIT_VM_FLAG(STACK)
+#ifdef CONFIG_STACK_GROWS_UP
+#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
+#else
+#define VM_STACK_EARLY VM_NONE
+#endif
+#ifdef CONFIG_ARCH_HAS_PKEYS
+#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
+/* Despite the naming, these are FLAGS not bits. */
+#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
+#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
+#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
+#if CONFIG_ARCH_PKEY_BITS > 3
+#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
+#else
+#define VM_PKEY_BIT3 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
+#if CONFIG_ARCH_PKEY_BITS > 4
+#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
+#else
+#define VM_PKEY_BIT4 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
+#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
+#else
+#define VM_SHADOW_STACK VM_NONE
+#endif
+#if defined(CONFIG_PPC64)
+#define VM_SAO INIT_VM_FLAG(SAO)
+#elif defined(CONFIG_PARISC)
+#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
+#elif defined(CONFIG_SPARC64)
+#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif defined(CONFIG_ARM64)
+#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif !defined(CONFIG_MMU)
+#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
+#endif
+#ifndef VM_GROWSUP
+#define VM_GROWSUP VM_NONE
+#endif
+#ifdef CONFIG_ARM64_MTE
+#define VM_MTE INIT_VM_FLAG(MTE)
+#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
+#else
+#define VM_MTE VM_NONE
+#define VM_MTE_ALLOWED VM_NONE
+#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
#else
-#define VM_STACK VM_GROWSDOWN
-#define VM_STACK_EARLY 0
+#define VM_UFFD_MINOR VM_NONE
+#endif
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
+#define VM_SEALED INIT_VM_FLAG(SEALED)
+#else
+#define VM_ALLOW_ANY_UNCACHED VM_NONE
+#define VM_SEALED VM_NONE
+#endif
+#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
+#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
+#else
+#define VM_DROPPABLE VM_NONE
+#endif
+
+/* Bits set in the VMA until the stack is in its final location */
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+/* Common data flag combinations */
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
+ VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
+#endif
+
+#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
+/* VMA basic access permission flags */
+#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+
+/*
+ * Special vmas that are non-mergable, non-mlock()able.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
@@ -96,25 +326,58 @@ extern unsigned long dac_mmap_min_addr;
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
-
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
-
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
#define CAP_IPC_LOCK 14
-#ifdef CONFIG_64BIT
-#define VM_SEALED_BIT 42
-#define VM_SEALED BIT(VM_SEALED_BIT)
-#else
-#define VM_SEALED VM_NONE
-#endif
+/*
+ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
+ * possesses it but the other does not, the merged VMA should nonetheless have
+ * applied to it:
+ *
+ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
+ * references cleared via /proc/$pid/clear_refs, any merged VMA
+ * should be considered soft-dirty also as it operates at a VMA
+ * granularity.
+ */
+#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+/*
+ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
+ * of these flags and the other not does not preclude a merge.
+ *
+ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
+ * 'sticky'. If any sticky flags exist in either VMA, we simply
+ * set all of them on the merged VMA.
+ */
+#define VM_IGNORE_MERGE VM_STICKY
+
+/*
+ * Flags which should result in page tables being copied on fork. These are
+ * flags which indicate that the VMA maps page tables which cannot be
+ * reconsistuted upon page fault, so necessitate page table copying upon
+ *
+ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
+ * reasonably reconstructed on page fault.
+ *
+ * VM_UFFD_WP - Encodes metadata about an installed uffd
+ * write protect handler, which cannot be
+ * reconstructed on page fault.
+ *
+ * We always copy pgtables when dst_vma has uffd-wp
+ * enabled even if it's file-backed
+ * (e.g. shmem). Because when uffd-wp is enabled,
+ * pgtable contains uffd-wp protection information,
+ * that's something we can't retrieve from page cache,
+ * and skip copying will lose those info.
+ *
+ * VM_MAYBE_GUARD - Could contain page guard region markers which
+ * by design are a property of the page tables
+ * only and thus cannot be reconstructed on page
+ * fault.
+ */
+#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
#define FIRST_USER_ADDRESS 0UL
#define USER_PGTABLES_CEILING 0UL
@@ -163,6 +426,8 @@ typedef __bitwise unsigned int vm_fault_t;
#define ASSERT_EXCLUSIVE_WRITER(x)
+#define pgtable_supports_soft_dirty() 1
+
/**
* swap - swap values of @a and @b
* @a: first value
@@ -259,6 +524,15 @@ typedef struct {
__private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
} mm_flags_t;
+/*
+ * Opaque type representing current VMA (vm_area_struct) flag state. Must be
+ * accessed via vma_flags_xxx() helper functions.
+ */
+#define NUM_VMA_FLAG_BITS BITS_PER_LONG
+typedef struct {
+ DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
+} __private vma_flags_t;
+
struct mm_struct {
struct maple_tree mm_mt;
int map_count; /* number of VMAs */
@@ -275,6 +549,57 @@ struct mm_struct {
struct vm_area_struct;
+
+/* What action should be taken after an .mmap_prepare call is complete? */
+enum mmap_action_type {
+ MMAP_NOTHING, /* Mapping is complete, no further action. */
+ MMAP_REMAP_PFN, /* Remap PFN range. */
+ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+};
+
+/*
+ * Describes an action an mmap_prepare hook can instruct to be taken to complete
+ * the mapping of a VMA. Specified in vm_area_desc.
+ */
+struct mmap_action {
+ union {
+ /* Remap range. */
+ struct {
+ unsigned long start;
+ unsigned long start_pfn;
+ unsigned long size;
+ pgprot_t pgprot;
+ } remap;
+ };
+ enum mmap_action_type type;
+
+ /*
+ * If specified, this hook is invoked after the selected action has been
+ * successfully completed. Note that the VMA write lock still held.
+ *
+ * The absolute minimum ought to be done here.
+ *
+ * Returns 0 on success, or an error code.
+ */
+ int (*success_hook)(const struct vm_area_struct *vma);
+
+ /*
+ * If specified, this hook is invoked when an error occurred when
+ * attempting the selection action.
+ *
+ * The hook can return an error code in order to filter the error, but
+ * it is not valid to clear the error here.
+ */
+ int (*error_hook)(int err);
+
+ /*
+ * This should be set in rare instances where the operation required
+ * that the rmap should not be able to access the VMA until
+ * completely set up.
+ */
+ bool hide_from_rmap_until_complete :1;
+};
+
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
* manipulate mutable fields which will cause those fields to be updated in the
@@ -292,12 +617,18 @@ struct vm_area_desc {
/* Mutable fields. Populated with initial state. */
pgoff_t pgoff;
struct file *vm_file;
- vm_flags_t vm_flags;
+ union {
+ vm_flags_t vm_flags;
+ vma_flags_t vma_flags;
+ };
pgprot_t page_prot;
/* Write-only fields. */
const struct vm_operations_struct *vm_ops;
void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
};
struct file_operations {
@@ -335,7 +666,7 @@ struct vm_area_struct {
*/
union {
const vm_flags_t vm_flags;
- vm_flags_t __private __vm_flags;
+ vma_flags_t flags;
};
#ifdef CONFIG_PER_VMA_LOCK
@@ -794,8 +1125,7 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr,
- unsigned long end_addr, unsigned long tree_end,
- bool mm_wr_locked)
+ unsigned long end_addr, unsigned long tree_end)
{
}
@@ -844,6 +1174,14 @@ static inline void vma_start_write(struct vm_area_struct *vma)
vma->vm_lock_seq++;
}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+ return 0;
+}
+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -1042,26 +1380,6 @@ static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
return true;
}
-static inline void vm_flags_init(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma->__vm_flags = flags;
-}
-
-static inline void vm_flags_set(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags |= flags;
-}
-
-static inline void vm_flags_clear(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags &= ~flags;
-}
-
static inline int shmem_zero_setup(struct vm_area_struct *vma)
{
return 0;
@@ -1218,13 +1536,118 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
{
}
-# define ACCESS_PRIVATE(p, member) ((p)->member)
+#define ACCESS_PRIVATE(p, member) ((p)->member)
+
+#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
+
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+{
+ unsigned int len = bitmap_size(nbits);
+
+ if (small_const_nbits(nbits))
+ *dst = 0;
+ else
+ memset(dst, 0, len);
+}
static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
{
return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
}
+/* Clears all bits in the VMA flags bitmap, non-atomically. */
+static inline void vma_flags_clear_all(vma_flags_t *flags)
+{
+ bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Copy value to the first system word of VMA flags, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+{
+ *ACCESS_PRIVATE(flags, __vma_flags) = value;
+}
+
+/*
+ * Copy value to the first system word of VMA flags ONCE, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ WRITE_ONCE(*bitmap, value);
+}
+
+/* Update the first system word of VMA flags setting bits, non-atomically. */
+static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap |= value;
+}
+
+/* Update the first system word of VMA flags clearing bits, non-atomically. */
+static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap &= ~value;
+}
+
+
+/* Use when VMA is not part of the VMA tree and needs no locking */
+static inline void vm_flags_init(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word(&vma->flags, flags);
+}
+
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
+static inline void vm_flags_reset(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ vm_flags_init(vma, flags);
+}
+
+static inline void vm_flags_reset_once(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ /*
+ * The user should only be interested in avoiding reordering of
+ * assignment to the first word.
+ */
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word_once(&vma->flags, flags);
+}
+
+static inline void vm_flags_set(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_set_word(&vma->flags, flags);
+}
+
+static inline void vm_flags_clear(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_clear_word(&vma->flags, flags);
+}
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
@@ -1326,12 +1749,23 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
static inline void set_vma_from_desc(struct vm_area_struct *vma,
struct vm_area_desc *desc);
-static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+static inline void mmap_action_prepare(struct mmap_action *action,
+ struct vm_area_desc *desc)
+{
+}
+
+static inline int mmap_action_complete(struct mmap_action *action,
+ struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline int __compat_vma_mmap(const struct file_operations *f_op,
struct file *file, struct vm_area_struct *vma)
{
struct vm_area_desc desc = {
.mm = vma->vm_mm,
- .file = vma->vm_file,
+ .file = file,
.start = vma->vm_start,
.end = vma->vm_end,
@@ -1339,21 +1773,24 @@ static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
.vm_file = vma->vm_file,
.vm_flags = vma->vm_flags,
.page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
};
int err;
err = f_op->mmap_prepare(&desc);
if (err)
return err;
- set_vma_from_desc(vma, &desc);
- return 0;
+ mmap_action_prepare(&desc.action, &desc);
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(&desc.action, vma);
}
-static inline int compat_vma_mmap_prepare(struct file *file,
+static inline int compat_vma_mmap(struct file *file,
struct vm_area_struct *vma)
{
- return __compat_vma_mmap_prepare(file->f_op, file, vma);
+ return __compat_vma_mmap(file->f_op, file, vma);
}
/* Did the driver provide valid mmap hook configuration? */
@@ -1374,7 +1811,7 @@ static inline bool can_mmap_file(struct file *file)
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
if (file->f_op->mmap_prepare)
- return compat_vma_mmap_prepare(file, vma);
+ return compat_vma_mmap(file, vma);
return file->f_op->mmap(file, vma);
}
@@ -1407,4 +1844,20 @@ static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
return vm_flags;
}
+static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+{
+}
+
+static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t pgprot)
+{
+ return 0;
+}
+
+static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf)
+{
+ return 0;
+}
+
#endif /* __MM_VMA_INTERNAL_H */