summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rwxr-xr-xtools/testing/ktest/config-bisect.pl4
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py11
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py9
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py71
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest34
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions6
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm12
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c287
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c4
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c98
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h45
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h81
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h3
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c10
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c145
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S6
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c47
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c82
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c10
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c32
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c4
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c2
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c5
-rw-r--r--tools/testing/selftests/mm/guard-regions.c185
-rw-r--r--tools/testing/selftests/mm/gup_test.c24
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c919
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c57
-rw-r--r--tools/testing/selftests/mm/mremap_test.c5
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c127
-rw-r--r--tools/testing/selftests/mm/uffd-common.c24
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c8
-rw-r--r--tools/testing/selftests/mm/vm_util.c5
-rw-r--r--tools/testing/selftests/mm/vm_util.h1
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c165
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile5
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py4
-rw-r--r--tools/testing/selftests/verification/.gitignore2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/vma/vma.c112
-rw-r--r--tools/testing/vma/vma_internal.h603
81 files changed, 4677 insertions, 592 deletions
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 6fd864935319..bee7cb34a289 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -741,9 +741,9 @@ if ($start) {
die "Can not find file $bad\n";
}
if ($val eq "good") {
- run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ run_command "cp $output_config $good" or die "failed to copy $output_config to $good\n";
} elsif ($val eq "bad") {
- run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ run_command "cp $output_config $bad" or die "failed to copy $output_config to $bad\n";
}
}
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index a0e6290833fb..748778b563cd 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -475,12 +475,14 @@ class Damos:
class DamonTarget:
pid = None
+ obsolete = None
# todo: Support target regions if test is made
idx = None
context = None
- def __init__(self, pid):
+ def __init__(self, pid, obsolete=False):
self.pid = pid
+ self.obsolete = obsolete
def sysfs_dir(self):
return os.path.join(
@@ -491,8 +493,13 @@ class DamonTarget:
os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
if err is not None:
return err
- return write_file(
+ err = write_file(
os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+ if err is not None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'obsolete_target'),
+ 'Y' if self.obsolete else 'N')
class IntervalsGoal:
access_bp = None
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
index 7233369a3a44..5374d18d1fa8 100755
--- a/tools/testing/selftests/damon/drgn_dump_damon_status.py
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -73,6 +73,7 @@ def target_to_dict(target):
['pid', int],
['nr_regions', int],
['regions_list', regions_to_list],
+ ['obsolete', bool],
])
def targets_to_list(targets):
@@ -174,11 +175,11 @@ def scheme_to_dict(scheme):
['target_nid', int],
['migrate_dests', damos_migrate_dests_to_dict],
])
- filters = []
+ core_filters = []
for f in list_for_each_entry(
- 'struct damos_filter', scheme.filters.address_of_(), 'list'):
- filters.append(damos_filter_to_dict(f))
- dict_['filters'] = filters
+ 'struct damos_filter', scheme.core_filters.address_of_(), 'list'):
+ core_filters.append(damos_filter_to_dict(f))
+ dict_['core_filters'] = core_filters
ops_filters = []
for f in list_for_each_entry(
'struct damos_filter', scheme.ops_filters.address_of_(), 'list'):
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
index 2666c6f0f1a5..9cca71eb0325 100755
--- a/tools/testing/selftests/damon/sysfs.py
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -132,7 +132,7 @@ def assert_scheme_committed(scheme, dump):
assert_watermarks_committed(scheme.watermarks, dump['wmarks'])
# TODO: test filters directory
for idx, f in enumerate(scheme.core_filters.filters):
- assert_filter_committed(f, dump['filters'][idx])
+ assert_filter_committed(f, dump['core_filters'][idx])
for idx, f in enumerate(scheme.ops_filters.filters):
assert_filter_committed(f, dump['ops_filters'][idx])
@@ -164,6 +164,16 @@ def assert_monitoring_attrs_committed(attrs, dump):
assert_true(dump['max_nr_regions'] == attrs.max_nr_regions,
'max_nr_regions', dump)
+def assert_monitoring_target_committed(target, dump):
+ # target.pid is the pid "number", while dump['pid'] is 'struct pid'
+ # pointer, and hence cannot be compared.
+ assert_true(dump['obsolete'] == target.obsolete, 'target obsolete', dump)
+
+def assert_monitoring_targets_committed(targets, dump):
+ assert_true(len(targets) == len(dump), 'len_targets', dump)
+ for idx, target in enumerate(targets):
+ assert_monitoring_target_committed(target, dump[idx])
+
def assert_ctx_committed(ctx, dump):
ops_val = {
'vaddr': 0,
@@ -172,9 +182,18 @@ def assert_ctx_committed(ctx, dump):
}
assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump)
assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
+ assert_monitoring_targets_committed(ctx.targets, dump['adaptive_targets'])
assert_schemes_committed(ctx.schemes, dump['schemes'])
-def assert_ctxs_committed(ctxs, dump):
+def assert_ctxs_committed(kdamonds):
+ status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print(err)
+ kdamonds.stop()
+ exit(1)
+
+ ctxs = kdamonds.kdamonds[0].contexts
+ dump = status['contexts']
assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
for idx, ctx in enumerate(ctxs):
assert_ctx_committed(ctx, dump[idx])
@@ -191,13 +210,7 @@ def main():
print('kdamond start failed: %s' % err)
exit(1)
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- kdamonds.stop()
- exit(1)
-
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ assert_ctxs_committed(kdamonds)
context = _damon_sysfs.DamonCtx(
monitoring_attrs=_damon_sysfs.DamonAttrs(
@@ -245,12 +258,7 @@ def main():
kdamonds.kdamonds[0].contexts = [context]
kdamonds.kdamonds[0].commit()
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- exit(1)
-
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ assert_ctxs_committed(kdamonds)
# test online commitment of minimum context.
context = _damon_sysfs.DamonCtx()
@@ -259,13 +267,36 @@ def main():
kdamonds.kdamonds[0].contexts = [context]
kdamonds.kdamonds[0].commit()
- status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
- if err is not None:
- print(err)
- exit(1)
+ assert_ctxs_committed(kdamonds)
- assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+ kdamonds.stop()
+ # test obsolete_target.
+ proc1 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc2 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc3 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[
+ _damon_sysfs.DamonTarget(pid=proc1.pid),
+ _damon_sysfs.DamonTarget(pid=proc2.pid),
+ _damon_sysfs.DamonTarget(pid=proc3.pid),
+ ],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+ kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+ kdamonds.kdamonds[0].commit()
+ del kdamonds.kdamonds[0].contexts[0].targets[1]
+ assert_ctxs_committed(kdamonds)
kdamonds.stop()
if __name__ == '__main__':
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index cce72f8b03dc..3230bd54dba8 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -22,6 +22,7 @@ echo " --fail-unresolved Treat UNRESOLVED as a failure"
echo " -d|--debug Debug mode (trace all shell commands)"
echo " -l|--logdir <dir> Save logs on the <dir>"
echo " If <dir> is -, all logs output in console only"
+echo " --rv Run RV selftests instead of ftrace ones"
exit $1
}
@@ -133,6 +134,10 @@ parse_opts() { # opts
LINK_PTR=
shift 2
;;
+ --rv)
+ RV_TEST=1
+ shift 1
+ ;;
*.tc)
if [ -f "$1" ]; then
OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
@@ -152,9 +157,13 @@ parse_opts() { # opts
;;
esac
done
- if [ ! -z "$OPT_TEST_CASES" ]; then
+ if [ -n "$OPT_TEST_CASES" ]; then
TEST_CASES=$OPT_TEST_CASES
fi
+ if [ -n "$OPT_TEST_DIR" -a -f "$OPT_TEST_DIR"/test.d/functions ]; then
+ TOP_DIR=$OPT_TEST_DIR
+ TEST_DIR=$TOP_DIR/test.d
+ fi
}
# Parameters
@@ -190,10 +199,6 @@ fi
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
-LOG_TOP_DIR=$TOP_DIR/logs
-LOG_DATE=`date +%Y%m%d-%H%M%S`
-LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
-LINK_PTR=$LOG_TOP_DIR/latest
KEEP_LOG=0
KTAP=0
DEBUG=0
@@ -201,14 +206,23 @@ VERBOSE=0
UNSUPPORTED_RESULT=0
UNRESOLVED_RESULT=0
STOP_FAILURE=0
+RV_TEST=0
# Parse command-line options
parse_opts $*
+LOG_TOP_DIR=$TOP_DIR/logs
+LOG_DATE=`date +%Y%m%d-%H%M%S`
+LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
+LINK_PTR=$LOG_TOP_DIR/latest
+
[ $DEBUG -ne 0 ] && set -x
-# Verify parameters
-if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
- errexit "No ftrace directory found"
+if [ $RV_TEST -ne 0 ]; then
+ TRACING_DIR=$TRACING_DIR/rv
+ if [ ! -d "$TRACING_DIR" ]; then
+ err_ret=$err_skip
+ errexit "rv is not configured in this kernel"
+ fi
fi
# Preparing logs
@@ -419,7 +433,7 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
__run_test() { # testfile
# setup PID and PPID, $$ is not updated.
(cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x;
- checkreq $1; initialize_ftrace; . $1)
+ checkreq $1; initialize_system; . $1)
[ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
}
@@ -496,7 +510,7 @@ for t in $TEST_CASES; do
exit 1
fi
done
-(cd $TRACING_DIR; finish_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_system) # for cleanup
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
index 8a7ce647a60d..318939451caf 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
@@ -28,7 +28,7 @@ unmount_tracefs() {
local mount_point="$1"
# Need to make sure the mount isn't busy so that we can umount it
- (cd $mount_point; finish_ftrace;)
+ (cd $mount_point; finish_system;)
cleanup
}
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index a1052bf460fc..e8e718139294 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -104,7 +104,7 @@ clear_dynamic_events() { # reset all current dynamic events
done
}
-initialize_ftrace() { # Reset ftrace to initial-state
+initialize_system() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
# no probes, and tracing on.
@@ -134,8 +134,8 @@ initialize_ftrace() { # Reset ftrace to initial-state
enable_tracing
}
-finish_ftrace() {
- initialize_ftrace
+finish_system() {
+ initialize_system
# And recover it to default.
[ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
}
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d9fffe06d3ea..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -6,7 +6,7 @@ ARCH ?= $(SUBARCH)
ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
- ARCH := x86
+ override ARCH := x86
endif
include Makefile.kvm
else
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 148d427ff24b..ba5c2b643efa 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -88,8 +88,12 @@ TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -111,14 +115,12 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
@@ -156,6 +158,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
@@ -163,6 +166,7 @@ TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
@@ -194,6 +198,7 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
TEST_GEN_PROGS_s390 += rseq_test
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
@@ -210,6 +215,7 @@ TEST_GEN_PROGS_riscv += mmu_stress_test
TEST_GEN_PROGS_riscv += rseq_test
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch = arch_timer
TEST_GEN_PROGS_loongarch += coalesced_io_test
TEST_GEN_PROGS_loongarch += demand_paging_test
TEST_GEN_PROGS_loongarch += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..c8ee6f520734
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+ TEST_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ GUEST_SYNC(TEST_ACCESS_FLAG); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ /*
+ * KVM's software PTW makes the implementation choice that the AT
+ * instruction sets the access flag.
+ */
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, forcing KVM to
+ * use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ case TEST_ACCESS_FLAG:
+ TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
+ "Expected access flag to be set (desc: %lu)", *ptep_hva);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..573dd790aeb8
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static vm_paddr_t einj_gpa;
+static void *einj_hva;
+static uint64_t einj_hpa;
+static bool far_invalid;
+
+static uint64_t translate_to_host_paddr(unsigned long vaddr)
+{
+ uint64_t pinfo;
+ int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
+ int fd;
+ uint64_t page_addr;
+ uint64_t paddr;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
+ paddr = page_addr + (vaddr & (getpagesize() - 1));
+ return paddr;
+}
+
+static void write_einj_entry(const char *einj_path, uint64_t val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(uint64_t paddr)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, paddr);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ uint64_t guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(uint64_t *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ uint64_t num_guest_pages;
+ vm_paddr_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*guest_paddr=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ uint64_t guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(uint64_t *)einj_hva = 0xBAADCAFE;
+ guest_data = *(uint64_t *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index 6338f5bbdb70..2fb2c7939fe9 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ uint32_t shared_data;
};
/*
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
+ uint32_t first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
}
local_irq_enable();
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -636,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
+ kvm_close(fd[f]);
}
/* handles the valid case: intid=0xffffffff num=1 */
@@ -779,6 +802,221 @@ done:
kvm_vm_free(vm);
}
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ uint32_t intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ vm_vaddr_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void help(const char *name)
{
printf(
@@ -835,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index 687d04463983..e857a605f577 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -118,6 +118,10 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index e7d9aeb418d3..618c937f3c90 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -19,6 +19,7 @@
#include <sys/stat.h>
#include "kvm_util.h"
+#include "numaif.h"
#include "test_util.h"
#include "ucall_common.h"
@@ -75,6 +76,101 @@ static void test_mmap_supported(int fd, size_t total_size)
kvm_munmap(mem, total_size);
}
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = 8;
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
{
const char val = 0xaa;
@@ -273,11 +369,13 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
gmem_test(mmap_supported, vm, flags);
gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
} else {
gmem_test(fault_private, vm, flags);
}
gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
} else {
gmem_test(mmap_not_supported, vm, flags);
}
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..cc7a7f34ed37 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..58feef3eb386 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..d4e613162bba
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+#include <sys/syscall.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index d3f3e455c031..81f4355ff28a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -23,6 +23,7 @@
#include <pthread.h>
+#include "kvm_syscalls.h"
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -177,7 +178,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
VM_MODE_P36V48_4K,
@@ -219,7 +220,7 @@ extern enum vm_guest_mode vm_mode_default;
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -283,34 +284,6 @@ static inline bool kvm_has_cap(long cap)
return kvm_check_cap(cap);
}
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
-static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
- off_t offset)
-{
- void *mem;
-
- mem = mmap(NULL, size, prot, flags, fd, offset);
- TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
- (int)(unsigned long)MAP_FAILED));
-
- return mem;
-}
-
-static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
-{
- return __kvm_mmap(size, prot, flags, fd, 0);
-}
-
-static inline void kvm_munmap(void *mem, size_t size)
-{
- int ret;
-
- ret = munmap(mem, size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
-}
-
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -700,12 +673,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
uint32_t guest_memfd, uint64_t guest_memfd_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd_fd, uint64_t guest_memfd_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -715,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
@@ -1230,6 +1204,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
virt_arch_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..2ed106b32c81
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
index 6427a3275e6a..76840ddda57d 100644
--- a/tools/testing/selftests/kvm/include/loongarch/processor.h
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -83,7 +83,14 @@
#define LOONGARCH_CSR_PRMD 0x1
#define LOONGARCH_CSR_EUEN 0x2
#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_TI 11 /* Timer interrupt*/
#define LOONGARCH_CSR_ERA 0x6 /* ERA */
#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
#define LOONGARCH_CSR_EENTRY 0xc
@@ -106,6 +113,14 @@
#define LOONGARCH_CSR_KS1 0x31
#define LOONGARCH_CSR_TMID 0x40
#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
/* TLB refill exception entry */
#define LOONGARCH_CSR_TLBRENTRY 0x88
#define LOONGARCH_CSR_TLBRSAVE 0x8b
@@ -113,6 +128,28 @@
#define CSR_TLBREHI_PS_SHIFT 0
#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
#define EXREGS_GPRS (32)
#ifndef __ASSEMBLER__
@@ -124,18 +161,60 @@ struct ex_regs {
unsigned long pc;
unsigned long estat;
unsigned long badv;
+ unsigned long prmd;
};
#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
#define EXREGS_SIZE sizeof(struct ex_regs)
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
#else
#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
-#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
#endif
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 51cd84b9ca66..57d62a425109 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1441,7 +1441,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
- PG_LEVEL_NUM
+ PG_LEVEL_256T
};
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index edb3c391b982..96e2b4c630a9 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -568,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index f02355c3c4c2..b7dbde9c0843 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -239,14 +239,14 @@ int main(int argc, char *argv[])
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
- stats_test(dup(vm_stats_fds));
+ stats_test(kvm_dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
- stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..b023868fe0b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..b6a7e30c3eb1 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..50754a27f493 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
+static void gicv3_set_group(uint32_t intid, bool grp)
+{
+ uint32_t cpu_or_dist;
+ uint32_t val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
+ u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 0e2f8ed90f30..7f9fdcf42ae6 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -253,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 54f6d17c78f7..d46e4b13b92c 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -324,7 +324,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
/* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1a93d6361671..8279b6ced8d2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -201,7 +201,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
- [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
@@ -228,7 +228,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
- [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
@@ -310,24 +310,26 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
kvm_init_vm_address_properties(vm);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this mode (48-bit virtual address space).
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
@@ -704,8 +706,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
static void kvm_stats_release(struct kvm_binary_stats *stats)
{
- int ret;
-
if (stats->fd < 0)
return;
@@ -714,8 +714,7 @@ static void kvm_stats_release(struct kvm_binary_stats *stats)
stats->desc = NULL;
}
- ret = close(stats->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(stats->fd);
stats->fd = -1;
}
@@ -738,8 +737,6 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
*/
static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
@@ -747,9 +744,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_munmap(vcpu->run, vcpu_mmap_sz());
- ret = close(vcpu->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
-
+ kvm_close(vcpu->fd);
kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -761,16 +756,12 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
void kvm_vm_release(struct kvm_vm *vmp)
{
struct kvm_vcpu *vcpu, *tmp;
- int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
-
- ret = close(vmp->kvm_fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
/* Free cached stats metadata and close FD */
kvm_stats_release(&vmp->stats);
@@ -828,7 +819,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
int kvm_memfd_alloc(size_t size, bool hugepages)
{
int memfd_flags = MFD_CLOEXEC;
- int fd, r;
+ int fd;
if (hugepages)
memfd_flags |= MFD_HUGETLB;
@@ -836,11 +827,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- r = ftruncate(fd, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
-
- r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
return fd;
}
@@ -957,8 +945,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
@@ -972,30 +960,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
@@ -1009,8 +996,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
+ slot, gpa, npages, region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
}
@@ -1036,7 +1022,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -1084,8 +1070,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ guest_memfd = kvm_dup(guest_memfd);
}
region->region.guest_memfd = guest_memfd;
@@ -1097,20 +1082,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size,
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
region->region.guest_memfd);
/* Add to quick lookup data structures */
@@ -1132,10 +1115,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot,
- uint64_t npages, uint32_t flags)
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags)
{
- vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -1201,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -1456,8 +1449,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
pages--, vaddr += vm->page_size, paddr += vm->page_size) {
virt_pg_map(vm, vaddr, paddr);
-
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
return vaddr_start;
@@ -1571,7 +1562,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
while (npages--) {
virt_pg_map(vm, vaddr, paddr);
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
vaddr += page_size;
paddr += page_size;
@@ -2025,6 +2015,7 @@ static struct exit_reason {
KVM_EXIT_STRING(NOTIFY),
KVM_EXIT_STRING(LOONGARCH_IOCSR),
KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -2305,11 +2296,35 @@ __weak void kvm_selftest_arch_init(void)
{
}
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
void __attribute((constructor)) kvm_selftest_init(void)
{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
index 88bfa505c6f5..3f1e4b67c5ae 100644
--- a/tools/testing/selftests/kvm/lib/loongarch/exception.S
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -51,9 +51,15 @@ handle_exception:
st.d t0, sp, ESTAT_OFFSET_EXREGS
csrrd t0, LOONGARCH_CSR_BADV
st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
or a0, sp, zero
bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
restore_gprs sp
csrrd sp, LOONGARCH_CSR_KS0
ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
index 0ac1abcb71cb..07c103369ddb 100644
--- a/tools/testing/selftests/kvm/lib/loongarch/processor.c
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -3,6 +3,7 @@
#include <assert.h>
#include <linux/compiler.h>
+#include <asm/kvm.h>
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
@@ -11,6 +12,7 @@
#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
static vm_paddr_t invalid_pgtable[4];
+static vm_vaddr_t exception_handlers;
static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
@@ -183,7 +185,14 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
void route_exception(struct ex_regs *regs)
{
+ int vector;
unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
pc = regs->pc;
badv = regs->badv;
@@ -192,6 +201,32 @@ void route_exception(struct ex_regs *regs)
while (1) ;
}
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
int i;
@@ -211,6 +246,11 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
vcpu_regs_set(vcpu, &regs);
}
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
{
uint64_t csrid;
@@ -242,8 +282,8 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- /* user mode and page enable mode */
- val = PLV_USER | CSR_CRMD_PG;
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
@@ -251,7 +291,10 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+ /* time count start from 0 */
val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
width = vm->page_shift - 3;
switch (vm->pgtable_levels) {
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 7f5d62a65c68..0b1f288ad556 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
{
uint64_t start, end;
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
/*
* Identity map the first 4G and the test region with 1G pages so that
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index b418502c5ecc..36104d27f3d9 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -158,10 +158,10 @@ bool kvm_is_tdp_enabled(void)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
- /* If needed, create page map l4 table. */
+ /* If needed, create the top-level page table. */
if (!vm->pgd_created) {
vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
@@ -218,11 +218,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % pg_size) == 0,
"Virtual address not aligned,\n"
@@ -243,20 +243,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, pte, vaddr, paddr,
+ current_level, level);
+ if (*pte & PTE_LARGE_MASK)
+ return;
+ }
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -289,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
for (i = 0; i < nr_pages; i++) {
__virt_pg_map(vm, vaddr, paddr, level);
+ sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
vaddr += pg_size;
paddr += pg_size;
@@ -310,40 +309,38 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level)
{
- uint64_t *pml4e, *pdpe, *pde;
+ int va_width = 12 + (vm->pgtable_levels) * 9;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
/*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
+ * Check that the vaddr is a sign-extended va_width value.
*/
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
-
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
-
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
+ TEST_ASSERT(vaddr ==
+ (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, pte, vaddr, current_level);
+ if (vm_is_target_pte(pte, level, current_level))
+ return pte;
+ }
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
}
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
@@ -526,7 +523,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
@@ -540,6 +538,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d4d1208dd023..29b082a58daa 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -401,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
uint16_t index;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
+ "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
nested_paddr);
TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
@@ -534,8 +534,7 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm)
{
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..355ecac30954
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ uint32_t cpu = guest_get_vcpuid();
+ uint64_t xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(uint32_t cpu)
+{
+ uint32_t config_iter;
+ uint64_t xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(uint32_t cpu)
+{
+ uint32_t config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 37b7e6524533..51c070556f3e 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void)
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
errno, strerror(errno));
- nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ nr_vcpus = CPU_COUNT(&possible_mask);
TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
}
int main(int argc, char *argv[])
@@ -360,11 +362,9 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
- for (i = 0; i < slot_size; i += SZ_1G)
- __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm->page_size)
- virt_pg_map(vm, gpa + i, gpa + i);
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
#endif
}
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index f04768c1d2e4..93e603d91311 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -17,13 +17,13 @@
#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
#define TEST_SLOT 10
-static void guest_code(uint64_t base_gpa)
+static void guest_code(uint64_t base_gva)
{
volatile uint64_t val __used;
int i;
for (i = 0; i < TEST_NPAGES; i++) {
- uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+ uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
val = *src;
}
@@ -161,6 +161,7 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
{
+ uint64_t gpa, gva, alignment, guest_page_size;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
.type = vm_type,
@@ -170,35 +171,30 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
struct kvm_vm *vm;
struct ucall uc;
- uint64_t guest_test_phys_mem;
- uint64_t guest_test_virt_mem;
- uint64_t alignment, guest_page_size;
-
vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
- guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
#ifdef __s390x__
alignment = max(0x100000UL, guest_page_size);
#else
alignment = SZ_2M;
#endif
- guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
- guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
- private ? KVM_MEM_GUEST_MEMFD : 0);
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
if (private)
- vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
- pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
- vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 705ab3d7778b..cb54a56990a0 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -133,6 +133,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -639,6 +640,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -1142,6 +1144,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
@@ -1222,6 +1225,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
+ &config_sbi_mpxy,
&config_sbi_fwft,
&config_aia,
&config_fp_f,
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 99d327084172..130b9ce7e5dd 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
- output = pgs_gpa + 4096;
+ output = pgs_gpa + PAGE_SIZE;
} else {
input = output = 0;
}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 2b5b4bc6ef7e..ca61836c4e32 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
ipi->vector = IPI_VECTOR;
ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 0;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..a3b7ce155981 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -621,7 +621,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NTEST_PAGES; i++) {
pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..f001cb836bfa 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_close_while_nested
- *
* Copyright (C) 2019, Red Hat, Inc.
*
* Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
PORT_L0_EXIT = 0x2000,
};
+#define L2_GUEST_STACK_SIZE 64
+
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
: : [port] "d" (PORT_L0_EXIT) : "rax");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(0);
}
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
for (;;) {
volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..a6b6da9cf7fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..2839f650e5c9 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -35,6 +34,8 @@
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -72,42 +73,47 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
@@ -119,16 +125,19 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t nested_gva;
struct kvm_vcpu *vcpu;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..4260c9e4f489 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#include "kselftest.h"
/* L2 is scaled up (from L1's perspective) by this factor */
@@ -79,7 +80,30 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint32_t control;
@@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva = 0;
uint64_t tsc_start, tsc_end;
uint64_t tsc_khz;
@@ -129,7 +161,8 @@ int main(int argc, char *argv[])
uint64_t l1_tsc_freq = 0;
uint64_t l2_tsc_freq = 0;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1969f4ab9b28 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
pthread_t threads[KVM_MAX_VCPUS];
struct kvm_vm *vm;
- int memfd, i, r;
+ int memfd, i;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
* should prevent the VM from being fully destroyed until the last
* reference to the guest_memfd is also put.
*/
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
close(memfd);
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index 77256c89bb8d..86ad1c7d068f 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -104,7 +104,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy)
vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
+ memset(hva, 0, PAGE_SIZE);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..f2c7a1c297e3 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -141,7 +141,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
if (this_cpu_has(X86_FEATURE_XSAVE)) {
uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
+ uint8_t buffer[PAGE_SIZE];
memset(buffer, 0xcc, sizeof(buffer));
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..be7d72f3c029 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
vcpu_regs_set(vcpu, &regs);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index fa512d033205..98cb6bdab3e6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
@@ -120,17 +120,17 @@ static void test_vmx_dirty_log(bool enable_ept)
* GPAs as the EPT enabled case.
*/
if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
}
bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept)
}
TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..cf1d2d1f2a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ vm_paddr_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->pgd, vm->pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index 35cb9de54a82..ae4a4b6c05ca 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -256,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
uint64_t hlt_count;
@@ -267,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 8dd81c0a4a5a..795bf3f39f44 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -94,6 +94,7 @@ static void *mmap_(FIXTURE_DATA(guard_regions) * self,
case ANON_BACKED:
flags |= MAP_PRIVATE | MAP_ANON;
fd = -1;
+ offset = 0;
break;
case SHMEM_BACKED:
case LOCAL_FILE_BACKED:
@@ -260,6 +261,54 @@ static bool is_buf_eq(char *buf, size_t size, char chr)
return true;
}
+/*
+ * Some file systems have issues with merging due to changing merge-sensitive
+ * parameters in the .mmap callback, and prior to .mmap_prepare being
+ * implemented everywhere this will now result in an unexpected failure to
+ * merge (e.g. - overlayfs).
+ *
+ * Perform a simple test to see if the local file system suffers from this, if
+ * it does then we can skip test logic that assumes local file system merging is
+ * sane.
+ */
+static bool local_fs_has_sane_mmap(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr2;
+ struct procmap_fd procmap;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ return true;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ if (ptr == MAP_FAILED)
+ return false;
+ /* Unmap the middle. */
+ munmap(&ptr[5 * page_size], page_size);
+
+ /* Map again. */
+ ptr2 = mmap_(self, variant, &ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 5 * page_size);
+
+ if (ptr2 == MAP_FAILED)
+ return false;
+
+ /* Now make sure they all merged. */
+ if (open_self_procmap(&procmap) != 0)
+ return false;
+ if (!find_vma_procmap(&procmap, ptr))
+ return false;
+ if (procmap.query.vma_start != (unsigned long)ptr)
+ return false;
+ if (procmap.query.vma_end != (unsigned long)ptr + 10 * page_size)
+ return false;
+ close_procmap(&procmap);
+
+ return true;
+}
+
FIXTURE_SETUP(guard_regions)
{
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
@@ -2138,4 +2187,140 @@ TEST_F(guard_regions, pagemap_scan)
ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
}
+TEST_F(guard_regions, collapse)
+{
+ const unsigned long page_size = self->page_size;
+ const unsigned long size = 2 * HPAGE_SIZE;
+ const unsigned long num_pages = size / page_size;
+ char *ptr;
+ int i;
+
+ /* Need file to be correct size for tests for non-anon. */
+ if (variant->backing != ANON_BACKED)
+ ASSERT_EQ(ftruncate(self->fd, size), 0);
+
+ /*
+ * We must close and re-open local-file backed as read-only for
+ * CONFIG_READ_ONLY_THP_FOR_FS to work.
+ */
+ if (variant->backing == LOCAL_FILE_BACKED) {
+ ASSERT_EQ(close(self->fd), 0);
+
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_GE(self->fd, 0);
+ }
+
+ ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Prevent being faulted-in as huge. */
+ ASSERT_EQ(madvise(ptr, size, MADV_NOHUGEPAGE), 0);
+ /* Fault in. */
+ ASSERT_EQ(madvise(ptr, size, MADV_POPULATE_READ), 0);
+
+ /* Install guard regions in ever other page. */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_page, page_size, MADV_GUARD_INSTALL), 0);
+ /* Accesses should now fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+
+ /* Allow huge page throughout region. */
+ ASSERT_EQ(madvise(ptr, size, MADV_HUGEPAGE), 0);
+
+ /*
+ * Now collapse the entire region. This should fail in all cases.
+ *
+ * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is
+ * not set for the local file case, but we can't differentiate whether
+ * this occurred or if the collapse was rightly rejected.
+ */
+ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0);
+
+ /*
+ * If we introduce a bug that causes the collapse to succeed, gather
+ * data on whether guard regions are at least preserved. The test will
+ * fail at this point in any case.
+ */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ /* Accesses should still fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+}
+
+TEST_F(guard_regions, smaps)
+{
+ const unsigned long page_size = self->page_size;
+ struct procmap_fd procmap;
+ char *ptr, *ptr2;
+ int i;
+
+ /* Map a region. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We shouldn't yet see a guard flag. */
+ ASSERT_FALSE(check_vmflag_guard(ptr));
+
+ /* Install a single guard region. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Now we should see a guard flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /*
+ * Removing the guard region should not change things because we simply
+ * cannot accurately track whether a given VMA has had all of its guard
+ * regions removed.
+ */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_REMOVE), 0);
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /* Install guard regions throughout. */
+ for (i = 0; i < 10; i++) {
+ ASSERT_EQ(madvise(&ptr[i * page_size], page_size, MADV_GUARD_INSTALL), 0);
+ /* We should always see the guard region flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ }
+
+ /* Split into two VMAs. */
+ ASSERT_EQ(munmap(&ptr[4 * page_size], page_size), 0);
+
+ /* Both VMAs should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ ASSERT_TRUE(check_vmflag_guard(&ptr[5 * page_size]));
+
+ /*
+ * If the local file system is unable to merge VMAs due to having
+ * unusual characteristics, there is no point in asserting merge
+ * behaviour.
+ */
+ if (!local_fs_has_sane_mmap(self, variant)) {
+ TH_LOG("local filesystem does not support sane merging skipping merge test");
+ return;
+ }
+
+ /* Map a fresh VMA between the two split VMAs. */
+ ptr2 = mmap_(self, variant, &ptr[4 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 4 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Check the procmap to ensure that this VMA merged with the adjacent
+ * two. The guard region flag is 'sticky' so should not preclude
+ * merging.
+ */
+ ASSERT_EQ(open_self_procmap(&procmap), 0);
+ ASSERT_TRUE(find_vma_procmap(&procmap, ptr));
+ ASSERT_EQ(procmap.query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap.query.vma_end, (unsigned long)ptr + 10 * page_size);
+ ASSERT_EQ(close_procmap(&procmap), 0);
+ /* And, of course, this VMA should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index 8900b840c17a..40c1538a17b4 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -17,9 +17,8 @@
#define MB (1UL << 20)
-/* Just the flags we need, copied from mm.h: */
+/* Just the flags we need, copied from the kernel internals. */
#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_TOUCH 0x02 /* mark page accessed */
#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
@@ -93,7 +92,7 @@ int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
- int flags = MAP_PRIVATE, touch = 0;
+ int flags = MAP_PRIVATE;
char *file = "/dev/zero";
pthread_t *tid;
char *p;
@@ -170,10 +169,6 @@ int main(int argc, char **argv)
case 'H':
flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
- case 'z':
- /* fault pages in gup, do not fault in userland */
- touch = 1;
- break;
default:
ksft_exit_fail_msg("Wrong argument\n");
}
@@ -244,18 +239,9 @@ int main(int argc, char **argv)
else if (thp == 0)
madvise(p, size, MADV_NOHUGEPAGE);
- /*
- * FOLL_TOUCH, in gup_test, is used as an either/or case: either
- * fault pages in from the kernel via FOLL_TOUCH, or fault them
- * in here, from user space. This allows comparison of performance
- * between those two cases.
- */
- if (touch) {
- gup.gup_flags |= FOLL_TOUCH;
- } else {
- for (; (unsigned long)p < gup.addr + size; p += psize())
- p[0] = 0;
- }
+ /* Fault them in here, from user space. */
+ for (; (unsigned long)p < gup.addr + size; p += psize())
+ p[0] = 0;
tid = malloc(sizeof(pthread_t) * nthreads);
assert(tid);
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 15aadaf24a66..5a1525f72daa 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -25,6 +25,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
/*
@@ -50,6 +51,8 @@ enum {
HMM_COHERENCE_DEVICE_TWO,
};
+#define ONEKB (1 << 10)
+#define ONEMEG (1 << 20)
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
@@ -207,8 +210,10 @@ static void hmm_buffer_free(struct hmm_buffer *buffer)
if (buffer == NULL)
return;
- if (buffer->ptr)
+ if (buffer->ptr) {
munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ }
free(buffer->mirror);
free(buffer);
}
@@ -525,6 +530,8 @@ TEST_F(hmm, anon_write_prot)
/*
* Check that a device writing an anonymous private mapping
* will copy-on-write if a child process inherits the mapping.
+ *
+ * Also verifies after fork() memory the device can be read by child.
*/
TEST_F(hmm, anon_write_child)
{
@@ -532,72 +539,101 @@ TEST_F(hmm, anon_write_child)
unsigned long npages;
unsigned long size;
unsigned long i;
+ void *old_ptr;
+ void *map;
int *ptr;
pid_t pid;
int child_fd;
- int ret;
-
- npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
- ASSERT_NE(npages, 0);
- size = npages << self->page_shift;
-
- buffer = malloc(sizeof(*buffer));
- ASSERT_NE(buffer, NULL);
-
- buffer->fd = -1;
- buffer->size = size;
- buffer->mirror = malloc(size);
- ASSERT_NE(buffer->mirror, NULL);
-
- buffer->ptr = mmap(NULL, size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- buffer->fd, 0);
- ASSERT_NE(buffer->ptr, MAP_FAILED);
-
- /* Initialize buffer->ptr so we can tell if it is written. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ptr[i] = i;
-
- /* Initialize data that the device will write to buffer->ptr. */
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ptr[i] = -i;
+ int ret, use_thp, migrate;
+
+ for (migrate = 0; migrate < 2; ++migrate) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ npages = ALIGN(use_thp ? TWOMEG : HMM_BUFFER_SIZE,
+ self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size * 2;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ old_ptr = buffer->ptr;
+ if (use_thp) {
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+ }
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ if (migrate) {
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ }
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ continue;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ if (!migrate) {
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
- pid = fork();
- if (pid == -1)
- ASSERT_EQ(pid, 0);
- if (pid != 0) {
- waitpid(pid, &ret, 0);
- ASSERT_EQ(WIFEXITED(ret), 1);
+ /* Check what the device wrote. */
+ if (!migrate) {
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
- /* Check that the parent's buffer did not change. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- return;
+ close(child_fd);
+ exit(0);
+ }
}
-
- /* Check that we see the parent's values. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- /* The child process needs its own mirror to its own mm. */
- child_fd = hmm_open(0);
- ASSERT_GE(child_fd, 0);
-
- /* Simulate a device writing system memory. */
- ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
- ASSERT_EQ(ret, 0);
- ASSERT_EQ(buffer->cpages, npages);
- ASSERT_EQ(buffer->faults, 1);
-
- /* Check what the device wrote. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- close(child_fd);
- exit(0);
}
/*
@@ -2055,4 +2091,765 @@ TEST_F(hmm, hmm_cow_in_device)
hmm_buffer_free(buffer);
}
+
+/*
+ * Migrate private anonymous huge empty page.
+ */
+TEST_F(hmm, migrate_anon_huge_empty)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page.
+ */
+TEST_F(hmm, migrate_anon_huge_zero)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+ int val;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize a read-only zero huge page. */
+ val = *(int *)buffer->ptr;
+ ASSERT_EQ(val, 0);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], 0);
+ /* If it asserts once, it probably will 500,000 times */
+ if (ptr[i] != 0)
+ break;
+ }
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and free.
+ */
+TEST_F(hmm, migrate_anon_huge_free)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Try freeing it. */
+ ret = madvise(map, size, MADV_FREE);
+ ASSERT_EQ(ret, 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and fault back to sysmem.
+ */
+TEST_F(hmm, migrate_anon_huge_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate memory and fault back to sysmem after partially unmapping.
+ */
+TEST_F(hmm, migrate_partial_unmap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(buffer->ptr + offsets[j], ONEMEG);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ if (i * sizeof(int) < offsets[j] ||
+ i * sizeof(int) >= offsets[j] + ONEMEG)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+}
+
+TEST_F(hmm, migrate_remap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr, *new_ptr = NULL;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp, dont_unmap, before;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (before = 0; before < 2; ++before) {
+ for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
+
+ if (dont_unmap)
+ flags |= MREMAP_DONTUNMAP;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 8 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, buffer->size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ munmap(map + size, size * 2);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ if (before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ if (!before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(new_ptr, size);
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Migrate private anonymous huge page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], i);
+ if (ptr[i] != i)
+ break;
+ }
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate THP to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /*
+ * Force an allocation error when faulting back a THP resident in the
+ * device.
+ */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_zero_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 0);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory (zero THP page). */
+ ret = ptr[0];
+ ASSERT_EQ(ret, 0);
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Fault the device memory back and check it. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+struct benchmark_results {
+ double sys_to_dev_time;
+ double dev_to_sys_time;
+ double throughput_s2d;
+ double throughput_d2s;
+};
+
+static double get_time_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
+{
+ struct hmm_buffer *buffer;
+
+ buffer = malloc(sizeof(*buffer));
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ memset(buffer->mirror, 0xFF, size);
+ return buffer;
+}
+
+static void print_benchmark_results(const char *test_name, size_t buffer_size,
+ struct benchmark_results *thp,
+ struct benchmark_results *regular)
+{
+ double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
+ regular->sys_to_dev_time) * 100.0;
+ double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
+ regular->dev_to_sys_time) * 100.0;
+ double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
+ regular->throughput_s2d) * 100.0;
+ double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
+ regular->throughput_d2s) * 100.0;
+
+ printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
+ printf(" | With THP | Without THP | Improvement\n");
+ printf("---------------------------------------------------------------------\n");
+ printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
+ printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
+ printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
+ printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
+}
+
+/*
+ * Run a single migration benchmark
+ * fd: file descriptor for hmm device
+ * use_thp: whether to use THP
+ * buffer_size: size of buffer to allocate
+ * iterations: number of iterations
+ * results: where to store results
+ */
+static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
+ int iterations, struct benchmark_results *results)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
+ double start, end;
+ double s2d_total = 0, d2s_total = 0;
+ int ret, i;
+ int *ptr;
+
+ buffer = hmm_buffer_alloc(buffer_size);
+
+ /* Map memory */
+ buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (!buffer->ptr)
+ return -1;
+
+ /* Apply THP hint if requested */
+ if (use_thp)
+ ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
+ else
+ ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
+
+ if (ret)
+ return ret;
+
+ /* Initialize memory to make sure pages are allocated */
+ ptr = (int *)buffer->ptr;
+ for (i = 0; i < buffer_size / sizeof(int); i++)
+ ptr[i] = i & 0xFF;
+
+ /* Warmup iteration */
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ /* Benchmark iterations */
+ for (i = 0; i < iterations; i++) {
+ /* System to device migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ s2d_total += (end - start);
+
+ /* Device to system migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ d2s_total += (end - start);
+ }
+
+ /* Calculate average times and throughput */
+ results->sys_to_dev_time = s2d_total / iterations;
+ results->dev_to_sys_time = d2s_total / iterations;
+ results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->sys_to_dev_time / 1000.0);
+ results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->dev_to_sys_time / 1000.0);
+
+ /* Cleanup */
+ hmm_buffer_free(buffer);
+ return 0;
+}
+
+/*
+ * Benchmark THP migration with different buffer sizes
+ */
+TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
+{
+ struct benchmark_results thp_results, regular_results;
+ size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
+ int iterations = 5;
+
+ printf("\nHMM THP Migration Benchmark\n");
+ printf("---------------------------\n");
+ printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
+
+ /* Test different buffer sizes */
+ size_t test_sizes[] = {
+ thp_size / 4, /* 512KB - smaller than THP */
+ thp_size / 2, /* 1MB - half THP */
+ thp_size, /* 2MB - single THP */
+ thp_size * 2, /* 4MB - two THPs */
+ thp_size * 4, /* 8MB - four THPs */
+ thp_size * 8, /* 16MB - eight THPs */
+ thp_size * 128, /* 256MB - one twenty eight THPs */
+ };
+
+ static const char *const test_names[] = {
+ "Small Buffer (512KB)",
+ "Half THP Size (1MB)",
+ "Single THP Size (2MB)",
+ "Two THP Size (4MB)",
+ "Four THP Size (8MB)",
+ "Eight THP Size (16MB)",
+ "One twenty eight THP Size (256MB)"
+ };
+
+ int num_tests = ARRAY_SIZE(test_sizes);
+
+ /* Run all tests */
+ for (int i = 0; i < num_tests; i++) {
+ /* Test with THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
+ iterations, &thp_results), 0);
+
+ /* Test without THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
+ iterations, &regular_results), 0);
+
+ /* Print results */
+ print_benchmark_results(test_names[i], test_sizes[i],
+ &thp_results, &regular_results);
+ }
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index ac136f04b8d6..95afa5cfc062 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -38,6 +38,8 @@ enum ksm_merge_mode {
};
static int mem_fd;
+static int pages_to_scan_fd;
+static int sleep_millisecs_fd;
static int pagemap_fd;
static size_t pagesize;
@@ -493,6 +495,46 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
+static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "1", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, pages_to_scan, strlen(pages_to_scan)) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, sleep_ms, strlen(sleep_ms)) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static int stop_ksmd_and_restore_frequency(void)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, "100", 3) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, "20", 2) <= 0)
+ return -errno;
+
+ return 0;
+}
+
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -500,6 +542,9 @@ static void test_prctl_fork_exec(void)
ksft_print_msg("[RUN] %s\n", __func__);
+ if (start_ksmd_and_set_frequency("2000", "0"))
+ ksft_test_result_fail("set ksmd's scanning frequency failed\n");
+
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
@@ -542,6 +587,11 @@ static void test_prctl_fork_exec(void)
return;
}
+ if (stop_ksmd_and_restore_frequency()) {
+ ksft_test_result_fail("restore ksmd frequency failed\n");
+ return;
+ }
+
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
@@ -656,6 +706,13 @@ static void init_global_file_handles(void)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
if (ksm_get_self_merging_pages() < 0)
ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n");
+
+ pages_to_scan_fd = open("/sys/kernel/mm/ksm/pages_to_scan", O_RDWR);
+ if (pages_to_scan_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/pages_to_scan failed\n");
+ sleep_millisecs_fd = open("/sys/kernel/mm/ksm/sleep_millisecs", O_RDWR);
+ if (sleep_millisecs_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/sleep_millisecs failed\n");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index bf2863b102e3..5f073504e0b1 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -994,7 +994,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_siz
static long long remap_region(struct config c, unsigned int threshold_mb,
char *rand_addr)
{
- void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
+ void *addr, *tmp_addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
@@ -1032,7 +1032,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
/* Don't destroy existing mappings unless expected to overlap */
while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
/* Check for unsigned overflow */
- if (addr + c.dest_alignment < addr) {
+ tmp_addr = addr + c.dest_alignment;
+ if (tmp_addr < addr) {
ksft_print_msg("Couldn't find a valid region to remap to\n");
ret = -1;
goto clean_up_src;
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index 4ee4db3750c1..c3a9585de98c 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -184,6 +184,130 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
close(test_fd);
}
+static void test_merge(int pagemap_fd, int pagesize)
+{
+ char *reserved, *map, *map2;
+
+ /*
+ * Reserve space for tests:
+ *
+ * ---padding to ---
+ * | avoid adj. |
+ * v merge v
+ * |---|---|---|---|---|
+ * | | 1 | 2 | 3 | |
+ * |---|---|---|---|---|
+ */
+ reserved = mmap(NULL, 5 * pagesize, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (reserved == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ munmap(reserved, 4 * pagesize);
+
+ /*
+ * Establish initial VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* This will clear VM_SOFTDIRTY too. */
+ clear_softdirty();
+
+ /*
+ * Now place a new mapping which will be marked VM_SOFTDIRTY. Away from
+ * map:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | | 2 | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[3 * pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now remap it immediately adjacent to map, if the merge correctly
+ * propagates VM_SOFTDIRTY, we should then observe the VMA as a whole
+ * being marked soft-dirty:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ map2 = mremap(map2, pagesize, pagesize, MREMAP_FIXED | MREMAP_MAYMOVE,
+ &reserved[2 * pagesize]);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mremap failed\n");
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after remap merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after remap merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+
+ /*
+ * Now establish another VMA:
+ *
+ * S/D
+ * |---|---|---|---|---|
+ * | | 1 | | | |
+ * |---|---|---|---|---|
+ */
+ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear VM_SOFTDIRTY... */
+ clear_softdirty();
+ /* ...and establish incompatible adjacent VMA:
+ *
+ * - S/D
+ * |---|---|---|---|---|
+ * | | 1 | 2 | | |
+ * |---|---|---|---|---|
+ */
+ map2 = mmap(&reserved[2 * pagesize], pagesize,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /*
+ * Now mprotect() VMA 1 so it's compatible with 2 and therefore merges:
+ *
+ * merge
+ * S/D
+ * |---|-------|---|---|
+ * | | 1 | | |
+ * |---|-------|---|---|
+ */
+ if (mprotect(map, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC))
+ ksft_exit_fail_msg("mprotect failed\n");
+
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 1st pg\n",
+ __func__);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s-anon soft-dirty after mprotect merge 2nd pg\n",
+ __func__);
+
+ munmap(map, 2 * pagesize);
+}
+
static void test_mprotect_anon(int pagemap_fd, int pagesize)
{
test_mprotect(pagemap_fd, pagesize, true);
@@ -204,7 +328,7 @@ int main(int argc, char **argv)
if (!softdirty_supported())
ksft_exit_skip("soft-dirty is not support\n");
- ksft_set_plan(15);
+ ksft_set_plan(19);
pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
if (pagemap_fd < 0)
ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
@@ -216,6 +340,7 @@ int main(int argc, char **argv)
test_hugepage(pagemap_fd, pagesize);
test_mprotect_anon(pagemap_fd, pagesize);
test_mprotect_file(pagemap_fd, pagesize);
+ test_merge(pagemap_fd, pagesize);
close(pagemap_fd);
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 994fe8c03923..edd02328f77b 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -10,7 +10,6 @@
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
-#define BASE_PMD_ADDR ((void *)(1UL << 30))
/* pthread_mutex_t starts at page offset 0 */
pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts)
@@ -142,30 +141,37 @@ static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area
unsigned long offset = is_src ? 0 : bytes;
char *p = NULL, *p_alias = NULL;
int mem_fd = uffd_mem_fd_create(bytes * 2, false);
+ size_t region_size = bytes * 2 + hpage_size;
- /* TODO: clean this up. Use a static addr is ugly */
- p = BASE_PMD_ADDR;
- if (!is_src)
- /* src map + alias + interleaved hpages */
- p += 2 * (bytes + hpage_size);
+ void *reserve = mmap(NULL, region_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (reserve == MAP_FAILED) {
+ close(mem_fd);
+ return -errno;
+ }
+
+ p = reserve;
p_alias = p;
p_alias += bytes;
p_alias += hpage_size; /* Prevent src/dst VMA merge */
- *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
mem_fd, offset);
if (*alloc_area == MAP_FAILED) {
*alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
return -errno;
}
if (*alloc_area != p)
err("mmap of memfd failed at %p", p);
- area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
mem_fd, offset);
if (area_alias == MAP_FAILED) {
- munmap(*alloc_area, bytes);
*alloc_area = NULL;
+ munmap(reserve, region_size);
+ close(mem_fd);
return -errno;
}
if (area_alias != p_alias)
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index b51c89e1cd1a..700fbaa18d44 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -241,7 +241,7 @@ static int stress(struct uffd_args *args)
return 1;
for (cpu = 0; cpu < gopts->nr_parallel; cpu++) {
- char c;
+ char c = '\0';
if (bounces & BOUNCE_POLL) {
if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1)
err("pipefd write error");
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index f917b4c4c943..f4807242c5b2 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -543,7 +543,7 @@ static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_col
{
unsigned long p;
pthread_t uffd_mon;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -759,7 +759,7 @@ static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp)
pthread_t uffd_mon;
pid_t pid;
int err;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -819,7 +819,7 @@ static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp)
pthread_t uffd_mon;
pid_t pid;
int err;
- char c;
+ char c = '\0';
struct uffd_args args = { 0 };
args.gopts = gopts;
@@ -1125,7 +1125,7 @@ uffd_move_test_common(uffd_global_test_opts_t *gopts,
{
unsigned long nr;
pthread_t uffd_mon;
- char c;
+ char c = '\0';
unsigned long long count;
struct uffd_args args = { 0 };
char *orig_area_src = NULL, *orig_area_dst = NULL;
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index e33cda301dad..605cb58ea5c3 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -449,6 +449,11 @@ bool check_vmflag_pfnmap(void *addr)
return check_vmflag(addr, "pf");
}
+bool check_vmflag_guard(void *addr)
+{
+ return check_vmflag(addr, "gu");
+}
+
bool softdirty_supported(void)
{
char *addr;
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 26c30fdc0241..a8abdf414d46 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -98,6 +98,7 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
bool check_vmflag_pfnmap(void *addr);
+bool check_vmflag_guard(void *addr);
int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
int query_procmap(struct procmap_fd *procmap);
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index 5e96ef785d0d..6d99726aceac 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -15,24 +15,31 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <asm/ucontext.h>
+#include <getopt.h>
#include "hwprobe.h"
#include "../../kselftest.h"
#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+#define MK_PREFETCH(fn) \
+ le32_bswap(0 << 25 | (uint32_t)(fn) << 20 | 10 << 15 | 6 << 12 | 0 << 7 | 19)
static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
-static bool illegal_insn;
+static bool got_fault;
-static void sigill_handler(int sig, siginfo_t *info, void *context)
+static void fault_handler(int sig, siginfo_t *info, void *context)
{
unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
uint32_t insn = *(uint32_t *)regs[0];
- assert(insn == MK_CBO(regs[11]));
+ if (sig == SIGILL)
+ assert(insn == MK_CBO(regs[11]));
- illegal_insn = true;
+ if (sig == SIGSEGV || sig == SIGBUS)
+ assert(insn == MK_PREFETCH(regs[11]));
+
+ got_fault = true;
regs[0] += 4;
}
@@ -45,39 +52,51 @@ static void sigill_handler(int sig, siginfo_t *info, void *context)
: : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \
})
+#define prefetch_insn(base, fn) \
+({ \
+ asm volatile( \
+ "mv a0, %0\n" \
+ "li a1, %1\n" \
+ ".4byte %2\n" \
+ : : "r" (base), "i" (fn), "i" (MK_PREFETCH(fn)) : "a0", "a1"); \
+})
+
static void cbo_inval(char *base) { cbo_insn(base, 0); }
static void cbo_clean(char *base) { cbo_insn(base, 1); }
static void cbo_flush(char *base) { cbo_insn(base, 2); }
static void cbo_zero(char *base) { cbo_insn(base, 4); }
+static void prefetch_i(char *base) { prefetch_insn(base, 0); }
+static void prefetch_r(char *base) { prefetch_insn(base, 1); }
+static void prefetch_w(char *base) { prefetch_insn(base, 3); }
static void test_no_cbo_inval(void *arg)
{
ksft_print_msg("Testing cbo.inval instruction remain privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_inval(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.inval\n");
+ ksft_test_result(got_fault, "No cbo.inval\n");
}
static void test_no_zicbom(void *arg)
{
ksft_print_msg("Testing Zicbom instructions remain privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_clean(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.clean\n");
+ ksft_test_result(got_fault, "No cbo.clean\n");
- illegal_insn = false;
+ got_fault = false;
cbo_flush(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.flush\n");
+ ksft_test_result(got_fault, "No cbo.flush\n");
}
static void test_no_zicboz(void *arg)
{
ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
- illegal_insn = false;
+ got_fault = false;
cbo_zero(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.zero\n");
+ ksft_test_result(got_fault, "No cbo.zero\n");
}
static bool is_power_of_2(__u64 n)
@@ -85,6 +104,51 @@ static bool is_power_of_2(__u64 n)
return n != 0 && (n & (n - 1)) == 0;
}
+static void test_zicbop(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE,
+ };
+ struct sigaction act = {
+ .sa_sigaction = &fault_handler,
+ .sa_flags = SA_SIGINFO
+ };
+ struct sigaction dfl = {
+ .sa_handler = SIG_DFL
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ long rc;
+
+ rc = sigaction(SIGSEGV, &act, NULL);
+ assert(rc == 0);
+ rc = sigaction(SIGBUS, &act, NULL);
+ assert(rc == 0);
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicbop block size\n");
+ ksft_print_msg("Zicbop block size: %llu\n", block_size);
+
+ got_fault = false;
+ prefetch_i(&mem[0]);
+ prefetch_r(&mem[0]);
+ prefetch_w(&mem[0]);
+ ksft_test_result(!got_fault, "Zicbop prefetch.* on valid address\n");
+
+ got_fault = false;
+ prefetch_i(NULL);
+ prefetch_r(NULL);
+ prefetch_w(NULL);
+ ksft_test_result(!got_fault, "Zicbop prefetch.* on NULL\n");
+
+ rc = sigaction(SIGBUS, &dfl, NULL);
+ assert(rc == 0);
+ rc = sigaction(SIGSEGV, &dfl, NULL);
+ assert(rc == 0);
+}
+
static void test_zicbom(void *arg)
{
struct riscv_hwprobe pair = {
@@ -100,13 +164,13 @@ static void test_zicbom(void *arg)
is_power_of_2(block_size), "Zicbom block size\n");
ksft_print_msg("Zicbom block size: %llu\n", block_size);
- illegal_insn = false;
+ got_fault = false;
cbo_clean(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.clean\n");
+ ksft_test_result(!got_fault, "cbo.clean\n");
- illegal_insn = false;
+ got_fault = false;
cbo_flush(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.flush\n");
+ ksft_test_result(!got_fault, "cbo.flush\n");
}
static void test_zicboz(void *arg)
@@ -125,11 +189,11 @@ static void test_zicboz(void *arg)
is_power_of_2(block_size), "Zicboz block size\n");
ksft_print_msg("Zicboz block size: %llu\n", block_size);
- illegal_insn = false;
+ got_fault = false;
cbo_zero(&mem[block_size]);
- ksft_test_result(!illegal_insn, "cbo.zero\n");
+ ksft_test_result(!got_fault, "cbo.zero\n");
- if (illegal_insn || !is_power_of_2(block_size)) {
+ if (got_fault || !is_power_of_2(block_size)) {
ksft_test_result_skip("cbo.zero check\n");
return;
}
@@ -177,7 +241,19 @@ static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo)
rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
- cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom";
+ switch (cbo) {
+ case RISCV_HWPROBE_EXT_ZICBOZ:
+ cbostr = "Zicboz";
+ break;
+ case RISCV_HWPROBE_EXT_ZICBOM:
+ cbostr = "Zicbom";
+ break;
+ case RISCV_HWPROBE_EXT_ZICBOP:
+ cbostr = "Zicbop";
+ break;
+ default:
+ ksft_exit_fail_msg("Internal error: invalid cbo %llu\n", cbo);
+ }
if (pair.value & cbo)
ksft_exit_fail_msg("%s is only present on a subset of harts.\n"
@@ -194,6 +270,7 @@ enum {
TEST_ZICBOM,
TEST_NO_ZICBOM,
TEST_NO_CBO_INVAL,
+ TEST_ZICBOP,
};
static struct test_info {
@@ -206,26 +283,51 @@ static struct test_info {
[TEST_ZICBOM] = { .nr_tests = 3, test_zicbom },
[TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom },
[TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval },
+ [TEST_ZICBOP] = { .nr_tests = 3, test_zicbop },
+};
+
+static const struct option long_opts[] = {
+ {"zicbom-raises-sigill", no_argument, 0, 'm'},
+ {"zicboz-raises-sigill", no_argument, 0, 'z'},
+ {0, 0, 0, 0}
};
int main(int argc, char **argv)
{
struct sigaction act = {
- .sa_sigaction = &sigill_handler,
+ .sa_sigaction = &fault_handler,
.sa_flags = SA_SIGINFO,
};
struct riscv_hwprobe pair;
unsigned int plan = 0;
cpu_set_t cpus;
long rc;
- int i;
-
- if (argc > 1 && !strcmp(argv[1], "--sigill")) {
- rc = sigaction(SIGILL, &act, NULL);
- assert(rc == 0);
- tests[TEST_NO_ZICBOZ].enabled = true;
- tests[TEST_NO_ZICBOM].enabled = true;
- tests[TEST_NO_CBO_INVAL].enabled = true;
+ int i, opt, long_index;
+
+ long_index = 0;
+
+ while ((opt = getopt_long(argc, argv, "mz", long_opts, &long_index)) != -1) {
+ switch (opt) {
+ case 'm':
+ tests[TEST_NO_ZICBOM].enabled = true;
+ tests[TEST_NO_CBO_INVAL].enabled = true;
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ break;
+ case 'z':
+ tests[TEST_NO_ZICBOZ].enabled = true;
+ tests[TEST_NO_CBO_INVAL].enabled = true;
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ break;
+ case '?':
+ fprintf(stderr,
+ "Usage: %s [--zicbom-raises-sigill|-m] [--zicboz-raises-sigill|-z]\n",
+ argv[0]);
+ exit(1);
+ default:
+ break;
+ }
}
rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
@@ -253,6 +355,11 @@ int main(int argc, char **argv)
check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM);
}
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOP)
+ tests[TEST_ZICBOP].enabled = true;
+ else
+ check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOP);
+
for (i = 0; i < ARRAY_SIZE(tests); ++i)
plan += tests[i].enabled ? tests[i].nr_tests : 0;
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index 6f7497f4e7b3..2c2a33fc083e 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,7 +2,7 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := v_initval vstate_prctl
+TEST_GEN_PROGS := v_initval vstate_prctl vstate_ptrace
TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
include ../../lib.mk
@@ -26,3 +26,6 @@ $(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
+
+$(OUTPUT)/vstate_ptrace: vstate_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/vector/vstate_ptrace.c b/tools/testing/selftests/riscv/vector/vstate_ptrace.c
new file mode 100644
index 000000000000..1479abc0c9cb
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_ptrace.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <stdlib.h>
+#include <asm/ptrace.h>
+#include <linux/elf.h>
+#include <sys/ptrace.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include "../../kselftest.h"
+#include "v_helpers.h"
+
+int parent_set_val, child_set_val;
+
+static long do_ptrace(enum __ptrace_request op, pid_t pid, long type, size_t size, void *data)
+{
+ struct iovec v_iovec = {
+ .iov_len = size,
+ .iov_base = data
+ };
+
+ return ptrace(op, pid, type, &v_iovec);
+}
+
+static int do_child(void)
+{
+ int out;
+
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) {
+ ksft_perror("PTRACE_TRACEME failed\n");
+ return EXIT_FAILURE;
+ }
+
+ asm volatile (".option push\n\t"
+ ".option arch, +v\n\t"
+ ".option norvc\n\t"
+ "vsetivli x0, 1, e32, m1, ta, ma\n\t"
+ "vmv.s.x v31, %[in]\n\t"
+ "ebreak\n\t"
+ "vmv.x.s %[out], v31\n\t"
+ ".option pop\n\t"
+ : [out] "=r" (out)
+ : [in] "r" (child_set_val));
+
+ if (out != parent_set_val)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+static void do_parent(pid_t child)
+{
+ int status;
+ void *data = NULL;
+
+ /* Attach to the child */
+ while (waitpid(child, &status, 0)) {
+ if (WIFEXITED(status)) {
+ ksft_test_result(WEXITSTATUS(status) == 0, "SETREGSET vector\n");
+ goto out;
+ } else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) {
+ size_t size;
+ void *data, *v31;
+ struct __riscv_v_regset_state *v_regset_hdr;
+ struct user_regs_struct *gpreg;
+
+ size = sizeof(*v_regset_hdr);
+ data = malloc(size);
+ if (!data)
+ goto out;
+ v_regset_hdr = (struct __riscv_v_regset_state *)data;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ ksft_print_msg("vlenb %ld\n", v_regset_hdr->vlenb);
+ data = realloc(data, size + v_regset_hdr->vlenb * 32);
+ if (!data)
+ goto out;
+ v_regset_hdr = (struct __riscv_v_regset_state *)data;
+ v31 = (void *)(data + size + v_regset_hdr->vlenb * 31);
+ size += v_regset_hdr->vlenb * 32;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ ksft_test_result(*(int *)v31 == child_set_val, "GETREGSET vector\n");
+
+ *(int *)v31 = parent_set_val;
+ if (do_ptrace(PTRACE_SETREGSET, child, NT_RISCV_VECTOR, size, data))
+ goto out;
+
+ /* move the pc forward */
+ size = sizeof(*gpreg);
+ data = realloc(data, size);
+ gpreg = (struct user_regs_struct *)data;
+
+ if (do_ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, size, data))
+ goto out;
+
+ gpreg->pc += 4;
+ if (do_ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, size, data))
+ goto out;
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+ }
+
+out:
+ free(data);
+}
+
+int main(void)
+{
+ pid_t child;
+
+ ksft_set_plan(2);
+ if (!is_vector_supported() && !is_xtheadvector_supported())
+ ksft_exit_skip("Vector not supported\n");
+
+ srandom(getpid());
+ parent_set_val = rand();
+ child_set_val = rand();
+
+ child = fork();
+ if (child < 0)
+ ksft_exit_fail_msg("Fork failed %d\n", child);
+
+ if (!child)
+ return do_child();
+
+ do_parent(child);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index bba8cb54548e..3d130c30bc7c 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -437,7 +437,7 @@ class Client:
def extend_pcr(self, i, dig, bank_alg = TPM2_ALG_SHA1):
ds = get_digest_size(bank_alg)
- assert(ds == len(dig))
+ assert ds == len(dig)
auth_cmd = AuthCommand()
@@ -589,7 +589,7 @@ class Client:
def seal(self, parent_key, data, auth_value, policy_dig,
name_alg = TPM2_ALG_SHA1):
ds = get_digest_size(name_alg)
- assert(not policy_dig or ds == len(policy_dig))
+ assert not policy_dig or ds == len(policy_dig)
attributes = 0
if not policy_dig:
diff --git a/tools/testing/selftests/verification/.gitignore b/tools/testing/selftests/verification/.gitignore
new file mode 100644
index 000000000000..2659417cb2c7
--- /dev/null
+++ b/tools/testing/selftests/verification/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+logs
diff --git a/tools/testing/selftests/verification/Makefile b/tools/testing/selftests/verification/Makefile
new file mode 100644
index 000000000000..aa8790c22a71
--- /dev/null
+++ b/tools/testing/selftests/verification/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := verificationtest-ktap
+TEST_FILES := test.d settings
+EXTRA_CLEAN := $(OUTPUT)/logs/*
+
+include ../lib.mk
diff --git a/tools/testing/selftests/verification/config b/tools/testing/selftests/verification/config
new file mode 100644
index 000000000000..43072c1c38f4
--- /dev/null
+++ b/tools/testing/selftests/verification/config
@@ -0,0 +1 @@
+CONFIG_RV=y
diff --git a/tools/testing/selftests/verification/settings b/tools/testing/selftests/verification/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/verification/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/verification/test.d/functions b/tools/testing/selftests/verification/test.d/functions
new file mode 100644
index 000000000000..ec36a092f56e
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/functions
@@ -0,0 +1,39 @@
+check_requires() { # Check required files, monitors and reactors
+ for i in "$@" ; do
+ p=${i%:program}
+ m=${i%:monitor}
+ r=${i%:reactor}
+ if [ $p != $i ]; then
+ if ! which $p ; then
+ echo "Required program $p is not found."
+ exit_unresolved
+ fi
+ elif [ $m != $i ]; then
+ if ! grep -wq $m available_monitors ; then
+ echo "Required monitor $m is not configured."
+ exit_unsupported
+ fi
+ elif [ $r != $i ]; then
+ if ! grep -wq $r available_reactors ; then
+ echo "Required reactor $r is not configured."
+ exit_unsupported
+ fi
+ elif [ ! -e $i ]; then
+ echo "Required feature interface $i doesn't exist."
+ exit_unsupported
+ fi
+ done
+}
+
+initialize_system() { # Reset RV to initial-state
+ echo > enabled_monitors
+ for m in monitors/*; do
+ echo nop > $m/reactors || true
+ done
+ echo 1 > monitoring_on
+ echo 1 > reacting_on || true
+}
+
+finish_system() {
+ initialize_system
+}
diff --git a/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc b/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc
new file mode 100644
index 000000000000..f29236defb5a
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc
@@ -0,0 +1,75 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test monitor enable/disable
+
+test_simple_monitor() {
+ local monitor="$1"
+ local prefix="$2" # nested monitors
+
+ echo 1 > "monitors/$prefix$monitor/enable"
+ grep -q "$monitor$" enabled_monitors
+
+ echo 0 > "monitors/$prefix$monitor/enable"
+ ! grep -q "$monitor$" enabled_monitors
+
+ echo "$monitor" >> enabled_monitors
+ grep -q 1 "monitors/$prefix$monitor/enable"
+
+ echo "!$monitor" >> enabled_monitors
+ grep -q 0 "monitors/$prefix$monitor/enable"
+}
+
+test_container_monitor() {
+ local monitor="$1"
+ local nested
+
+ echo 1 > "monitors/$monitor/enable"
+ grep -q "^$monitor$" enabled_monitors
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "^$monitor:$nested$" enabled_monitors
+ done
+ test -n "$nested"
+
+ echo 0 > "monitors/$monitor/enable"
+ ! grep -q "^$monitor$" enabled_monitors
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ ! grep -q "^$monitor:$nested$" enabled_monitors
+ done
+
+ echo "$monitor" >> enabled_monitors
+ grep -q 1 "monitors/$monitor/enable"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "^$monitor:$nested$" enabled_monitors
+ done
+
+ echo "!$monitor" >> enabled_monitors
+ grep -q 0 "monitors/$monitor/enable"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ test_simple_monitor "$nested" "$monitor/"
+ done
+}
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ if find "$monitor_dir" -mindepth 1 -type d | grep -q .; then
+ test_container_monitor "$monitor"
+ else
+ test_simple_monitor "$monitor"
+ fi
+done
+
+! echo non_existent_monitor > enabled_monitors
+! grep -q "^non_existent_monitor$" enabled_monitors
diff --git a/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc b/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc
new file mode 100644
index 000000000000..2958bf849338
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test monitor reactor setting
+# requires: available_reactors
+
+test_monitor_reactor() {
+ local monitor="$1"
+ local prefix="$2" # nested monitors
+
+ while read -r reactor; do
+ [ "$reactor" = nop ] && continue
+
+ echo "$reactor" > "monitors/$prefix$monitor/reactors"
+ grep -q "\\[$reactor\\]" "monitors/$prefix$monitor/reactors"
+ done < available_reactors
+
+ echo nop > "monitors/$prefix$monitor/reactors"
+ grep -q "\\[nop\\]" "monitors/$prefix$monitor/reactors"
+}
+
+test_container_monitor() {
+ local monitor="$1"
+ local nested
+
+ while read -r reactor; do
+ [ "$reactor" = nop ] && continue
+
+ echo "$reactor" > "monitors/$monitor/reactors"
+ grep -q "\\[$reactor\\]" "monitors/$monitor/reactors"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "\\[$reactor\\]" "monitors/$monitor/$nested/reactors"
+ done
+ done < available_reactors
+ test -n "$nested"
+
+ echo nop > "monitors/$monitor/reactors"
+ grep -q "\\[nop\\]" "monitors/$monitor/reactors"
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ grep -q "\\[nop\\]" "monitors/$monitor/$nested/reactors"
+ done
+
+ for nested_dir in "monitors/$monitor"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+ test_monitor_reactor "$nested" "$monitor/"
+ done
+}
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ if find "$monitor_dir" -mindepth 1 -type d | grep -q .; then
+ test_container_monitor "$monitor"
+ else
+ test_monitor_reactor "$monitor"
+ fi
+done
+
+monitor=$(ls /sys/kernel/tracing/rv/monitors -1 | head -n 1)
+test -f "monitors/$monitor/reactors"
+! echo non_existent_reactor > "monitors/$monitor/reactors"
+! grep -q "\\[non_existent_reactor\\]" "monitors/$monitor/reactors"
diff --git a/tools/testing/selftests/verification/test.d/rv_monitors_available.tc b/tools/testing/selftests/verification/test.d/rv_monitors_available.tc
new file mode 100644
index 000000000000..e6a4a1410690
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_monitors_available.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Check available monitors
+
+for monitor_dir in monitors/*; do
+ monitor=$(basename "$monitor_dir")
+
+ grep -q "^$monitor$" available_monitors
+ grep -q . "$monitor_dir"/desc
+
+ for nested_dir in "$monitor_dir"/*; do
+ [ -d "$nested_dir" ] || continue
+ nested=$(basename "$nested_dir")
+
+ grep -q "^$monitor:$nested$" available_monitors
+ grep -q . "$nested_dir"/desc
+ done
+done
diff --git a/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc b/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc
new file mode 100644
index 000000000000..5a59432b1d93
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test wwnr monitor with printk reactor
+# requires: available_reactors wwnr:monitor printk:reactor stress-ng:program
+
+load() { # returns true if there was a reaction
+ local lines_before num
+ num=$((($(nproc) + 1) / 2))
+ lines_before=$(dmesg | wc -l)
+ stress-ng --cpu-sched "$num" --timer "$num" -t 5 -q
+ dmesg | tail -n $((lines_before + 1)) | grep -q "rv: monitor wwnr does not allow event"
+}
+
+echo 1 > monitors/wwnr/enable
+echo printk > monitors/wwnr/reactors
+
+load
+
+echo 0 > monitoring_on
+! load
+echo 1 > monitoring_on
+
+load
+
+echo 0 > reacting_on
+! load
+echo 1 > reacting_on
+
+echo nop > monitors/wwnr/reactors
+echo 0 > monitors/wwnr/enable
diff --git a/tools/testing/selftests/verification/verificationtest-ktap b/tools/testing/selftests/verification/verificationtest-ktap
new file mode 100755
index 000000000000..18f7fe324e2f
--- /dev/null
+++ b/tools/testing/selftests/verification/verificationtest-ktap
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner
+#
+# Copyright (C) Arm Ltd., 2023
+
+../ftrace/ftracetest -K -v --rv ../verification
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 656e1c75b711..93d21bc7e112 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -48,6 +48,8 @@ static struct anon_vma dummy_anon_vma;
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
+#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
+
static struct task_struct __current;
struct task_struct *get_current(void)
@@ -67,18 +69,18 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
pgoff_t pgoff,
vm_flags_t vm_flags)
{
- struct vm_area_struct *ret = vm_area_alloc(mm);
+ struct vm_area_struct *vma = vm_area_alloc(mm);
- if (ret == NULL)
+ if (vma == NULL)
return NULL;
- ret->vm_start = start;
- ret->vm_end = end;
- ret->vm_pgoff = pgoff;
- ret->__vm_flags = vm_flags;
- vma_assert_detached(ret);
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_pgoff = pgoff;
+ vm_flags_reset(vma, vm_flags);
+ vma_assert_detached(vma);
- return ret;
+ return vma;
}
/* Helper function to allocate a VMA and link it to the tree. */
@@ -339,6 +341,7 @@ static bool test_simple_modify(void)
struct mm_struct mm = {};
struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
VMA_ITERATOR(vmi, &mm, 0x1000);
+ vm_flags_t flags = VM_READ | VM_MAYREAD;
ASSERT_FALSE(attach_vma(&mm, init_vma));
@@ -347,7 +350,7 @@ static bool test_simple_modify(void)
* performs the merge/split only.
*/
vma = vma_modify_flags(&vmi, init_vma, init_vma,
- 0x1000, 0x2000, VM_READ | VM_MAYREAD);
+ 0x1000, 0x2000, &flags);
ASSERT_NE(vma, NULL);
/* We modify the provided VMA, and on split allocate new VMAs. */
ASSERT_EQ(vma, init_vma);
@@ -441,7 +444,7 @@ static bool test_simple_shrink(void)
return true;
}
-static bool test_merge_new(void)
+static bool __test_merge_new(bool is_sticky, bool a_is_sticky, bool b_is_sticky, bool c_is_sticky)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
struct mm_struct mm = {};
@@ -469,23 +472,32 @@ static bool test_merge_new(void)
struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d;
bool merged;
+ if (is_sticky)
+ vm_flags |= VM_STICKY;
+
/*
* 0123456789abc
* AA B CC
*/
vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
ASSERT_NE(vma_a, NULL);
+ if (a_is_sticky)
+ vm_flags_set(vma_a, VM_STICKY);
/* We give each VMA a single avc so we can test anon_vma duplication. */
INIT_LIST_HEAD(&vma_a->anon_vma_chain);
list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain);
vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags);
ASSERT_NE(vma_b, NULL);
+ if (b_is_sticky)
+ vm_flags_set(vma_b, VM_STICKY);
INIT_LIST_HEAD(&vma_b->anon_vma_chain);
list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain);
vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, vm_flags);
ASSERT_NE(vma_c, NULL);
+ if (c_is_sticky)
+ vm_flags_set(vma_c, VM_STICKY);
INIT_LIST_HEAD(&vma_c->anon_vma_chain);
list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain);
@@ -520,6 +532,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky || a_is_sticky || b_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to PREVIOUS VMA.
@@ -537,6 +551,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky || a_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to NEXT VMA.
@@ -556,6 +572,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 3);
+ if (is_sticky) /* D uses is_sticky. */
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge BOTH sides.
@@ -574,6 +592,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (is_sticky || a_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge to NEXT VMA.
@@ -592,6 +612,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (is_sticky || c_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Merge BOTH sides.
@@ -609,6 +631,8 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 1);
+ if (is_sticky || a_is_sticky || c_is_sticky)
+ ASSERT_TRUE(IS_SET(vma->vm_flags, VM_STICKY));
/*
* Final state.
@@ -637,6 +661,20 @@ static bool test_merge_new(void)
return true;
}
+static bool test_merge_new(void)
+{
+ int i, j, k, l;
+
+ /* Generate every possible permutation of sticky flags. */
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ for (l = 0; l < 2; l++)
+ ASSERT_TRUE(__test_merge_new(i, j, k, l));
+
+ return true;
+}
+
static bool test_vma_merge_special_flags(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
@@ -676,7 +714,7 @@ static bool test_vma_merge_special_flags(void)
for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
vm_flags_t special_flag = special_flags[i];
- vma_left->__vm_flags = vm_flags | special_flag;
+ vm_flags_reset(vma_left, vm_flags | special_flag);
vmg.vm_flags = vm_flags | special_flag;
vma = merge_new(&vmg);
ASSERT_EQ(vma, NULL);
@@ -698,7 +736,7 @@ static bool test_vma_merge_special_flags(void)
for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
vm_flags_t special_flag = special_flags[i];
- vma_left->__vm_flags = vm_flags | special_flag;
+ vm_flags_reset(vma_left, vm_flags | special_flag);
vmg.vm_flags = vm_flags | special_flag;
vma = merge_existing(&vmg);
ASSERT_EQ(vma, NULL);
@@ -973,9 +1011,11 @@ static bool test_vma_merge_new_with_close(void)
return true;
}
-static bool test_merge_existing(void)
+static bool __test_merge_existing(bool prev_is_sticky, bool middle_is_sticky, bool next_is_sticky)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+ vm_flags_t prev_flags = vm_flags;
+ vm_flags_t next_flags = vm_flags;
struct mm_struct mm = {};
VMA_ITERATOR(vmi, &mm, 0);
struct vm_area_struct *vma, *vma_prev, *vma_next;
@@ -988,6 +1028,13 @@ static bool test_merge_existing(void)
};
struct anon_vma_chain avc = {};
+ if (prev_is_sticky)
+ prev_flags |= VM_STICKY;
+ if (middle_is_sticky)
+ vm_flags |= VM_STICKY;
+ if (next_is_sticky)
+ next_flags |= VM_STICKY;
+
/*
* Merge right case - partial span.
*
@@ -1000,7 +1047,7 @@ static bool test_merge_existing(void)
*/
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
- vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma);
vmg.middle = vma;
@@ -1018,6 +1065,8 @@ static bool test_merge_existing(void)
ASSERT_TRUE(vma_write_started(vma));
ASSERT_TRUE(vma_write_started(vma_next));
ASSERT_EQ(mm.map_count, 2);
+ if (middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY));
/* Clear down and reset. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
@@ -1033,7 +1082,7 @@ static bool test_merge_existing(void)
* NNNNNNN
*/
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, next_flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, vm_flags, &dummy_anon_vma);
vmg.middle = vma;
@@ -1046,6 +1095,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_next));
ASSERT_EQ(mm.map_count, 1);
+ if (middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_next->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted vma. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1060,7 +1111,7 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPV
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
@@ -1080,6 +1131,8 @@ static bool test_merge_existing(void)
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(mm.map_count, 2);
+ if (prev_is_sticky || middle_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 2);
@@ -1094,7 +1147,7 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPP
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
@@ -1109,6 +1162,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_EQ(mm.map_count, 1);
+ if (prev_is_sticky || middle_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted vma. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1123,10 +1178,10 @@ static bool test_merge_existing(void)
* 0123456789
* PPPPPPPPPP
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, next_flags);
vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1139,6 +1194,8 @@ static bool test_merge_existing(void)
ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
ASSERT_TRUE(vma_write_started(vma_prev));
ASSERT_EQ(mm.map_count, 1);
+ if (prev_is_sticky || middle_is_sticky || next_is_sticky)
+ ASSERT_TRUE(IS_SET(vma_prev->vm_flags, VM_STICKY));
/* Clear down and reset. We should have deleted prev and next. */
ASSERT_EQ(cleanup_mm(&mm, &vmi), 1);
@@ -1158,9 +1215,9 @@ static bool test_merge_existing(void)
* PPPVVVVVNNN
*/
- vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+ vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, prev_flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags);
- vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, vm_flags);
+ vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, next_flags);
vmg_set_range(&vmg, 0x4000, 0x5000, 4, vm_flags);
vmg.prev = vma;
@@ -1203,6 +1260,19 @@ static bool test_merge_existing(void)
return true;
}
+static bool test_merge_existing(void)
+{
+ int i, j, k;
+
+ /* Generate every possible permutation of sticky flags. */
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ ASSERT_TRUE(__test_merge_existing(i, j, k));
+
+ return true;
+}
+
static bool test_anon_vma_non_mergeable(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index dc976a285ad2..9f0a9f5ed0fe 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -46,42 +46,272 @@ extern unsigned long dac_mmap_min_addr;
#define MMF_HAS_MDWE 28
+/*
+ * vm_flags in vm_area_struct, see mm_types.h.
+ * When changing, update also include/trace/events/mmflags.h
+ */
+
#define VM_NONE 0x00000000
-#define VM_READ 0x00000001
-#define VM_WRITE 0x00000002
-#define VM_EXEC 0x00000004
-#define VM_SHARED 0x00000008
-#define VM_MAYREAD 0x00000010
-#define VM_MAYWRITE 0x00000020
-#define VM_MAYEXEC 0x00000040
-#define VM_GROWSDOWN 0x00000100
-#define VM_PFNMAP 0x00000400
-#define VM_LOCKED 0x00002000
-#define VM_IO 0x00004000
-#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
-#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
-#define VM_DONTEXPAND 0x00040000
-#define VM_LOCKONFAULT 0x00080000
-#define VM_ACCOUNT 0x00100000
-#define VM_NORESERVE 0x00200000
-#define VM_MIXEDMAP 0x10000000
-#define VM_STACK VM_GROWSDOWN
-#define VM_SHADOW_STACK VM_NONE
-#define VM_SOFTDIRTY 0
-#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
-#define VM_GROWSUP VM_NONE
-#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+/**
+ * typedef vma_flag_t - specifies an individual VMA flag by bit number.
+ *
+ * This value is made type safe by sparse to avoid passing invalid flag values
+ * around.
+ */
+typedef int __bitwise vma_flag_t;
+#define DECLARE_VMA_BIT(name, bitnum) \
+ VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
+#define DECLARE_VMA_BIT_ALIAS(name, aliased) \
+ VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
+enum {
+ DECLARE_VMA_BIT(READ, 0),
+ DECLARE_VMA_BIT(WRITE, 1),
+ DECLARE_VMA_BIT(EXEC, 2),
+ DECLARE_VMA_BIT(SHARED, 3),
+ /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
+ DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
+ DECLARE_VMA_BIT(MAYWRITE, 5),
+ DECLARE_VMA_BIT(MAYEXEC, 6),
+ DECLARE_VMA_BIT(MAYSHARE, 7),
+ DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
+#ifdef CONFIG_MMU
+ DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
+#else
+ /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
+ DECLARE_VMA_BIT(MAYOVERLAY, 9),
+#endif /* CONFIG_MMU */
+ /* Page-ranges managed without "struct page", just pure PFN */
+ DECLARE_VMA_BIT(PFNMAP, 10),
+ DECLARE_VMA_BIT(MAYBE_GUARD, 11),
+ DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
+ DECLARE_VMA_BIT(LOCKED, 13),
+ DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
+ DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
+ DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
+ DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
+ DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
+ DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
+ DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
+ DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
+ DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
+ DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
+ DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
+ DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
+ DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
+ DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
+ DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
+ DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
+ DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
+ /* These bits are reused, we define specific uses below. */
+ DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
+ DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
+ DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
+ DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
+ DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
+ DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
+ DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
+ /*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+ DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
+#ifdef CONFIG_PPC32
+ DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
+#else
+ DECLARE_VMA_BIT(DROPPABLE, 40),
+#endif
+ DECLARE_VMA_BIT(UFFD_MINOR, 41),
+ DECLARE_VMA_BIT(SEALED, 42),
+ /* Flags that reuse flags above. */
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
+ DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
+#if defined(CONFIG_X86_USER_SHADOW_STACK)
+ /*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace
+ * protect itself from attacks. A single page is enough for current
+ * shadow stack archs (x86). See the comments near alloc_shstk() in
+ * arch/x86/kernel/shstk.c for more details on the guard size.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
+#elif defined(CONFIG_ARM64_GCS)
+ /*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+ DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
+#endif
+ DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
+ DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
+ DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
+ DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
+ DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
+ DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
+ DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
#ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK VM_GROWSUP
-#define VM_STACK_EARLY VM_GROWSDOWN
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
+ DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
+#else
+ DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
+#endif
+};
+
+#define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
+#define VM_READ INIT_VM_FLAG(READ)
+#define VM_WRITE INIT_VM_FLAG(WRITE)
+#define VM_EXEC INIT_VM_FLAG(EXEC)
+#define VM_SHARED INIT_VM_FLAG(SHARED)
+#define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
+#define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
+#define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
+#define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
+#define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
+#ifdef CONFIG_MMU
+#define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
+#else
+#define VM_UFFD_MISSING VM_NONE
+#define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
+#endif
+#define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
+#define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
+#define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
+#define VM_LOCKED INIT_VM_FLAG(LOCKED)
+#define VM_IO INIT_VM_FLAG(IO)
+#define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
+#define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
+#define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
+#define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
+#define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
+#define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
+#define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
+#define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
+#define VM_SYNC INIT_VM_FLAG(SYNC)
+#define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
+#define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
+#define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
+#else
+#define VM_SOFTDIRTY VM_NONE
+#endif
+#define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
+#define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
+#define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
+#define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
+#define VM_STACK INIT_VM_FLAG(STACK)
+#ifdef CONFIG_STACK_GROWS_UP
+#define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
+#else
+#define VM_STACK_EARLY VM_NONE
+#endif
+#ifdef CONFIG_ARCH_HAS_PKEYS
+#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
+/* Despite the naming, these are FLAGS not bits. */
+#define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
+#define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
+#define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
+#if CONFIG_ARCH_PKEY_BITS > 3
+#define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
+#else
+#define VM_PKEY_BIT3 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 3 */
+#if CONFIG_ARCH_PKEY_BITS > 4
+#define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
+#else
+#define VM_PKEY_BIT4 VM_NONE
+#endif /* CONFIG_ARCH_PKEY_BITS > 4 */
+#endif /* CONFIG_ARCH_HAS_PKEYS */
+#if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
+#define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
+#else
+#define VM_SHADOW_STACK VM_NONE
+#endif
+#if defined(CONFIG_PPC64)
+#define VM_SAO INIT_VM_FLAG(SAO)
+#elif defined(CONFIG_PARISC)
+#define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
+#elif defined(CONFIG_SPARC64)
+#define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif defined(CONFIG_ARM64)
+#define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
+#define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
+#elif !defined(CONFIG_MMU)
+#define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
+#endif
+#ifndef VM_GROWSUP
+#define VM_GROWSUP VM_NONE
+#endif
+#ifdef CONFIG_ARM64_MTE
+#define VM_MTE INIT_VM_FLAG(MTE)
+#define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
+#else
+#define VM_MTE VM_NONE
+#define VM_MTE_ALLOWED VM_NONE
+#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+#define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
#else
-#define VM_STACK VM_GROWSDOWN
-#define VM_STACK_EARLY 0
+#define VM_UFFD_MINOR VM_NONE
+#endif
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
+#define VM_SEALED INIT_VM_FLAG(SEALED)
+#else
+#define VM_ALLOW_ANY_UNCACHED VM_NONE
+#define VM_SEALED VM_NONE
+#endif
+#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
+#define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
+#else
+#define VM_DROPPABLE VM_NONE
+#endif
+
+/* Bits set in the VMA until the stack is in its final location */
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+/* Common data flag combinations */
+#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
+ VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
+#endif
+
+#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
+/* VMA basic access permission flags */
+#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
+
+/*
+ * Special vmas that are non-mergable, non-mlock()able.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
@@ -96,25 +326,58 @@ extern unsigned long dac_mmap_min_addr;
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-
-#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
-
-#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
-
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
#define CAP_IPC_LOCK 14
-#ifdef CONFIG_64BIT
-#define VM_SEALED_BIT 42
-#define VM_SEALED BIT(VM_SEALED_BIT)
-#else
-#define VM_SEALED VM_NONE
-#endif
+/*
+ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
+ * possesses it but the other does not, the merged VMA should nonetheless have
+ * applied to it:
+ *
+ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
+ * references cleared via /proc/$pid/clear_refs, any merged VMA
+ * should be considered soft-dirty also as it operates at a VMA
+ * granularity.
+ */
+#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+/*
+ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
+ * of these flags and the other not does not preclude a merge.
+ *
+ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
+ * 'sticky'. If any sticky flags exist in either VMA, we simply
+ * set all of them on the merged VMA.
+ */
+#define VM_IGNORE_MERGE VM_STICKY
+
+/*
+ * Flags which should result in page tables being copied on fork. These are
+ * flags which indicate that the VMA maps page tables which cannot be
+ * reconsistuted upon page fault, so necessitate page table copying upon
+ *
+ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
+ * reasonably reconstructed on page fault.
+ *
+ * VM_UFFD_WP - Encodes metadata about an installed uffd
+ * write protect handler, which cannot be
+ * reconstructed on page fault.
+ *
+ * We always copy pgtables when dst_vma has uffd-wp
+ * enabled even if it's file-backed
+ * (e.g. shmem). Because when uffd-wp is enabled,
+ * pgtable contains uffd-wp protection information,
+ * that's something we can't retrieve from page cache,
+ * and skip copying will lose those info.
+ *
+ * VM_MAYBE_GUARD - Could contain page guard region markers which
+ * by design are a property of the page tables
+ * only and thus cannot be reconstructed on page
+ * fault.
+ */
+#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
#define FIRST_USER_ADDRESS 0UL
#define USER_PGTABLES_CEILING 0UL
@@ -163,6 +426,8 @@ typedef __bitwise unsigned int vm_fault_t;
#define ASSERT_EXCLUSIVE_WRITER(x)
+#define pgtable_supports_soft_dirty() 1
+
/**
* swap - swap values of @a and @b
* @a: first value
@@ -259,6 +524,15 @@ typedef struct {
__private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
} mm_flags_t;
+/*
+ * Opaque type representing current VMA (vm_area_struct) flag state. Must be
+ * accessed via vma_flags_xxx() helper functions.
+ */
+#define NUM_VMA_FLAG_BITS BITS_PER_LONG
+typedef struct {
+ DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
+} __private vma_flags_t;
+
struct mm_struct {
struct maple_tree mm_mt;
int map_count; /* number of VMAs */
@@ -275,6 +549,57 @@ struct mm_struct {
struct vm_area_struct;
+
+/* What action should be taken after an .mmap_prepare call is complete? */
+enum mmap_action_type {
+ MMAP_NOTHING, /* Mapping is complete, no further action. */
+ MMAP_REMAP_PFN, /* Remap PFN range. */
+ MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+};
+
+/*
+ * Describes an action an mmap_prepare hook can instruct to be taken to complete
+ * the mapping of a VMA. Specified in vm_area_desc.
+ */
+struct mmap_action {
+ union {
+ /* Remap range. */
+ struct {
+ unsigned long start;
+ unsigned long start_pfn;
+ unsigned long size;
+ pgprot_t pgprot;
+ } remap;
+ };
+ enum mmap_action_type type;
+
+ /*
+ * If specified, this hook is invoked after the selected action has been
+ * successfully completed. Note that the VMA write lock still held.
+ *
+ * The absolute minimum ought to be done here.
+ *
+ * Returns 0 on success, or an error code.
+ */
+ int (*success_hook)(const struct vm_area_struct *vma);
+
+ /*
+ * If specified, this hook is invoked when an error occurred when
+ * attempting the selection action.
+ *
+ * The hook can return an error code in order to filter the error, but
+ * it is not valid to clear the error here.
+ */
+ int (*error_hook)(int err);
+
+ /*
+ * This should be set in rare instances where the operation required
+ * that the rmap should not be able to access the VMA until
+ * completely set up.
+ */
+ bool hide_from_rmap_until_complete :1;
+};
+
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
* manipulate mutable fields which will cause those fields to be updated in the
@@ -292,12 +617,18 @@ struct vm_area_desc {
/* Mutable fields. Populated with initial state. */
pgoff_t pgoff;
struct file *vm_file;
- vm_flags_t vm_flags;
+ union {
+ vm_flags_t vm_flags;
+ vma_flags_t vma_flags;
+ };
pgprot_t page_prot;
/* Write-only fields. */
const struct vm_operations_struct *vm_ops;
void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
};
struct file_operations {
@@ -335,7 +666,7 @@ struct vm_area_struct {
*/
union {
const vm_flags_t vm_flags;
- vm_flags_t __private __vm_flags;
+ vma_flags_t flags;
};
#ifdef CONFIG_PER_VMA_LOCK
@@ -794,8 +1125,7 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr,
- unsigned long end_addr, unsigned long tree_end,
- bool mm_wr_locked)
+ unsigned long end_addr, unsigned long tree_end)
{
}
@@ -844,6 +1174,14 @@ static inline void vma_start_write(struct vm_area_struct *vma)
vma->vm_lock_seq++;
}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+ return 0;
+}
+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -1042,26 +1380,6 @@ static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
return true;
}
-static inline void vm_flags_init(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma->__vm_flags = flags;
-}
-
-static inline void vm_flags_set(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags |= flags;
-}
-
-static inline void vm_flags_clear(struct vm_area_struct *vma,
- vm_flags_t flags)
-{
- vma_start_write(vma);
- vma->__vm_flags &= ~flags;
-}
-
static inline int shmem_zero_setup(struct vm_area_struct *vma)
{
return 0;
@@ -1218,13 +1536,118 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
{
}
-# define ACCESS_PRIVATE(p, member) ((p)->member)
+#define ACCESS_PRIVATE(p, member) ((p)->member)
+
+#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
+
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+{
+ unsigned int len = bitmap_size(nbits);
+
+ if (small_const_nbits(nbits))
+ *dst = 0;
+ else
+ memset(dst, 0, len);
+}
static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
{
return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
}
+/* Clears all bits in the VMA flags bitmap, non-atomically. */
+static inline void vma_flags_clear_all(vma_flags_t *flags)
+{
+ bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
+}
+
+/*
+ * Copy value to the first system word of VMA flags, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
+{
+ *ACCESS_PRIVATE(flags, __vma_flags) = value;
+}
+
+/*
+ * Copy value to the first system word of VMA flags ONCE, non-atomically.
+ *
+ * IMPORTANT: This does not overwrite bytes past the first system word. The
+ * caller must account for this.
+ */
+static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ WRITE_ONCE(*bitmap, value);
+}
+
+/* Update the first system word of VMA flags setting bits, non-atomically. */
+static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap |= value;
+}
+
+/* Update the first system word of VMA flags clearing bits, non-atomically. */
+static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
+{
+ unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
+
+ *bitmap &= ~value;
+}
+
+
+/* Use when VMA is not part of the VMA tree and needs no locking */
+static inline void vm_flags_init(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word(&vma->flags, flags);
+}
+
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
+static inline void vm_flags_reset(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ vm_flags_init(vma, flags);
+}
+
+static inline void vm_flags_reset_once(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_assert_write_locked(vma);
+ /*
+ * The user should only be interested in avoiding reordering of
+ * assignment to the first word.
+ */
+ vma_flags_clear_all(&vma->flags);
+ vma_flags_overwrite_word_once(&vma->flags, flags);
+}
+
+static inline void vm_flags_set(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_set_word(&vma->flags, flags);
+}
+
+static inline void vm_flags_clear(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ vma_start_write(vma);
+ vma_flags_clear_word(&vma->flags, flags);
+}
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
@@ -1326,12 +1749,23 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
static inline void set_vma_from_desc(struct vm_area_struct *vma,
struct vm_area_desc *desc);
-static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+static inline void mmap_action_prepare(struct mmap_action *action,
+ struct vm_area_desc *desc)
+{
+}
+
+static inline int mmap_action_complete(struct mmap_action *action,
+ struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline int __compat_vma_mmap(const struct file_operations *f_op,
struct file *file, struct vm_area_struct *vma)
{
struct vm_area_desc desc = {
.mm = vma->vm_mm,
- .file = vma->vm_file,
+ .file = file,
.start = vma->vm_start,
.end = vma->vm_end,
@@ -1339,21 +1773,24 @@ static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op,
.vm_file = vma->vm_file,
.vm_flags = vma->vm_flags,
.page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
};
int err;
err = f_op->mmap_prepare(&desc);
if (err)
return err;
- set_vma_from_desc(vma, &desc);
- return 0;
+ mmap_action_prepare(&desc.action, &desc);
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(&desc.action, vma);
}
-static inline int compat_vma_mmap_prepare(struct file *file,
+static inline int compat_vma_mmap(struct file *file,
struct vm_area_struct *vma)
{
- return __compat_vma_mmap_prepare(file->f_op, file, vma);
+ return __compat_vma_mmap(file->f_op, file, vma);
}
/* Did the driver provide valid mmap hook configuration? */
@@ -1374,7 +1811,7 @@ static inline bool can_mmap_file(struct file *file)
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
if (file->f_op->mmap_prepare)
- return compat_vma_mmap_prepare(file, vma);
+ return compat_vma_mmap(file, vma);
return file->f_op->mmap(file, vma);
}
@@ -1407,4 +1844,20 @@ static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
return vm_flags;
}
+static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+{
+}
+
+static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t pgprot)
+{
+ return 0;
+}
+
+static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf)
+{
+ return 0;
+}
+
#endif /* __MM_VMA_INTERNAL_H */