summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/bhyve.c66
-rw-r--r--arch/x86/kernel/cpu/bugs.c689
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c48
-rw-r--r--arch/x86/kernel/cpu/common.c87
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c3
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c163
-rw-r--r--arch/x86/kernel/cpu/mce/core.c315
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c18
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h9
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c73
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c47
-rw-r--r--arch/x86/kernel/cpu/microcode/intel-ucode-defs.h86
-rw-r--r--arch/x86/kernel/cpu/microcode/internal.h9
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c81
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h56
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c248
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/topology.c13
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c39
20 files changed, 1244 insertions, 808 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1e26179ff18c..2f8a58ef690e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_X86_SGX) += sgx/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+obj-$(CONFIG_BHYVE_GUEST) += bhyve.o
obj-$(CONFIG_ACRN_GUEST) += acrn.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/arch/x86/kernel/cpu/bhyve.c b/arch/x86/kernel/cpu/bhyve.c
new file mode 100644
index 000000000000..f1a8ca3dd1ed
--- /dev/null
+++ b/arch/x86/kernel/cpu/bhyve.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FreeBSD Bhyve guest enlightenments
+ *
+ * Copyright © 2025 Amazon.com, Inc. or its affiliates.
+ *
+ * Author: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/hypervisor.h>
+
+static uint32_t bhyve_cpuid_base;
+static uint32_t bhyve_cpuid_max;
+
+#define BHYVE_SIGNATURE "bhyve bhyve "
+
+#define CPUID_BHYVE_FEATURES 0x40000001
+
+/* Features advertised in CPUID_BHYVE_FEATURES %eax */
+
+/* MSI Extended Dest ID */
+#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0)
+
+static uint32_t __init bhyve_detect(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return 0;
+
+ bhyve_cpuid_base = cpuid_base_hypervisor(BHYVE_SIGNATURE, 0);
+ if (!bhyve_cpuid_base)
+ return 0;
+
+ bhyve_cpuid_max = cpuid_eax(bhyve_cpuid_base);
+ return bhyve_cpuid_max;
+}
+
+static uint32_t bhyve_features(void)
+{
+ unsigned int cpuid_leaf = bhyve_cpuid_base | CPUID_BHYVE_FEATURES;
+
+ if (bhyve_cpuid_max < cpuid_leaf)
+ return 0;
+
+ return cpuid_eax(cpuid_leaf);
+}
+
+static bool __init bhyve_ext_dest_id(void)
+{
+ return !!(bhyve_features() & CPUID_BHYVE_FEAT_EXT_DEST_ID);
+}
+
+static bool __init bhyve_x2apic_available(void)
+{
+ return true;
+}
+
+const struct hypervisor_x86 x86_hyper_bhyve __refconst = {
+ .name = "Bhyve",
+ .detect = bhyve_detect,
+ .init.init_platform = x86_init_noop,
+ .init.x2apic_available = bhyve_x2apic_available,
+ .init.msi_ext_dest_id = bhyve_ext_dest_id,
+};
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index af838b8d845c..6a526ae1fe99 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -96,6 +96,9 @@ static void __init its_update_mitigation(void);
static void __init its_apply_mitigation(void);
static void __init tsa_select_mitigation(void);
static void __init tsa_apply_mitigation(void);
+static void __init vmscape_select_mitigation(void);
+static void __init vmscape_update_mitigation(void);
+static void __init vmscape_apply_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -105,6 +108,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
+/*
+ * Set when the CPU has run a potentially malicious guest. An IBPB will
+ * be needed to before running userspace. That IBPB will flush the branch
+ * predictor content.
+ */
+DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user);
+EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user);
+
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
static u64 __ro_after_init x86_arch_cap_msr;
@@ -262,6 +273,7 @@ void __init cpu_select_mitigations(void)
its_select_mitigation();
bhi_select_mitigation();
tsa_select_mitigation();
+ vmscape_select_mitigation();
/*
* After mitigations are selected, some may need to update their
@@ -293,6 +305,7 @@ void __init cpu_select_mitigations(void)
bhi_update_mitigation();
/* srso_update_mitigation() depends on retbleed_update_mitigation(). */
srso_update_mitigation();
+ vmscape_update_mitigation();
spectre_v1_apply_mitigation();
spectre_v2_apply_mitigation();
@@ -310,6 +323,7 @@ void __init cpu_select_mitigations(void)
its_apply_mitigation();
bhi_apply_mitigation();
tsa_apply_mitigation();
+ vmscape_apply_mitigation();
}
/*
@@ -420,6 +434,9 @@ static bool __init should_mitigate_vuln(unsigned int bug)
case X86_BUG_SPEC_STORE_BYPASS:
return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER);
+ case X86_BUG_VMSCAPE:
+ return cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST);
+
default:
WARN(1, "Unknown bug %x\n", bug);
return false;
@@ -670,8 +687,7 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
- if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
- cpu_mitigations_off()) {
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
mmio_mitigation = MMIO_MITIGATION_OFF;
return;
}
@@ -1446,8 +1462,10 @@ static void __init retbleed_update_mitigation(void)
retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
break;
default:
- if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
pr_err(RETBLEED_INTEL_MSG);
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+ }
}
}
@@ -1828,9 +1846,10 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_IBRS,
};
-static enum spectre_v2_mitigation_cmd spectre_v2_cmd __ro_after_init = SPECTRE_V2_CMD_AUTO;
+static enum spectre_v2_mitigation_cmd spectre_v2_cmd __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_CMD_AUTO : SPECTRE_V2_CMD_NONE;
-enum spectre_v2_user_cmd {
+enum spectre_v2_user_mitigation_cmd {
SPECTRE_V2_USER_CMD_NONE,
SPECTRE_V2_USER_CMD_AUTO,
SPECTRE_V2_USER_CMD_FORCE,
@@ -1840,6 +1859,9 @@ enum spectre_v2_user_cmd {
SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
};
+static enum spectre_v2_user_mitigation_cmd spectre_v2_user_cmd __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_USER_CMD_AUTO : SPECTRE_V2_USER_CMD_NONE;
+
static const char * const spectre_v2_user_strings[] = {
[SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
[SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
@@ -1848,50 +1870,31 @@ static const char * const spectre_v2_user_strings[] = {
[SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
};
-static const struct {
- const char *option;
- enum spectre_v2_user_cmd cmd;
- bool secure;
-} v2_user_options[] __initconst = {
- { "auto", SPECTRE_V2_USER_CMD_AUTO, false },
- { "off", SPECTRE_V2_USER_CMD_NONE, false },
- { "on", SPECTRE_V2_USER_CMD_FORCE, true },
- { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
- { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
- { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
- { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
-};
-
-static void __init spec_v2_user_print_cond(const char *reason, bool secure)
+static int __init spectre_v2_user_parse_cmdline(char *str)
{
- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
- pr_info("spectre_v2_user=%s forced on command line.\n", reason);
-}
-
-static enum spectre_v2_user_cmd __init spectre_v2_parse_user_cmdline(void)
-{
- char arg[20];
- int ret, i;
-
- if (!IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2))
- return SPECTRE_V2_USER_CMD_NONE;
-
- ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
- arg, sizeof(arg));
- if (ret < 0)
- return SPECTRE_V2_USER_CMD_AUTO;
+ if (!str)
+ return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
- if (match_option(arg, ret, v2_user_options[i].option)) {
- spec_v2_user_print_cond(v2_user_options[i].option,
- v2_user_options[i].secure);
- return v2_user_options[i].cmd;
- }
- }
+ if (!strcmp(str, "auto"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_AUTO;
+ else if (!strcmp(str, "off"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_NONE;
+ else if (!strcmp(str, "on"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_FORCE;
+ else if (!strcmp(str, "prctl"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_PRCTL;
+ else if (!strcmp(str, "prctl,ibpb"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_PRCTL_IBPB;
+ else if (!strcmp(str, "seccomp"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_SECCOMP;
+ else if (!strcmp(str, "seccomp,ibpb"))
+ spectre_v2_user_cmd = SPECTRE_V2_USER_CMD_SECCOMP_IBPB;
+ else
+ pr_err("Ignoring unknown spectre_v2_user option (%s).", str);
- pr_err("Unknown user space protection option (%s). Switching to default\n", arg);
- return SPECTRE_V2_USER_CMD_AUTO;
+ return 0;
}
+early_param("spectre_v2_user", spectre_v2_user_parse_cmdline);
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
@@ -1903,7 +1906,7 @@ static void __init spectre_v2_user_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
return;
- switch (spectre_v2_parse_user_cmdline()) {
+ switch (spectre_v2_user_cmd) {
case SPECTRE_V2_USER_CMD_NONE:
return;
case SPECTRE_V2_USER_CMD_FORCE:
@@ -2031,119 +2034,61 @@ static void __init spectre_v2_user_apply_mitigation(void)
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
+ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
[SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
-static const struct {
- const char *option;
- enum spectre_v2_mitigation_cmd cmd;
- bool secure;
-} mitigation_options[] __initconst = {
- { "off", SPECTRE_V2_CMD_NONE, false },
- { "on", SPECTRE_V2_CMD_FORCE, true },
- { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
- { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
- { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
- { "eibrs", SPECTRE_V2_CMD_EIBRS, false },
- { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
- { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
- { "auto", SPECTRE_V2_CMD_AUTO, false },
- { "ibrs", SPECTRE_V2_CMD_IBRS, false },
-};
+static bool nospectre_v2 __ro_after_init;
-static void __init spec_v2_print_cond(const char *reason, bool secure)
+static int __init nospectre_v2_parse_cmdline(char *str)
{
- if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
- pr_info("%s selected on command line.\n", reason);
+ nospectre_v2 = true;
+ spectre_v2_cmd = SPECTRE_V2_CMD_NONE;
+ return 0;
}
+early_param("nospectre_v2", nospectre_v2_parse_cmdline);
-static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+static int __init spectre_v2_parse_cmdline(char *str)
{
- enum spectre_v2_mitigation_cmd cmd;
- char arg[20];
- int ret, i;
-
- cmd = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? SPECTRE_V2_CMD_AUTO : SPECTRE_V2_CMD_NONE;
- if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
- return SPECTRE_V2_CMD_NONE;
-
- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
- if (ret < 0)
- return cmd;
-
- for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
- if (!match_option(arg, ret, mitigation_options[i].option))
- continue;
- cmd = mitigation_options[i].cmd;
- break;
- }
-
- if (i >= ARRAY_SIZE(mitigation_options)) {
- pr_err("unknown option (%s). Switching to default mode\n", arg);
- return cmd;
- }
-
- if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
- cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
- cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
- !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
-
- if ((cmd == SPECTRE_V2_CMD_EIBRS ||
- cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
- cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
- !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
-
- if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
- cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
- !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
- pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
-
- if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
-
- if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
- pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
+ if (!str)
+ return -EINVAL;
- if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
- pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
- }
+ if (nospectre_v2)
+ return 0;
- if (cmd == SPECTRE_V2_CMD_IBRS && cpu_feature_enabled(X86_FEATURE_XENPV)) {
- pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
- mitigation_options[i].option);
- return SPECTRE_V2_CMD_AUTO;
+ if (!strcmp(str, "off")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_NONE;
+ } else if (!strcmp(str, "on")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_FORCE;
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2_USER);
+ } else if (!strcmp(str, "retpoline")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_RETPOLINE;
+ } else if (!strcmp(str, "retpoline,amd") ||
+ !strcmp(str, "retpoline,lfence")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_RETPOLINE_LFENCE;
+ } else if (!strcmp(str, "retpoline,generic")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+ } else if (!strcmp(str, "eibrs")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_EIBRS;
+ } else if (!strcmp(str, "eibrs,lfence")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_EIBRS_LFENCE;
+ } else if (!strcmp(str, "eibrs,retpoline")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_EIBRS_RETPOLINE;
+ } else if (!strcmp(str, "auto")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ } else if (!strcmp(str, "ibrs")) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_IBRS;
+ } else {
+ pr_err("Ignoring unknown spectre_v2 option (%s).", str);
}
- spec_v2_print_cond(mitigation_options[i].option,
- mitigation_options[i].secure);
- return cmd;
+ return 0;
}
+early_param("spectre_v2", spectre_v2_parse_cmdline);
static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
{
@@ -2292,10 +2237,6 @@ static void __init bhi_update_mitigation(void)
{
if (spectre_v2_cmd == SPECTRE_V2_CMD_NONE)
bhi_mitigation = BHI_MITIGATION_OFF;
-
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
- spectre_v2_cmd == SPECTRE_V2_CMD_AUTO)
- bhi_mitigation = BHI_MITIGATION_OFF;
}
static void __init bhi_apply_mitigation(void)
@@ -2331,11 +2272,55 @@ static void __init bhi_apply_mitigation(void)
static void __init spectre_v2_select_mitigation(void)
{
- spectre_v2_cmd = spectre_v2_parse_cmdline();
+ if ((spectre_v2_cmd == SPECTRE_V2_CMD_RETPOLINE ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) {
+ pr_err("RETPOLINE selected but not compiled in. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ pr_err("EIBRS selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((spectre_v2_cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ spectre_v2_cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
+ !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY)) {
+ pr_err("IBRS selected but not compiled in. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
- (spectre_v2_cmd == SPECTRE_V2_CMD_NONE || spectre_v2_cmd == SPECTRE_V2_CMD_AUTO))
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("IBRS selected but not Intel CPU. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
+ pr_err("IBRS selected but CPU doesn't have IBRS. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (spectre_v2_cmd == SPECTRE_V2_CMD_IBRS && cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ pr_err("IBRS selected but running as XenPV guest. Switching to AUTO select\n");
+ spectre_v2_cmd = SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) {
+ spectre_v2_cmd = SPECTRE_V2_CMD_NONE;
return;
+ }
switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
@@ -2538,101 +2523,11 @@ static void update_mds_branch_idle(void)
}
}
-#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
-#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
-#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
-
-void cpu_bugs_smt_update(void)
-{
- mutex_lock(&spec_ctrl_mutex);
-
- if (sched_smt_active() && unprivileged_ebpf_enabled() &&
- spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
- pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
-
- switch (spectre_v2_user_stibp) {
- case SPECTRE_V2_USER_NONE:
- break;
- case SPECTRE_V2_USER_STRICT:
- case SPECTRE_V2_USER_STRICT_PREFERRED:
- update_stibp_strict();
- break;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
- update_indir_branch_cond();
- break;
- }
-
- switch (mds_mitigation) {
- case MDS_MITIGATION_FULL:
- case MDS_MITIGATION_AUTO:
- case MDS_MITIGATION_VMWERV:
- if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
- pr_warn_once(MDS_MSG_SMT);
- update_mds_branch_idle();
- break;
- case MDS_MITIGATION_OFF:
- break;
- }
-
- switch (taa_mitigation) {
- case TAA_MITIGATION_VERW:
- case TAA_MITIGATION_AUTO:
- case TAA_MITIGATION_UCODE_NEEDED:
- if (sched_smt_active())
- pr_warn_once(TAA_MSG_SMT);
- break;
- case TAA_MITIGATION_TSX_DISABLED:
- case TAA_MITIGATION_OFF:
- break;
- }
-
- switch (mmio_mitigation) {
- case MMIO_MITIGATION_VERW:
- case MMIO_MITIGATION_AUTO:
- case MMIO_MITIGATION_UCODE_NEEDED:
- if (sched_smt_active())
- pr_warn_once(MMIO_MSG_SMT);
- break;
- case MMIO_MITIGATION_OFF:
- break;
- }
-
- switch (tsa_mitigation) {
- case TSA_MITIGATION_USER_KERNEL:
- case TSA_MITIGATION_VM:
- case TSA_MITIGATION_AUTO:
- case TSA_MITIGATION_FULL:
- /*
- * TSA-SQ can potentially lead to info leakage between
- * SMT threads.
- */
- if (sched_smt_active())
- static_branch_enable(&cpu_buf_idle_clear);
- else
- static_branch_disable(&cpu_buf_idle_clear);
- break;
- case TSA_MITIGATION_NONE:
- case TSA_MITIGATION_UCODE_NEEDED:
- break;
- }
-
- mutex_unlock(&spec_ctrl_mutex);
-}
-
#undef pr_fmt
#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
-static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
-
-/* The kernel command line selection */
-enum ssb_mitigation_cmd {
- SPEC_STORE_BYPASS_CMD_NONE,
- SPEC_STORE_BYPASS_CMD_AUTO,
- SPEC_STORE_BYPASS_CMD_ON,
- SPEC_STORE_BYPASS_CMD_PRCTL,
- SPEC_STORE_BYPASS_CMD_SECCOMP,
-};
+static enum ssb_mitigation ssb_mode __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SSB) ? SPEC_STORE_BYPASS_AUTO : SPEC_STORE_BYPASS_NONE;
static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
@@ -2641,94 +2536,61 @@ static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
};
-static const struct {
- const char *option;
- enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initconst = {
- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
- { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
- { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
-};
+static bool nossb __ro_after_init;
-static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+static int __init nossb_parse_cmdline(char *str)
{
- enum ssb_mitigation_cmd cmd;
- char arg[20];
- int ret, i;
-
- cmd = IS_ENABLED(CONFIG_MITIGATION_SSB) ?
- SPEC_STORE_BYPASS_CMD_AUTO : SPEC_STORE_BYPASS_CMD_NONE;
- if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
- cpu_mitigations_off()) {
- return SPEC_STORE_BYPASS_CMD_NONE;
- } else {
- ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
- arg, sizeof(arg));
- if (ret < 0)
- return cmd;
+ nossb = true;
+ ssb_mode = SPEC_STORE_BYPASS_NONE;
+ return 0;
+}
+early_param("nospec_store_bypass_disable", nossb_parse_cmdline);
- for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
- if (!match_option(arg, ret, ssb_mitigation_options[i].option))
- continue;
+static int __init ssb_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
- cmd = ssb_mitigation_options[i].cmd;
- break;
- }
+ if (nossb)
+ return 0;
- if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
- pr_err("unknown option (%s). Switching to default mode\n", arg);
- return cmd;
- }
- }
+ if (!strcmp(str, "auto"))
+ ssb_mode = SPEC_STORE_BYPASS_AUTO;
+ else if (!strcmp(str, "on"))
+ ssb_mode = SPEC_STORE_BYPASS_DISABLE;
+ else if (!strcmp(str, "off"))
+ ssb_mode = SPEC_STORE_BYPASS_NONE;
+ else if (!strcmp(str, "prctl"))
+ ssb_mode = SPEC_STORE_BYPASS_PRCTL;
+ else if (!strcmp(str, "seccomp"))
+ ssb_mode = IS_ENABLED(CONFIG_SECCOMP) ?
+ SPEC_STORE_BYPASS_SECCOMP : SPEC_STORE_BYPASS_PRCTL;
+ else
+ pr_err("Ignoring unknown spec_store_bypass_disable option (%s).\n",
+ str);
- return cmd;
+ return 0;
}
+early_param("spec_store_bypass_disable", ssb_parse_cmdline);
static void __init ssb_select_mitigation(void)
{
- enum ssb_mitigation_cmd cmd;
-
- if (!boot_cpu_has(X86_FEATURE_SSBD))
- goto out;
-
- cmd = ssb_parse_cmdline();
- if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
- (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
- cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) {
+ ssb_mode = SPEC_STORE_BYPASS_NONE;
return;
+ }
- switch (cmd) {
- case SPEC_STORE_BYPASS_CMD_SECCOMP:
- /*
- * Choose prctl+seccomp as the default mode if seccomp is
- * enabled.
- */
- if (IS_ENABLED(CONFIG_SECCOMP))
- ssb_mode = SPEC_STORE_BYPASS_SECCOMP;
- else
- ssb_mode = SPEC_STORE_BYPASS_PRCTL;
- break;
- case SPEC_STORE_BYPASS_CMD_ON:
- ssb_mode = SPEC_STORE_BYPASS_DISABLE;
- break;
- case SPEC_STORE_BYPASS_CMD_AUTO:
+ if (ssb_mode == SPEC_STORE_BYPASS_AUTO) {
if (should_mitigate_vuln(X86_BUG_SPEC_STORE_BYPASS))
ssb_mode = SPEC_STORE_BYPASS_PRCTL;
else
ssb_mode = SPEC_STORE_BYPASS_NONE;
- break;
- case SPEC_STORE_BYPASS_CMD_PRCTL:
- ssb_mode = SPEC_STORE_BYPASS_PRCTL;
- break;
- case SPEC_STORE_BYPASS_CMD_NONE:
- break;
}
-out:
- if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
- pr_info("%s\n", ssb_strings[ssb_mode]);
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ ssb_mode = SPEC_STORE_BYPASS_NONE;
+
+ pr_info("%s\n", ssb_strings[ssb_mode]);
}
static void __init ssb_apply_mitigation(void)
@@ -2944,6 +2806,7 @@ static int ssb_prctl_get(struct task_struct *task)
return PR_SPEC_DISABLE;
case SPEC_STORE_BYPASS_SECCOMP:
case SPEC_STORE_BYPASS_PRCTL:
+ case SPEC_STORE_BYPASS_AUTO:
if (task_spec_ssb_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ssb_noexec(task))
@@ -3263,14 +3126,15 @@ ibpb_on_vmexit:
static void __init srso_update_mitigation(void)
{
+ if (!boot_cpu_has_bug(X86_BUG_SRSO))
+ return;
+
/* If retbleed is using IBPB, that works for SRSO as well */
if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB &&
boot_cpu_has(X86_FEATURE_IBPB_BRTYPE))
srso_mitigation = SRSO_MITIGATION_IBPB;
- if (boot_cpu_has_bug(X86_BUG_SRSO) &&
- !cpu_mitigations_off())
- pr_info("%s\n", srso_strings[srso_mitigation]);
+ pr_info("%s\n", srso_strings[srso_mitigation]);
}
static void __init srso_apply_mitigation(void)
@@ -3331,8 +3195,187 @@ static void __init srso_apply_mitigation(void)
}
#undef pr_fmt
+#define pr_fmt(fmt) "VMSCAPE: " fmt
+
+enum vmscape_mitigations {
+ VMSCAPE_MITIGATION_NONE,
+ VMSCAPE_MITIGATION_AUTO,
+ VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
+ VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+static const char * const vmscape_strings[] = {
+ [VMSCAPE_MITIGATION_NONE] = "Vulnerable",
+ /* [VMSCAPE_MITIGATION_AUTO] */
+ [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace",
+ [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT",
+};
+
+static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE;
+
+static int __init vmscape_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ } else if (!strcmp(str, "ibpb")) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+ } else if (!strcmp(str, "force")) {
+ setup_force_cpu_bug(X86_BUG_VMSCAPE);
+ vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
+ } else {
+ pr_err("Ignoring unknown vmscape=%s option.\n", str);
+ }
+
+ return 0;
+}
+early_param("vmscape", vmscape_parse_cmdline);
+
+static void __init vmscape_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
+ !boot_cpu_has(X86_FEATURE_IBPB)) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ return;
+ }
+
+ if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) {
+ if (should_mitigate_vuln(X86_BUG_VMSCAPE))
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+ else
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ }
+}
+
+static void __init vmscape_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_VMSCAPE))
+ return;
+
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB ||
+ srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT)
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT;
+
+ pr_info("%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
+static void __init vmscape_apply_mitigation(void)
+{
+ if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
+ setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) fmt
+#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
+#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n"
+
+void cpu_bugs_smt_update(void)
+{
+ mutex_lock(&spec_ctrl_mutex);
+
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
+ switch (spectre_v2_user_stibp) {
+ case SPECTRE_V2_USER_NONE:
+ break;
+ case SPECTRE_V2_USER_STRICT:
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
+ update_stibp_strict();
+ break;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ update_indir_branch_cond();
+ break;
+ }
+
+ switch (mds_mitigation) {
+ case MDS_MITIGATION_FULL:
+ case MDS_MITIGATION_AUTO:
+ case MDS_MITIGATION_VMWERV:
+ if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
+ pr_warn_once(MDS_MSG_SMT);
+ update_mds_branch_idle();
+ break;
+ case MDS_MITIGATION_OFF:
+ break;
+ }
+
+ switch (taa_mitigation) {
+ case TAA_MITIGATION_VERW:
+ case TAA_MITIGATION_AUTO:
+ case TAA_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(TAA_MSG_SMT);
+ break;
+ case TAA_MITIGATION_TSX_DISABLED:
+ case TAA_MITIGATION_OFF:
+ break;
+ }
+
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_AUTO:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_AUTO:
+ case TSA_MITIGATION_FULL:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ break;
+ }
+
+ switch (vmscape_mitigation) {
+ case VMSCAPE_MITIGATION_NONE:
+ case VMSCAPE_MITIGATION_AUTO:
+ break;
+ case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT:
+ case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
+ /*
+ * Hypervisors can be attacked across-threads, warn for SMT when
+ * STIBP is not already enabled system-wide.
+ *
+ * Intel eIBRS (!AUTOIBRS) implies STIBP on.
+ */
+ if (!sched_smt_active() ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
+ break;
+ pr_warn_once(VMSCAPE_MSG_SMT);
+ break;
+ }
+
+ mutex_unlock(&spec_ctrl_mutex);
+}
+
#ifdef CONFIG_SYSFS
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
@@ -3518,9 +3561,6 @@ static const char *spectre_bhi_state(void)
static ssize_t spectre_v2_show_state(char *buf)
{
- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
-
if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
@@ -3578,6 +3618,11 @@ static ssize_t tsa_show_state(char *buf)
return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
}
+static ssize_t vmscape_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -3644,6 +3689,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_TSA:
return tsa_show_state(buf);
+ case X86_BUG_VMSCAPE:
+ return vmscape_show_state(buf);
+
default:
break;
}
@@ -3735,6 +3783,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
}
+
+ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index adfa7e8bb865..51a95b07831f 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -290,6 +290,22 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
}
/*
+ * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
+ * ECX as cache index. Then right shift apicid by the number's order to get
+ * cache id for this cache node.
+ */
+static unsigned int get_cache_id(u32 apicid, const struct _cpuid4_info *id4)
+{
+ unsigned long num_threads_sharing;
+ int index_msb;
+
+ num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+
+ return apicid >> index_msb;
+}
+
+/*
* AMD/Hygon CPUs may have multiple LLCs if L3 caches exist.
*/
@@ -312,18 +328,11 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
* Newer families: LLC ID is calculated from the number
* of threads sharing the L3 cache.
*/
- u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
u32 llc_index = find_num_cache_leaves(c) - 1;
+ struct _cpuid4_info id4 = {};
- cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
- if (eax)
- num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
-
- if (num_sharing_cache) {
- int index_msb = get_count_order(num_sharing_cache);
-
- c->topo.llc_id = c->topo.apicid >> index_msb;
- }
+ if (!amd_fill_cpuid4_info(llc_index, &id4))
+ c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
}
}
@@ -598,27 +607,12 @@ int init_cache_level(unsigned int cpu)
return 0;
}
-/*
- * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
- * ECX as cache index. Then right shift apicid by the number's order to get
- * cache id for this cache node.
- */
-static void get_cache_id(int cpu, struct _cpuid4_info *id4)
-{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- unsigned long num_threads_sharing;
- int index_msb;
-
- num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- id4->id = c->topo.apicid >> index_msb;
-}
-
int populate_cache_leaves(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *ci = this_cpu_ci->info_list;
u8 cpu_vendor = boot_cpu_data.x86_vendor;
+ u32 apicid = cpu_data(cpu).topo.apicid;
struct amd_northbridge *nb = NULL;
struct _cpuid4_info id4 = {};
int idx, ret;
@@ -628,7 +622,7 @@ int populate_cache_leaves(unsigned int cpu)
if (ret)
return ret;
- get_cache_id(cpu, &id4);
+ id4.id = get_cache_id(apicid, &id4);
if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
nb = amd_init_l3_cache(idx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 34a054181c4d..c7d3512914ca 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1236,55 +1236,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define ITS_NATIVE_ONLY BIT(9)
/* CPU is affected by Transient Scheduler Attacks */
#define TSA BIT(10)
+/* CPU is affected by VMSCAPE */
+#define VMSCAPE BIT(11)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
- VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
+ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
- VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
- VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
- VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO | TSA),
- VULNBL_AMD(0x1a, SRSO),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
+ VULNBL_AMD(0x1a, SRSO | VMSCAPE),
{}
};
@@ -1543,6 +1559,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
}
+ /*
+ * Set the bug only on bare-metal. A nested hypervisor should already be
+ * deploying IBPB to isolate itself from nested guests.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) &&
+ !boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ setup_force_cpu_bug(X86_BUG_VMSCAPE);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1784,6 +1808,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_LA57);
detect_nopl();
+ mca_bsp_init(c);
}
void __init init_cpu_devs(void)
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 553bfbfc3a1b..f3e9219845e8 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -45,6 +45,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_ACRN_GUEST
&x86_hyper_acrn,
#endif
+#ifdef CONFIG_BHYVE_GUEST
+ &x86_hyper_bhyve,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5c4eb28c3ac9..d6906442f49b 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -241,7 +241,8 @@ struct threshold_block {
struct threshold_bank {
struct kobject *kobj;
- struct threshold_block *blocks;
+ /* List of threshold blocks within this MCA bank. */
+ struct list_head miscj;
};
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
@@ -252,9 +253,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
*/
static DEFINE_PER_CPU(u64, bank_map);
-/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u64, smca_misc_banks_map);
-
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -264,28 +262,6 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
-static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
-{
- u32 low, high;
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return;
-
- if (!(low & MCI_CONFIG_MCAX))
- return;
-
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
- return;
-
- if (low & MASK_BLKPTR_LO)
- per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
-
-}
-
static void smca_configure(unsigned int bank, unsigned int cpu)
{
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
@@ -326,8 +302,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
- smca_set_misc_banks_map(bank, cpu);
-
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
@@ -419,8 +393,8 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return true;
};
-/* Reprogram MCx_MISC MSR behind this threshold bank. */
-static void threshold_restart_bank(void *_tr)
+/* Reprogram MCx_MISC MSR behind this threshold block. */
+static void threshold_restart_block(void *_tr)
{
struct thresh_restart *tr = _tr;
u32 hi, lo;
@@ -478,7 +452,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
};
b->threshold_limit = THRESHOLD_MAX;
- threshold_restart_bank(&tr);
+ threshold_restart_block(&tr);
};
static int setup_APIC_mce_threshold(int reserved, int new)
@@ -525,18 +499,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int bank, unsigned int block,
- unsigned int cpu)
-{
- if (!block)
- return MSR_AMD64_SMCA_MCx_MISC(bank);
-
- if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
- return 0;
-
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-}
-
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block,
unsigned int cpu)
@@ -546,8 +508,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
- if (mce_flags.smca)
- return smca_get_block_address(bank, block, cpu);
+ if (mce_flags.smca) {
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ if (!(low & MASK_BLKPTR_LO))
+ return 0;
+
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
/* Fall back to method we used for older processors: */
switch (block) {
@@ -677,6 +646,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
wrmsrq(MSR_K7_HWCR, hwcr);
}
+static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+ }
+
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+ mce_banks[0].ctl = 0;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -684,6 +675,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
u32 low = 0, high = 0, address = 0;
int offset = -1;
+ amd_apply_cpu_quirks(c);
+
+ mce_flags.amd_threshold = 1;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
@@ -714,6 +708,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
+void smca_bsp_init(void)
+{
+ mce_threshold_vector = amd_threshold_interrupt;
+ deferred_error_int_vector = amd_deferred_error_interrupt;
+}
+
/*
* DRAM ECC errors are reported in the Northbridge (bank 4) with
* Extended Error Code 8.
@@ -921,7 +921,7 @@ static void log_and_reset_block(struct threshold_block *block)
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
tr.b = block;
- threshold_restart_bank(&tr);
+ threshold_restart_block(&tr);
}
/*
@@ -930,9 +930,9 @@ static void log_and_reset_block(struct threshold_block *block)
*/
static void amd_threshold_interrupt(void)
{
- struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
- struct threshold_bank **bp = this_cpu_read(threshold_banks);
+ struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank;
unsigned int bank, cpu = smp_processor_id();
+ struct threshold_block *block, *tmp;
/*
* Validate that the threshold bank has been initialized already. The
@@ -946,20 +946,20 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
continue;
- first_block = bp[bank]->blocks;
- if (!first_block)
+ thr_bank = bp[bank];
+ if (!thr_bank)
continue;
- /*
- * The first block is also the head of the list. Check it first
- * before iterating over the rest.
- */
- log_and_reset_block(first_block);
- list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+ list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj)
log_and_reset_block(block);
}
}
+void amd_clear_bank(struct mce *m)
+{
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
+}
+
/*
* Sysfs Interface
*/
@@ -995,7 +995,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1020,7 +1020,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1181,13 +1181,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
default_attrs[2] = NULL;
}
- INIT_LIST_HEAD(&b->miscj);
-
- /* This is safe as @tb is not visible yet */
- if (tb->blocks)
- list_add(&b->miscj, &tb->blocks->miscj);
- else
- tb->blocks = b;
+ list_add(&b->miscj, &tb->miscj);
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
@@ -1238,6 +1232,8 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
goto out_free;
}
+ INIT_LIST_HEAD(&b->miscj);
+
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
goto out_kobj;
@@ -1258,26 +1254,15 @@ static void threshold_block_release(struct kobject *kobj)
kfree(to_block(kobj));
}
-static void deallocate_threshold_blocks(struct threshold_bank *bank)
+static void threshold_remove_bank(struct threshold_bank *bank)
{
struct threshold_block *pos, *tmp;
- list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
+ list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
list_del(&pos->miscj);
kobject_put(&pos->kobj);
}
- kobject_put(&bank->blocks->kobj);
-}
-
-static void threshold_remove_bank(struct threshold_bank *bank)
-{
- if (!bank->blocks)
- goto out_free;
-
- deallocate_threshold_blocks(bank);
-
-out_free:
kobject_put(bank->kobj);
kfree(bank);
}
@@ -1296,12 +1281,12 @@ static void __threshold_remove_device(struct threshold_bank **bp)
kfree(bp);
}
-int mce_threshold_remove_device(unsigned int cpu)
+void mce_threshold_remove_device(unsigned int cpu)
{
struct threshold_bank **bp = this_cpu_read(threshold_banks);
if (!bp)
- return 0;
+ return;
/*
* Clear the pointer before cleaning up, so that the interrupt won't
@@ -1310,7 +1295,7 @@ int mce_threshold_remove_device(unsigned int cpu)
this_cpu_write(threshold_banks, NULL);
__threshold_remove_device(bp);
- return 0;
+ return;
}
/**
@@ -1324,36 +1309,34 @@ int mce_threshold_remove_device(unsigned int cpu)
* thread running on @cpu. The callback is invoked on all CPUs which are
* online when the callback is installed or during a real hotplug event.
*/
-int mce_threshold_create_device(unsigned int cpu)
+void mce_threshold_create_device(unsigned int cpu)
{
unsigned int numbanks, bank;
struct threshold_bank **bp;
- int err;
if (!mce_flags.amd_threshold)
- return 0;
+ return;
bp = this_cpu_read(threshold_banks);
if (bp)
- return 0;
+ return;
numbanks = this_cpu_read(mce_num_banks);
bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
if (!bp)
- return -ENOMEM;
+ return;
for (bank = 0; bank < numbanks; ++bank) {
if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
continue;
- err = threshold_create_bank(bp, cpu, bank);
- if (err) {
+ if (threshold_create_bank(bp, cpu, bank)) {
__threshold_remove_device(bp);
- return err;
+ return;
}
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
- return 0;
+ return;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4da4eab56c81..460e90a1a0b1 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -423,7 +423,7 @@ noinstr u64 mce_rdmsrq(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
-static noinstr void mce_wrmsrq(u32 msr, u64 v)
+noinstr void mce_wrmsrq(u32 msr, u64 v)
{
u32 low, high;
@@ -715,6 +715,60 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
+ * Newer Intel systems that support software error
+ * recovery need to make additional checks. Other
+ * CPUs should skip over uncorrected errors, but log
+ * everything else.
+ */
+static bool ser_should_log_poll_error(struct mce *m)
+{
+ /* Log "not enabled" (speculative) errors */
+ if (!(m->status & MCI_STATUS_EN))
+ return true;
+
+ /*
+ * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
+ * UC == 1 && PCC == 0 && S == 0
+ */
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
+ return true;
+
+ return false;
+}
+
+static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+{
+ struct mce *m = &err->m;
+
+ /* If this entry is not valid, ignore it. */
+ if (!(m->status & MCI_STATUS_VAL))
+ return false;
+
+ /*
+ * If we are logging everything (at CPU online) or this
+ * is a corrected error, then we must log it.
+ */
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
+ return true;
+
+ if (mca_cfg.ser)
+ return ser_should_log_poll_error(m);
+
+ if (m->status & MCI_STATUS_UC)
+ return false;
+
+ return true;
+}
+
+static void clear_bank(struct mce *m)
+{
+ if (m->cpuvendor == X86_VENDOR_AMD)
+ return amd_clear_bank(m);
+
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
+}
+
+/*
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
*
@@ -765,51 +819,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!mca_cfg.cmci_disabled)
mce_track_storm(m);
- /* If this entry is not valid, ignore it */
- if (!(m->status & MCI_STATUS_VAL))
+ /* Verify that the error should be logged based on hardware conditions. */
+ if (!should_log_poll_error(flags, &err))
continue;
- /*
- * If we are logging everything (at CPU online) or this
- * is a corrected error, then we must log it.
- */
- if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
- goto log_it;
-
- /*
- * Newer Intel systems that support software error
- * recovery need to make additional checks. Other
- * CPUs should skip over uncorrected errors, but log
- * everything else.
- */
- if (!mca_cfg.ser) {
- if (m->status & MCI_STATUS_UC)
- continue;
- goto log_it;
- }
-
- /* Log "not enabled" (speculative) errors */
- if (!(m->status & MCI_STATUS_EN))
- goto log_it;
-
- /*
- * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
- * UC == 1 && PCC == 0 && S == 0
- */
- if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
- goto log_it;
-
- /*
- * Skip anything else. Presumption is that our read of this
- * bank is racing with a machine check. Leave the log alone
- * for do_machine_check() to deal with it.
- */
- continue;
-
-log_it:
- if (flags & MCP_DONTLOG)
- goto clear_it;
-
mce_read_aux(&err, i);
m->severity = mce_severity(m, NULL, NULL, false);
/*
@@ -826,10 +839,7 @@ log_it:
mce_log(&err);
clear_it:
- /*
- * Clear state for this bank.
- */
- mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+ clear_bank(m);
}
/*
@@ -1810,9 +1820,10 @@ static void __mcheck_cpu_mce_banks_init(void)
struct mce_bank *b = &mce_banks[i];
/*
- * Init them all, __mcheck_cpu_apply_quirks() is going to apply
- * the required vendor quirks before
- * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ * Init them all by default.
+ *
+ * The required vendor quirks will be applied before
+ * __mcheck_cpu_init_prepare_banks() does the final bank setup.
*/
b->ctl = -1ULL;
b->init = true;
@@ -1840,69 +1851,34 @@ static void __mcheck_cpu_cap_init(void)
this_cpu_write(mce_num_banks, b);
__mcheck_cpu_mce_banks_init();
-
- /* Use accurate RIP reporting if available. */
- if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
- mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
-
- if (cap & MCG_SER_P)
- mca_cfg.ser = 1;
}
static void __mcheck_cpu_init_generic(void)
{
- enum mcp_flags m_fl = 0;
- mce_banks_t all_banks;
u64 cap;
- if (!mca_cfg.bootlog)
- m_fl = MCP_DONTLOG;
-
- /*
- * Log the machine checks left over from the previous reset. Log them
- * only, do not start processing them. That will happen in mcheck_late_init()
- * when all consumers have been registered on the notifier chain.
- */
- bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
-
- cr4_set_bits(X86_CR4_MCE);
-
rdmsrq(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
-static void __mcheck_cpu_init_clear_banks(void)
+static void __mcheck_cpu_init_prepare_banks(void)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
int i;
- for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- struct mce_bank *b = &mce_banks[i];
+ /*
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
+ */
+ if (mca_cfg.bootlog) {
+ mce_banks_t all_banks;
- if (!b->init)
- continue;
- wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
- wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+ bitmap_fill(all_banks, MAX_NR_BANKS);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
}
-}
-
-/*
- * Do a final check to see if there are any unused/RAZ banks.
- *
- * This must be done after the banks have been initialized and any quirks have
- * been applied.
- *
- * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
- * Otherwise, a user who disables a bank will not be able to re-enable it
- * without a system reboot.
- */
-static void __mcheck_cpu_check_banks(void)
-{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- u64 msrval;
- int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
@@ -1910,25 +1886,16 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init)
continue;
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+
rdmsrq(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
-static void apply_quirks_amd(struct cpuinfo_x86 *c)
+static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
- /*
- * disable GART TBL walk error reporting, which
- * trips off incorrectly with the IOMMU & 3ware
- * & Cerberus:
- */
- clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
- }
-
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
@@ -1938,13 +1905,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
}
/*
- * Various K7s with broken bank 0 around. Always disable
- * by default.
- */
- if (c->x86 == 6 && this_cpu_read(mce_num_banks))
- mce_banks[0].ctl = 0;
-
- /*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
*/
@@ -1955,26 +1915,13 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mce_flags.zen_ifu_quirk = 1;
}
-static void apply_quirks_intel(struct cpuinfo_x86 *c)
+static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
/* Older CPUs (prior to family 6) don't need quirks. */
if (c->x86_vfm < INTEL_PENTIUM_PRO)
return;
/*
- * SDM documents that on family 6 bank 0 should not be written
- * because it aliases to another special BIOS controlled
- * register.
- * But it's not aliased anymore on model 0x1a+
- * Don't ignore bank 0 completely because there could be a
- * valid event later, merely don't write CTL0.
- */
- if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
- mce_banks[0].init = false;
-
- /*
* All newer Intel systems support MCE broadcasting. Enable
* synchronization with a one second timeout.
*/
@@ -1999,7 +1946,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
mce_flags.skx_repmov_quirk = 1;
}
-static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
+static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
{
/*
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
@@ -2011,34 +1958,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
}
}
-/* Add per CPU specific workarounds here */
-static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
-{
- struct mca_config *cfg = &mca_cfg;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_UNKNOWN:
- pr_info("unknown CPU type - not enabling MCE support\n");
- return false;
- case X86_VENDOR_AMD:
- apply_quirks_amd(c);
- break;
- case X86_VENDOR_INTEL:
- apply_quirks_intel(c);
- break;
- case X86_VENDOR_ZHAOXIN:
- apply_quirks_zhaoxin(c);
- break;
- }
-
- if (cfg->monarch_timeout < 0)
- cfg->monarch_timeout = 0;
- if (cfg->bootlog != 0)
- cfg->panic_timeout = 30;
-
- return true;
-}
-
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
@@ -2060,19 +1979,6 @@ static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
return false;
}
-/*
- * Init basic CPU features needed for early decoding of MCEs.
- */
-static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
-{
- if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
- mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
- mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
- mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.amd_threshold = 1;
- }
-}
-
static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
@@ -2281,6 +2187,53 @@ DEFINE_IDTENTRY_RAW(exc_machine_check)
}
#endif
+void mca_bsp_init(struct cpuinfo_x86 *c)
+{
+ u64 cap;
+
+ if (!mce_available(c))
+ return;
+
+ if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
+ mca_cfg.disabled = 1;
+ pr_info("unknown CPU type - not enabling MCE support\n");
+ return;
+ }
+
+ mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR);
+ mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA);
+
+ if (mce_flags.smca)
+ smca_bsp_init();
+
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
+
+ /* Use accurate RIP reporting if available. */
+ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
+ mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
+
+ if (cap & MCG_SER_P)
+ mca_cfg.ser = 1;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ amd_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_INTEL:
+ intel_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_ZHAOXIN:
+ zhaoxin_apply_global_quirks(c);
+ break;
+ }
+
+ if (mca_cfg.monarch_timeout < 0)
+ mca_cfg.monarch_timeout = 0;
+ if (mca_cfg.bootlog != 0)
+ mca_cfg.panic_timeout = 30;
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@@ -2298,11 +2251,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_cap_init();
- if (!__mcheck_cpu_apply_quirks(c)) {
- mca_cfg.disabled = 1;
- return;
- }
-
if (!mce_gen_pool_init()) {
mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
@@ -2311,12 +2259,11 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
mca_cfg.initialized = 1;
- __mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
- __mcheck_cpu_init_clear_banks();
- __mcheck_cpu_check_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_setup_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/*
@@ -2483,7 +2430,8 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
+ cr4_set_bits(X86_CR4_MCE);
}
static struct syscore_ops mce_syscore_ops = {
@@ -2501,8 +2449,9 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_init_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/* Reinit MCEs after user configuration changes */
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 9b149b9c4109..4655223ba560 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
}
}
+static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ *
+ * Older CPUs (prior to family 6) can't reach this point and already
+ * return early due to the check of __mcheck_cpu_ancient_init().
+ */
+ if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+ this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
+ intel_apply_cpu_quirks(c);
intel_init_cmci();
intel_init_lmce();
intel_imc_init(c);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b5ba598e54cb..b0e00ec5cc8c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -265,8 +265,11 @@ void mce_prep_record_common(struct mce *m);
void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
+void mce_threshold_create_device(unsigned int cpu);
+void mce_threshold_remove_device(unsigned int cpu);
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
+void amd_clear_bank(struct mce *m);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -292,10 +295,15 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
m->addr &= GENMASK_ULL(55, lsb);
}
+void smca_bsp_init(void);
#else
+static inline void mce_threshold_create_device(unsigned int cpu) { }
+static inline void mce_threshold_remove_device(unsigned int cpu) { }
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
+static inline void amd_clear_bank(struct mce *m) { }
static inline void smca_extract_err_addr(struct mce *m) { }
+static inline void smca_bsp_init(void) { }
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@@ -313,6 +321,7 @@ static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
noinstr u64 mce_rdmsrq(u32 msr);
+noinstr void mce_wrmsrq(u32 msr, u64 v);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 514f63340880..cdce885e2fd5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -269,15 +269,6 @@ static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsi
return true;
}
-static u32 get_patch_level(void)
-{
- u32 rev, dummy __always_unused;
-
- native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
- return rev;
-}
-
static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val)
{
union zen_patch_rev p;
@@ -295,6 +286,30 @@ static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val)
return c;
}
+static u32 get_patch_level(void)
+{
+ u32 rev, dummy __always_unused;
+
+ if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
+ int cpu = smp_processor_id();
+
+ if (!microcode_rev[cpu]) {
+ if (!base_rev)
+ base_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax);
+
+ microcode_rev[cpu] = base_rev;
+
+ ucode_dbg("CPU%d, base_rev: 0x%x\n", cpu, base_rev);
+ }
+
+ return microcode_rev[cpu];
+ }
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+ return rev;
+}
+
static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig)
{
unsigned int i;
@@ -324,13 +339,13 @@ static bool verify_container(const u8 *buf, size_t buf_size)
u32 cont_magic;
if (buf_size <= CONTAINER_HDR_SZ) {
- pr_debug("Truncated microcode container header.\n");
+ ucode_dbg("Truncated microcode container header.\n");
return false;
}
cont_magic = *(const u32 *)buf;
if (cont_magic != UCODE_MAGIC) {
- pr_debug("Invalid magic value (0x%08x).\n", cont_magic);
+ ucode_dbg("Invalid magic value (0x%08x).\n", cont_magic);
return false;
}
@@ -355,8 +370,8 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size)
cont_type = hdr[1];
if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) {
- pr_debug("Wrong microcode container equivalence table type: %u.\n",
- cont_type);
+ ucode_dbg("Wrong microcode container equivalence table type: %u.\n",
+ cont_type);
return false;
}
@@ -365,7 +380,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size)
equiv_tbl_len = hdr[2];
if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) ||
buf_size < equiv_tbl_len) {
- pr_debug("Truncated equivalence table.\n");
+ ucode_dbg("Truncated equivalence table.\n");
return false;
}
@@ -385,7 +400,7 @@ static bool __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize
const u32 *hdr;
if (buf_size < SECTION_HDR_SIZE) {
- pr_debug("Truncated patch section.\n");
+ ucode_dbg("Truncated patch section.\n");
return false;
}
@@ -394,13 +409,13 @@ static bool __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize
p_size = hdr[1];
if (p_type != UCODE_UCODE_TYPE) {
- pr_debug("Invalid type field (0x%x) in container file section header.\n",
- p_type);
+ ucode_dbg("Invalid type field (0x%x) in container file section header.\n",
+ p_type);
return false;
}
if (p_size < sizeof(struct microcode_header_amd)) {
- pr_debug("Patch of size %u too short.\n", p_size);
+ ucode_dbg("Patch of size %u too short.\n", p_size);
return false;
}
@@ -477,12 +492,12 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size)
* size sh_psize, as the section claims.
*/
if (buf_size < sh_psize) {
- pr_debug("Patch of size %u truncated.\n", sh_psize);
+ ucode_dbg("Patch of size %u truncated.\n", sh_psize);
return -1;
}
if (!__verify_patch_size(sh_psize, buf_size)) {
- pr_debug("Per-family patch size mismatch.\n");
+ ucode_dbg("Per-family patch size mismatch.\n");
return -1;
}
@@ -496,6 +511,9 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size)
proc_id = mc_hdr->processor_rev_id;
patch_fam = 0xf + (proc_id >> 12);
+
+ ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam);
+
if (patch_fam != family)
return 1;
@@ -566,9 +584,14 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
}
mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE);
+
+ ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id);
+
if (mc_patch_matches(mc, eq_id)) {
desc->psize = patch_size;
desc->mc = mc;
+
+ ucode_dbg(" match: size: %d\n", patch_size);
}
skip:
@@ -639,8 +662,14 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev,
invlpg(p_addr_end);
}
+ if (IS_ENABLED(CONFIG_MICROCODE_DBG))
+ microcode_rev[smp_processor_id()] = mc->hdr.patch_id;
+
/* verify patch application was successful */
*cur_rev = get_patch_level();
+
+ ucode_dbg("updated rev: 0x%x\n", *cur_rev);
+
if (*cur_rev != mc->hdr.patch_id)
return false;
@@ -1026,7 +1055,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
- pr_debug("%s: Adding patch_id: 0x%08x, proc_id: 0x%04x\n",
+ ucode_dbg("%s: Adding patch_id: 0x%08x, proc_id: 0x%04x\n",
__func__, patch->patch_id, proc_id);
/* ... and add to cache. */
@@ -1169,7 +1198,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
- pr_debug("failed to load file %s\n", fw_name);
+ ucode_dbg("failed to load file %s\n", fw_name);
goto out;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b92e09a87c69..f75c140906d0 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,10 +43,19 @@
#include "internal.h"
static struct microcode_ops *microcode_ops;
-static bool dis_ucode_ldr = false;
+static bool dis_ucode_ldr;
bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
-module_param(force_minrev, bool, S_IRUSR | S_IWUSR);
+
+/*
+ * Those below should be behind CONFIG_MICROCODE_DBG ifdeffery but in
+ * order to not uglify the code with ifdeffery and use IS_ENABLED()
+ * instead, leave them in. When microcode debugging is not enabled,
+ * those are meaningless anyway.
+ */
+/* base microcode revision for debugging */
+u32 base_rev;
+u32 microcode_rev[NR_CPUS] = {};
/*
* Synchronization.
@@ -119,20 +128,48 @@ bool __init microcode_loader_disabled(void)
* overwritten.
*/
if (!cpuid_feature() ||
- native_cpuid_ecx(1) & BIT(31) ||
+ ((native_cpuid_ecx(1) & BIT(31)) &&
+ !IS_ENABLED(CONFIG_MICROCODE_DBG)) ||
amd_check_current_patch_level())
dis_ucode_ldr = true;
return dis_ucode_ldr;
}
+static void early_parse_cmdline(void)
+{
+ char cmd_buf[64] = {};
+ char *s, *p = cmd_buf;
+
+ if (cmdline_find_option(boot_command_line, "microcode", cmd_buf, sizeof(cmd_buf)) > 0) {
+ while ((s = strsep(&p, ","))) {
+ if (IS_ENABLED(CONFIG_MICROCODE_DBG)) {
+ if (strstr(s, "base_rev=")) {
+ /* advance to the option arg */
+ strsep(&s, "=");
+ if (kstrtouint(s, 16, &base_rev)) { ; }
+ }
+ }
+
+ if (!strcmp("force_minrev", s))
+ force_minrev = true;
+
+ if (!strcmp(s, "dis_ucode_ldr"))
+ dis_ucode_ldr = true;
+ }
+ }
+
+ /* old, compat option */
+ if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
+ dis_ucode_ldr = true;
+}
+
void __init load_ucode_bsp(void)
{
unsigned int cpuid_1_eax;
bool intel = true;
- if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
- dis_ucode_ldr = true;
+ early_parse_cmdline();
if (microcode_loader_disabled())
return;
diff --git a/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
index cb6e601701ab..2d48e6593540 100644
--- a/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
+++ b/arch/x86/kernel/cpu/microcode/intel-ucode-defs.h
@@ -67,9 +67,8 @@
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0008, .driver_data = 0x1000191 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0010, .driver_data = 0x2007006 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0020, .driver_data = 0x3000010 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0040, .driver_data = 0x4003605 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .driver_data = 0x5003707 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .driver_data = 0x7002904 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0080, .driver_data = 0x5003901 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x55, .steppings = 0x0800, .driver_data = 0x7002b01 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0004, .driver_data = 0x1c },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0008, .driver_data = 0x700001c },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x56, .steppings = 0x0010, .driver_data = 0xf00001a },
@@ -81,51 +80,62 @@
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x5f, .steppings = 0x0002, .driver_data = 0x3e },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x66, .steppings = 0x0008, .driver_data = 0x2a },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0020, .driver_data = 0xc0002f0 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .driver_data = 0xd0003e7 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .driver_data = 0x10002b0 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6a, .steppings = 0x0040, .driver_data = 0xd000404 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x6c, .steppings = 0x0002, .driver_data = 0x10002d0 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0002, .driver_data = 0x42 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .driver_data = 0x24 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .driver_data = 0xc6 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7a, .steppings = 0x0100, .driver_data = 0x26 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x7e, .steppings = 0x0020, .driver_data = 0xca },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8a, .steppings = 0x0002, .driver_data = 0x33 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .driver_data = 0xb8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .driver_data = 0x38 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .driver_data = 0x52 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0002, .driver_data = 0xbc },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8c, .steppings = 0x0004, .driver_data = 0x3c },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8d, .steppings = 0x0002, .driver_data = 0x56 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0200, .driver_data = 0xf6 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0400, .driver_data = 0xf6 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x0800, .driver_data = 0xf6 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .driver_data = 0xfc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .driver_data = 0x2c000390 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .driver_data = 0x2b000603 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .driver_data = 0x2c000390 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .driver_data = 0x2c000390 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .driver_data = 0x2c000390 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8e, .steppings = 0x1000, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0010, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0020, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0040, .driver_data = 0x2c0003f7 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0080, .driver_data = 0x2b000639 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x8f, .steppings = 0x0100, .driver_data = 0x2c0003f7 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x96, .steppings = 0x0002, .driver_data = 0x1a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .driver_data = 0x37 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .driver_data = 0x37 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .driver_data = 0x37 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .driver_data = 0x37 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .driver_data = 0x435 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .driver_data = 0x435 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0004, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x97, .steppings = 0x0020, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0008, .driver_data = 0x437 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9a, .steppings = 0x0010, .driver_data = 0x437 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9c, .steppings = 0x0001, .driver_data = 0x24000026 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0200, .driver_data = 0xf8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .driver_data = 0xf8 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0400, .driver_data = 0xfa },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x0800, .driver_data = 0xf6 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x1000, .driver_data = 0xf8 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .driver_data = 0x100 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .driver_data = 0xfc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .driver_data = 0xfc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .driver_data = 0xfc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .driver_data = 0xfe },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .driver_data = 0xfc },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .driver_data = 0x62 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .driver_data = 0x20 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .driver_data = 0x12b },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .driver_data = 0x4123 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .driver_data = 0x4123 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .driver_data = 0x4123 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .driver_data = 0x1a },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .driver_data = 0x21000283 },
-{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .driver_data = 0x21000283 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0x9e, .steppings = 0x2000, .driver_data = 0x104 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0004, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0008, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa5, .steppings = 0x0020, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0001, .driver_data = 0x102 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa6, .steppings = 0x0002, .driver_data = 0x100 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xa7, .steppings = 0x0002, .driver_data = 0x64 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaa, .steppings = 0x0010, .driver_data = 0x24 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xad, .steppings = 0x0002, .driver_data = 0xa0000d1 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xaf, .steppings = 0x0008, .driver_data = 0x3000341 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb5, .steppings = 0x0001, .driver_data = 0xa },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0002, .driver_data = 0x12f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xb7, .steppings = 0x0010, .driver_data = 0x12f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0004, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0008, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xba, .steppings = 0x0100, .driver_data = 0x4128 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbd, .steppings = 0x0002, .driver_data = 0x11f },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbe, .steppings = 0x0001, .driver_data = 0x1d },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0004, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0020, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0040, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xbf, .steppings = 0x0080, .driver_data = 0x3a },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc5, .steppings = 0x0004, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0004, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xc6, .steppings = 0x0010, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xca, .steppings = 0x0004, .driver_data = 0x118 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0002, .driver_data = 0x210002a9 },
+{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0x6, .model = 0xcf, .steppings = 0x0004, .driver_data = 0x210002a9 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0080, .driver_data = 0x12 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x00, .steppings = 0x0400, .driver_data = 0x15 },
{ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, .vendor = X86_VENDOR_INTEL, .family = 0xf, .model = 0x01, .steppings = 0x0004, .driver_data = 0x2e },
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
index 50a9702ae4e2..ae8dbc2b908d 100644
--- a/arch/x86/kernel/cpu/microcode/internal.h
+++ b/arch/x86/kernel/cpu/microcode/internal.h
@@ -44,6 +44,9 @@ struct early_load_data {
extern struct early_load_data early_data;
extern struct ucode_cpu_info ucode_cpu_info[];
+extern u32 microcode_rev[NR_CPUS];
+extern u32 base_rev;
+
struct cpio_data find_microcode_in_initrd(const char *path);
#define MAX_UCODE_COUNT 128
@@ -122,4 +125,10 @@ static inline void reload_ucode_intel(void) { }
static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
#endif /* !CONFIG_CPU_SUP_INTEL */
+#define ucode_dbg(fmt, ...) \
+({ \
+ if (IS_ENABLED(CONFIG_MICROCODE_DBG)) \
+ pr_info(fmt, ##__VA_ARGS__); \
+})
+
#endif /* _X86_MICROCODE_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 187d527ef73b..06ca5a30140c 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -107,7 +107,7 @@ u32 resctrl_arch_system_num_rmid_idx(void)
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
- return r->num_rmid;
+ return r->mon.num_rmid;
}
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
@@ -365,8 +365,10 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
{
- kfree(hw_dom->arch_mbm_total);
- kfree(hw_dom->arch_mbm_local);
+ int idx;
+
+ for_each_mbm_idx(idx)
+ kfree(hw_dom->arch_mbm_states[idx]);
kfree(hw_dom);
}
@@ -400,25 +402,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *
*/
static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
- size_t tsize;
-
- if (resctrl_arch_is_mbm_total_enabled()) {
- tsize = sizeof(*hw_dom->arch_mbm_total);
- hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
- if (!hw_dom->arch_mbm_total)
- return -ENOMEM;
- }
- if (resctrl_arch_is_mbm_local_enabled()) {
- tsize = sizeof(*hw_dom->arch_mbm_local);
- hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
- if (!hw_dom->arch_mbm_local) {
- kfree(hw_dom->arch_mbm_total);
- hw_dom->arch_mbm_total = NULL;
- return -ENOMEM;
- }
+ size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]);
+ enum resctrl_event_id eventid;
+ int idx;
+
+ for_each_mbm_event_id(eventid) {
+ if (!resctrl_is_mon_event_enabled(eventid))
+ continue;
+ idx = MBM_STATE_IDX(eventid);
+ hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL);
+ if (!hw_dom->arch_mbm_states[idx])
+ goto cleanup;
}
return 0;
+cleanup:
+ for_each_mbm_idx(idx) {
+ kfree(hw_dom->arch_mbm_states[idx]);
+ hw_dom->arch_mbm_states[idx] = NULL;
+ }
+
+ return -ENOMEM;
}
static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
@@ -516,6 +520,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = container_of(hdr, struct rdt_mon_domain, hdr);
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ /* Update the mbm_assign_mode state for the CPU if supported */
+ if (r->mon.mbm_cntr_assignable)
+ resctrl_arch_mbm_cntr_assign_set_one(r);
return;
}
@@ -535,9 +542,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ /* Update the mbm_assign_mode state for the CPU if supported */
+ if (r->mon.mbm_cntr_assignable)
+ resctrl_arch_mbm_cntr_assign_set_one(r);
+
arch_mon_domain_online(r, d);
- if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
+ if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
mon_domain_free(hw_dom);
return;
}
@@ -707,6 +718,7 @@ enum {
RDT_FLAG_MBA,
RDT_FLAG_SMBA,
RDT_FLAG_BMEC,
+ RDT_FLAG_ABMC,
};
#define RDT_OPT(idx, n, f) \
@@ -732,6 +744,7 @@ static struct rdt_options rdt_options[] __ro_after_init = {
RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA),
RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC),
+ RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC),
};
#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -863,15 +876,24 @@ static __init bool get_rdt_alloc_resources(void)
static __init bool get_rdt_mon_resources(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ bool ret = false;
- if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
- rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
- rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
- rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
+ if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
+ resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
+ resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
+ resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_ABMC))
+ ret = true;
- if (!rdt_mon_features)
+ if (!ret)
return false;
return !rdt_get_mon_l3_config(r);
@@ -965,7 +987,7 @@ static enum cpuhp_state rdt_online;
/* Runs once on the BSP during boot. */
void resctrl_cpu_detect(struct cpuinfo_x86 *c)
{
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
c->x86_cache_mbm_width_offset = -1;
@@ -977,7 +999,8 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) ||
+ cpu_has(c, X86_FEATURE_ABMC)) {
u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5e3c41b36437..9f4c2f0aaf5c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -37,6 +37,15 @@ struct arch_mbm_state {
u64 prev_msr;
};
+/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
+#define ABMC_ENABLE_BIT 0
+
+/*
+ * Qos Event Identifiers.
+ */
+#define ABMC_EXTENDED_EVT_ID BIT(31)
+#define ABMC_EVT_ID BIT(0)
+
/**
* struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
* a resource for a control function
@@ -54,15 +63,15 @@ struct rdt_hw_ctrl_domain {
* struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
* a resource for a monitor function
* @d_resctrl: Properties exposed to the resctrl file system
- * @arch_mbm_total: arch private state for MBM total bandwidth
- * @arch_mbm_local: arch private state for MBM local bandwidth
+ * @arch_mbm_states: Per-event pointer to the MBM event's saved state.
+ * An MBM event's state is an array of struct arch_mbm_state
+ * indexed by RMID on x86.
*
* Members of this structure are accessed via helpers that provide abstraction.
*/
struct rdt_hw_mon_domain {
struct rdt_mon_domain d_resctrl;
- struct arch_mbm_state *arch_mbm_total;
- struct arch_mbm_state *arch_mbm_local;
+ struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS];
};
static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
@@ -102,6 +111,7 @@ struct msr_param {
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
* @cdp_enabled: CDP state of this resource
+ * @mbm_cntr_assign_enabled: ABMC feature is enabled
*
* Members of this structure are either private to the architecture
* e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
@@ -115,6 +125,7 @@ struct rdt_hw_resource {
unsigned int mon_scale;
unsigned int mbm_width;
bool cdp_enabled;
+ bool mbm_cntr_assign_enabled;
};
static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
@@ -159,6 +170,42 @@ union cpuid_0x10_x_edx {
unsigned int full;
};
+/*
+ * ABMC counters are configured by writing to MSR_IA32_L3_QOS_ABMC_CFG.
+ *
+ * @bw_type : Event configuration that represents the memory
+ * transactions being tracked by the @cntr_id.
+ * @bw_src : Bandwidth source (RMID or CLOSID).
+ * @reserved1 : Reserved.
+ * @is_clos : @bw_src field is a CLOSID (not an RMID).
+ * @cntr_id : Counter identifier.
+ * @reserved : Reserved.
+ * @cntr_en : Counting enable bit.
+ * @cfg_en : Configuration enable bit.
+ *
+ * Configuration and counting:
+ * Counter can be configured across multiple writes to MSR. Configuration
+ * is applied only when @cfg_en = 1. Counter @cntr_id is reset when the
+ * configuration is applied.
+ * @cfg_en = 1, @cntr_en = 0 : Apply @cntr_id configuration but do not
+ * count events.
+ * @cfg_en = 1, @cntr_en = 1 : Apply @cntr_id configuration and start
+ * counting events.
+ */
+union l3_qos_abmc_cfg {
+ struct {
+ unsigned long bw_type :32,
+ bw_src :12,
+ reserved1: 3,
+ is_clos : 1,
+ cntr_id : 5,
+ reserved : 9,
+ cntr_en : 1,
+ cfg_en : 1;
+ } split;
+ unsigned long full;
+};
+
void rdt_ctrl_update(void *arg);
int rdt_get_mon_l3_config(struct rdt_resource *r);
@@ -168,5 +215,6 @@ bool rdt_cpu_has(int flag);
void __init intel_rdt_mbm_apply_quirk(void);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c261558276cd..c8945610d455 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -31,11 +31,6 @@
*/
bool rdt_mon_capable;
-/*
- * Global to indicate which monitoring events are enabled.
- */
-unsigned int rdt_mon_features;
-
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
static int snc_nodes_per_l3_cache = 1;
@@ -135,7 +130,7 @@ static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
if (snc_nodes_per_l3_cache == 1)
return lrmid;
- return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
+ return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid;
}
static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
@@ -166,18 +161,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
u32 rmid,
enum resctrl_event_id eventid)
{
- switch (eventid) {
- case QOS_L3_OCCUP_EVENT_ID:
- return NULL;
- case QOS_L3_MBM_TOTAL_EVENT_ID:
- return &hw_dom->arch_mbm_total[rmid];
- case QOS_L3_MBM_LOCAL_EVENT_ID:
- return &hw_dom->arch_mbm_local[rmid];
- default:
- /* Never expect to get here */
- WARN_ON_ONCE(1);
+ struct arch_mbm_state *state;
+
+ if (!resctrl_is_mbm_event(eventid))
return NULL;
- }
+
+ state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)];
+
+ return state ? &state[rmid] : NULL;
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
@@ -206,14 +197,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{
struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
-
- if (resctrl_arch_is_mbm_total_enabled())
- memset(hw_dom->arch_mbm_total, 0,
- sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
-
- if (resctrl_arch_is_mbm_local_enabled())
- memset(hw_dom->arch_mbm_local, 0,
- sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
+ enum resctrl_event_id eventid;
+ int idx;
+
+ for_each_mbm_event_id(eventid) {
+ if (!resctrl_is_mon_event_enabled(eventid))
+ continue;
+ idx = MBM_STATE_IDX(eventid);
+ memset(hw_dom->arch_mbm_states[idx], 0,
+ sizeof(*hw_dom->arch_mbm_states[0]) * r->mon.num_rmid);
+ }
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -224,15 +217,33 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >> shift;
}
+static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 rmid, enum resctrl_event_id eventid, u64 msr_val)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct arch_mbm_state *am;
+ u64 chunks;
+
+ am = get_arch_mbm_state(hw_dom, rmid, eventid);
+ if (am) {
+ am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
+ hw_res->mbm_width);
+ chunks = get_corrected_mbm_count(rmid, am->chunks);
+ am->prev_msr = msr_val;
+ } else {
+ chunks = msr_val;
+ }
+
+ return chunks * hw_res->mon_scale;
+}
+
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
- struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
int cpu = cpumask_any(&d->hdr.cpu_mask);
- struct arch_mbm_state *am;
- u64 msr_val, chunks;
+ u64 msr_val;
u32 prmid;
int ret;
@@ -243,17 +254,76 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
if (ret)
return ret;
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+
+ return 0;
+}
+
+static int __cntr_id_read(u32 cntr_id, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * QM_EVTSEL Register definition:
+ * =======================================================
+ * Bits Mnemonic Description
+ * =======================================================
+ * 63:44 -- Reserved
+ * 43:32 RMID RMID or counter ID in ABMC mode
+ * when reading an MBM event
+ * 31 ExtendedEvtID Extended Event Identifier
+ * 30:8 -- Reserved
+ * 7:0 EvtID Event Identifier
+ * =======================================================
+ * The contents of a specific counter can be read by setting the
+ * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
+ * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID
+ * to the desired counter ID. Reading the QM_CTR then returns the
+ * contents of the specified counter. The RMID_VAL_ERROR bit is set
+ * if the counter configuration is invalid, or if an invalid counter
+ * ID is set in the QM_EVTSEL.RMID field. The RMID_VAL_UNAVAIL bit
+ * is set if the counter data is unavailable.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 unused, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ struct arch_mbm_state *am;
+
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
- am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
- hw_res->mbm_width);
- chunks = get_corrected_mbm_count(rmid, am->chunks);
- am->prev_msr = msr_val;
- } else {
- chunks = msr_val;
+ memset(am, 0, sizeof(*am));
+
+ /* Record any initial, non-zero count value. */
+ __cntr_id_read(cntr_id, &am->prev_msr);
}
+}
+
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 unused, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val)
+{
+ u64 msr_val;
+ int ret;
+
+ ret = __cntr_id_read(cntr_id, &msr_val);
+ if (ret)
+ return ret;
- *val = chunks * hw_res->mon_scale;
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
return 0;
}
@@ -346,12 +416,13 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
unsigned int threshold;
+ u32 eax, ebx, ecx, edx;
snc_nodes_per_l3_cache = snc_get_config();
resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
- r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
+ r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
@@ -366,7 +437,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
*
* For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
*/
- threshold = resctrl_rmid_realloc_limit / r->num_rmid;
+ threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid;
/*
* Because num_rmid may not be a power of two, round the value
@@ -375,12 +446,17 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
*/
resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
- if (rdt_cpu_has(X86_FEATURE_BMEC)) {
- u32 eax, ebx, ecx, edx;
-
+ if (rdt_cpu_has(X86_FEATURE_BMEC) || rdt_cpu_has(X86_FEATURE_ABMC)) {
/* Detect list of bandwidth sources that can be tracked */
cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
- r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+ r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+ }
+
+ if (rdt_cpu_has(X86_FEATURE_ABMC)) {
+ r->mon.mbm_cntr_assignable = true;
+ cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx);
+ r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1;
+ hw_res->mbm_cntr_assign_enabled = true;
}
r->mon_capable = true;
@@ -401,3 +477,91 @@ void __init intel_rdt_mbm_apply_quirk(void)
mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
mbm_cf = mbm_cf_table[cf_index].cf;
}
+
+static void resctrl_abmc_set_one_amd(void *arg)
+{
+ bool *enable = arg;
+
+ if (*enable)
+ msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+ else
+ msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+}
+
+/*
+ * ABMC enable/disable requires update of L3_QOS_EXT_CFG MSR on all the CPUs
+ * associated with all monitor domains.
+ */
+static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd,
+ &enable, 1);
+ resctrl_arch_reset_rmid_all(r, d);
+ }
+}
+
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ if (r->mon.mbm_cntr_assignable &&
+ hw_res->mbm_cntr_assign_enabled != enable) {
+ _resctrl_abmc_enable(r, enable);
+ hw_res->mbm_cntr_assign_enabled = enable;
+ }
+
+ return 0;
+}
+
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
+{
+ return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled;
+}
+
+static void resctrl_abmc_config_one_amd(void *info)
+{
+ union l3_qos_abmc_cfg *abmc_cfg = info;
+
+ wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, abmc_cfg->full);
+}
+
+/*
+ * Send an IPI to the domain to assign the counter to RMID, event pair.
+ */
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ enum resctrl_event_id evtid, u32 rmid, u32 closid,
+ u32 cntr_id, bool assign)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ union l3_qos_abmc_cfg abmc_cfg = { 0 };
+ struct arch_mbm_state *am;
+
+ abmc_cfg.split.cfg_en = 1;
+ abmc_cfg.split.cntr_en = assign ? 1 : 0;
+ abmc_cfg.split.cntr_id = cntr_id;
+ abmc_cfg.split.bw_src = rmid;
+ if (assign)
+ abmc_cfg.split.bw_type = resctrl_get_mon_evt_cfg(evtid);
+
+ smp_call_function_any(&d->hdr.cpu_mask, resctrl_abmc_config_one_amd, &abmc_cfg, 1);
+
+ /*
+ * The hardware counter is reset (because cfg_en == 1) so there is no
+ * need to record initial non-zero counts.
+ */
+ am = get_arch_mbm_state(hw_dom, rmid, evtid);
+ if (am)
+ memset(am, 0, sizeof(*am));
+}
+
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ resctrl_abmc_set_one_amd(&hw_res->mbm_cntr_assign_enabled);
+}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 6b868afb26c3..4cee6213d667 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -51,6 +51,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 },
{ X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
{ X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index e35ccdc84910..6073a16628f9 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -372,6 +372,19 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni
return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
}
+#ifdef CONFIG_SMP
+int topology_get_primary_thread(unsigned int cpu)
+{
+ u32 apic_id = cpuid_to_apicid[cpu];
+
+ /*
+ * Get the core domain level APIC id, which is the primary thread
+ * and return the CPU number assigned to it.
+ */
+ return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN));
+}
+#endif
+
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/**
* topology_hotplug_apic - Handle a physical hotplugged APIC after boot
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 827dd0dbb6e9..6ac097e13106 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -59,7 +59,7 @@ static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
tscan->amd_node_id = node_id;
}
-static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
+static bool parse_8000_001e(struct topo_scan *tscan)
{
struct {
// eax
@@ -85,7 +85,7 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
* If leaf 0xb/0x26 is available, then the APIC ID and the domain
* shifts are set already.
*/
- if (!has_topoext) {
+ if (!cpu_feature_enabled(X86_FEATURE_XTOPOLOGY)) {
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
@@ -163,11 +163,12 @@ static void topoext_fixup(struct topo_scan *tscan)
c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
return;
- if (msr_set_bit(0xc0011005, 54) <= 0)
+ if (msr_set_bit(MSR_AMD64_CPUID_EXT_FEAT,
+ MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT) <= 0)
return;
- rdmsrq(0xc0011005, msrval);
- if (msrval & BIT_64(54)) {
+ rdmsrq(MSR_AMD64_CPUID_EXT_FEAT, msrval);
+ if (msrval & MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
}
@@ -175,27 +176,27 @@ static void topoext_fixup(struct topo_scan *tscan)
static void parse_topology_amd(struct topo_scan *tscan)
{
- bool has_topoext = false;
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES))
+ tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
/*
- * If the extended topology leaf 0x8000_001e is available
- * try to get SMT, CORE, TILE, and DIE shifts from extended
+ * Try to get SMT, CORE, TILE, and DIE shifts from extended
* CPUID leaf 0x8000_0026 on supported processors first. If
* extended CPUID leaf 0x8000_0026 is not supported, try to
- * get SMT and CORE shift from leaf 0xb first, then try to
- * get the CORE shift from leaf 0x8000_0008.
+ * get SMT and CORE shift from leaf 0xb. If either leaf is
+ * available, cpu_parse_topology_ext() will return true.
+ *
+ * If XTOPOLOGY leaves (0x26/0xb) are not available, try to
+ * get the CORE shift from leaf 0x8000_0008 first.
*/
- if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
- has_topoext = cpu_parse_topology_ext(tscan);
-
- if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES))
- tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
-
- if (!has_topoext && !parse_8000_0008(tscan))
+ if (!cpu_parse_topology_ext(tscan) && !parse_8000_0008(tscan))
return;
- /* Prefer leaf 0x8000001e if available */
- if (parse_8000_001e(tscan, has_topoext))
+ /*
+ * Prefer leaf 0x8000001e if available to get the SMT shift and
+ * the initial APIC ID if XTOPOLOGY leaves are not available.
+ */
+ if (parse_8000_001e(tscan))
return;
/* Try the NODEID MSR */