diff options
Diffstat (limited to 'arch')
74 files changed, 735 insertions, 378 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ea5a1dcb133b..4f2eeda907ec 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -6,6 +6,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h new file mode 100644 index 000000000000..acd3b6cb4bf5 --- /dev/null +++ b/arch/arc/include/asm/cachetype.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ARC_CACHETYPE_H +#define __ASM_ARC_CACHETYPE_H + +#define cpu_dcache_is_aliasing() false +#define cpu_icache_is_aliasing() true + +#endif diff --git a/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi b/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi index dd714d235d5f..b0bad0d1ba36 100644 --- a/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imxrt1050.dtsi @@ -87,7 +87,7 @@ reg = <0x402c0000 0x4000>; interrupts = <110>; clocks = <&clks IMXRT1050_CLK_IPG_PDOF>, - <&clks IMXRT1050_CLK_OSC>, + <&clks IMXRT1050_CLK_AHB_PODF>, <&clks IMXRT1050_CLK_USDHC1>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0beecdde55f5..f25eadcba5e6 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -323,6 +323,7 @@ CONFIG_SND_SOC_IMX_SGTL5000=y CONFIG_SND_SOC_FSL_ASOC_CARD=y CONFIG_SND_SOC_AC97_CODEC=y CONFIG_SND_SOC_CS42XX8_I2C=y +CONFIG_SND_SOC_SPDIF=y CONFIG_SND_SOC_TLV320AIC3X_I2C=y CONFIG_SND_SOC_WM8960=y CONFIG_SND_SOC_WM8962=y diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index e4fe059cd861..dc47b2312127 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -6,6 +6,7 @@ menuconfig ARCH_MXC select CLKSRC_IMX_GPT select GENERIC_IRQ_CHIP select GPIOLIB + select PINCTRL select PM_OPP if PM select SOC_BUS select SRAM diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 19973ab4ea6b..9e10d7a6b5a2 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -233,7 +233,7 @@ #interrupt-cells = <0x1>; compatible = "pci-host-ecam-generic"; device_type = "pci"; - bus-range = <0x0 0x1>; + bus-range = <0x0 0xff>; reg = <0x0 0x40000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index 6e5a984c1d4e..26a29e5e5078 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -67,7 +67,7 @@ l2_cache_l0: l2-cache-l0 { compatible = "cache"; cache-size = <0x80000>; - cache-line-size = <128>; + cache-line-size = <64>; cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set cache-level = <2>; cache-unified; @@ -91,7 +91,7 @@ l2_cache_l1: l2-cache-l1 { compatible = "cache"; cache-size = <0x80000>; - cache-line-size = <128>; + cache-line-size = <64>; cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set cache-level = <2>; cache-unified; @@ -115,7 +115,7 @@ l2_cache_l2: l2-cache-l2 { compatible = "cache"; cache-size = <0x80000>; - cache-line-size = <128>; + cache-line-size = <64>; cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set cache-level = <2>; cache-unified; @@ -139,7 +139,7 @@ l2_cache_l3: l2-cache-l3 { compatible = "cache"; cache-size = <0x80000>; - cache-line-size = <128>; + cache-line-size = <64>; cache-sets = <1024>; //512KiB(size)/64(line-size)=8192ways/8-way set cache-level = <2>; cache-unified; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi index a60ebb718789..c32a6947ae9c 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-audio.dtsi @@ -165,7 +165,7 @@ audio_subsys: bus@59000000 { }; esai0: esai@59010000 { - compatible = "fsl,imx8qm-esai"; + compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai"; reg = <0x59010000 0x10000>; interrupts = <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>; clocks = <&esai0_lpcg IMX_LPCG_CLK_4>, diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi index e24e639b98ee..c9b55f02497a 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-audio.dtsi @@ -134,7 +134,7 @@ }; esai1: esai@59810000 { - compatible = "fsl,imx8qm-esai"; + compatible = "fsl,imx8qm-esai", "fsl,imx6ull-esai"; reg = <0x59810000 0x10000>; interrupts = <GIC_SPI 411 IRQ_TYPE_LEVEL_HIGH>; clocks = <&esai1_lpcg IMX_LPCG_CLK_0>, diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index d10f62eacfe0..e9c7a8265d71 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1673,7 +1673,7 @@ netcmix_blk_ctrl: syscon@4c810000 { compatible = "nxp,imx95-netcmix-blk-ctrl", "syscon"; - reg = <0x0 0x4c810000 0x0 0x10000>; + reg = <0x0 0x4c810000 0x0 0x8>; #clock-cells = <1>; clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>; assigned-clocks = <&scmi_clk IMX95_CLK_BUSNETCMIX>; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 9f315a51a7c1..9da62d7c4d27 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -2440,6 +2440,7 @@ qcom,cmb-element-bits = <32>; qcom,cmb-msrs-num = <32>; + status = "disabled"; out-ports { port { @@ -6092,7 +6093,7 @@ <0x0 0x40000000 0x0 0xf20>, <0x0 0x40000f20 0x0 0xa8>, <0x0 0x40001000 0x0 0x4000>, - <0x0 0x40200000 0x0 0x100000>, + <0x0 0x40200000 0x0 0x1fe00000>, <0x0 0x01c03000 0x0 0x1000>, <0x0 0x40005000 0x0 0x2000>; reg-names = "parf", "dbi", "elbi", "atu", "addr_space", @@ -6250,7 +6251,7 @@ <0x0 0x60000000 0x0 0xf20>, <0x0 0x60000f20 0x0 0xa8>, <0x0 0x60001000 0x0 0x4000>, - <0x0 0x60200000 0x0 0x100000>, + <0x0 0x60200000 0x0 0x1fe00000>, <0x0 0x01c13000 0x0 0x1000>, <0x0 0x60005000 0x0 0x2000>; reg-names = "parf", "dbi", "elbi", "atu", "addr_space", diff --git a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts index 975550139e10..66513fc8e67a 100644 --- a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts +++ b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dts @@ -773,6 +773,10 @@ status = "okay"; }; +&usb_1_ss0_dwc3 { + dr_mode = "host"; +}; + &usb_1_ss0_dwc3_hs { remote-endpoint = <&pmic_glink_ss0_hs_in>; }; @@ -801,6 +805,10 @@ status = "okay"; }; +&usb_1_ss1_dwc3 { + dr_mode = "host"; +}; + &usb_1_ss1_dwc3_hs { remote-endpoint = <&pmic_glink_ss1_hs_in>; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index 39f9d9cdc10d..d51a9bdcf67f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -1197,6 +1197,10 @@ status = "okay"; }; +&usb_1_ss0_dwc3 { + dr_mode = "host"; +}; + &usb_1_ss0_dwc3_hs { remote-endpoint = <&pmic_glink_ss0_hs_in>; }; @@ -1225,6 +1229,10 @@ status = "okay"; }; +&usb_1_ss1_dwc3 { + dr_mode = "host"; +}; + &usb_1_ss1_dwc3_hs { remote-endpoint = <&pmic_glink_ss1_hs_in>; }; @@ -1253,6 +1261,10 @@ status = "okay"; }; +&usb_1_ss2_dwc3 { + dr_mode = "host"; +}; + &usb_1_ss2_dwc3_hs { remote-endpoint = <&pmic_glink_ss2_hs_in>; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 88805629ed2b..7e4f46ad8edd 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2924,7 +2924,7 @@ #address-cells = <3>; #size-cells = <2>; ranges = <0x01000000 0x0 0x00000000 0x0 0x70200000 0x0 0x100000>, - <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x1d00000>; + <0x02000000 0x0 0x70300000 0x0 0x70300000 0x0 0x3d00000>; bus-range = <0x00 0xff>; dma-coherent; @@ -4066,8 +4066,6 @@ dma-coherent; - usb-role-switch; - ports { #address-cells = <1>; #size-cells = <0>; @@ -4321,8 +4319,6 @@ dma-coherent; - usb-role-switch; - ports { #address-cells = <1>; #size-cells = <0>; @@ -4421,8 +4417,6 @@ dma-coherent; - usb-role-switch; - ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 0597de415fe0..7d992c3c01ce 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -333,6 +333,7 @@ power-domain@RK3328_PD_HEVC { reg = <RK3328_PD_HEVC>; + clocks = <&cru SCLK_VENC_CORE>; #power-domain-cells = <0>; }; power-domain@RK3328_PD_VIDEO { diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index ecaefe208e3e..695cccbdab0f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -350,6 +350,7 @@ assigned-clocks = <&pmucru CLK_PCIEPHY0_REF>; assigned-clock-rates = <100000000>; resets = <&cru SRST_PIPEPHY0>; + reset-names = "phy"; rockchip,pipe-grf = <&pipegrf>; rockchip,pipe-phy-grf = <&pipe_phy_grf0>; #phy-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi index 62be06f3b863..e55390629114 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x-base.dtsi @@ -1681,6 +1681,7 @@ assigned-clocks = <&pmucru CLK_PCIEPHY1_REF>; assigned-clock-rates = <100000000>; resets = <&cru SRST_PIPEPHY1>; + reset-names = "phy"; rockchip,pipe-grf = <&pipegrf>; rockchip,pipe-phy-grf = <&pipe_phy_grf1>; #phy-cells = <1>; @@ -1697,6 +1698,7 @@ assigned-clocks = <&pmucru CLK_PCIEPHY2_REF>; assigned-clock-rates = <100000000>; resets = <&cru SRST_PIPEPHY2>; + reset-names = "phy"; rockchip,pipe-grf = <&pipegrf>; rockchip,pipe-phy-grf = <&pipe_phy_grf2>; #phy-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index c44d001da169..d597112f1d5b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -72,7 +72,7 @@ rfkill { compatible = "rfkill-gpio"; - label = "rfkill-pcie-wlan"; + label = "rfkill-m2-wlan"; radio-type = "wlan"; shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi index 76a6e8e517e9..c9749cb50076 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-nanopi-r6.dtsi @@ -434,6 +434,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-mmc; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 37e24f1bd227..99ea26d400ff 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -36,15 +36,8 @@ #include <asm/traps.h> #include <asm/vdso.h> -#ifdef CONFIG_ARM64_GCS #define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK) -static bool gcs_signal_cap_valid(u64 addr, u64 val) -{ - return val == GCS_SIGNAL_CAP(addr); -} -#endif - /* * Do a signal return; undo the signal stack. These are aligned to 128-bit. */ @@ -1062,8 +1055,7 @@ static int restore_sigframe(struct pt_regs *regs, #ifdef CONFIG_ARM64_GCS static int gcs_restore_signal(void) { - unsigned long __user *gcspr_el0; - u64 cap; + u64 gcspr_el0, cap; int ret; if (!system_supports_gcs()) @@ -1072,7 +1064,7 @@ static int gcs_restore_signal(void) if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)) return 0; - gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0); + gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); /* * Ensure that any changes to the GCS done via GCS operations @@ -1087,22 +1079,23 @@ static int gcs_restore_signal(void) * then faults will be generated on GCS operations - the main * concern is to protect GCS pages. */ - ret = copy_from_user(&cap, gcspr_el0, sizeof(cap)); + ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0, + sizeof(cap)); if (ret) return -EFAULT; /* * Check that the cap is the actual GCS before replacing it. */ - if (!gcs_signal_cap_valid((u64)gcspr_el0, cap)) + if (cap != GCS_SIGNAL_CAP(gcspr_el0)) return -EINVAL; /* Invalidate the token to prevent reuse */ - put_user_gcs(0, (__user void*)gcspr_el0, &ret); + put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret); if (ret != 0) return -EFAULT; - write_sysreg_s(gcspr_el0 + 1, SYS_GCSPR_EL0); + write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0); return 0; } @@ -1421,7 +1414,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) { - unsigned long __user *gcspr_el0; + u64 gcspr_el0; int ret = 0; if (!system_supports_gcs()) @@ -1434,18 +1427,20 @@ static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) * We are entering a signal handler, current register state is * active. */ - gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0); + gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); /* * Push a cap and the GCS entry for the trampoline onto the GCS. */ - put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret); - put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret); + put_user_gcs((unsigned long)sigtramp, + (unsigned long __user *)(gcspr_el0 - 16), &ret); + put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8), + (unsigned long __user *)(gcspr_el0 - 8), &ret); if (ret != 0) return ret; - gcspr_el0 -= 2; - write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0); + gcspr_el0 -= 16; + write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index caba3e4bd09e..e75374d682f4 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -783,9 +783,6 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) return -EBUSY; - if (__hyp_ack_skip_pgtable_check(tx)) - return 0; - return __hyp_check_page_state_range(addr, size, PKVM_PAGE_SHARED_BORROWED); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 456102bc0b55..6c5950b9ceac 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -24,6 +24,7 @@ static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); +static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc); static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) { @@ -327,48 +328,25 @@ u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); } -/** - * kvm_pmu_enable_counter_mask - enable selected PMU counters - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMCNTENSET register - * - * Call perf_event_enable to start counting the perf event - */ -void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +static void kvm_pmc_enable_perf_event(struct kvm_pmc *pmc) { - int i; - if (!kvm_vcpu_has_pmu(vcpu)) - return; - - if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val) + if (!pmc->perf_event) { + kvm_pmu_create_perf_event(pmc); return; + } - for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) { - struct kvm_pmc *pmc; - - if (!(val & BIT(i))) - continue; - - pmc = kvm_vcpu_idx_to_pmc(vcpu, i); + perf_event_enable(pmc->perf_event); + if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) + kvm_debug("fail to enable perf event\n"); +} - if (!pmc->perf_event) { - kvm_pmu_create_perf_event(pmc); - } else { - perf_event_enable(pmc->perf_event); - if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) - kvm_debug("fail to enable perf event\n"); - } - } +static void kvm_pmc_disable_perf_event(struct kvm_pmc *pmc) +{ + if (pmc->perf_event) + perf_event_disable(pmc->perf_event); } -/** - * kvm_pmu_disable_counter_mask - disable selected PMU counters - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMCNTENCLR register - * - * Call perf_event_disable to stop counting the perf event - */ -void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +void kvm_pmu_reprogram_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; @@ -376,16 +354,18 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) return; for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) { - struct kvm_pmc *pmc; + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (!(val & BIT(i))) continue; - pmc = kvm_vcpu_idx_to_pmc(vcpu, i); - - if (pmc->perf_event) - perf_event_disable(pmc->perf_event); + if (kvm_pmu_counter_is_enabled(pmc)) + kvm_pmc_enable_perf_event(pmc); + else + kvm_pmc_disable_perf_event(pmc); } + + kvm_vcpu_pmu_restore_guest(vcpu); } /* @@ -626,27 +606,28 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)) val &= ~ARMV8_PMU_PMCR_LP; + /* Request a reload of the PMU to enable/disable affected counters */ + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) ^ val) & ARMV8_PMU_PMCR_E) + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + /* The reset bits don't indicate any state, and shouldn't be saved. */ __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); - if (val & ARMV8_PMU_PMCR_E) { - kvm_pmu_enable_counter_mask(vcpu, - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); - } else { - kvm_pmu_disable_counter_mask(vcpu, - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); - } - if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { - unsigned long mask = kvm_pmu_accessible_counter_mask(vcpu); - mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); + /* + * Unlike other PMU sysregs, the controls in PMCR_EL0 always apply + * to the 'guest' range of counters and never the 'hyp' range. + */ + unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) & + ~kvm_pmu_hyp_counter_mask(vcpu) & + ~BIT(ARMV8_PMU_CYCLE_IDX); + for_each_set_bit(i, &mask, 32) kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } - kvm_vcpu_pmu_restore_guest(vcpu); } static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) @@ -910,11 +891,11 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu)); - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; + + kvm_pmu_reprogram_counter_mask(vcpu, mask); } int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e2a5c2918d9e..634ff18a59a1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1208,16 +1208,14 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, mask = kvm_pmu_accessible_counter_mask(vcpu); if (p->is_write) { val = p->regval & mask; - if (r->Op2 & 0x1) { + if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; - kvm_pmu_enable_counter_mask(vcpu, val); - kvm_vcpu_pmu_restore_guest(vcpu); - } else { + else /* accessing PMCNTENCLR_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; - kvm_pmu_disable_counter_mask(vcpu, val); - } + + kvm_pmu_reprogram_counter_mask(vcpu, val); } else { p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); } @@ -2450,6 +2448,26 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static bool access_mdcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2); + + if (!access_rw(vcpu, p, r)) + return false; + + /* + * Request a reload of the PMU to enable/disable the counters affected + * by HPME. + */ + if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME) + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + + return true; +} + + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2983,7 +3001,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), - EL2_REG(MDCR_EL2, access_rw, reset_val, 0), + EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG_VNCR(HSTR_EL2, reset_val, 0), EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0), diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile index 92d005958dfb..ff172cbe5881 100644 --- a/arch/hexagon/Makefile +++ b/arch/hexagon/Makefile @@ -32,3 +32,9 @@ KBUILD_LDFLAGS += $(ldflags-y) TIR_NAME := r19 KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__ KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME) + +# Disable HexagonConstExtenders pass for LLVM versions prior to 19.1.0 +# https://github.com/llvm/llvm-project/issues/99714 +ifneq ($(call clang-min-version, 190100),y) +KBUILD_CFLAGS += -mllvm -hexagon-cext=false +endif diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c index 338849c430a5..7b1e8f9128e9 100644 --- a/arch/nios2/kernel/cpuinfo.c +++ b/arch/nios2/kernel/cpuinfo.c @@ -143,11 +143,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) " DIV:\t\t%s\n" " BMX:\t\t%s\n" " CDX:\t\t%s\n", - cpuinfo.has_mul ? "yes" : "no", - cpuinfo.has_mulx ? "yes" : "no", - cpuinfo.has_div ? "yes" : "no", - cpuinfo.has_bmx ? "yes" : "no", - cpuinfo.has_cdx ? "yes" : "no"); + str_yes_no(cpuinfo.has_mul), + str_yes_no(cpuinfo.has_mulx), + str_yes_no(cpuinfo.has_div), + str_yes_no(cpuinfo.has_bmx), + str_yes_no(cpuinfo.has_cdx)); seq_printf(m, "Icache:\t\t%ukB, line length: %u\n", diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 57ded82c2840..e8b3f67bf3f5 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -208,6 +208,7 @@ CONFIG_FB_ATY=y CONFIG_FB_ATY_CT=y CONFIG_FB_ATY_GX=y CONFIG_FB_3DFX=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 4d77e17541e9..ca0c90e95837 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -716,6 +716,7 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m +CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 6d0d329cbb35..f9acf866c709 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -34,6 +34,8 @@ enum vcpu_ftr { #define E500_TLB_BITMAP (1 << 30) /* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 29) +/* entry is writable on the host */ +#define E500_TLB_WRITABLE (1 << 28) /* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ #define E500_TLB_MAS2_ATTR (0x7f) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index e5a145b578a4..06caf8bbbe2b 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -45,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void) return host_tlb_params[1].entries - tlbcam_index - 1; } -static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode) { /* Mask off reserved bits. */ mas3 &= MAS3_ATTRIB_MASK; + if (!writable) + mas3 &= ~(MAS3_UW|MAS3_SW); + #ifndef CONFIG_KVM_BOOKE_HV if (!usermode) { /* Guest is in supervisor mode, @@ -242,17 +245,18 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref, +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, - kvm_pfn_t pfn, unsigned int wimg) + kvm_pfn_t pfn, unsigned int wimg, + bool writable) { ref->pfn = pfn; ref->flags = E500_TLB_VALID; + if (writable) + ref->flags |= E500_TLB_WRITABLE; /* Use guest supplied MAS2_G and MAS2_E */ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; - - return tlbe_is_writable(gtlbe); } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) @@ -305,6 +309,7 @@ static void kvmppc_e500_setup_stlbe( { kvm_pfn_t pfn = ref->pfn; u32 pr = vcpu->arch.shared->msr & MSR_PR; + bool writable = !!(ref->flags & E500_TLB_WRITABLE); BUG_ON(!(ref->flags & E500_TLB_VALID)); @@ -312,7 +317,7 @@ static void kvmppc_e500_setup_stlbe( stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | - e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr); } static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -321,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, struct tlbe_ref *ref) { struct kvm_memory_slot *slot; - unsigned long pfn = 0; /* silence GCC warning */ + unsigned int psize; + unsigned long pfn; struct page *page = NULL; unsigned long hva; - int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; int ret = 0; unsigned long mmu_seq; struct kvm *kvm = vcpu_e500->vcpu.kvm; - unsigned long tsize_pages = 0; pte_t *ptep; unsigned int wimg = 0; pgd_t *pgdir; @@ -351,110 +355,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); hva = gfn_to_hva_memslot(slot, gfn); - if (tlbsel == 1) { - struct vm_area_struct *vma; - mmap_read_lock(kvm->mm); - - vma = find_vma(kvm->mm, hva); - if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_PFNMAP)) { - /* - * This VMA is a physically contiguous region (e.g. - * /dev/mem) that bypasses normal Linux page - * management. Find the overlap between the - * vma and the memslot. - */ - - unsigned long start, end; - unsigned long slot_start, slot_end; - - pfnmap = 1; - - start = vma->vm_pgoff; - end = start + - vma_pages(vma); - - pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); - - slot_start = pfn - (gfn - slot->base_gfn); - slot_end = slot_start + slot->npages; - - if (start < slot_start) - start = slot_start; - if (end > slot_end) - end = slot_end; - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - - /* - * Now find the largest tsize (up to what the guest - * requested) that will cover gfn, stay within the - * range, and for which gfn and pfn are mutually - * aligned. - */ - - for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { - unsigned long gfn_start, gfn_end; - tsize_pages = 1UL << (tsize - 2); - - gfn_start = gfn & ~(tsize_pages - 1); - gfn_end = gfn_start + tsize_pages; - - if (gfn_start + pfn - gfn < start) - continue; - if (gfn_end + pfn - gfn > end) - continue; - if ((gfn & (tsize_pages - 1)) != - (pfn & (tsize_pages - 1))) - continue; - - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - pfn &= ~(tsize_pages - 1); - break; - } - } else if (vma && hva >= vma->vm_start && - is_vm_hugetlb_page(vma)) { - unsigned long psize = vma_kernel_pagesize(vma); - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * Take the largest page size that satisfies both host - * and guest mapping - */ - tsize = min(__ilog2(psize) - 10, tsize); - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - } - - mmap_read_unlock(kvm->mm); - } - - if (likely(!pfnmap)) { - tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); - pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page); - if (is_error_noslot_pfn(pfn)) { - if (printk_ratelimit()) - pr_err("%s: real page not found for gfn %lx\n", - __func__, (long)gfn); - return -EINVAL; - } - - /* Align guest and physical address to page map boundaries */ - pfn &= ~(tsize_pages - 1); - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page); + if (is_error_noslot_pfn(pfn)) { + if (printk_ratelimit()) + pr_err("%s: real page not found for gfn %lx\n", + __func__, (long)gfn); + return -EINVAL; } spin_lock(&kvm->mmu_lock); @@ -472,14 +378,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, &psize); if (ptep) { pte_t pte = READ_ONCE(*ptep); if (pte_present(pte)) { wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; - local_irq_restore(flags); } else { local_irq_restore(flags); pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n", @@ -488,10 +393,72 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, goto out; } } - writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); + local_irq_restore(flags); + + if (psize && tlbsel == 1) { + unsigned long psize_pages, tsize_pages; + unsigned long start, end; + unsigned long slot_start, slot_end; + + psize_pages = 1UL << (psize - PAGE_SHIFT); + start = pfn & ~(psize_pages - 1); + end = start + psize_pages; + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * Any page size that doesn't satisfy the host mapping + * will fail the start and end tests. + */ + tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize); + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end; + tsize_pages = 1UL << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } + kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); + writable = tlbe_is_writable(stlbe); /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index f381b177ea06..0b6365d85d11 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -464,7 +464,43 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +/* + * During mmap() paste address, mapping VMA is saved in VAS window + * struct which is used to unmap during migration if the window is + * still open. But the user space can remove this mapping with + * munmap() before closing the window and the VMA address will + * be invalid. Set VAS window VMA to NULL in this function which + * is called before VMA free. + */ +static void vas_mmap_close(struct vm_area_struct *vma) +{ + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + + /* Should not happen */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("No attached VAS window for the paste address mmap\n"); + return; + } + + txwin = cp_inst->txwin; + /* + * task_ref.vma is set in coproc_mmap() during mmap paste + * address. So it has to be the same VMA that is getting freed. + */ + if (WARN_ON(txwin->task_ref.vma != vma)) { + pr_err("Invalid paste address mmaping\n"); + return; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + txwin->task_ref.vma = NULL; + mutex_unlock(&txwin->task_ref.mmap_mutex); +} + static const struct vm_operations_struct vas_vm_ops = { + .close = vas_mmap_close, .fault = vas_mmap_fault, }; diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 71aabc5c6713..125f5ecd9565 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -122,6 +122,7 @@ struct kernel_mapping { extern struct kernel_mapping kernel_map; extern phys_addr_t phys_ram_base; +extern unsigned long vmemmap_start_pfn; #define is_kernel_mapping(x) \ ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index d4e99eef90ac..050fdc49b5ad 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -87,7 +87,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6c82318065cf..3d250824178b 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -159,6 +159,7 @@ struct riscv_pmu_snapshot_data { }; #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) +#define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 #define RISCV_PLAT_FW_EVENT 0xFFFF diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index e5121b89acea..52f11bfd0079 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -3,8 +3,11 @@ #ifndef __ASM_RISCV_SPINLOCK_H #define __ASM_RISCV_SPINLOCK_H -#ifdef CONFIG_RISCV_COMBO_SPINLOCKS +#ifdef CONFIG_QUEUED_SPINLOCKS #define _Q_PENDING_LOOPS (1 << 9) +#endif + +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS #define __no_arch_spinlock_redefine #include <asm/ticket_spinlock.h> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index c200d329d4bd..33a5a9f2a0d4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -23,21 +23,21 @@ REG_S a0, TASK_TI_A0(tp) csrr a0, CSR_CAUSE /* Exclude IRQs */ - blt a0, zero, _new_vmalloc_restore_context_a0 + blt a0, zero, .Lnew_vmalloc_restore_context_a0 REG_S a1, TASK_TI_A1(tp) /* Only check new_vmalloc if we are in page/protection fault */ li a1, EXC_LOAD_PAGE_FAULT - beq a0, a1, _new_vmalloc_kernel_address + beq a0, a1, .Lnew_vmalloc_kernel_address li a1, EXC_STORE_PAGE_FAULT - beq a0, a1, _new_vmalloc_kernel_address + beq a0, a1, .Lnew_vmalloc_kernel_address li a1, EXC_INST_PAGE_FAULT - bne a0, a1, _new_vmalloc_restore_context_a1 + bne a0, a1, .Lnew_vmalloc_restore_context_a1 -_new_vmalloc_kernel_address: +.Lnew_vmalloc_kernel_address: /* Is it a kernel address? */ csrr a0, CSR_TVAL - bge a0, zero, _new_vmalloc_restore_context_a1 + bge a0, zero, .Lnew_vmalloc_restore_context_a1 /* Check if a new vmalloc mapping appeared that could explain the trap */ REG_S a2, TASK_TI_A2(tp) @@ -69,7 +69,7 @@ _new_vmalloc_kernel_address: /* Check the value of new_vmalloc for this cpu */ REG_L a2, 0(a0) and a2, a2, a1 - beq a2, zero, _new_vmalloc_restore_context + beq a2, zero, .Lnew_vmalloc_restore_context /* Atomically reset the current cpu bit in new_vmalloc */ amoxor.d a0, a1, (a0) @@ -83,11 +83,11 @@ _new_vmalloc_kernel_address: csrw CSR_SCRATCH, x0 sret -_new_vmalloc_restore_context: +.Lnew_vmalloc_restore_context: REG_L a2, TASK_TI_A2(tp) -_new_vmalloc_restore_context_a1: +.Lnew_vmalloc_restore_context_a1: REG_L a1, TASK_TI_A1(tp) -_new_vmalloc_restore_context_a0: +.Lnew_vmalloc_restore_context_a0: REG_L a0, TASK_TI_A0(tp) .endm @@ -278,6 +278,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else sret #endif +SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL) SYM_CODE_END(ret_from_exception) ASM_NOKPROBE(ret_from_exception) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1cd461f3d872..47d0ebeec93c 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -23,7 +23,7 @@ struct used_bucket { struct relocation_head { struct hlist_node node; - struct list_head *rel_entry; + struct list_head rel_entry; void *location; }; @@ -634,7 +634,7 @@ process_accumulated_relocations(struct module *me, location = rel_head_iter->location; list_for_each_entry_safe(rel_entry_iter, rel_entry_iter_tmp, - rel_head_iter->rel_entry, + &rel_head_iter->rel_entry, head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( @@ -704,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, return -ENOMEM; } - rel_head->rel_entry = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - - if (!rel_head->rel_entry) { - kfree(entry); - kfree(rel_head); - return -ENOMEM; - } - - INIT_LIST_HEAD(rel_head->rel_entry); + INIT_LIST_HEAD(&rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { @@ -722,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head->rel_entry); kfree(rel_head); return -ENOMEM; } @@ -735,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, } /* Add relocation to head of discovered rel_head */ - list_add_tail(&entry->head, rel_head->rel_entry); + list_add_tail(&entry->head, &rel_head->rel_entry); return 0; } diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 380a0e8cecc0..c0738d6c6498 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -30,7 +30,7 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) p->ainsn.api.restore = (unsigned long)p->addr + len; patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); - patch_text_nosync(p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 153a2db4c5fa..d4355c770c36 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -17,6 +17,7 @@ #ifdef CONFIG_FRAME_POINTER extern asmlinkage void handle_exception(void); +extern unsigned long ret_from_exception_end; static inline int fp_is_valid(unsigned long fp, unsigned long sp) { @@ -71,7 +72,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)handle_exception) { + if (pc >= (unsigned long)handle_exception && + pc < (unsigned long)&ret_from_exception_end) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 51ebfd23e007..8ff8e8b36524 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -35,7 +35,7 @@ int show_unhandled_signals = 1; -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) { @@ -81,7 +81,7 @@ void die(struct pt_regs *regs, const char *str) oops_enter(); - spin_lock_irqsave(&die_lock, flags); + raw_spin_lock_irqsave(&die_lock, flags); console_verbose(); bust_spinlocks(1); @@ -100,7 +100,7 @@ void die(struct pt_regs *regs, const char *str) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&die_lock, flags); + raw_spin_unlock_irqrestore(&die_lock, flags); oops_exit(); if (in_interrupt()) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fc53ce748c80..8d167e09f1fe 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/sections.h> #include <asm/soc.h> +#include <asm/sparsemem.h> #include <asm/tlbflush.h> #include "../kernel/head.h" @@ -62,6 +63,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled); phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS) + +unsigned long vmemmap_start_pfn __ro_after_init; +EXPORT_SYMBOL(vmemmap_start_pfn); +#endif + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -240,8 +248,12 @@ static void __init setup_bootmem(void) * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. */ - if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) { phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif + } /* * In 64-bit, any use of __va/__pa before this point is wrong as we @@ -1101,6 +1113,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); phys_ram_base = CONFIG_PHYS_RAM_BASE; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT; +#endif kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index abe6e6c0ab98..6087d38c7235 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -234,6 +234,8 @@ static unsigned long get_vmem_size(unsigned long identity_size, vsize = round_up(SZ_2G + max_mappable, rte_size) + round_up(vmemmap_size, rte_size) + FIXMAP_SIZE + MODULES_LEN + KASLR_LEN; + if (IS_ENABLED(CONFIG_KMSAN)) + vsize += MODULES_LEN * 2; return size_add(vsize, vmalloc_size); } diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 145035f84a0e..3fa28db2fe59 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -306,7 +306,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e pages++; } } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_4K, pages); } @@ -339,7 +339,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e } pgtable_pte_populate(pmd, addr, next, mode); } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_1M, pages); } @@ -372,7 +372,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pgtable_pmd_populate(pud, addr, next, mode); } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_2G, pages); } diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index edbb52ce3f1e..7d12a1305fc9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ if (len >= sizeof(_value)) \ return -E2BIG; \ len = strscpy(_value, buf, sizeof(_value)); \ - if (len < 0) \ + if ((ssize_t)len < 0) \ return len; \ strim(_value); \ return len; \ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ea8dce299954..d4f031e086fc 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2678,9 +2678,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) kvm_s390_clear_float_irqs(dev->kvm); break; case KVM_DEV_FLIC_APF_ENABLE: + if (kvm_is_ucontrol(dev->kvm)) + return -EINVAL; dev->kvm->arch.gmap->pfault_enabled = 1; break; case KVM_DEV_FLIC_APF_DISABLE_WAIT: + if (kvm_is_ucontrol(dev->kvm)) + return -EINVAL; dev->kvm->arch.gmap->pfault_enabled = 0; /* * Make sure no async faults are in transition when @@ -2894,6 +2898,8 @@ int kvm_set_routing_entry(struct kvm *kvm, switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ case KVM_IRQ_ROUTING_S390_ADAPTER: + if (kvm_is_ucontrol(kvm)) + return -EINVAL; e->set = set_adapter_int; uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); if (uaddr == -EFAULT) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 150b9387860a..a687695d8f68 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -854,7 +854,7 @@ unpin: static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, gpa_t gpa) { - hpa_t hpa = (hpa_t) vsie_page->scb_o; + hpa_t hpa = virt_to_phys(vsie_page->scb_o); if (hpa) unpin_guest_page(vcpu->kvm, gpa, hpa); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2e1e26846050..99c590da0ae2 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -429,6 +429,16 @@ static struct event_constraint intel_lnc_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_lnc_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -6422,7 +6432,7 @@ static __always_inline void intel_pmu_init_lnc(struct pmu *pmu) intel_pmu_init_glc(pmu); hybrid(pmu, event_constraints) = intel_lnc_event_constraints; hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints; - hybrid(pmu, extra_regs) = intel_rwc_extra_regs; + hybrid(pmu, extra_regs) = intel_lnc_extra_regs; } static __always_inline void intel_pmu_init_skt(struct pmu *pmu) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1a4b326ca2ce..6ba6549f26fa 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2517,6 +2517,7 @@ void __init intel_ds_init(void) x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; + case 6: case 5: x86_pmu.pebs_ept = 1; fallthrough; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d98fac567684..e7aba7349231 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1910,6 +1910,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 17b6590748c0..645aa360628d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -452,6 +452,7 @@ #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c0975815980c..20e6009381ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -230,6 +230,8 @@ static inline unsigned long long l1tf_pfn_limit(void) return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } +void init_cpu_devs(void); +void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 125c407e2abe..41502bd2afd6 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -65,4 +65,19 @@ extern bool __static_call_fixup(void *tramp, u8 op, void *dest); +extern void __static_call_update_early(void *tramp, void *func); + +#define static_call_update_early(name, _func) \ +({ \ + typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \ + if (static_call_initialized) { \ + __static_call_update(&STATIC_CALL_KEY(name), \ + STATIC_CALL_TRAMP_ADDR(name), __F);\ + } else { \ + WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \ + __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\ + __F); \ + } \ +}) + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index ab7382f92aff..96bda43538ee 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -8,7 +8,7 @@ #include <asm/special_insns.h> #ifdef CONFIG_X86_32 -static inline void iret_to_self(void) +static __always_inline void iret_to_self(void) { asm volatile ( "pushfl\n\t" @@ -19,7 +19,7 @@ static inline void iret_to_self(void) : ASM_CALL_CONSTRAINT : : "memory"); } #else -static inline void iret_to_self(void) +static __always_inline void iret_to_self(void) { unsigned int tmp; @@ -55,7 +55,7 @@ static inline void iret_to_self(void) * Like all of Linux's memory ordering operations, this is a * compiler barrier as well. */ -static inline void sync_core(void) +static __always_inline void sync_core(void) { /* * The SERIALIZE instruction is the most straightforward way to diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a2dd24947eb8..97771b9d33af 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -39,9 +39,11 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/pgtable.h> +#include <linux/instrumentation.h> #include <trace/events/xen.h> +#include <asm/alternative.h> #include <asm/page.h> #include <asm/smap.h> #include <asm/nospec-branch.h> @@ -86,11 +88,20 @@ struct xen_dm_op_buf; * there aren't more than 5 arguments...) */ -extern struct { char _entry[32]; } hypercall_page[]; +void xen_hypercall_func(void); +DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); -#define __HYPERCALL "call hypercall_page+%c[offset]" -#define __HYPERCALL_ENTRY(x) \ - [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) +#ifdef MODULE +#define __ADDRESSABLE_xen_hypercall +#else +#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#endif + +#define __HYPERCALL \ + __ADDRESSABLE_xen_hypercall \ + "call __SCT__xen_hypercall" + +#define __HYPERCALL_ENTRY(x) "a" (x) #ifdef CONFIG_X86_32 #define __HYPERCALL_RETREG "eax" @@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_0ARG(); \ asm volatile (__HYPERCALL \ : __HYPERCALL_0PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER0); \ (type)__res; \ }) @@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_1ARG(a1); \ asm volatile (__HYPERCALL \ : __HYPERCALL_1PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER1); \ (type)__res; \ }) @@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_2ARG(a1, a2); \ asm volatile (__HYPERCALL \ : __HYPERCALL_2PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER2); \ (type)__res; \ }) @@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_3ARG(a1, a2, a3); \ asm volatile (__HYPERCALL \ : __HYPERCALL_3PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER3); \ (type)__res; \ }) @@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[]; __HYPERCALL_4ARG(a1, a2, a3, a4); \ asm volatile (__HYPERCALL \ : __HYPERCALL_4PARAM \ - : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \ : __HYPERCALL_CLOBBER4); \ (type)__res; \ }) @@ -206,12 +217,9 @@ xen_single_call(unsigned int call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); - if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) - return -EINVAL; - - asm volatile(CALL_NOSPEC + asm volatile(__HYPERCALL : __HYPERCALL_5PARAM - : [thunk_target] "a" (&hypercall_page[call]) + : __HYPERCALL_ENTRY(call) : __HYPERCALL_CLOBBER5); return (long)__res; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 465647456753..f17d16607882 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -143,11 +143,6 @@ static bool skip_addr(void *dest) dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE) return true; #endif -#ifdef CONFIG_XEN - if (dest >= (void *)hypercall_page && - dest < (void*)hypercall_page + PAGE_SIZE) - return true; -#endif return false; } diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c index d2c732a34e5d..303bf74d175b 100644 --- a/arch/x86/kernel/cet.c +++ b/arch/x86/kernel/cet.c @@ -81,6 +81,34 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code) static __ro_after_init bool ibt_fatal = true; +/* + * By definition, all missing-ENDBRANCH #CPs are a result of WFE && !ENDBR. + * + * For the kernel IBT no ENDBR selftest where #CPs are deliberately triggered, + * the WFE state of the interrupted context needs to be cleared to let execution + * continue. Otherwise when the CPU resumes from the instruction that just + * caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU + * enters a dead loop. + * + * This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't + * set WFE. But FRED provides space on the entry stack (in an expanded CS area) + * to save and restore the WFE state, thus the WFE state is no longer clobbered, + * so software must clear it. + */ +static void ibt_clear_fred_wfe(struct pt_regs *regs) +{ + /* + * No need to do any FRED checks. + * + * For IDT event delivery, the high-order 48 bits of CS are pushed + * as 0s into the stack, and later IRET ignores these bits. + * + * For FRED, a test to check if fred_cs.wfe is set would be dropped + * by compilers. + */ + regs->fred_cs.wfe = 0; +} + static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) { if ((error_code & CP_EC) != CP_ENDBR) { @@ -90,6 +118,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) { regs->ax = 0; + ibt_clear_fred_wfe(regs); return; } @@ -97,6 +126,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (!ibt_fatal) { printk(KERN_DEFAULT CUT_HERE); __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + ibt_clear_fred_wfe(regs); return; } BUG(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5c28975c608..3e9037690814 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -867,7 +867,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } -static void get_cpu_vendor(struct cpuinfo_x86 *c) +void get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; @@ -1649,15 +1649,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) detect_nopl(); } -void __init early_cpu_init(void) +void __init init_cpu_devs(void) { const struct cpu_dev *const *cdev; int count = 0; -#ifdef CONFIG_PROCESSOR_SELECT - pr_info("KERNEL supported cpus:\n"); -#endif - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; @@ -1665,20 +1661,30 @@ void __init early_cpu_init(void) break; cpu_devs[count] = cpudev; count++; + } +} +void __init early_cpu_init(void) +{ #ifdef CONFIG_PROCESSOR_SELECT - { - unsigned int j; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - pr_info(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } + unsigned int i, j; + + pr_info("KERNEL supported cpus:\n"); #endif + + init_cpu_devs(); + +#ifdef CONFIG_PROCESSOR_SELECT + for (i = 0; i < X86_VENDOR_NUM && cpu_devs[i]; i++) { + for (j = 0; j < 2; j++) { + if (!cpu_devs[i]->c_ident[j]) + continue; + pr_info(" %s %s\n", cpu_devs[i]->c_vendor, + cpu_devs[i]->c_ident[j]); + } } +#endif + early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index d18078834ded..dc12fe5ef3ca 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -223,6 +223,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hyperv_cleanup(); } #endif /* CONFIG_CRASH_DUMP */ + +static u64 hv_ref_counter_at_suspend; +static void (*old_save_sched_clock_state)(void); +static void (*old_restore_sched_clock_state)(void); + +/* + * Hyper-V clock counter resets during hibernation. Save and restore clock + * offset during suspend/resume, while also considering the time passed + * before suspend. This is to make sure that sched_clock using hv tsc page + * based clocksource, proceeds from where it left off during suspend and + * it shows correct time for the timestamps of kernel messages after resume. + */ +static void save_hv_clock_tsc_state(void) +{ + hv_ref_counter_at_suspend = hv_read_reference_counter(); +} + +static void restore_hv_clock_tsc_state(void) +{ + /* + * Adjust the offsets used by hv tsc clocksource to + * account for the time spent before hibernation. + * adjusted value = reference counter (time) at suspend + * - reference counter (time) now. + */ + hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); +} + +/* + * Functions to override save_sched_clock_state and restore_sched_clock_state + * functions of x86_platform. The Hyper-V clock counter is reset during + * suspend-resume and the offset used to measure time needs to be + * corrected, post resume. + */ +static void hv_save_sched_clock_state(void) +{ + old_save_sched_clock_state(); + save_hv_clock_tsc_state(); +} + +static void hv_restore_sched_clock_state(void) +{ + restore_hv_clock_tsc_state(); + old_restore_sched_clock_state(); +} + +static void __init x86_setup_ops_for_tsc_pg_clock(void) +{ + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return; + + old_save_sched_clock_state = x86_platform.save_sched_clock_state; + x86_platform.save_sched_clock_state = hv_save_sched_clock_state; + + old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; + x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; +} #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -579,6 +636,7 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); + x86_setup_ops_for_tsc_pg_clock(); hv_vtl_init_platform(); #endif /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 6bc1eb2a21bd..887b0b8e21e3 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -190,7 +190,8 @@ int ssp_get(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; struct cet_user_state *cetregs; - if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || + !ssp_active(target, regset)) return -ENODEV; sync_fpstate(fpu); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 4eefaac64c6c..9e51242ed125 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -172,6 +172,14 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) } EXPORT_SYMBOL_GPL(arch_static_call_transform); +noinstr void __static_call_update_early(void *tramp, void *func) +{ + BUG_ON(system_state != SYSTEM_BOOTING); + BUG_ON(static_call_initialized); + __text_gen_insn(tramp, JMP32_INSN_OPCODE, tramp, func, JMP32_INSN_SIZE); + sync_core(); +} + #ifdef CONFIG_MITIGATION_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index fab3ac9a4574..6a17396c8174 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -519,14 +519,10 @@ INIT_PER_CPU(irq_stack_backing_store); * linker will never mark as relocatable. (Using just ABSOLUTE() is not * sufficient for that). */ -#ifdef CONFIG_XEN #ifdef CONFIG_XEN_PV xen_elfnote_entry_value = ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen); #endif -xen_elfnote_hypercall_page_value = - ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page); -#endif #ifdef CONFIG_PVH xen_elfnote_phys32_entry_value = ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 22e7ad235123..2401606db260 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, return true; } -static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) -{ - if (fault->exec) - return is_executable_pte(spte); - - if (fault->write) - return is_writable_pte(spte); - - /* Fault was on Read access */ - return spte & PT_PRESENT_MASK; -} - /* * Returns the last level spte pointer of the shadow page walk for the given * gpa, and sets *spte to the spte value. This spte may be non-preset. If no diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index f332b33bc817..af10bc0380a3 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -462,6 +462,23 @@ static inline bool is_mmu_writable_spte(u64 spte) } /* + * Returns true if the access indicated by @fault is allowed by the existing + * SPTE protections. Note, the caller is responsible for checking that the + * SPTE is a shadow-present, leaf SPTE (either before or after). + */ +static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) +{ + if (fault->exec) + return is_executable_pte(spte); + + if (fault->write) + return is_writable_pte(spte); + + /* Fault was on Read access */ + return spte & PT_PRESENT_MASK; +} + +/* * If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for * write-tracking, remote TLBs must be flushed, even if the SPTE was read-only, * as KVM allows stale Writable TLB entries to exist. When dirty logging, KVM diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 4508d868f1cd..2f15e0e33903 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) return RET_PF_SPURIOUS; + if (is_shadow_present_pte(iter->old_spte) && + is_access_allowed(fault, iter->old_spte) && + is_last_spte(iter->old_spte, iter->level)) + return RET_PF_SPURIOUS; + if (unlikely(!fault->slot)) new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); else diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 4b74ea91f4e6..65fd245a9953 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void) return false; } + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) { + pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n"); + return false; + } + if (boot_cpu_has(X86_FEATURE_AVIC)) { pr_info("AVIC enabled\n"); } else if (force_avic) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dd15cc635655..21dacd312779 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & ~supported_de_cfg) return 1; - /* - * Don't let the guest change the host-programmed value. The - * MSR is very model specific, i.e. contains multiple bits that - * are completely unknown to KVM, and the one bit known to KVM - * is simply a reflection of hardware capabilities. - */ - if (!msr->host_initiated && data != svm->msr_decfg) - return 1; - svm->msr_decfg = data; break; } diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 1715d2ab07be..ad9116a99bcc 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -2,7 +2,7 @@ #ifndef __KVM_X86_VMX_POSTED_INTR_H #define __KVM_X86_VMX_POSTED_INTR_H -#include <linux/find.h> +#include <linux/bitmap.h> #include <asm/posted_intr.h> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8160baf3838..c79a8cc57ba4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) { u64 ret = vcpu->run->hypercall.ret; - if (!is_64_bit_mode(vcpu)) + if (!is_64_bit_hypercall(vcpu)) ret = (u32)ret; kvm_rax_write(vcpu, ret); ++vcpu->stat.hypercalls; @@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_hv_init_vm(kvm); kvm_xen_init_vm(kvm); + if (ignore_msrs && !report_ignored_msrs) { + pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n" + "a supported configuration. Lying to the guest about the existence of MSRs\n" + "may cause the guest operating system to hang or produce errors. If a guest\n" + "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); + } + return 0; out_uninit_mmu: diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 84e5adbd0925..43dcd8c7badc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -2,6 +2,7 @@ #include <linux/console.h> #include <linux/cpu.h> +#include <linux/instrumentation.h> #include <linux/kexec.h> #include <linux/memblock.h> #include <linux/slab.h> @@ -21,7 +22,8 @@ #include "xen-ops.h" -EXPORT_SYMBOL_GPL(hypercall_page); +DEFINE_STATIC_CALL(xen_hypercall, xen_hypercall_hvm); +EXPORT_STATIC_CALL_TRAMP(xen_hypercall); /* * Pointer to the xen_vcpu_info structure or @@ -68,6 +70,67 @@ EXPORT_SYMBOL(xen_start_flags); */ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; +static __ref void xen_get_vendor(void) +{ + init_cpu_devs(); + cpu_detect(&boot_cpu_data); + get_cpu_vendor(&boot_cpu_data); +} + +void xen_hypercall_setfunc(void) +{ + if (static_call_query(xen_hypercall) != xen_hypercall_hvm) + return; + + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) + static_call_update(xen_hypercall, xen_hypercall_amd); + else + static_call_update(xen_hypercall, xen_hypercall_intel); +} + +/* + * Evaluate processor vendor in order to select the correct hypercall + * function for HVM/PVH guests. + * Might be called very early in boot before vendor has been set by + * early_cpu_init(). + */ +noinstr void *__xen_hypercall_setfunc(void) +{ + void (*func)(void); + + /* + * Xen is supported only on CPUs with CPUID, so testing for + * X86_FEATURE_CPUID is a test for early_cpu_init() having been + * run. + * + * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty + * dependency chain: it is being called via the xen_hypercall static + * call when running as a PVH or HVM guest. Hypercalls need to be + * noinstr due to PV guests using hypercalls in noinstr code. So we + * can safely tag the function body as "instrumentation ok", since + * the PV guest requirement is not of interest here (xen_get_vendor() + * calls noinstr functions, and static_call_update_early() might do + * so, too). + */ + instrumentation_begin(); + + if (!boot_cpu_has(X86_FEATURE_CPUID)) + xen_get_vendor(); + + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) + func = xen_hypercall_amd; + else + func = xen_hypercall_intel; + + static_call_update_early(xen_hypercall, func); + + instrumentation_end(); + + return func; +} + static int xen_cpu_up_online(unsigned int cpu) { xen_init_lock_cpu(cpu); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 24d2957a4726..fe57ff85d004 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -106,15 +106,8 @@ static void __init init_hvm_pv_info(void) /* PVH set up hypercall page in xen_prepare_pvh(). */ if (xen_pvh_domain()) pv_info.name = "Xen PVH"; - else { - u64 pfn; - uint32_t msr; - + else pv_info.name = "Xen HVM"; - msr = cpuid_ebx(base + 2); - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - } xen_setup_features(); @@ -300,6 +293,10 @@ static uint32_t __init xen_platform_hvm(void) if (xen_pv_domain()) return 0; + /* Set correct hypercall function. */ + if (xen_domain) + xen_hypercall_setfunc(); + if (xen_pvh_domain() && nopv) { /* Guest booting via the Xen-PVH boot entry goes here */ pr_info("\"nopv\" parameter is ignored in PVH guest\n"); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d6818c6cafda..a8eb7e0c473c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1341,6 +1341,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; + /* Interrupts are guaranteed to be off initially. */ + early_boot_irqs_disabled = true; + static_call_update_early(xen_hypercall, xen_hypercall_pv); xen_setup_features(); @@ -1431,7 +1434,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); - early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index bf68c329fc01..0e3d930bcb89 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -129,17 +129,10 @@ static void __init pvh_arch_setup(void) void __init xen_pvh_init(struct boot_params *boot_params) { - u32 msr; - u64 pfn; - xen_pvh = 1; xen_domain_type = XEN_HVM_DOMAIN; xen_start_flags = pvh_start_info.flags; - msr = cpuid_ebx(xen_cpuid_base() + 2); - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); - x86_init.oem.arch_setup = pvh_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 83189cf5cdce..b518f36d1ca2 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -20,10 +20,33 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/objtool.h> #include <../entry/calling.h> .pushsection .noinstr.text, "ax" /* + * PV hypercall interface to the hypervisor. + * + * Called via inline asm(), so better preserve %rcx and %r11. + * + * Input: + * %eax: hypercall number + * %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall + * Output: %rax + */ +SYM_FUNC_START(xen_hypercall_pv) + ANNOTATE_NOENDBR + push %rcx + push %r11 + UNWIND_HINT_SAVE + syscall + UNWIND_HINT_RESTORE + pop %r11 + pop %rcx + RET +SYM_FUNC_END(xen_hypercall_pv) + +/* * Disabling events is simply a matter of making the event mask * non-zero. */ @@ -176,7 +199,6 @@ SYM_CODE_START(xen_early_idt_handler_array) SYM_CODE_END(xen_early_idt_handler_array) __FINIT -hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* * Xen64 iret frame: * @@ -186,17 +208,28 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 * cs * rip <-- standard iret frame * - * flags + * flags <-- xen_iret must push from here on * - * rcx } - * r11 }<-- pushed by hypercall page - * rsp->rax } + * rcx + * r11 + * rsp->rax */ +.macro xen_hypercall_iret + pushq $0 /* Flags */ + push %rcx + push %r11 + push %rax + mov $__HYPERVISOR_iret, %eax + syscall /* Do the IRET. */ +#ifdef CONFIG_MITIGATION_SLS + int3 +#endif +.endm + SYM_CODE_START(xen_iret) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR - pushq $0 - jmp hypercall_iret + xen_hypercall_iret SYM_CODE_END(xen_iret) /* @@ -301,8 +334,7 @@ SYM_CODE_START(xen_entry_SYSENTER_compat) ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax - pushq $0 - jmp hypercall_iret + xen_hypercall_iret SYM_CODE_END(xen_entry_SYSENTER_compat) SYM_CODE_END(xen_entry_SYSCALL_compat) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7f6c69dbb816..9252652afe59 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -6,9 +6,11 @@ #include <linux/elfnote.h> #include <linux/init.h> +#include <linux/instrumentation.h> #include <asm/boot.h> #include <asm/asm.h> +#include <asm/frame.h> #include <asm/msr.h> #include <asm/page_types.h> #include <asm/percpu.h> @@ -20,28 +22,6 @@ #include <xen/interface/xen-mca.h> #include <asm/xen/interface.h> -.pushsection .noinstr.text, "ax" - .balign PAGE_SIZE -SYM_CODE_START(hypercall_page) - .rept (PAGE_SIZE / 32) - UNWIND_HINT_FUNC - ANNOTATE_NOENDBR - ANNOTATE_UNRET_SAFE - ret - /* - * Xen will write the hypercall page, and sort out ENDBR. - */ - .skip 31, 0xcc - .endr - -#define HYPERCALL(n) \ - .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ - .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 -#include <asm/xen-hypercalls.h> -#undef HYPERCALL -SYM_CODE_END(hypercall_page) -.popsection - #ifdef CONFIG_XEN_PV __INIT SYM_CODE_START(startup_xen) @@ -87,6 +67,87 @@ SYM_CODE_END(xen_cpu_bringup_again) #endif #endif + .pushsection .noinstr.text, "ax" +/* + * Xen hypercall interface to the hypervisor. + * + * Input: + * %eax: hypercall number + * 32-bit: + * %ebx, %ecx, %edx, %esi, %edi: args 1..5 for the hypercall + * 64-bit: + * %rdi, %rsi, %rdx, %r10, %r8: args 1..5 for the hypercall + * Output: %[er]ax + */ +SYM_FUNC_START(xen_hypercall_hvm) + ENDBR + FRAME_BEGIN + /* Save all relevant registers (caller save and arguments). */ +#ifdef CONFIG_X86_32 + push %eax + push %ebx + push %ecx + push %edx + push %esi + push %edi +#else + push %rax + push %rcx + push %rdx + push %rdi + push %rsi + push %r11 + push %r10 + push %r9 + push %r8 +#ifdef CONFIG_FRAME_POINTER + pushq $0 /* Dummy push for stack alignment. */ +#endif +#endif + /* Set the vendor specific function. */ + call __xen_hypercall_setfunc + /* Set ZF = 1 if AMD, Restore saved registers. */ +#ifdef CONFIG_X86_32 + lea xen_hypercall_amd, %ebx + cmp %eax, %ebx + pop %edi + pop %esi + pop %edx + pop %ecx + pop %ebx + pop %eax +#else + lea xen_hypercall_amd(%rip), %rbx + cmp %rax, %rbx +#ifdef CONFIG_FRAME_POINTER + pop %rax /* Dummy pop. */ +#endif + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %rsi + pop %rdi + pop %rdx + pop %rcx + pop %rax +#endif + /* Use correct hypercall function. */ + jz xen_hypercall_amd + jmp xen_hypercall_intel +SYM_FUNC_END(xen_hypercall_hvm) + +SYM_FUNC_START(xen_hypercall_amd) + vmmcall + RET +SYM_FUNC_END(xen_hypercall_amd) + +SYM_FUNC_START(xen_hypercall_intel) + vmcall + RET +SYM_FUNC_END(xen_hypercall_intel) + .popsection + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") @@ -116,8 +177,6 @@ SYM_CODE_END(xen_cpu_bringup_again) #else # define FEATURES_DOM0 0 #endif - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page; - xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .) ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0) ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index e1b782e823e6..63c13a2ccf55 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -326,4 +326,13 @@ static inline void xen_smp_intr_free_pv(unsigned int cpu) {} static inline void xen_smp_count_cpus(void) { } #endif /* CONFIG_SMP */ +#ifdef CONFIG_XEN_PV +void xen_hypercall_pv(void); +#endif +void xen_hypercall_hvm(void); +void xen_hypercall_amd(void); +void xen_hypercall_intel(void); +void xen_hypercall_setfunc(void); +void *__xen_hypercall_setfunc(void); + #endif /* XEN_OPS_H */ |
