diff options
Diffstat (limited to 'arch')
456 files changed, 5117 insertions, 2768 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index ebe08b9186ad..61130b88964b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -917,6 +917,13 @@ config ARCH_USES_CFI_TRAPS An architecture should select this option if it requires the .kcfi_traps section for KCFI trap handling. +config ARCH_USES_CFI_GENERIC_LLVM_PASS + bool + help + An architecture should select this option if it uses the generic + KCFIPass in LLVM to expand kCFI bundles instead of architecture-specific + lowering. + config CFI bool "Use Kernel Control Flow Integrity (kCFI)" default CFI_CLANG @@ -965,6 +972,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 + depends on ARM64 || X86_64 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 16dca28ebf17..3fed97478058 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -509,3 +509,4 @@ 577 common open_tree_attr sys_open_tree_attr 578 common file_getattr sys_file_getattr 579 common file_setattr sys_file_setattr +580 common listns sys_listns diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a7cd526dd7ca..f930396d9dae 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index afa6a348f444..6b779dee5ea0 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -86,7 +86,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 2bfa6371953c..a89b50d5369d 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1558e8e87767..1b8b2a098cda 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -77,7 +77,7 @@ CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y CONFIG_IIO=y CONFIG_TI_ADC108S102=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 03d9ac20baa9..b7120523e09a 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -74,7 +74,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_SERIAL=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index c09488992f13..4077abd5980c 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -81,7 +81,7 @@ CONFIG_MMC_DW=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2e3f93b690f4..4fb985b76e97 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -44,6 +44,8 @@ config ARM select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST + # https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de + select ARCH_USES_CFI_GENERIC_LLVM_PASS if CLANG_VERSION < 220000 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts index aa9576d8ab56..48ca25f57ef6 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-fuji-data64.dts @@ -1254,3 +1254,17 @@ max-frequency = <25000000>; bus-width = <4>; }; + +/* + * FIXME: rgmii delay is introduced by MAC (configured in u-boot now) + * instead of PCB on fuji board, so the "phy-mode" should be updated to + * "rgmii-[tx|rx]id" when the aspeed-mac driver can handle the delay + * properly. + */ +&mac3 { + status = "okay"; + phy-mode = "rgmii"; + phy-handle = <ðphy3>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_rgmii4_default>; +}; diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi index c78ed064d166..1eb6406449d1 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi @@ -77,6 +77,14 @@ /delete-property/ pinctrl-0; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &rmem { /* * RPi4's co-processor will copy the board's bootloader configuration diff --git a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi index 8b3c21d9f333..fa9d784c88b6 100644 --- a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi @@ -13,7 +13,16 @@ clock-names = "pixel", "hdmi"; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &v3d { + clocks = <&firmware_clocks 5>; power-domains = <&power RPI_POWER_DOMAIN_V3D>; }; diff --git a/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts b/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts index ac44c745bdf8..a39a021a3910 100644 --- a/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts +++ b/arch/arm/boot/dts/broadcom/bcm47189-luxul-xap-1440.dts @@ -55,8 +55,8 @@ mdio { /delete-node/ switch@1e; - bcm54210e: ethernet-phy@0 { - reg = <0>; + bcm54210e: ethernet-phy@25 { + reg = <25>; }; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts b/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts index 06545a6052f7..43ff5eafb2bb 100644 --- a/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx51-zii-rdu1.dts @@ -259,7 +259,7 @@ pinctrl-0 = <&pinctrl_audmux>; status = "okay"; - ssi2 { + mux-ssi2 { fsl,audmux-port = <1>; fsl,port-config = < (IMX_AUDMUX_V2_PTCR_SYN | @@ -271,7 +271,7 @@ >; }; - aud3 { + mux-aud3 { fsl,audmux-port = <2>; fsl,port-config = < IMX_AUDMUX_V2_PTCR_SYN diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi index 6de224dd2bb9..6eb80f867f50 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul.dtsi @@ -339,7 +339,7 @@ #sound-dai-cells = <0>; compatible = "fsl,imx6ul-sai", "fsl,imx6sx-sai"; reg = <0x02030000 0x4000>; - interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_SAI3_IPG>, <&clks IMX6UL_CLK_SAI3>, <&clks IMX6UL_CLK_DUMMY>, <&clks IMX6UL_CLK_DUMMY>; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts index 107b00b9a939..540642e99a41 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-engicam-microgea-rmm.dts @@ -136,7 +136,7 @@ interrupt-parent = <&gpio2>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; - report-rate-hz = <6>; + report-rate-hz = <60>; /* settings valid only for Hycon touchscreen */ touchscreen-size-x = <1280>; touchscreen-size-y = <800>; diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 516689dc6cf1..242a61208a0f 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -194,8 +194,7 @@ CONFIG_MAILBOX=y CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 27dc3bf6b124..4a8ac09843d7 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -154,8 +154,8 @@ CONFIG_PWM_BCM2835=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index e2ddaca0f89d..673408a10888 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -228,7 +228,7 @@ CONFIG_PWM=y CONFIG_PWM_TIECAP=m CONFIG_PWM_TIEHRPWM=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index d76eb12d29a7..bb6c4748bfc8 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -95,8 +95,8 @@ CONFIG_RTC_DRV_MV=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 2248afaf35b5..7f3756d8b086 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -103,8 +103,8 @@ CONFIG_RTC_DRV_EP93XX=y CONFIG_DMADEVICES=y CONFIG_EP93XX_DMA=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9a57763a8d38..0d55056c6f82 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -436,9 +436,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 3cb995b9616a..81199dddcde7 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -158,8 +158,8 @@ CONFIG_IXP4XX_NPE=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_OVERLAY_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 842a989baa27..f67e9cda73e2 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -53,7 +53,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y # CONFIG_RESET_CONTROLLER is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index fa06d98e43fc..e2d9f3610063 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_MOXART=y CONFIG_DMADEVICES=y CONFIG_MOXART_DMA=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index b523bc246c09..59b020e66a0b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -268,7 +268,7 @@ CONFIG_PWM_ATMEL=m CONFIG_PWM_ATMEL_HLCDC_PWM=m CONFIG_PWM_ATMEL_TCB=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 3343f72de7ea..55f4ab67a306 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -91,8 +91,8 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 23dbb80fcc2e..d1742a7cae6a 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -168,7 +168,7 @@ CONFIG_MV_XOR=y CONFIG_STAGING=y CONFIG_FB_XGI=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index ea28ed8991b4..696b4fbc2412 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -116,7 +116,7 @@ CONFIG_IIO_ST_ACCEL_3AXIS=y CONFIG_PWM=y CONFIG_PWM_STMPE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 661e5d6894bd..24c54bf1e243 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -184,7 +184,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_OMAP=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 1d5f75241739..4e53c331cd84 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -679,7 +679,7 @@ CONFIG_TWL4030_USB=m CONFIG_COUNTER=m CONFIG_TI_EQEP=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_SECURITY=y CONFIG_FANOTIFY=y CONFIG_QUOTA=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 62b9c6102789..c28426250ec3 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -115,8 +115,8 @@ CONFIG_RTC_DRV_M48T86=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 70489f3555d0..3ea189f1f42f 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -579,9 +579,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index fa681a7a49c2..29a1dea500f0 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -291,7 +291,7 @@ CONFIG_INTERCONNECT_QCOM_MSM8974=m CONFIG_INTERCONNECT_QCOM_SDX55=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 24f1fa868230..46df453e224e 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -77,7 +77,7 @@ CONFIG_SOUND_VIDC=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8583=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index 967b1cb22136..7bf28a83946a 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -52,9 +52,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S3C=y CONFIG_PWM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_CRAMFS=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index e14720a9a5ac..e2ad9a05566f 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -201,7 +201,7 @@ CONFIG_MCHP_EIC=y CONFIG_RESET_CONTROLLER=y CONFIG_NVMEM_MICROCHIP_OTPC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 294906c8f16e..f2e42846b116 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -136,7 +136,7 @@ CONFIG_FPGA_REGION=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig index a8f992fdb30d..8b19af1ea67c 100644 --- a/arch/arm/configs/spear13xx_defconfig +++ b/arch/arm/configs/spear13xx_defconfig @@ -84,8 +84,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig index 8dc5a388759c..b4e4b96a98af 100644 --- a/arch/arm/configs/spear3xx_defconfig +++ b/arch/arm/configs/spear3xx_defconfig @@ -67,8 +67,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig index 4e9e1a6ff381..7083b1bd8573 100644 --- a/arch/arm/configs/spear6xx_defconfig +++ b/arch/arm/configs/spear6xx_defconfig @@ -53,8 +53,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac2a0f998c73..395df2f9dc8e 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -193,8 +193,8 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index dcd9c316072e..82190b155b14 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -69,7 +69,7 @@ CONFIG_STM32_MDMA=y CONFIG_IIO=y CONFIG_STM32_ADC_CORE=y CONFIG_STM32_ADC=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index ba863b445417..ab477ca13f89 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -319,9 +319,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 9c8dc6dd5fe3..e88533b78327 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -175,7 +175,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index cdb6065e04fd..b9454f6954f8 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -120,7 +120,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index f90be312418e..d6ae80b5df36 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -283,10 +283,17 @@ extern int __put_user_8(void *, unsigned long long); __gu_err; \ }) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __long_type(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + #define __get_user_err(x, ptr, err, __t) \ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ - unsigned long __gu_val; \ + __long_type(x) __gu_val; \ unsigned int __ua_flags; \ __chk_user_ptr(ptr); \ might_fault(); \ @@ -295,6 +302,7 @@ do { \ case 1: __get_user_asm_byte(__gu_val, __gu_addr, err, __t); break; \ case 2: __get_user_asm_half(__gu_val, __gu_addr, err, __t); break; \ case 4: __get_user_asm_word(__gu_val, __gu_addr, err, __t); break; \ + case 8: __get_user_asm_dword(__gu_val, __gu_addr, err, __t); break; \ default: (__gu_val) = __get_user_bad(); \ } \ uaccess_restore(__ua_flags); \ @@ -353,6 +361,22 @@ do { \ #define __get_user_asm_word(x, addr, err, __t) \ __get_user_asm(x, addr, err, "ldr" __t) +#ifdef __ARMEB__ +#define __WORD0_OFFS 4 +#define __WORD1_OFFS 0 +#else +#define __WORD0_OFFS 0 +#define __WORD1_OFFS 4 +#endif + +#define __get_user_asm_dword(x, addr, err, __t) \ + ({ \ + unsigned long __w0, __w1; \ + __get_user_asm(__w0, addr + __WORD0_OFFS, err, "ldr" __t); \ + __get_user_asm(__w1, addr + __WORD1_OFFS, err, "ldr" __t); \ + (x) = ((u64)__w1 << 32) | (u64) __w0; \ +}) + #define __put_user_switch(x, ptr, __err, __fn) \ do { \ const __typeof__(*(ptr)) __user *__pu_ptr = (ptr); \ diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b07e699aaa3c..fd09afae72a2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -484,3 +484,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts b/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts index b8f256545022..3e0319fdb93f 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2712-rpi-5-b.dts @@ -18,11 +18,21 @@ #include "bcm2712-rpi-5-b-ovl-rp1.dts" +/ { + aliases { + ethernet0 = &rp1_eth; + }; +}; + &pcie2 { #include "rp1-nexus.dtsi" }; &rp1_eth { + assigned-clocks = <&rp1_clocks RP1_CLK_ETH_TSU>, + <&rp1_clocks RP1_CLK_ETH>; + assigned-clock-rates = <50000000>, + <125000000>; status = "okay"; phy-mode = "rgmii-id"; phy-handle = <&phy1>; diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index e77a66adc22a..205b87f557d6 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -326,6 +326,8 @@ <0x7fffe000 0x2000>; interrupt-controller; #address-cells = <0>; + interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | + IRQ_TYPE_LEVEL_HIGH)>; #interrupt-cells = <3>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi index 2cf0f7208350..a72b2f1c4a1b 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi @@ -67,7 +67,6 @@ img_subsys: bus@58000000 { power-domains = <&pd IMX_SC_R_CSI_0>; fsl,channel = <0>; fsl,num-irqs = <32>; - status = "disabled"; }; gpio0_mipi_csi0: gpio@58222000 { @@ -144,7 +143,6 @@ img_subsys: bus@58000000 { power-domains = <&pd IMX_SC_R_CSI_1>; fsl,channel = <0>; fsl,num-irqs = <32>; - status = "disabled"; }; gpio0_mipi_csi1: gpio@58242000 { diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi index a66ba6d0a8c0..da33a35c6d46 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8dxl-ss-conn.dtsi @@ -29,8 +29,8 @@ compatible = "nxp,imx8dxl-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x5b050000 0x10000>; interrupt-parent = <&gic>; - interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq", "eth_wake_irq"; clocks = <&eqos_lpcg IMX_LPCG_CLK_4>, <&eqos_lpcg IMX_LPCG_CLK_6>, diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi b/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi index ec466e4d7df5..5c0d09c5c086 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8dxl-ss-hsio.dtsi @@ -54,3 +54,8 @@ interrupt-names = "dma"; }; }; + +&pcieb_ep { + interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "dma"; +}; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts b/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts index 614b4ce330b1..0924ac50fd2d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-kontron-bl-osm-s.dts @@ -16,11 +16,20 @@ ethernet1 = &eqos; }; - extcon_usbc: usbc { - compatible = "linux,extcon-usb-gpio"; + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + label = "Type-C"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1_id>; - id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + type = "micro"; + vbus-supply = <®_usb1_vbus>; + + port { + usb_dr_connector: endpoint { + remote-endpoint = <&usb3_dwc>; + }; + }; }; leds { @@ -244,9 +253,15 @@ hnp-disable; srp-disable; dr_mode = "otg"; - extcon = <&extcon_usbc>; usb-role-switch; + role-switch-default-mode = "peripheral"; status = "okay"; + + port { + usb3_dwc: endpoint { + remote-endpoint = <&usb_dr_connector>; + }; + }; }; &usb_dwc3_1 { @@ -273,7 +288,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index 202d5c67ac40..9c0b6b8d6459 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -217,8 +217,8 @@ compatible = "nxp,cbdtu02043", "gpio-sbu-mux"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_typec_mux>; - select-gpios = <&lsio_gpio4 6 GPIO_ACTIVE_LOW>; - enable-gpios = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>; + select-gpios = <&lsio_gpio4 6 GPIO_ACTIVE_HIGH>; + enable-gpios = <&lsio_gpio4 19 GPIO_ACTIVE_LOW>; orientation-switch; port { diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 1292677cbe4e..6da961eb3fe5 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1886,7 +1886,7 @@ assigned-clock-rates = <3600000000>, <100000000>, <10000000>; assigned-clock-parents = <0>, <0>, <&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>; - msi-map = <0x0 &its 0x98 0x1>; + msi-map = <0x0 &its 0x10 0x1>; power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>; status = "disabled"; }; @@ -1963,6 +1963,7 @@ assigned-clock-rates = <3600000000>, <100000000>, <10000000>; assigned-clock-parents = <0>, <0>, <&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>; + msi-map = <0x0 &its 0x98 0x1>; power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi index a410fc335fa3..c0f17f8189fa 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi @@ -42,6 +42,7 @@ interrupt-parent = <&gpio>; interrupts = <TEGRA194_MAIN_GPIO(G, 4) IRQ_TYPE_LEVEL_LOW>; #phy-cells = <0>; + wakeup-source; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 283d9cbc4368..03b7c4313750 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -598,7 +598,6 @@ pinctrl-2 = <&otp_pin>; resets = <&cru SRST_TSADC>; reset-names = "tsadc-apb"; - rockchip,grf = <&grf>; rockchip,hw-tshut-temp = <100000>; #thermal-sensor-cells = <1>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi index c4f4f1ff6117..9da6fd82e46b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-op1.dtsi @@ -3,7 +3,7 @@ * Copyright (c) 2016-2017 Fuzhou Rockchip Electronics Co., Ltd */ -#include "rk3399.dtsi" +#include "rk3399-base.dtsi" / { cluster0_opp: opp-table-0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso index 5e8f729c2cf2..141a921a06e4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso @@ -45,11 +45,11 @@ cam_dovdd_1v8: regulator-cam-dovdd-1v8 { compatible = "regulator-fixed"; - gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; - regulator-max-microvolt = <1800000>; - regulator-min-microvolt = <1800000>; - regulator-name = "cam-dovdd-1v8"; - vin-supply = <&vcc1v8_video>; + gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; + regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <1800000>; + regulator-name = "cam-dovdd-1v8"; + vin-supply = <&vcc1v8_video>; }; cam_dvdd_1v2: regulator-cam-dvdd-1v2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index 7f578c50b4ad..b6cf03a7ba66 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -120,7 +120,7 @@ compatible = "regulator-fixed"; regulator-name = "vcc3v3_pcie"; enable-active-high; - gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; + gpios = <&gpio4 RK_PB1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie_drv>; regulator-always-on; @@ -187,7 +187,7 @@ vcc5v0_usb2b: regulator-vcc5v0-usb2b { compatible = "regulator-fixed"; enable-active-high; - gpio = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PC4 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_usb2b_en>; regulator-name = "vcc5v0_usb2b"; @@ -199,7 +199,7 @@ vcc5v0_usb2t: regulator-vcc5v0-usb2t { compatible = "regulator-fixed"; enable-active-high; - gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>; + gpios = <&gpio3 RK_PD5 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_usb2t_en>; regulator-name = "vcc5v0_usb2t"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi index d0e38412d56a..08bf40de17ea 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinetab2.dtsi @@ -789,7 +789,7 @@ vccio1-supply = <&vccio_acodec>; vccio2-supply = <&vcc_1v8>; vccio3-supply = <&vccio_sd>; - vccio4-supply = <&vcc_1v8>; + vccio4-supply = <&vcca1v8_pmu>; vccio5-supply = <&vcc_1v8>; vccio6-supply = <&vcc1v8_dvp>; vccio7-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts index 0f844806ec54..442a2bc43ba8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts @@ -482,6 +482,8 @@ }; &i2s1_8ch { + pinctrl-names = "default"; + pinctrl-0 = <&i2s1m0_sclktx &i2s1m0_lrcktx &i2s1m0_sdi0 &i2s1m0_sdo0>; rockchip,trcm-sync-tx-only; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index fc4e9e07f1cf..a86fc6b4e8c4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -276,12 +276,6 @@ opp-microvolt = <900000 900000 950000>; clock-latency-ns = <40000>; }; - - opp-2208000000 { - opp-hz = /bits/ 64 <2208000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; cluster1_opp_table: opp-table-cluster1 { @@ -348,12 +342,6 @@ opp-microvolt = <925000 925000 950000>; clock-latency-ns = <40000>; }; - - opp-2304000000 { - opp-hz = /bits/ 64 <2304000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; gpu_opp_table: opp-table-gpu { @@ -2561,8 +2549,6 @@ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&i2c9m0_xfer>; - resets = <&cru SRST_I2C9>, <&cru SRST_P_I2C9>; - reset-names = "i2c", "apb"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi index 0f1a77697351..b5d630d2c879 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-opp.dtsi @@ -115,7 +115,7 @@ }; }; - gpu_opp_table: opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-300000000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi index b44e89e1bb15..365c1d958f2d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi @@ -382,14 +382,12 @@ cap-mmc-highspeed; mmc-ddr-1_8v; mmc-hs200-1_8v; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; mmc-pwrseq = <&emmc_pwrseq>; no-sdio; no-sd; non-removable; pinctrl-names = "default"; - pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk &emmc_data_strobe>; + pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk>; vmmc-supply = <&vcc_3v3_s3>; vqmmc-supply = <&vcc_1v8_s3>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index 9884a5df47df..e1e0e3fc0ca7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -66,7 +66,7 @@ }; }; - gpu_opp_table: opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-300000000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index ad6d04793b0a..83b9b6645a1e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -14,8 +14,8 @@ gpios = <&gpio0 RK_PC5 GPIO_ACTIVE_HIGH>; regulator-name = "vcc3v3_pcie20"; regulator-boot-on; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; startup-delay-us = <50000>; vin-supply = <&vcc5v0_sys>; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index e3a2d37bd104..1a48faad2473 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1341,7 +1341,7 @@ CONFIG_COMMON_CLK_RS9_PCIE=y CONFIG_COMMON_CLK_VC3=y CONFIG_COMMON_CLK_VC5=y CONFIG_COMMON_CLK_BD718XX=m -CONFIG_CLK_RASPBERRYPI=m +CONFIG_CLK_RASPBERRYPI=y CONFIG_CLK_IMX8MM=y CONFIG_CLK_IMX8MN=y CONFIG_CLK_IMX8MP=y diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..51746005239b 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -26,9 +26,12 @@ void __init apply_alternatives_all(void); bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length); +int apply_alternatives_module(void *start, size_t length); #else -static inline void apply_alternatives_module(void *start, size_t length) { } +static inline int apply_alternatives_module(void *start, size_t length) +{ + return 0; +} #endif void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 28be048db3f6..bceeaec21fb9 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -19,7 +19,7 @@ unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) +#define __WARN_FLAGS(cond_str, flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) #define HAVE_ARCH_BUG diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..99a7c0235e6d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -24,22 +24,48 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - * * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH cmp x1, #0 - b.ge .LnVHE_\@ + b.lt .LnE2H0_\@ + /* + * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised + * as such via ID_AA64MMFR4_EL1.E2H0: + * + * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to + * have HCR_EL2.E2H implemented as RAO/WI. + * + * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest + * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV + * guests on these hosts can write to HCR_EL2.E2H without + * trapping to the hypervisor, but these writes have no + * functional effect. + * + * Handle both cases by checking for an essential VHE property + * (system register remapping) to decide whether we're + * effectively VHE-only or not. + */ + msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE + isb + mov x1, #1 // Write something to FAR_EL1 + msr far_el1, x1 + isb + mov x1, #2 // Try to overwrite it via FAR_EL2 + msr far_el2, x1 + isb + mrs x1, far_el1 // If we see the latest write in FAR_EL1, + cmp x1, #2 // we can safely assume we are VHE only. + b.ne .LnVHE_\@ // Otherwise, we know that nVHE works. + +.LnE2H0_\@: orr x0, x0, #HCR_E2H -.LnVHE_\@: msr_hcr_el2 x0 isb +.LnVHE_\@: .endm .macro __init_el2_sctlr diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index a81937fae9f6..21dbc9dda747 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -10,8 +10,6 @@ #include <asm/set_memory.h> -static inline bool arch_kfence_init_pool(void) { return true; } - static inline bool kfence_protect_page(unsigned long addr, bool protect) { set_memory_valid(addr, 1, !protect); @@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void) { return !kfence_early_init; } +bool arch_kfence_init_pool(void); #else /* CONFIG_KFENCE */ static inline bool arm64_kfence_can_set_direct_map(void) { return false; } #endif /* CONFIG_KFENCE */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b763293281c8..64302c438355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -816,6 +816,11 @@ struct kvm_vcpu_arch { u64 hcrx_el2; u64 mdcr_el2; + struct { + u64 r; + u64 w; + } fgt[__NR_FGT_GROUP_IDS__]; + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void) void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); void check_feature_map(void); +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); + +static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + case HFGWTR_EL2: + return HFGRTR_GROUP; + case HFGITR_EL2: + return HFGITR_GROUP; + case HDFGRTR_EL2: + case HDFGWTR_EL2: + return HDFGRTR_GROUP; + case HAFGRTR_EL2: + return HAFGRTR_GROUP; + case HFGRTR2_EL2: + case HFGWTR2_EL2: + return HFGRTR2_GROUP; + case HFGITR2_EL2: + return HFGITR2_GROUP; + case HDFGRTR2_EL2: + case HDFGWTR2_EL2: + return HDFGRTR2_GROUP; + default: + BUILD_BUG_ON(1); + } +} +#define vcpu_fgt(vcpu, reg) \ + ({ \ + enum fgt_group_id id = __fgt_reg_to_group_id(reg); \ + u64 *p; \ + switch (reg) { \ + case HFGWTR_EL2: \ + case HDFGWTR_EL2: \ + case HFGWTR2_EL2: \ + case HDFGWTR2_EL2: \ + p = &(vcpu)->arch.fgt[id].w; \ + break; \ + default: \ + p = &(vcpu)->arch.fgt[id].r; \ + break; \ + } \ + \ + p; \ + }) #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 2312e6ee595f..258cca4b4873 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,8 +33,8 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio -void tag_clear_highpage(struct page *to); -#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE +bool tag_clear_highpages(struct page *to, int numpages); +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9abcc8ef3087..b57b2bb00967 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ " cbnz %w[loop], 1b", \ /* LSE atomics */ \ - #op_lse "\t%" #w "[val], %[ptr]\n" \ + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ __nops(3)) \ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ [ptr] "+Q"(*(u##sz *)ptr) \ @@ -124,9 +124,16 @@ PERCPU_RW_OPS(8) PERCPU_RW_OPS(16) PERCPU_RW_OPS(32) PERCPU_RW_OPS(64) -PERCPU_OP(add, add, stadd) -PERCPU_OP(andnot, bic, stclr) -PERCPU_OP(or, orr, stset) + +/* + * Use value-returning atomics for CPU-local ops as they are more likely + * to execute "near" to the CPU (e.g. in L1$). + * + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop + */ +PERCPU_OP(add, add, ldadd) +PERCPU_OP(andnot, bic, ldclr) +PERCPU_OP(or, orr, ldset) PERCPU_RET_OP(add, add, ldadd) #undef PERCPU_RW_OPS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..0944e296dd4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -293,7 +293,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (pte_sw_dirty(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); return pte; } diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..c59f6324f2bb 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -53,7 +53,7 @@ enum { EDYNSCS_INVALID_CFA_OPCODE = 4, }; -int __pi_scs_patch(const u8 eh_frame[], int size); +int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..900454aaa292 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_print_disabled_mitigations(void); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..c231d2a3e515 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1220,10 +1220,19 @@ __val; \ }) +/* + * The "Z" constraint combined with the "%x0" template should be enough + * to force XZR generation if (v) is a constant 0 value but LLVM does not + * yet understand that modifier/constraint combo so a conditional is required + * to nudge the compiler into using XZR as a source for a 0 constant value. + */ #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ u32 __maybe_unused __check_r = (u32)(r); \ - asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ + if (__builtin_constant_p(__val) && __val == 0) \ + asm volatile(__msr_s(r, "xzr")); \ + else \ + asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \ } while (0) /* diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 1aa4ecb73429..6490930deef8 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -422,9 +422,9 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt } #define user_access_begin(a,b) user_access_begin(a,b) #define user_access_end() uaccess_ttbr0_disable() -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) -#define unsafe_get_user(x, ptr, label) \ +#define arch_unsafe_get_user(x, ptr, label) \ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) /* diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..f1cb2447afc9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,8 +197,6 @@ out: */ void __init acpi_boot_table_init(void) { - int ret; - /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -252,12 +250,8 @@ done: * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, + acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) - pr_info("Use ACPI SPCR as default console: No\n"); - else - pr_info("Use ACPI SPCR as default console: Yes\n"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); @@ -357,16 +351,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) * as long as we take care not to create a writable * mapping for executable code. */ - fallthrough; - - case EFI_ACPI_MEMORY_NVS: - /* - * ACPI NVS marks an area reserved for use by the - * firmware, even after exiting the boot service. - * This may be used by the firmware for sharing dynamic - * tables/data (e.g., ACPI CCEL) with the OS. Map it - * as read-only. - */ prot = PAGE_KERNEL_RO; break; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..f5ec7e7c1d3f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(const struct alt_region *region, - bool is_module, - unsigned long *cpucap_mask) +static int __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (ALT_HAS_CB(alt)) + if (ALT_HAS_CB(alt)) { alt_cb = ALT_REPL_PTR(alt); - else + if (is_module && !core_kernel_text((unsigned long)alt_cb)) + return -ENOEXEC; + } else { alt_cb = patch_alternative; + } alt_cb(alt, origptr, updptr, nr_inst); @@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region, bitmap_and(applied_alternatives, applied_alternatives, system_cpucaps, ARM64_NCAPS); } + + return 0; } static void __init apply_alternatives_vdso(void) @@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void) } #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length) +int apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, @@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length) bitmap_fill(all_capabilities, ARM64_NCAPS); - __apply_alternatives(®ion, true, &all_capabilities[0]); + return __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..e25b0f84a22d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -95,6 +95,7 @@ #include <asm/vectors.h> #include <asm/virt.h> +#include <asm/spectre.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void) */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + + /* + * Report Spectre mitigations status. + */ + spectre_print_disabled_mitigations(); } void __init setup_system_features(void) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..0a97e2621f60 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -100,7 +100,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs) { local_irq_disable(); - exit_to_user_mode_prepare(regs); + exit_to_user_mode_prepare_legacy(regs); local_daif_mask(); mte_check_tfsr_exit(); exit_to_user_mode(); @@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) { + bool step_done; + if (!is_ttbr0_addr(regs->pc)) arm64_apply_bp_hardening(); @@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete. */ - if (!try_step_suspended_breakpoints(regs)) { - local_daif_restore(DAIF_PROCCTX); + step_done = try_step_suspended_breakpoints(regs); + local_daif_restore(DAIF_PROCCTX); + if (!step_done) do_el0_softstep(esr, regs); - } arm64_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d6d443c4a01a..24adb581af0e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -489,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr, int ret; s = find_section(hdr, sechdrs, ".altinstructions"); - if (s) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (s) { + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (ret < 0) { + pr_err("module %s: error occurred when applying alternatives\n", me->name); + return ret; + } + } if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); if (s) { - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); - if (ret) + /* + * Because we can reject modules that are malformed + * so SCS patching fails, skip dry run and try to patch + * it in place. If patching fails, the module would not + * be loaded anyway. + */ + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); + if (ret) { pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", me->name, ret); + return -ENOEXEC; + } } } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 43f7a2f39403..32148bf09c1d 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, folio = page_folio(page); if (folio_test_hugetlb(folio)) - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && + !is_huge_zero_folio(folio)); else WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..659297f87cfa 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) if (enable_scs) { scs_patch(__eh_frame_start + va_offset, - __eh_frame_end - __eh_frame_start); + __eh_frame_end - __eh_frame_start, false); asm("ic ialluis"); dynamic_scs_is_enabled = true; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 55d0cd64ef71..bbe7d30ed12b 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, return 0; } -int scs_patch(const u8 eh_frame[], int size) +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) { int code_alignment_factor = 1; bool fde_use_sdata8 = false; @@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size) } } else { ret = scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, true); + fde_use_sdata8, !skip_dry_run); if (ret) return ret; - scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, false); + + if (!skip_dry_run) + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 08ef9f80456b..aec3172d4003 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); void relocate_kernel(u64 offset); -int scs_patch(const u8 eh_frame[], int size); +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 8ab6104a4883..43a0361a8bf0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -49,7 +49,10 @@ void *alloc_insn_page(void) addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!addr) return NULL; - set_memory_rox((unsigned long)addr, 1); + if (set_memory_rox((unsigned long)addr, 1)) { + execmem_free(addr); + return NULL; + } return addr; } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f9a32dfde006..80a580e019c5 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param); static bool spectre_v2_mitigations_off(void) { - bool ret = __nospectre_v2 || cpu_mitigations_off(); - - if (ret) - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); - - return ret; + return __nospectre_v2 || cpu_mitigations_off(); } static const char *get_bhb_affected_string(enum mitigation_state bhb_state) @@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param); */ static bool spectre_v4_mitigations_off(void) { - bool ret = cpu_mitigations_off() || - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; - - if (ret) - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); - - return ret; + return cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; } /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ @@ -1043,9 +1033,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); - } else if (cpu_mitigations_off() || __nospectre_bhb) { - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + /* Do nothing */ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; set_bit(BHB_HW, &system_bhb_mitigations); @@ -1199,3 +1187,18 @@ void unpriv_ebpf_notify(int new_state) pr_err("WARNING: %s", EBPF_WARN); } #endif + +void spectre_print_disabled_mitigations(void) +{ + /* Keep a single copy of the common message suffix to avoid duplication. */ + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; + + if (spectre_v2_mitigations_off()) + pr_info("spectre-v2 %s", spectre_disabled_suffix); + + if (spectre_v4_mitigations_off()) + pr_info("spectre-v4 %s", spectre_disabled_suffix); + + if (__nospectre_bhb || cpu_mitigations_off()) + pr_info("spectre-bhb %s", spectre_disabled_suffix); +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 68cea3a4a35c..6fb838eee2e7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1094,7 +1094,7 @@ static void ipi_setup_sgi(int ipi) irq = ipi_irq_base + ipi; if (ipi_should_be_nmi(ipi)) { - err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat); + err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat); WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err); } else { err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index ffa3536581f6..9d0efed91414 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -63,7 +63,7 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ $(filter -Werror,$(KBUILD_CPPFLAGS)) \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu11 + -std=gnu11 -fms-extensions VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) @@ -71,6 +71,7 @@ VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) VDSO_CFLAGS += -Werror=date-time VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) +VDSO_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) # Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is # unreliable. diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..3f675875abea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu) u32 timer_get_ctl(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) u64 timer_get_cval(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } -static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) -{ - if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); - return; - } - - WRITE_ONCE(*ctxt->offset.vm_offset, offset); -} - u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) u64 ns; ctx = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ctx->vcpu; + vcpu = timer_context_to_vcpu(ctx); trace_kvm_timer_hrtimer_expire(ctx); @@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) * * But hey, it's fast, right? */ - if (is_hyp_ctxt(ctx->vcpu) && - (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + if (is_hyp_ctxt(vcpu) && + (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { unsigned long val = timer_get_ctl(ctx); __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); timer_set_ctl(ctx, val); @@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); if (should_fire != ctx->irq.level) - kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); kvm_timer_update_status(ctx, should_fire); @@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff) static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct kvm_vcpu *vcpu = ctx->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); bool phys_active = false; /* @@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); @@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); struct kvm *kvm = vcpu->kvm; - ctxt->vcpu = vcpu; + ctxt->timer_id = timerid; if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; @@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void) disable_percpu_irq(host_ptimer_irq); } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_context *timer; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_TIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_TIMER_CVAL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - case KVM_REG_ARM_PTIMER_CTL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_PTIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_ptimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_PTIMER_CVAL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - - default: - return -1; - } - - return 0; -} - static u64 read_timer_ctl(struct arch_timer_context *timer) { /* @@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) return ctl; } -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_TIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_TIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL); - case KVM_REG_ARM_PTIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_PTIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_PTIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL); - } - return (u64)-1; -} - static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f21d1b7f20f8..052bf0d4d0b0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -624,6 +624,7 @@ nommu: kvm_timer_vcpu_load(vcpu); kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); + kvm_vcpu_load_fgt(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -1794,6 +1795,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (kvm_arm_vcpu_get_events(vcpu, &events)) return -EINVAL; @@ -1805,6 +1809,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (copy_from_user(&events, argp, sizeof(events))) return -EFAULT; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 20bb9af125b1..be26d5aa668c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -91,7 +91,6 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o case OP_AT_S1E2W: case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; - break; default: return (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; @@ -1602,13 +1601,17 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) .fn = match_s1_desc, .priv = &dm, }, - .regime = TR_EL10, .as_el0 = false, .pan = false, }; struct s1_walk_result wr = {}; int ret; + if (is_hyp_ctxt(vcpu)) + wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + else + wi.regime = TR_EL10; + ret = setup_s1_walk(vcpu, &wi, &wr, va); if (ret) return ret; diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index fbd8944a3dea..24bb3f36e9d5 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -5,6 +5,8 @@ */ #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_nested.h> #include <asm/sysreg.h> /* @@ -1428,3 +1430,91 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r break; } } + +static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + return &hfgrtr_masks; + case HFGWTR_EL2: + return &hfgwtr_masks; + case HFGITR_EL2: + return &hfgitr_masks; + case HDFGRTR_EL2: + return &hdfgrtr_masks; + case HDFGWTR_EL2: + return &hdfgwtr_masks; + case HAFGRTR_EL2: + return &hafgrtr_masks; + case HFGRTR2_EL2: + return &hfgrtr2_masks; + case HFGWTR2_EL2: + return &hfgwtr2_masks; + case HFGITR2_EL2: + return &hfgitr2_masks; + case HDFGRTR2_EL2: + return &hdfgrtr2_masks; + case HDFGWTR2_EL2: + return &hdfgwtr2_masks; + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)]; + struct fgt_masks *m = __fgt_reg_to_masks(reg); + u64 clear = 0, set = 0, val = m->nmask; + + set |= fgu & m->mask; + clear |= fgu & m->nmask; + + if (is_nested_ctxt(vcpu)) { + u64 nested = __vcpu_sys_reg(vcpu, reg); + set |= nested & m->mask; + clear |= ~nested & m->nmask; + } + + val |= set; + val &= ~clear; + *vcpu_fgt(vcpu, reg) = val; +} + +static void __compute_hfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HFGWTR_EL2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; +} + +static void __compute_hdfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HDFGWTR_EL2); + + if (is_hyp_ctxt(vcpu)) + *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1; +} + +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + __compute_fgt(vcpu, HFGRTR_EL2); + __compute_hfgwtr(vcpu); + __compute_fgt(vcpu, HFGITR_EL2); + __compute_fgt(vcpu, HDFGRTR_EL2); + __compute_hdfgwtr(vcpu); + __compute_fgt(vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __compute_fgt(vcpu, HFGRTR2_EL2); + __compute_fgt(vcpu, HFGWTR2_EL2); + __compute_fgt(vcpu, HFGITR2_EL2); + __compute_fgt(vcpu, HDFGRTR2_EL2); + __compute_fgt(vcpu, HDFGWTR2_EL2); +} diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3515a273eaa2..3ad6b7c6e4ba 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -15,6 +15,12 @@ #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +static int cpu_has_spe(u64 dfr0) +{ + return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P); +} + /** * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value * @@ -77,13 +83,12 @@ void kvm_init_host_debug_data(void) *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); + if (cpu_has_spe(dfr0)) + host_data_set_flag(HAS_SPE); + if (has_vhe()) return; - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) - host_data_set_flag(HAS_SPE); - /* Check if we have BRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) host_data_set_flag(HAS_BRBE); @@ -102,7 +107,7 @@ void kvm_init_host_debug_data(void) void kvm_debug_init_vhe(void) { /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ - if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + if (host_data_test_flag(HAS_SPE)) write_sysreg_el1(0, SYS_PMSCR); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 16ba5e9ac86c..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -591,64 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CTL, - KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_TIMER_CVAL, - KVM_REG_ARM_PTIMER_CTL, - KVM_REG_ARM_PTIMER_CNT, - KVM_REG_ARM_PTIMER_CVAL, -}; - -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - -static bool is_timer_reg(u64 index) -{ - switch (index) { - case KVM_REG_ARM_TIMER_CTL: - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: - case KVM_REG_ARM_PTIMER_CTL: - case KVM_REG_ARM_PTIMER_CNT: - case KVM_REG_ARM_PTIMER_CVAL: - return true; - } - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - for (int i = 0; i < NUM_TIMER_REGS; i++) { - if (put_user(timer_reg_list[i], uindices)) - return -EFAULT; - uindices++; - } - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -724,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } @@ -755,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -777,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -797,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bca8c80e11da..cc7d5d1709cb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) if (esr & ESR_ELx_WFx_ISS_RV) { u64 val, now; - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + now = kvm_phys_timer_read(); + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + now -= timer_get_offset(vcpu_hvtimer(vcpu)); + else + now -= timer_get_offset(vcpu_vtimer(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); if (now >= val) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b6682202edf3..c5d5e5b86eaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) __deactivate_cptr_traps_nvhe(vcpu); } -#define reg_to_fgt_masks(reg) \ - ({ \ - struct fgt_masks *m; \ - switch(reg) { \ - case HFGRTR_EL2: \ - m = &hfgrtr_masks; \ - break; \ - case HFGWTR_EL2: \ - m = &hfgwtr_masks; \ - break; \ - case HFGITR_EL2: \ - m = &hfgitr_masks; \ - break; \ - case HDFGRTR_EL2: \ - m = &hdfgrtr_masks; \ - break; \ - case HDFGWTR_EL2: \ - m = &hdfgwtr_masks; \ - break; \ - case HAFGRTR_EL2: \ - m = &hafgrtr_masks; \ - break; \ - case HFGRTR2_EL2: \ - m = &hfgrtr2_masks; \ - break; \ - case HFGWTR2_EL2: \ - m = &hfgwtr2_masks; \ - break; \ - case HFGITR2_EL2: \ - m = &hfgitr2_masks; \ - break; \ - case HDFGRTR2_EL2: \ - m = &hdfgrtr2_masks; \ - break; \ - case HDFGWTR2_EL2: \ - m = &hdfgwtr2_masks; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - m; \ - }) - -#define compute_clr_set(vcpu, reg, clr, set) \ - do { \ - u64 hfg = __vcpu_sys_reg(vcpu, reg); \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= ~hfg & m->nmask; \ - } while(0) - -#define reg_to_fgt_group_id(reg) \ - ({ \ - enum fgt_group_id id; \ - switch(reg) { \ - case HFGRTR_EL2: \ - case HFGWTR_EL2: \ - id = HFGRTR_GROUP; \ - break; \ - case HFGITR_EL2: \ - id = HFGITR_GROUP; \ - break; \ - case HDFGRTR_EL2: \ - case HDFGWTR_EL2: \ - id = HDFGRTR_GROUP; \ - break; \ - case HAFGRTR_EL2: \ - id = HAFGRTR_GROUP; \ - break; \ - case HFGRTR2_EL2: \ - case HFGWTR2_EL2: \ - id = HFGRTR2_GROUP; \ - break; \ - case HFGITR2_EL2: \ - id = HFGITR2_GROUP; \ - break; \ - case HDFGRTR2_EL2: \ - case HDFGWTR2_EL2: \ - id = HDFGRTR2_GROUP; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - id; \ - }) - -#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ - do { \ - u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= hfg & m->nmask; \ - } while(0) - -#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ - do { \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - u64 c = clr, s = set; \ - u64 val; \ - \ - ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (is_nested_ctxt(vcpu)) \ - compute_clr_set(vcpu, reg, c, s); \ - \ - compute_undef_clr_set(vcpu, kvm, reg, c, s); \ - \ - val = m->nmask; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } while(0) - -#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ - update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void) ID_AA64PFR0_EL1_AMU_SHIFT); } +#define __activate_fgt(hctxt, vcpu, reg) \ + do { \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \ + } while (0) + static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); - update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, - cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGWTR_EL2_TCR_EL1_MASK : 0); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGITR_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGITR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } #define __deactivate_fgt(htcxt, vcpu, reg) \ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4e16f9b96f63..58b7d0c477d7 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id, struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; - u32 offset, nr_ranges; + u32 offset, nr_ranges, checked_offset; int ret = 0; if (addr_mbz || npages_mbz || fraglen > len || @@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } - if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { + if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + if (fraglen < checked_offset) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ddc8beb55eee..49db32f3ddf7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +/* + * Ensure the PFN range is contained within PA-range. + * + * This check is also robust to overflows and is therefore a requirement before + * using a pfn/nr_pages pair from an untrusted source. + */ +static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) +{ + u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); + + return pfn < limit && ((limit - pfn) >= nr_pages); +} + struct kvm_mem_range { u64 start; u64 end; @@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) void *virt = __hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) u64 virt = (u64)__hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (!ret) @@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (!ret) @@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); if (ret) return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 05774aed09cb..43bde061b65d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; + memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7a045cad6bdf..f04cda40545b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1859,13 +1859,16 @@ void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) { u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + if (is_nested_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); /* * In yet another example where FEAT_NV2 is fscking broken, accesses * to MDSCR_EL1 are redirected to the VNCR despite having an effect * at EL2. Use a big hammer to apply sanity. + * + * Unless of course we have FEAT_FGT, in which case we can precisely + * trap MDSCR_EL1. */ - if (is_hyp_ctxt(vcpu)) + else if (!cpus_have_final_cap(ARM64_HAS_FGT)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; - else - vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 91053aa832d0..ec3fbe0b8d52 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); case CNTHCTL_EL2: @@ -1595,14 +1594,47 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -static bool access_hv_timer(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) { - if (!vcpu_el2_e2h_is_set(vcpu)) - return undef_access(vcpu, p, r); + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; + } + + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } - return access_arch_timer(vcpu, p, r); + return 0; } static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, @@ -2507,15 +2539,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + #define EL2_REG(name, acc, rst, v) \ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) @@ -2526,6 +2563,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + arch_timer_get_user, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -2554,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, .val = 0, \ } -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define AA32_ID_SANITISED(name) { \ - ID_DESC(name), \ - .visibility = aa32_id_visibility, \ - .val = 0, \ -} - /* sys_reg_desc initialiser for writable ID registers */ #define ID_WRITABLE(name, mask) { \ ID_DESC(name), \ .val = mask, \ } +/* + * 32bit ID regs are fully writable when the guest is 32bit + * capable. Nothing in the KVM code should rely on 32bit features + * anyway, only 64bit, so let the VMM do its worse. + */ +#define AA32_ID_WRITABLE(name) { \ + ID_DESC(name), \ + .visibility = aa32_id_visibility, \ + .val = GENMASK(31, 0), \ +} + /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ #define ID_FILTERED(sysreg, name, mask) { \ ID_DESC(sysreg), \ @@ -2705,18 +2750,17 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); - + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } @@ -2833,6 +2877,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3078,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - AA32_ID_SANITISED(ID_PFR0_EL1), - AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_WRITABLE(ID_PFR0_EL1), + AA32_ID_WRITABLE(ID_PFR1_EL1), { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK | - ID_DFR0_EL1_CopDbg_MASK, }, + .val = GENMASK(31, 0) }, ID_HIDDEN(ID_AFR0_EL1), - AA32_ID_SANITISED(ID_MMFR0_EL1), - AA32_ID_SANITISED(ID_MMFR1_EL1), - AA32_ID_SANITISED(ID_MMFR2_EL1), - AA32_ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_WRITABLE(ID_MMFR0_EL1), + AA32_ID_WRITABLE(ID_MMFR1_EL1), + AA32_ID_WRITABLE(ID_MMFR2_EL1), + AA32_ID_WRITABLE(ID_MMFR3_EL1), /* CRm=2 */ - AA32_ID_SANITISED(ID_ISAR0_EL1), - AA32_ID_SANITISED(ID_ISAR1_EL1), - AA32_ID_SANITISED(ID_ISAR2_EL1), - AA32_ID_SANITISED(ID_ISAR3_EL1), - AA32_ID_SANITISED(ID_ISAR4_EL1), - AA32_ID_SANITISED(ID_ISAR5_EL1), - AA32_ID_SANITISED(ID_MMFR4_EL1), - AA32_ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_WRITABLE(ID_ISAR0_EL1), + AA32_ID_WRITABLE(ID_ISAR1_EL1), + AA32_ID_WRITABLE(ID_ISAR2_EL1), + AA32_ID_WRITABLE(ID_ISAR3_EL1), + AA32_ID_WRITABLE(ID_ISAR4_EL1), + AA32_ID_WRITABLE(ID_ISAR5_EL1), + AA32_ID_WRITABLE(ID_MMFR4_EL1), + AA32_ID_WRITABLE(ID_ISAR6_EL1), /* CRm=3 */ - AA32_ID_SANITISED(MVFR0_EL1), - AA32_ID_SANITISED(MVFR1_EL1), - AA32_ID_SANITISED(MVFR2_EL1), + AA32_ID_WRITABLE(MVFR0_EL1), + AA32_ID_WRITABLE(MVFR1_EL1), + AA32_ID_WRITABLE(MVFR2_EL1), ID_UNALLOCATED(3,3), - AA32_ID_SANITISED(ID_PFR2_EL1), + AA32_ID_WRITABLE(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - AA32_ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_WRITABLE(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -3482,17 +3535,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), + TIMER_REG(CNTP_CVAL_EL0, NULL), { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -3690,12 +3745,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, - EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, @@ -5233,15 +5288,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5274,13 +5342,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5340,10 +5409,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; @@ -5527,11 +5609,17 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && - irqchip_in_kernel(kvm) && - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; + /* + * This hacks into the ID registers, so only perform it when the + * first vcpu runs, or the kvm_set_vm_id_reg() helper will scream. + */ + if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) { + u64 val; + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); } if (vcpu_has_nv(vcpu)) { diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 317abc490368..b3f904472fac 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu); (val); \ }) +#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 4c1209261b65..bb92853d1fd3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) static int iter_mark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; int nr_lpis = 0; + xa_lock_irqsave(&dist->lpi_xa, flags); + xa_for_each(&dist->lpi_xa, intid, irq) { if (!vgic_try_get_irq_ref(irq)) continue; - xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); nr_lpis++; } + xa_unlock_irqrestore(&dist->lpi_xa, flags); + return nr_lpis; } static void iter_unmark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + /* vgic_put_irq() expects to be called outside of the xa_lock */ vgic_put_irq(kvm, irq); } } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1796b1a22a72..da62edbc1205 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init(&dist->lpi_xa); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ @@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type) { struct kvm_vcpu *vcpu; + u64 aa64pfr0, pfr1; unsigned long i; int ret; @@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - else + } else { INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index ce3e3ed3f29f..3f1c4b10fed9 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -114,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - - goto out_unlock; + } else { + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); } - ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + if (ret) { xa_release(&dist->lpi_xa, intid); kfree(irq); - } - -out_unlock: - xa_unlock(&dist->lpi_xa); - if (ret) return ERR_PTR(ret); + } /* * We "cache" the configuration table entries in our struct vgic_irq's. diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index f1c153106c56..2f75ef14d339 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -297,8 +297,12 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + if (!vgic_is_v3(vcpu->kvm)) + return; + /* Hide GICv3 sysreg if necessary */ - if (!kvm_has_gicv3(vcpu->kvm)) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || + !irqchip_in_kernel(vcpu->kvm)) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 6dd5a10081e2..8d20c53faef0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_dist->lpi_xa.xa_lock + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * @@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; - if (irq->intid >= VGIC_MIN_LPI) - might_lock(&dist->lpi_xa.xa_lock); + /* + * Normally the lock is only taken when the refcount drops to 0. + * Acquire/release it early on lockdep kernels to make locking issues + * in rare release paths a bit more obvious. + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { + guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); + } if (!__vgic_put_irq(kvm, irq)) return; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); vgic_release_lpi_locked(dist, irq); - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } static void vgic_release_deleted_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid; + unsigned long flags, intid; struct vgic_irq *irq; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); xa_for_each(&dist->lpi_xa, intid, irq) { if (irq->pending_release) vgic_release_lpi_locked(dist, irq); } - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index a86c897017df..cd5912ba617b 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -35,7 +35,7 @@ void copy_highpage(struct page *to, struct page *from) from != folio_page(src, 0)) return; - WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + folio_try_hugetlb_mte_tagging(dst); /* * Populate tags for all subpages. @@ -51,8 +51,13 @@ void copy_highpage(struct page *to, struct page *from) } folio_set_hugetlb_mte_tagged(dst); } else if (page_mte_tagged(from)) { - /* It's a new page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(to)); + /* + * Most of the time it's a new page that shouldn't have been + * tagged yet. However, folio migration can end up reusing the + * same page without untagging it. Ignore the warning if the + * page is already tagged. + */ + try_page_mte_tagging(to); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d816ff44faff..a193b6a5d1e6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -967,10 +967,21 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, return vma_alloc_folio(flags, 0, vma, vaddr); } -void tag_clear_highpage(struct page *page) +bool tag_clear_highpages(struct page *page, int numpages) { - /* Newly allocated page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(page)); - mte_zero_clear_page_tags(page_address(page)); - set_page_mte_tagged(page); + /* + * Check if MTE is supported and fall back to clear_highpage(). + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and + * post_alloc_hook() will invoke tag_clear_highpages(). + */ + if (!system_supports_mte()) + return false; + + /* Newly allocated pages, shouldn't have been tagged yet */ + for (int i = 0; i < numpages; i++, page++) { + WARN_ON_ONCE(!try_page_mte_tagging(page)); + mte_zero_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } + return true; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..2ba01dc8ef82 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -708,6 +708,30 @@ out: return ret; } +static inline bool force_pte_mapping(void) +{ + const bool bbml2 = system_capabilities_finalized() ? + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); + + if (debug_pagealloc_enabled()) + return true; + if (bbml2) + return false; + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); +} + +static inline bool split_leaf_mapping_possible(void) +{ + /* + * !BBML2_NOABORT systems should never run into scenarios where we would + * have to split. So exit early and let calling code detect it and raise + * a warning. + */ + if (!system_supports_bbml2_noabort()) + return false; + return !force_pte_mapping(); +} + static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -715,12 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. + * Exit early if the region is within a pte-mapped area or if we can't + * split. For the latter case, the permission change code will raise a + * warning if not already pte-mapped. */ - if (!system_supports_bbml2_noabort()) + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) return 0; /* @@ -758,30 +781,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } -static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pud_t pud = pudp_get(pudp); int ret = 0; if (pud_leaf(pud)) - ret = split_pud(pudp, pud, GFP_ATOMIC, false); + ret = split_pud(pudp, pud, gfp, false); return ret; } -static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pmd_t pmd = pmdp_get(pmdp); int ret = 0; if (pmd_leaf(pmd)) { if (pmd_cont(pmd)) split_contpmd(pmdp); - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + ret = split_pmd(pmdp, pmd, gfp, false); /* * We have split the pmd directly to ptes so there is no need to @@ -793,9 +816,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, return ret; } -static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) { pte_t pte = __ptep_get(ptep); @@ -805,12 +827,24 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, return 0; } -static const struct mm_walk_ops split_to_ptes_ops __initconst = { +static const struct mm_walk_ops split_to_ptes_ops = { .pud_entry = split_to_ptes_pud_entry, .pmd_entry = split_to_ptes_pmd_entry, .pte_entry = split_to_ptes_pte_entry, }; +static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) +{ + int ret; + + arch_enter_lazy_mmu_mode(); + ret = walk_kernel_page_table_range_lockless(start, end, + &split_to_ptes_ops, NULL, &gfp); + arch_leave_lazy_mmu_mode(); + + return ret; +} + static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; @@ -847,11 +881,9 @@ static int __init linear_map_split_to_ptes(void *__unused) * PTE. The kernel alias remains static throughout runtime so * can continue to be safely mapped with large mappings. */ - ret = walk_kernel_page_table_range_lockless(lstart, kstart, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); if (!ret) - ret = walk_kernel_page_table_range_lockless(kend, lend, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); if (ret) panic("Failed to split linear map\n"); flush_tlb_kernel_range(lstart, lend); @@ -1002,6 +1034,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = phys_to_virt(kfence_pool); } + +bool arch_kfence_init_pool(void) +{ + unsigned long start = (unsigned long)__kfence_pool; + unsigned long end = start + KFENCE_POOL_SIZE; + int ret; + + /* Exit early if we know the linear map is already pte-mapped. */ + if (!split_leaf_mapping_possible()) + return true; + + /* Kfence pool is already pte-mapped for the early init case. */ + if (kfence_early_init) + return true; + + mutex_lock(&pgtable_split_lock); + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); + mutex_unlock(&pgtable_split_lock); + + /* + * Since the system supports bbml2_noabort, tlb invalidation is not + * required here; the pgtable mappings have been split to pte but larger + * entries may safely linger in the TLB. + */ + + return !ret; +} #else /* CONFIG_KFENCE */ static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } @@ -1009,16 +1068,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { #endif /* CONFIG_KFENCE */ -static inline bool force_pte_mapping(void) -{ - bool bbml2 = system_capabilities_finalized() ? - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); -} - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ab83089c3d8f..0c9a50a1e73e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1213,6 +1213,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 tmp3 = bpf2a64[TMP_REG_3]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 priv_sp = bpf2a64[PRIVATE_SP]; @@ -1757,8 +1758,8 @@ emit_cond_jmp: case BPF_ST | BPF_PROBE_MEM32 | BPF_W: case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { - emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); - dst = tmp2; + emit(A64_ADD(1, tmp3, dst, arena_vm_base), ctx); + dst = tmp3; } if (dst == fp) { dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; diff --git a/arch/arm64/tools/syscall_32.tbl b/arch/arm64/tools/syscall_32.tbl index 8d9088bc577d..8cdfe5d4dac9 100644 --- a/arch/arm64/tools/syscall_32.tbl +++ b/arch/arm64/tools/syscall_32.tbl @@ -481,3 +481,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index 876028b1083f..064b0f0f95ca 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -21,7 +21,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, folio = page_folio(pfn_to_page(pfn)); - if (test_and_set_bit(PG_dcache_clean, &folio->flags)) + if (test_and_set_bit(PG_dcache_clean, &folio->flags.f)) return; icache_inv_range(address, address + nr*PAGE_SIZE); diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 6513ac5d2578..da51a0f02391 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -20,8 +20,8 @@ static inline void flush_dcache_folio(struct folio *folio) { - if (test_bit(PG_dcache_clean, &folio->flags)) - clear_bit(PG_dcache_clean, &folio->flags); + if (test_bit(PG_dcache_clean, &folio->flags.f)) + clear_bit(PG_dcache_clean, &folio->flags.f); } #define flush_dcache_folio flush_dcache_folio diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index c6108f000288..22d7f8ac58a3 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -46,10 +46,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index dc5bd3f1b8d2..96ca1a688984 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -109,7 +109,7 @@ endif ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump else -KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers +KBUILD_RUSTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) # keep compatibility with older compilers endif ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 3e838c229cd5..50e1304e7a6f 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -917,7 +917,6 @@ CONFIG_MMC=y CONFIG_MMC_LOONGSON2=m CONFIG_INFINIBAND=m CONFIG_EDAC=y -# CONFIG_EDAC_LEGACY_SYSFS is not set CONFIG_EDAC_LOONGSON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index f6f254f2c5db..d090a5bec5eb 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -11,7 +11,7 @@ #else #define __BUGVERBOSE_LOCATION(file, line) \ .pushsection .rodata.str, "aMS", @progbits, 1; \ - 10002: .string file; \ + 10002: .ascii file "\0"; \ .popsection; \ \ .long 10002b - .; \ @@ -20,39 +20,38 @@ #endif #ifndef CONFIG_GENERIC_BUG -#define __BUG_ENTRY(flags) +#define __BUG_ENTRY(cond_str, flags) #else -#define __BUG_ENTRY(flags) \ +#define __BUG_ENTRY(cond_str, flags) \ .pushsection __bug_table, "aw"; \ .align 2; \ 10000: .long 10001f - .; \ - _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ - .short flags; \ + _BUGVERBOSE_LOCATION(WARN_CONDITION_STR(cond_str) __FILE__, __LINE__) \ + .short flags; \ .popsection; \ 10001: #endif -#define ASM_BUG_FLAGS(flags) \ - __BUG_ENTRY(flags) \ +#define ASM_BUG_FLAGS(cond_str, flags) \ + __BUG_ENTRY(cond_str, flags) \ break BRK_BUG; -#define ASM_BUG() ASM_BUG_FLAGS(0) +#define ASM_BUG() ASM_BUG_FLAGS("", 0) -#define __BUG_FLAGS(flags, extra) \ - asm_inline volatile (__stringify(ASM_BUG_FLAGS(flags)) \ - extra); +#define __BUG_FLAGS(cond_str, flags, extra) \ + asm_inline volatile (__stringify(ASM_BUG_FLAGS(cond_str, flags)) extra); -#define __WARN_FLAGS(flags) \ +#define __WARN_FLAGS(cond_str, flags) \ do { \ instrumentation_begin(); \ - __BUG_FLAGS(BUGFLAG_WARNING|(flags), ANNOTATE_REACHABLE(10001b));\ + __BUG_FLAGS(cond_str, BUGFLAG_WARNING|(flags), ANNOTATE_REACHABLE(10001b));\ instrumentation_end(); \ } while (0) #define BUG() \ do { \ instrumentation_begin(); \ - __BUG_FLAGS(0, ""); \ + __BUG_FLAGS("", 0, ""); \ unreachable(); \ } while (0) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index fc83bb32f9f0..bd5f0457ad21 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -67,6 +67,8 @@ #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) +#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) +#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index dfb982fe8701..f3efb00b6141 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -55,6 +55,27 @@ enum cpu_type_enum { CPU_LAST }; +static inline char *id_to_core_name(unsigned int id) +{ + if ((id & PRID_COMP_MASK) != PRID_COMP_LOONGSON) + return "Unknown"; + + switch (id & PRID_SERIES_MASK) { + case PRID_SERIES_LA132: + return "LA132"; + case PRID_SERIES_LA264: + return "LA264"; + case PRID_SERIES_LA364: + return "LA364"; + case PRID_SERIES_LA464: + return "LA464"; + case PRID_SERIES_LA664: + return "LA664"; + default: + return "Unknown"; + } +} + #endif /* !__ASSEMBLER__ */ /* @@ -101,7 +122,9 @@ enum cpu_type_enum { #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -132,6 +155,8 @@ enum cpu_type_enum { #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) +#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) +#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 13b2462f3d8c..5faa97a87a9e 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; + return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; + return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; } #endif /* __KERNEL__ */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index eaff72b38dc8..0130185e0349 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,7 +14,7 @@ #include <asm/pgtable-bits.h> #include <asm/string.h> -extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap @@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, pgprot_t prot) { + if (offset > TO_PHYS_MASK) + return NULL; + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 09dfd7eb406e..3de03cb864b2 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -128,6 +128,7 @@ #define CPUCFG6_PMNUM GENMASK(7, 4) #define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_PMBITS_SHIFT 8 #define CPUCFG6_UPM BIT(14) #define LOONGARCH_CPUCFG16 0x10 @@ -1137,6 +1138,7 @@ #define IOCSRF_FLATMODE BIT_ULL(10) #define IOCSRF_VM BIT_ULL(11) #define IOCSRF_AVEC BIT_ULL(15) +#define IOCSRF_REDIRECT BIT_ULL(16) #define LOONGARCH_IOCSR_VENDOR 0x10 diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 1c63a9d9a6d3..08dcc698ec18 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index bd128696e96d..03fb60432fde 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) |= _PAGE_MODIFIED; + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } @@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); - return pmd; + if (pmd_val(pmd) & _PAGE_DIRTY) + pmd_val(pmd) |= _PAGE_MODIFIED; + + return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index aafb3cd9e943..215e0f9e8aa3 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -10,10 +10,6 @@ #include <linux/types.h> -#ifndef __KERNEL__ -#include <stdint.h> -#endif - /* * For PTRACE_{POKE,PEEK}USR. 0 - 31 are GPRs, * 32 is syscall's original ARG0, 33 is PC, 34 is BADVADDR. @@ -41,44 +37,44 @@ struct user_pt_regs { } __attribute__((aligned(8))); struct user_fp_state { - uint64_t fpr[32]; - uint64_t fcc; - uint32_t fcsr; + __u64 fpr[32]; + __u64 fcc; + __u32 fcsr; }; struct user_lsx_state { /* 32 registers, 128 bits width per register. */ - uint64_t vregs[32*2]; + __u64 vregs[32*2]; }; struct user_lasx_state { /* 32 registers, 256 bits width per register. */ - uint64_t vregs[32*4]; + __u64 vregs[32*4]; }; struct user_lbt_state { - uint64_t scr[4]; - uint32_t eflags; - uint32_t ftop; + __u64 scr[4]; + __u32 eflags; + __u32 ftop; }; struct user_watch_state { - uint64_t dbg_info; + __u64 dbg_info; struct { - uint64_t addr; - uint64_t mask; - uint32_t ctrl; - uint32_t pad; + __u64 addr; + __u64 mask; + __u32 ctrl; + __u32 pad; } dbg_regs[8]; }; struct user_watch_state_v2 { - uint64_t dbg_info; + __u64 dbg_info; struct { - uint64_t addr; - uint64_t mask; - uint32_t ctrl; - uint32_t pad; + __u64 addr; + __u64 mask; + __u32 ctrl; + __u32 pad; } dbg_regs[14]; }; diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cbfce2872d71..a2060a24b39f 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_TLB; if (config & CPUCFG1_IOCSR) c->options |= LOONGARCH_CPU_IOCSR; + if (config & CPUCFG1_MSGINT) + c->options |= LOONGARCH_CPU_MSGINT; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -275,7 +277,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int uint32_t config; uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); - const char *core_name = "Unknown"; + const char *core_name = id_to_core_name(c->processor_id); switch (BIT(fls(c->isa_level) - 1)) { case LOONGARCH_CPU_ISA_LA32R: @@ -289,35 +291,23 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int break; } - switch (c->processor_id & PRID_SERIES_MASK) { - case PRID_SERIES_LA132: - core_name = "LA132"; - break; - case PRID_SERIES_LA264: - core_name = "LA264"; - break; - case PRID_SERIES_LA364: - core_name = "LA364"; - break; - case PRID_SERIES_LA464: - core_name = "LA464"; - break; - case PRID_SERIES_LA664: - core_name = "LA664"; - break; - } - pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name); - if (!cpu_has_iocsr) + if (!cpu_has_iocsr) { + __cpu_full_name[cpu] = "Unknown"; return; - - if (!__cpu_full_name[cpu]) - __cpu_full_name[cpu] = cpu_full_name; + } *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); + if (!__cpu_full_name[cpu]) { + if (((char *)vendor)[0] == 0) + __cpu_full_name[cpu] = "Unknown"; + else + __cpu_full_name[cpu] = cpu_full_name; + } + config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); if (config & IOCSRF_CSRIPI) c->options |= LOONGARCH_CPU_CSRIPI; @@ -331,6 +321,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int c->options |= LOONGARCH_CPU_EIODECODE; if (config & IOCSRF_AVEC) c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_REDIRECT) + c->options |= LOONGARCH_CPU_REDIRECTINT; if (config & IOCSRF_VM) c->options |= LOONGARCH_CPU_HYPERVISOR; } diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c index 45121b914f8f..5ee78ebb1546 100644 --- a/arch/loongarch/kernel/kexec_efi.c +++ b/arch/loongarch/kernel/kexec_efi.c @@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image, { int ret; unsigned long text_offset, kernel_segment_number; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_segment *kernel_segment; struct loongarch_image_header *h; diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c index 97b2f049801a..1b6b64744c7f 100644 --- a/arch/loongarch/kernel/kexec_elf.c +++ b/arch/loongarch/kernel/kexec_elf.c @@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image, int ret; unsigned long text_offset, kernel_segment_number; struct elfhdr ehdr; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_elf_info elf_info; struct kexec_segment *kernel_segment; diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index e4b2bbc47e62..d7fafda1d541 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -39,34 +39,12 @@ static unsigned long systable_ptr; static unsigned long start_addr; static unsigned long first_ind_entry; -static void kexec_image_info(const struct kimage *kimage) -{ - unsigned long i; - - pr_debug("kexec kimage info:\n"); - pr_debug("\ttype: %d\n", kimage->type); - pr_debug("\tstart: %lx\n", kimage->start); - pr_debug("\thead: %lx\n", kimage->head); - pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); - - for (i = 0; i < kimage->nr_segments; i++) { - pr_debug("\t segment[%lu]: %016lx - %016lx", i, - kimage->segment[i].mem, - kimage->segment[i].mem + kimage->segment[i].memsz); - pr_debug("\t\t0x%lx bytes, %lu pages\n", - (unsigned long)kimage->segment[i].memsz, - (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); - } -} - int machine_kexec_prepare(struct kimage *kimage) { int i; char *bootloader = "kexec"; void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; - kexec_image_info(kimage); - kimage->arch.efi_boot = fw_arg0; kimage->arch.systable_ptr = fw_arg2; @@ -259,6 +237,7 @@ void machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_SMP crash_smp_send_stop(); #endif + machine_kexec_mask_interrupts(); cpumask_set_cpu(crashing_cpu, &cpus_in_crash); pr_info("Starting crashdump kernel...\n"); @@ -296,6 +275,7 @@ void machine_kexec(struct kimage *image) /* We do not want to be bothered. */ local_irq_disable(); + machine_kexec_mask_interrupts(); pr_notice("EFI boot flag: 0x%lx\n", efi_boot); pr_notice("Command line addr: 0x%lx\n", cmdline_ptr); diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index dda236b51a88..fb57026f5f25 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image, unsigned long initrd_load_addr = 0; unsigned long orig_segments = image->nr_segments; char *modified_cmdline = NULL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; kbuf.image = image; /* Don't allocate anything below the kernel */ diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index aed901c57fb4..8ab1ffedc52c 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -13,7 +13,7 @@ void __init memblock_init(void) { u32 mem_type; - u64 mem_start, mem_end, mem_size; + u64 mem_start, mem_size; efi_memory_desc_t *md; /* Parse memory information */ @@ -21,7 +21,6 @@ void __init memblock_init(void) mem_type = md->type; mem_start = md->phys_addr; mem_size = md->num_pages << EFI_PAGE_SHIFT; - mem_end = mem_start + mem_size; switch (mem_type) { case EFI_LOADER_CODE: @@ -31,8 +30,6 @@ void __init memblock_init(void) case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: memblock_add(mem_start, mem_size); - if (max_low_pfn < (mem_end >> PAGE_SHIFT)) - max_low_pfn = mem_end >> PAGE_SHIFT; break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: @@ -49,6 +46,8 @@ void __init memblock_init(void) } } + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); memblock_set_current_limit(PFN_PHYS(max_low_pfn)); /* Reserve the first 2MB */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index d6e73e8f9c0b..8b89898e20df 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -158,35 +158,9 @@ static void __init node_mem_init(unsigned int node) #ifdef CONFIG_ACPI_NUMA -/* - * add_numamem_region - * - * Add a uasable memory region described by BIOS. The - * routine gets each intersection between BIOS's region - * and node's region, and adds them into node's memblock - * pool. - * - */ -static void __init add_numamem_region(u64 start, u64 end, u32 type) -{ - u32 node = pa_to_nid(start); - u64 size = end - start; - static unsigned long num_physpages; - - if (start >= end) { - pr_debug("Invalid region: %016llx-%016llx\n", start, end); - return; - } - - num_physpages += (size >> PAGE_SHIFT); - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", - node, type, start, size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start >> PAGE_SHIFT, end >> PAGE_SHIFT, num_physpages); - memblock_set_node(start, size, &memblock.memory, node); -} +static unsigned long num_physpages; -static void __init init_node_memblock(void) +static void __init info_node_memblock(void) { u32 mem_type; u64 mem_end, mem_start, mem_size; @@ -206,12 +180,20 @@ static void __init init_node_memblock(void) case EFI_BOOT_SERVICES_DATA: case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: - add_numamem_region(mem_start, mem_end, mem_type); + num_physpages += (mem_size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + (u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages); break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: case EFI_ACPI_RECLAIM_MEMORY: - add_numamem_region(mem_start, mem_end, mem_type); + num_physpages += (mem_size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + (u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages); fallthrough; case EFI_RESERVED_TYPE: case EFI_RUNTIME_SERVICES_CODE: @@ -249,22 +231,16 @@ int __init init_numa_memory(void) for (i = 0; i < NR_CPUS; i++) set_cpuid_to_node(i, NUMA_NO_NODE); - numa_reset_distance(); - nodes_clear(numa_nodes_parsed); - nodes_clear(node_possible_map); - nodes_clear(node_online_map); - WARN_ON(memblock_clear_hotplug(0, PHYS_ADDR_MAX)); - /* Parse SRAT and SLIT if provided by firmware. */ - ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); + if (!acpi_disabled) + ret = numa_memblks_init(acpi_numa_init, false); + else + ret = numa_memblks_init(fake_numa_init, false); + if (ret < 0) return ret; - node_possible_map = numa_nodes_parsed; - if (WARN_ON(nodes_empty(node_possible_map))) - return -EINVAL; - - init_node_memblock(); + info_node_memblock(); if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; @@ -272,7 +248,8 @@ int __init init_numa_memory(void) node_mem_init(node); node_set_online(node); } - max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); setup_nr_node_ids(); loongson_sysconf.nr_nodes = nr_node_ids; @@ -283,26 +260,6 @@ int __init init_numa_memory(void) #endif -void __init paging_init(void) -{ - unsigned int node; - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } -#ifdef CONFIG_ZONE_DMA32 - zones_size[ZONE_DMA32] = MAX_DMA32_PFN; -#endif - zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init(zones_size); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 8ad098703488..9d257c8519c9 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config static int __init init_hw_perf_events(void) { - int counters; + int bits, counters; if (!cpu_has_pmp) return -ENODEV; pr_info("Performance counters: "); - counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; loongarch_pmu.num_counters = counters; loongarch_pmu.max_period = (1ULL << 63) - 1; @@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void) on_each_cpu(reset_counters, NULL, 1); pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", - loongarch_pmu.name, counters, 64); + loongarch_pmu.name, counters, bits); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index cea30768ae92..63d2b7e7e844 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -17,6 +17,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; unsigned int isa = cpu_data[n].isa_level; + unsigned int prid = cpu_data[n].processor_id; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; @@ -37,6 +38,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "global_id\t\t: %d\n", cpu_data[n].global_id); seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); + seq_printf(m, "PRID\t\t\t: %s (%08x)\n", id_to_core_name(prid), prid); seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); seq_printf(m, "FPU Revision\t\t: 0x%02x\n", fp_version); seq_printf(m, "CPU MHz\t\t\t: %llu.%02llu\n", diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 69c17d162fff..25a87378e48e 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -294,8 +294,6 @@ static void __init fdt_setup(void) early_init_dt_scan(fdt_pointer, __pa(fdt_pointer)); early_init_fdt_reserve_self(); - - max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); #endif } @@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void) static void __init arch_mem_init(char **cmdline_p) { /* Recalculate max_low_pfn for "mem=xxx" */ - max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); if (usermem) pr_info("User-defined physical RAM map overwrite\n"); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 3d9be6ca7ec5..da5926fead4a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -1131,8 +1131,8 @@ static void configure_exception_vector(void) tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; csr_write64(eentry, LOONGARCH_CSR_EENTRY); - csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); - csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY); + csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY); } void per_cpu_trap_init(int cpu) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index c32333695381..a1cc116b4dac 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (val >= EIOINTC_ROUTE_MAX_VCPUS) + if (val > EIOINTC_ROUTE_MAX_VCPUS) ret = -EINVAL; else s->num_cpu = val; diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 7c8143e79c12..a7fa458e3360 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -857,7 +857,7 @@ retry: if (writeable) { prot_bits = kvm_pte_mkwriteable(prot_bits); - if (write) + if (write || !kvm_slot_dirty_track_enabled(memslot)) prot_bits = kvm_pte_mkdirty(prot_bits); } diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 32dc213374be..29c2aaba63c3 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -4,6 +4,7 @@ */ #include <linux/kvm_host.h> +#include <asm/delay.h> #include <asm/kvm_csr.h> #include <asm/kvm_vcpu.h> @@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); + __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 30e3b089a596..1245a6b35896 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when * exiting the guest, so that the next time trap into the guest. * We don't need to deal with PMU CSRs contexts. + * + * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU + * before entering guest VM */ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); @@ -139,16 +142,12 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); if (!(val & KVM_PMU_EVENT_ENABLED)) vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + else + kvm_make_request(KVM_REQ_PMU, vcpu); kvm_restore_host_pmu(vcpu); } -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) - kvm_make_request(KVM_REQ_PMU, vcpu); -} - static void kvm_check_pmu(struct kvm_vcpu *vcpu) { if (kvm_check_request(KVM_REQ_PMU, vcpu)) { @@ -299,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { - kvm_lose_pmu(vcpu); + if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) { + kvm_lose_pmu(vcpu); + kvm_make_request(KVM_REQ_PMU, vcpu); + } /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -1604,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_restore_timer(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Restore hardware PMU CSRs */ - kvm_restore_pmu(vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index c3e4586a7975..6bfd4b8dad1b 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn) return memblock_is_memory(addr) && !memblock_is_reserved(addr); } -#ifndef CONFIG_NUMA void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -72,7 +71,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } -#endif /* !CONFIG_NUMA */ void __ref free_initmem(void) { diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index df949a3d0f34..27c336959fe8 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -6,7 +6,7 @@ #include <asm/io.h> #include <asm-generic/early_ioremap.h> -void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size) { return ((void __iomem *)TO_CACHE(phys_addr)); } diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index cbe53d0b7fb0..f97dc9936401 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i /* Direct jump skips 5 NOP instructions */ else if (is_bpf_text_address((unsigned long)orig_call)) orig_call += LOONGARCH_BPF_FENTRY_NBYTES; + /* Module tracing not supported - cause kernel lockups */ + else if (is_module_text_address((unsigned long)orig_call)) + return -ENOTSUPP; if (flags & BPF_TRAMP_F_CALL_ORIG) { move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 5bc9627a6cf9..d9fc5d520b37 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -50,11 +50,11 @@ static int __init pcibios_init(void) */ lsize = cpu_last_level_cache_line_size(); - BUG_ON(!lsize); + if (lsize) { + pci_dfl_cache_line_size = lsize >> 2; - pci_dfl_cache_line_size = lsize >> 2; - - pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + } return 0; } diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index d8316f993482..c0cc3ca5da9f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -19,7 +19,7 @@ ccflags-vdso := \ cflags-vdso := $(ccflags-vdso) \ -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -std=gnu11 -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ + -std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 7787a4dd7c3c..f3268fed02fc 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -72,9 +72,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f41d38dfbf13..871a5d67bf41 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 176314f3c9aa..fbbdcb394ca2 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -73,7 +73,7 @@ CONFIG_FB_XILINX=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y CONFIG_UIO_DMEM_GENIRQ=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 580af574fe73..022fc85d94b3 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -475,3 +475,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/mips/boot/dts/econet/en751221.dtsi b/arch/mips/boot/dts/econet/en751221.dtsi index 66197e73d4f0..2abeef5b744a 100644 --- a/arch/mips/boot/dts/econet/en751221.dtsi +++ b/arch/mips/boot/dts/econet/en751221.dtsi @@ -18,7 +18,7 @@ cpu@0 { device_type = "cpu"; - compatible = "mips,mips24KEc"; + compatible = "mips,mips34Kc"; reg = <0>; }; }; diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 97d2cd997285..349e9e0b4f54 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -144,9 +144,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index b0b551efac7c..6ee9ee391fdc 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -59,9 +59,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 85a4472cb058..52a63dd7aac7 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -133,9 +133,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index a3b2c8da2dde..59fb7ee5eeb0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index a476717b8a6a..8be1cb433e95 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index cdedbb8a8f53..b6fe3c962464 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -173,7 +173,7 @@ CONFIG_USB_ISIGHTFW=m CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 2decf8b98d31..e123848f94ab 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -232,9 +232,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1286=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 5d079941fd20..1c10242b148b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -272,9 +272,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 6db21e498faa..755cbf20f5a5 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -49,9 +49,9 @@ CONFIG_WATCHDOG=y CONFIG_INDYDOG=y # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index a4524e785469..718f3060d9fa 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -143,9 +143,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index d8ac11427f69..7568838eb08b 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -89,9 +89,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 65adb538030d..a790c2610fd3 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -69,7 +69,7 @@ CONFIG_FB_G364=y CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 5038a27d035f..8d3f20ed19b5 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -226,9 +226,9 @@ CONFIG_MMC=m CONFIG_LEDS_CLASS=y CONFIG_STAGING=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index 0cc665d3ea34..aec1fd1902eb 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -298,9 +298,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 240efff37d98..575aaf242361 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -348,9 +348,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_POSIX_ACL=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 9fcbac829920..81704ec67f09 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -313,7 +313,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 19102386a81c..82a97f58bce1 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 1b98f6945c2d..accb471a1d93 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 7b8905cb3400..6bda67c5f68f 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -149,7 +149,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 8249f6a51895..e4082537f80f 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -148,9 +148,9 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 21cb37668763..58f5af45fa98 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -152,7 +152,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 3df9cd669683..9bfef7de0d1c 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 1dd07c9d1812..0f9ef20744f9 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 2707ab134639..c58d1a61d528 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -595,9 +595,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 39a2419e1f3e..b507dc4dddd4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -307,7 +307,7 @@ CONFIG_USB_SISUSBVGA=m CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 29191fa1801e..a3101f2268c6 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -692,7 +692,7 @@ unsigned long mips_stack_top(void) /* Space for the VDSO, data page & GIC user page */ if (current->thread.abi) { top -= PAGE_ALIGN(current->thread.abi->vdso->size); - top -= PAGE_SIZE; + top -= VDSO_NR_PAGES * PAGE_SIZE; top -= mips_gic_present() ? PAGE_SIZE : 0; /* Space to randomize the VDSO base */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index d824ffe9a014..8cedc83c3266 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -408,3 +408,4 @@ 467 n32 open_tree_attr sys_open_tree_attr 468 n32 file_getattr sys_file_getattr 469 n32 file_setattr sys_file_setattr +470 n32 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 7a7049c2c307..9b92bddf06b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -384,3 +384,4 @@ 467 n64 open_tree_attr sys_open_tree_attr 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr +470 n64 listns sys_listns diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index d330274f0601..f810b8a55716 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -457,3 +457,4 @@ 467 o32 open_tree_attr sys_open_tree_attr 468 o32 file_getattr sys_file_getattr 469 o32 file_setattr sys_file_setattr +470 o32 listns sys_listns diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 347126dc010d..44a662536148 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -12,9 +12,11 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/export.h> +#include <linux/sort.h> #include <asm/cpu.h> #include <asm/cpu-type.h> @@ -508,58 +510,95 @@ static int __init set_ntlb(char *str) __setup("ntlb=", set_ntlb); -/* Initialise all TLB entries with unique values */ -static void r4k_tlb_uniquify(void) + +/* Comparison function for EntryHi VPN fields. */ +static int r4k_vpn_cmp(const void *a, const void *b) { - int entry = num_wired_entries(); + long v = *(unsigned long *)a - *(unsigned long *)b; + int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0; + return s ? (v != 0) | v >> s : v; +} + +/* + * Initialise all TLB entries with unique values that do not clash with + * what we have been handed over and what we'll be using ourselves. + */ +static void __ref r4k_tlb_uniquify(void) +{ + int tlbsize = current_cpu_data.tlbsize; + bool use_slab = slab_is_available(); + int start = num_wired_entries(); + phys_addr_t tlb_vpn_size; + unsigned long *tlb_vpns; + unsigned long vpn_mask; + int cnt, ent, idx, i; + + vpn_mask = GENMASK(cpu_vmbits - 1, 13); + vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31; + + tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); + tlb_vpns = (use_slab ? + kmalloc(tlb_vpn_size, GFP_KERNEL) : + memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); + if (WARN_ON(!tlb_vpns)) + return; /* Pray local_flush_tlb_all() is good enough. */ htw_stop(); + + for (i = start, cnt = 0; i < tlbsize; i++, cnt++) { + unsigned long vpn; + + write_c0_index(i); + mtc0_tlbr_hazard(); + tlb_read(); + tlb_read_hazard(); + vpn = read_c0_entryhi(); + vpn &= vpn_mask & PAGE_MASK; + tlb_vpns[cnt] = vpn; + + /* Prevent any large pages from overlapping regular ones. */ + write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + } + + sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL); + + write_c0_pagemask(PM_DEFAULT_MASK); write_c0_entrylo0(0); write_c0_entrylo1(0); - while (entry < current_cpu_data.tlbsize) { - unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); - unsigned long asid = 0; - int idx; + idx = 0; + ent = tlbsize; + for (i = start; i < tlbsize; i++) + while (1) { + unsigned long entryhi, vpn; - /* Skip wired MMID to make ginvt_mmid work */ - if (cpu_has_mmid) - asid = MMID_KERNEL_WIRED + 1; + entryhi = UNIQUE_ENTRYHI(ent); + vpn = entryhi & vpn_mask & PAGE_MASK; - /* Check for match before using UNIQUE_ENTRYHI */ - do { - if (cpu_has_mmid) { - write_c0_memorymapid(asid); - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + if (idx >= cnt || vpn < tlb_vpns[idx]) { + write_c0_entryhi(entryhi); + write_c0_index(i); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + ent++; + break; + } else if (vpn == tlb_vpns[idx]) { + ent++; } else { - write_c0_entryhi(UNIQUE_ENTRYHI(entry) | asid); + idx++; } - mtc0_tlbw_hazard(); - tlb_probe(); - tlb_probe_hazard(); - idx = read_c0_index(); - /* No match or match is on current entry */ - if (idx < 0 || idx == entry) - break; - /* - * If we hit a match, we need to try again with - * a different ASID. - */ - asid++; - } while (asid < asid_mask); - - if (idx >= 0 && idx != entry) - panic("Unable to uniquify TLB entry %d", idx); - - write_c0_index(entry); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - entry++; - } + } tlbw_use_hazard(); htw_start(); flush_micro_tlb(); + if (use_slab) + kfree(tlb_vpns); + else + memblock_free(tlb_vpns, tlb_vpn_size); } /* @@ -602,6 +641,7 @@ static void r4k_tlb_configure(void) /* From this point on the ARC firmware is dead. */ r4k_tlb_uniquify(); + local_flush_tlb_all(); /* Did I tell you that ARC SUCKS? */ } diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index 000d6d50520a..82b0fd8576a2 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -241,16 +241,22 @@ mips_pci_controller: #endif /* - * Setup the Malta max (2GB) memory for PCI DMA in host bridge - * in transparent addressing mode. + * Set up memory mapping in host bridge for PCI DMA masters, + * in transparent addressing mode. For EVA use the Malta + * maximum of 2 GiB memory in the alias space at 0x80000000 + * as per PHYS_OFFSET. Otherwise use 256 MiB of memory in + * the regular space, avoiding mapping the PCI MMIO window + * for DMA as it seems to confuse the system controller's + * logic, causing PCI MMIO to stop working. */ - mask = PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH; - MSC_WRITE(MSC01_PCI_BAR0, mask); - MSC_WRITE(MSC01_PCI_HEAD4, mask); + mask = PHYS_OFFSET ? PHYS_OFFSET : 0xf0000000; + MSC_WRITE(MSC01_PCI_BAR0, + mask | PCI_BASE_ADDRESS_MEM_PREFETCH); + MSC_WRITE(MSC01_PCI_HEAD4, + PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH); - mask &= MSC01_PCI_BAR0_SIZE_MSK; MSC_WRITE(MSC01_PCI_P2SCMSKL, mask); - MSC_WRITE(MSC01_PCI_P2SCMAPL, mask); + MSC_WRITE(MSC01_PCI_P2SCMAPL, PHYS_OFFSET); /* Don't handle target retries indefinitely. */ if ((data & MSC01_PCI_CFG_MAXRTRY_MSK) == diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 3a2836e9d856..816570514c37 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -47,7 +47,7 @@ static struct resource standard_io_resources[] = { .name = "keyboard", .start = 0x60, .end = 0x6f, - .flags = IORESOURCE_IO | IORESOURCE_BUSY + .flags = IORESOURCE_IO }, { .name = "dma page reg", @@ -213,7 +213,7 @@ void __init plat_mem_setup(void) /* Request I/O space for devices used on the Malta board. */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, standard_io_resources+i); + insert_resource(&ioport_resource, standard_io_resources + i); /* * Enable DMA channel 4 (cascade channel) in the PIIX4 south bridge. diff --git a/arch/mips/pci/pci-malta.c b/arch/mips/pci/pci-malta.c index 6aefdf20ca05..2e35aeba45bc 100644 --- a/arch/mips/pci/pci-malta.c +++ b/arch/mips/pci/pci-malta.c @@ -230,8 +230,7 @@ void __init mips_pcibios_init(void) } /* PIIX4 ACPI starts at 0x1000 */ - if (controller->io_resource->start < 0x00001000UL) - controller->io_resource->start = 0x00001000UL; + PCIBIOS_MIN_IO = 0x1000; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index 3e849d25838a..fb1eb9a68bd6 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -38,7 +38,7 @@ CONFIG_MMC_LITEX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_LITEX_SOC_CONTROLLER=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_EXFAT_FS=y diff --git a/arch/openrisc/configs/virt_defconfig b/arch/openrisc/configs/virt_defconfig index a93a3e1e4f87..0b9979b35ca8 100644 --- a/arch/openrisc/configs/virt_defconfig +++ b/arch/openrisc/configs/virt_defconfig @@ -94,8 +94,8 @@ CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 17c42d718eb3..f8481e4e9d21 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -18,7 +18,7 @@ KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT KBUILD_CFLAGS += -mfast-indirect-calls endif -KBUILD_CFLAGS += -std=gnu11 +KBUILD_CFLAGS += -std=gnu11 -fms-extensions LDFLAGS_vmlinux := -X -e startup --as-needed -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(addprefix $(obj)/, $(OBJECTS)) $(LIBGCC) FORCE diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 94928d114d4c..52031bde9f17 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -232,8 +232,8 @@ CONFIG_AUXDISPLAY=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d8cd7f858b2a..1aec04c09d0b 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -251,8 +251,8 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 833555f74ffa..5aa1623e4f2f 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -50,7 +50,7 @@ #endif #ifdef CONFIG_DEBUG_BUGVERBOSE -#define __WARN_FLAGS(flags) \ +#define __WARN_FLAGS(cond_str, flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ @@ -61,12 +61,12 @@ "\t.short %1, %2\n" \ "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ - : : "i" (__FILE__), "i" (__LINE__), \ + : : "i" (WARN_CONDITION_STR(cond_str) __FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ "i" (sizeof(struct bug_entry)) ); \ } while(0) #else -#define __WARN_FLAGS(flags) \ +#define __WARN_FLAGS(cond_str, flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 88a788a7b18d..39bdacaa530b 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -468,3 +468,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f7e0fee5ee55..7ac88ff13d3c 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -35,6 +35,8 @@ #define KERNEL_START (KERNEL_BINARY_TEXT_START) +#define ALIGNMENT_OK(ptr, type) (((ptr) & (sizeof(type) - 1)) == 0) + extern struct unwind_table_entry __start___unwind[]; extern struct unwind_table_entry __stop___unwind[]; @@ -257,12 +259,15 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int if (pc_is_kernel_fn(pc, _switch_to) || pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; - info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + if (ALIGNMENT_OK(info->prev_sp, long)) + info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + else + info->prev_ip = info->prev_sp = 0; return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long)&_call_on_stack) { + if (pc == (unsigned long)&_call_on_stack && ALIGNMENT_OK(info->sp, long)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; @@ -370,8 +375,10 @@ static void unwind_frame_regs(struct unwind_frame_info *info) info->prev_sp = info->sp - frame_size; if (e->Millicode) info->rp = info->r31; - else if (rpoffset) + else if (rpoffset && ALIGNMENT_OK(info->prev_sp, long)) info->rp = *(unsigned long *)(info->prev_sp - rpoffset); + else + info->rp = 0; info->prev_ip = info->rp; info->rp = 0; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e24f4d88885a..9537a61ebae0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_DMA_OPS if PPC64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index c47b78c1d3e7..f1a4761ebd44 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,7 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE) BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) BOOTCFLAGS := $(BOOTTARGETFLAGS) \ - -std=gnu11 \ + -std=gnu11 -fms-extensions \ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 \ -msoft-float -mno-altivec -mno-vsx \ @@ -86,6 +86,7 @@ BOOTARFLAGS := -crD ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) +BOOTCFLAGS += -Wno-microsoft-anon-tag BOOTAFLAGS += $(CLANG_FLAGS) endif diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index bbaa7e81f821..0db48977c70c 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -51,11 +51,11 @@ ".previous\n" #endif -#define BUG_ENTRY(insn, flags, ...) \ +#define BUG_ENTRY(cond_str, insn, flags, ...) \ __asm__ __volatile__( \ "1: " insn "\n" \ _EMIT_BUG_ENTRY \ - : : "i" (__FILE__), "i" (__LINE__), \ + : : "i" (WARN_CONDITION_STR(cond_str) __FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry)), \ ##__VA_ARGS__) @@ -67,12 +67,12 @@ */ #define BUG() do { \ - BUG_ENTRY("twi 31, 0, 0", 0); \ + BUG_ENTRY("", "twi 31, 0, 0", 0); \ unreachable(); \ } while (0) #define HAVE_ARCH_BUG -#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags)) +#define __WARN_FLAGS(cond_str, flags) BUG_ENTRY(cond_str, "twi 31, 0, 0", BUGFLAG_WARNING | (flags)) #ifdef CONFIG_PPC64 #define BUG_ON(x) do { \ @@ -80,7 +80,7 @@ if (x) \ BUG(); \ } else { \ - BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x))); \ + BUG_ENTRY(#x, PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x))); \ } \ } while (0) @@ -90,7 +90,7 @@ if (__ret_warn_on) \ __WARN(); \ } else { \ - BUG_ENTRY(PPC_TLNEI " %4, 0", \ + BUG_ENTRY(#x, PPC_TLNEI " %4, 0", \ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \ "r" (__ret_warn_on)); \ } \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4f5a46a77fa2..784a00e681fa 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -451,7 +451,7 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_begin user_write_access_begin #define user_write_access_end prevent_current_write_to_user -#define unsafe_get_user(x, p, e) do { \ +#define arch_unsafe_get_user(x, p, e) do { \ __long_type(*(p)) __gu_val; \ __typeof__(*(p)) __user *__gu_addr = (p); \ \ @@ -459,7 +459,7 @@ user_write_access_begin(const void __user *ptr, size_t len) (x) = (__typeof__(*(p)))__gu_val; \ } while (0) -#define unsafe_put_user(x, p, e) \ +#define arch_unsafe_put_user(x, p, e) \ __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_from_user(d, s, l, e) \ @@ -504,11 +504,11 @@ do { \ unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \ } while (0) -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_size_goto(*((type *)(dst)), \ (__force type __user *)(src), sizeof(type), err_label) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_size_goto(*((type *)(src)), \ (__force type __user *)(dst), sizeof(type), err_label) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; + if (!fw_dump.fadump_enabled) + return; + if (!fw_dump.param_area_supported || fw_dump.dump_active) return; diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b453e80dfc00..ec4458cdb97b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -560,3 +560,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1302b5ac5672..89a1b8c21ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, * it fires once. */ if (single_escalation) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]); xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; @@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Grab info about irq */ state->pt_number = hw_irq; - state->pt_data = irq_data_get_irq_handler_data(host_data); + state->pt_data = irq_data_get_irq_chip_data(host_data); /* * Configure the IRQ to match the existing configuration of @@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) */ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { - struct irq_data *d = irq_get_irq_data(irq); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(irq); /* * This slightly odd sequence gives the right result @@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) i0, i1); } if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = - irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); seq_printf(m, " ESC %d %c%c EOI @%llx", diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7b527d18aa5e..4c321a8ea896 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7ec60290abe6..78c4b6ce5f13 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -267,22 +267,11 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, static int spufs_context_open(const struct path *path) { - int ret; - struct file *filp; - - ret = get_unused_fd_flags(0); - if (ret < 0) - return ret; - - filp = dentry_open(path, O_RDONLY, current_cred()); - if (IS_ERR(filp)) { - put_unused_fd(ret); - return PTR_ERR(filp); - } - - filp->f_op = &spufs_context_fops; - fd_install(ret, filp); - return ret; + FD_PREPARE(fdf, 0, dentry_open(path, O_RDONLY, current_cred())); + if (fdf.err) + return fdf.err; + fd_prepare_file(fdf)->f_op = &spufs_context_fops; + return fd_publish(fdf); } static struct spu_context * @@ -508,26 +497,15 @@ static const struct file_operations spufs_gang_fops = { static int spufs_gang_open(const struct path *path) { - int ret; - struct file *filp; - - ret = get_unused_fd_flags(0); - if (ret < 0) - return ret; - /* * get references for dget and mntget, will be released * in error path of *_open(). */ - filp = dentry_open(path, O_RDONLY, current_cred()); - if (IS_ERR(filp)) { - put_unused_fd(ret); - return PTR_ERR(filp); - } - - filp->f_op = &spufs_gang_fops; - fd_install(ret, filp); - return ret; + FD_PREPARE(fdf, 0, dentry_open(path, O_RDONLY, current_cred())); + if (fdf.err) + return fdf.err; + fd_prepare_file(fdf)->f_op = &spufs_gang_fops; + return fd_publish(fdf); } static int spufs_create_gang(struct inode *inode, diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index b65256a63e87..9c9650319f3b 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev) return -EINVAL; } - xd = irq_get_handler_data(vinst->virq); + xd = irq_get_chip_data(vinst->virq); if (!xd) { pr_err("Inst%d: Invalid virq %d\n", vinst->vas_id, vinst->virq); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 825f9432e03d..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { - struct msi_desc *desc = arg->desc; - struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); } diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 21a2f447c43f..dd7b668799d9 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -479,10 +479,7 @@ static const struct file_operations papr_hvpipe_handle_ops = { static int papr_hvpipe_dev_create_handle(u32 srcID) { - struct hvpipe_source_info *src_info; - struct file *file; - long err; - int fd; + struct hvpipe_source_info *src_info __free(kfree) = NULL; spin_lock(&hvpipe_src_list_lock); /* @@ -506,20 +503,13 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) src_info->tsk = current; init_waitqueue_head(&src_info->recv_wqh); - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = fd; - goto free_buf; - } - - file = anon_inode_getfile("[papr-hvpipe]", - &papr_hvpipe_handle_ops, (void *)src_info, - O_RDWR); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto free_fd; - } + FD_PREPARE(fdf, O_RDONLY | O_CLOEXEC, + anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, + (void *)src_info, O_RDWR)); + if (fdf.err) + return fdf.err; + retain_and_null_ptr(src_info); spin_lock(&hvpipe_src_list_lock); /* * If two processes are executing ioctl() for the same @@ -528,22 +518,11 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) */ if (hvpipe_find_source(srcID)) { spin_unlock(&hvpipe_src_list_lock); - err = -EALREADY; - goto free_file; + return -EALREADY; } list_add(&src_info->list, &hvpipe_src_list); spin_unlock(&hvpipe_src_list_lock); - - fd_install(fd, file); - return fd; - -free_file: - fput(file); -free_fd: - put_unused_fd(fd); -free_buf: - kfree(src_info); - return err; + return fd_publish(fdf); } /* diff --git a/arch/powerpc/platforms/pseries/papr-platform-dump.c b/arch/powerpc/platforms/pseries/papr-platform-dump.c index f8d55eccdb6b..be633c9a0e75 100644 --- a/arch/powerpc/platforms/pseries/papr-platform-dump.c +++ b/arch/powerpc/platforms/pseries/papr-platform-dump.c @@ -303,8 +303,6 @@ static long papr_platform_dump_create_handle(u64 dump_tag) { struct ibm_platform_dump_params *params; u64 param_dump_tag; - struct file *file; - long err; int fd; /* @@ -334,34 +332,22 @@ static long papr_platform_dump_create_handle(u64 dump_tag) params->dump_tag_lo = (u32)(dump_tag & 0x00000000ffffffffULL); params->status = RTAS_IBM_PLATFORM_DUMP_START; - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile_fmode("[papr-platform-dump]", + &papr_platform_dump_handle_ops, + (void *)params, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD)); if (fd < 0) { - err = fd; - goto free_area; - } - - file = anon_inode_getfile_fmode("[papr-platform-dump]", - &papr_platform_dump_handle_ops, - (void *)params, O_RDONLY, - FMODE_LSEEK | FMODE_PREAD); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto put_fd; + rtas_work_area_free(params->work_area); + kfree(params); + return fd; } - fd_install(fd, file); - list_add(¶ms->list, &platform_dump_list); pr_info("%s (%d) initiated platform dump for dump tag %llu\n", current->comm, current->pid, dump_tag); return fd; -put_fd: - put_unused_fd(fd); -free_area: - rtas_work_area_free(params->work_area); - kfree(params); - return err; } /* diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c b/arch/powerpc/platforms/pseries/papr-rtas-common.c index 33c606e3378a..1630e0cd210c 100644 --- a/arch/powerpc/platforms/pseries/papr-rtas-common.c +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c @@ -205,35 +205,18 @@ long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq, char *name) { const struct papr_rtas_blob *blob; - struct file *file; - long ret; int fd; blob = papr_rtas_retrieve(seq); if (IS_ERR(blob)) return PTR_ERR(blob); - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); - if (fd < 0) { - ret = fd; - goto free_blob; - } - - file = anon_inode_getfile_fmode(name, fops, (void *)blob, - O_RDONLY, FMODE_LSEEK | FMODE_PREAD); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto put_fd; - } - - fd_install(fd, file); + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile_fmode(name, fops, (void *)blob, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD)); + if (fd < 0) + papr_rtas_blob_free(blob); return fd; - -put_fd: - put_unused_fd(fd); -free_blob: - papr_rtas_blob_free(blob); - return ret; } /* diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 625361a15424..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); /* * Clear saved_p to indicate that it's no longer pending diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0c6038dc5dfd..fadec20b87a8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,7 +29,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX - select ARCH_HAS_ELF_CORE_EFLAGS + select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS systems to handle cache management. config AS_HAS_INSN - def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + def_bool $(as-instr,.insn 0x100000f) config AS_HAS_OPTION_ARCH # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4 diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index ecf2fcce2d92..4c6de57f65ef 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -134,21 +134,6 @@ endif CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make -boot := arch/riscv/boot -ifeq ($(CONFIG_XIP_KERNEL),y) -KBUILD_IMAGE := $(boot)/xipImage -else -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) -KBUILD_IMAGE := $(boot)/loader.bin -else -ifeq ($(CONFIG_EFI_ZBOOT),) -KBUILD_IMAGE := $(boot)/Image.gz -else -KBUILD_IMAGE := $(boot)/vmlinuz.efi -endif -endif -endif - boot := arch/riscv/boot boot-image-y := Image boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 @@ -159,7 +144,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst boot-image-$(CONFIG_KERNEL_XZ) := Image.xz ifdef CONFIG_RISCV_M_MODE -boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin +boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin endif boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi boot-image-$(CONFIG_XIP_KERNEL) := xipImage diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 6367112e614a..a7442a508433 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -28,7 +28,7 @@ riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", "zifencei", "zihpm", "xtheadvector"; - thead,vlenb = <128>; + thead,vlenb = <16>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 8bd2a11382a3..e9e8ba83e632 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -12,6 +12,12 @@ #define __ASM_STR(x) #x #endif +#ifdef CONFIG_AS_HAS_INSN +#define ASM_INSN_I(__x) ".insn " __x +#else +#define ASM_INSN_I(__x) ".4byte " __x +#endif + #if __riscv_xlen == 64 #define __REG_SEL(a, b) __ASM_STR(a) #elif __riscv_xlen == 32 @@ -84,15 +90,9 @@ .endm #ifdef CONFIG_SMP -#ifdef CONFIG_32BIT -#define PER_CPU_OFFSET_SHIFT 2 -#else -#define PER_CPU_OFFSET_SHIFT 3 -#endif - .macro asm_per_cpu dst sym tmp lw \tmp, TASK_TI_CPU_NUM(tp) - slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + slli \tmp, \tmp, RISCV_LGPTR la \dst, __per_cpu_offset add \dst, \dst, \tmp REG_L \tmp, 0(\dst) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 4c03e20ad11f..6f581b84d8fc 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -60,28 +60,28 @@ typedef u32 bug_insn_t; ".org 2b + " size "\n\t" \ ".popsection" \ -#define __BUG_FLAGS(flags) \ +#define __BUG_FLAGS(cond_str, flags) \ do { \ __asm__ __volatile__ ( \ ARCH_WARN_ASM("%0", "%1", "%2", "%3") \ : \ - : "i" (__FILE__), "i" (__LINE__), \ + : "i" (WARN_CONDITION_STR(cond_str) __FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) #else /* CONFIG_GENERIC_BUG */ -#define __BUG_FLAGS(flags) do { \ +#define __BUG_FLAGS(cond_str, flags) do { \ __asm__ __volatile__ ("ebreak\n"); \ } while (0) #endif /* CONFIG_GENERIC_BUG */ #define BUG() do { \ - __BUG_FLAGS(0); \ + __BUG_FLAGS("", 0); \ unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) +#define __WARN_FLAGS(cond_str, flags) __BUG_FLAGS(cond_str, BUGFLAG_WARNING|(flags)) #define ARCH_WARN_REACHABLE diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbd0e4306c93..62837fa981e8 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -31,6 +31,8 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +extern const struct seq_operations cpuinfo_op; + /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 948d2b34e94e..58f8dda73259 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -42,4 +42,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair, return pair->value == other_pair->value; } +#ifdef CONFIG_MMU +void riscv_hwprobe_register_async_probe(void); +void riscv_hwprobe_complete_async_probe(void); +#else +static inline void riscv_hwprobe_register_async_probe(void) {} +static inline void riscv_hwprobe_complete_async_probe(void) {} +#endif #endif diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index c9cfcea52cbb..d29da6ccd3dd 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -256,10 +256,10 @@ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ SIMM12((offset) & 0xfe0), RS1(base)) -#define RISCV_PAUSE ".4byte 0x100000f" -#define ZAWRS_WRS_NTO ".4byte 0x00d00073" -#define ZAWRS_WRS_STO ".4byte 0x01d00073" -#define RISCV_NOP4 ".4byte 0x00000013" +#define RISCV_PAUSE ASM_INSN_I("0x100000f") +#define ZAWRS_WRS_NTO ASM_INSN_I("0x00d00073") +#define ZAWRS_WRS_STO ASM_INSN_I("0x01d00073") +#define RISCV_NOP4 ASM_INSN_I("0x00000013") #define RISCV_INSN_NOP4 _AC(0x00000013, U) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 7559d728c5ff..78b18e2fd771 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -3,14 +3,18 @@ #ifndef __ASM_KGDB_H_ #define __ASM_KGDB_H_ +#include <linux/build_bug.h> + #ifdef __KERNEL__ #define GDB_SIZEOF_REG sizeof(unsigned long) -#define DBG_MAX_REG_NUM (36) -#define NUMREGBYTES ((DBG_MAX_REG_NUM) * GDB_SIZEOF_REG) +#define DBG_MAX_REG_NUM 36 +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) #define CACHE_FLUSH_IS_SAFE 1 #define BUFMAX 2048 +static_assert(BUFMAX > NUMREGBYTES, + "As per KGDB documentation, BUFMAX must be larger than NUMREGBYTES"); #ifdef CONFIG_RISCV_ISA_C #define BREAK_INSTR_SIZE 2 #else @@ -97,6 +101,7 @@ extern unsigned long kgdb_compiled_break; #define DBG_REG_STATUS_OFF 33 #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 +/* NOTE: increase DBG_MAX_REG_NUM if you add more values here. */ extern const char riscv_gdb_stub_feature[64]; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 1018d2216901..6e789fa58514 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -69,6 +69,8 @@ typedef struct { #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) +#define MAX_POSSIBLE_PHYSMEM_BITS 56 + /* * rv64 PTE format: * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 29e994a9afb6..5a08eb5fe99f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -654,6 +654,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +#define pgprot_dmacoherent pgprot_writecombine + /* * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index f5f4f7f85543..36bba6720c26 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -437,10 +437,10 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) __clear_user(untagged_addr(to), n) : n; } -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_nocheck(*((type *)(dst)), (__force __user type *)(src), err_label) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_nocheck(*((type *)(src)), (__force __user type *)(dst), err_label) static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) @@ -460,10 +460,10 @@ static inline void user_access_restore(unsigned long enabled) { } * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __put_user_nocheck(x, (ptr), label) -#define unsafe_get_user(x, ptr, label) do { \ +#define arch_unsafe_get_user(x, ptr, label) do { \ __inttype(*(ptr)) __gu_val; \ __get_user_nocheck(__gu_val, (ptr), label); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h index da57a3786f7a..88b37af55175 100644 --- a/arch/riscv/include/asm/vdso/arch_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -12,6 +12,12 @@ struct vdso_arch_data { /* Boolean indicating all CPUs have the same static hwprobe values. */ __u8 homogeneous_cpus; + + /* + * A gate to check and see if the hwprobe data is actually ready, as + * probing is deferred to avoid boot slowdowns. + */ + __u8 ready; }; #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/include/asm/vendor_extensions/mips.h b/arch/riscv/include/asm/vendor_extensions/mips.h index ea8ca747d691..ffeb12dc17a3 100644 --- a/arch/riscv/include/asm/vendor_extensions/mips.h +++ b/arch/riscv/include/asm/vendor_extensions/mips.h @@ -30,8 +30,8 @@ extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_mips; * allowing any subsequent instructions to fetch. */ -#define MIPS_PAUSE ".4byte 0x00501013\n\t" -#define MIPS_EHB ".4byte 0x00301013\n\t" -#define MIPS_IHB ".4byte 0x00101013\n\t" +#define MIPS_PAUSE ASM_INSN_I("0x00501013\n\t") +#define MIPS_EHB ASM_INSN_I("0x00301013\n\t") +#define MIPS_IHB ASM_INSN_I("0x00101013\n\t") #endif // _ASM_RISCV_VENDOR_EXTENSIONS_MIPS_H diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index 3b09874d7a6d..7f5030ee1fcf 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -7,8 +7,8 @@ #define ANDES_VENDOR_ID 0x31e #define MICROCHIP_VENDOR_ID 0x029 +#define MIPS_VENDOR_ID 0x127 #define SIFIVE_VENDOR_ID 0x489 #define THEAD_VENDOR_ID 0x5b7 -#define MIPS_VENDOR_ID 0x722 #endif diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a1e38ecfc8be..3f50d3dd76c6 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -54,6 +54,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); + clear_tasks_mm_cpumask(cpu); /* Verify from the firmware if the cpu is really stopped*/ if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f6b13e9f5e6c..3dbc8cc557dd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -62,10 +62,8 @@ int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned lo return -ENODEV; } - if (!of_device_is_available(node)) { - pr_info("CPU with hartid=%lu is not available\n", *hart); + if (!of_device_is_available(node)) return -ENODEV; - } if (of_property_read_string(node, "riscv,isa-base", &isa)) goto old_interface; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 67b59699357d..72ca768f4e91 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -932,9 +932,9 @@ static int has_thead_homogeneous_vlenb(void) { int cpu; u32 prev_vlenb = 0; - u32 vlenb; + u32 vlenb = 0; - /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + /* Ignore thead,vlenb property if xtheadvector is not enabled in the kernel */ if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return 0; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d3d92a4becc7..9b9dec6893b8 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -455,7 +455,7 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - /* instruciton page fault */ + /* instruction page fault */ ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 9f3db3503dab..15fec5d1e6de 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -265,10 +265,10 @@ void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, { if (!strncmp(remcom_in_buffer, gdb_xfer_read_target, sizeof(gdb_xfer_read_target))) - strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc); + strscpy(remcom_out_buffer, riscv_gdb_stub_target_desc, BUFMAX); else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml, sizeof(gdb_xfer_read_cpuxml))) - strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml); + strscpy(remcom_out_buffer, riscv_gdb_stub_cpuxml, BUFMAX); } static inline void kgdb_arch_update_addr(struct pt_regs *regs, diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 75551ac6504c..1675cbad8619 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int num_plts = 0; unsigned int num_gots = 0; Elf_Rela *scratch = NULL; + Elf_Rela *new_scratch; size_t scratch_size = 0; int i; @@ -168,9 +169,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); if (scratch_size_needed > scratch_size) { scratch_size = scratch_size_needed; - scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); - if (!scratch) + new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!new_scratch) { + kvfree(scratch); return -ENOMEM; + } + scratch = new_scratch; } for (size_t j = 0; j < num_relas; j++) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index c0738d6c6498..8723390c7cad 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -49,10 +49,15 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) post_kprobe_handler(p, kcb, regs); } -static bool __kprobes arch_check_kprobe(struct kprobe *p) +static bool __kprobes arch_check_kprobe(unsigned long addr) { - unsigned long tmp = (unsigned long)p->addr - p->offset; - unsigned long addr = (unsigned long)p->addr; + unsigned long tmp, offset; + + /* start iterating at the closest preceding symbol */ + if (!kallsyms_lookup_size_offset(addr, NULL, &offset)) + return false; + + tmp = addr - offset; while (tmp <= addr) { if (tmp == addr) @@ -71,7 +76,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)insn & 0x1) return -EILSEQ; - if (!arch_check_kprobe(p)) + if (!arch_check_kprobe((unsigned long)p->addr)) return -EILSEQ; /* copy instruction */ diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5e8cde055264..c443337056ab 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -648,9 +648,9 @@ int sbi_debug_console_read(char *bytes, unsigned int num_bytes) void __init sbi_init(void) { + bool srst_power_off = false; int ret; - sbi_set_power_off(); ret = sbi_get_spec_version(); if (ret > 0) sbi_spec_version = ret; @@ -683,6 +683,7 @@ void __init sbi_init(void) sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); register_platform_power_off(sbi_srst_power_off); + srst_power_off = true; sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot; sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); @@ -702,4 +703,7 @@ void __init sbi_init(void) __sbi_send_ipi = __sbi_send_ipi_v01; __sbi_rfence = __sbi_rfence_v01; } + + if (!srst_power_off) + sbi_set_power_off(); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14235e58c539..b5bc5fc65cea 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -331,11 +331,14 @@ void __init setup_arch(char **cmdline_p) /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); + if (acpi_disabled) { #if IS_ENABLED(CONFIG_BUILTIN_DTB) - unflatten_and_copy_device_tree(); + unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + unflatten_device_tree(); #endif + } + misc_mem_init(); init_resources(); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index e650dec44817..5ed5095320e6 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -40,6 +40,17 @@ enum ipi_message_type { IPI_MAX }; +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", +}; + unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; @@ -199,7 +210,7 @@ void riscv_ipi_set_virq_range(int virq, int nr) /* Request IPIs */ for (i = 0; i < nr_ipi; i++) { err = request_percpu_irq(ipi_virq_base + i, handle_IPI, - "IPI", &ipi_dummy_dev); + ipi_names[i], &ipi_dummy_dev); WARN_ON(err); ipi_desc[i] = irq_to_desc(ipi_virq_base + i); @@ -210,17 +221,6 @@ void riscv_ipi_set_virq_range(int virq, int nr) riscv_ipi_enable(); } -static const char * const ipi_names[] = { - [IPI_RESCHEDULE] = "Rescheduling interrupts", - [IPI_CALL_FUNC] = "Function call interrupts", - [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", - [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_TIMER] = "Timer broadcast interrupts", - [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", - [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", -}; - void show_ipi_stats(struct seq_file *p, int prec) { unsigned int cpu, i; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 3fe9e6edef8f..b41b6255751c 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,22 @@ #ifdef CONFIG_FRAME_POINTER +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + unsigned long addr = x; \ + if ((task) == current) \ + val = READ_ONCE(addr); \ + else \ + val = READ_ONCE_NOCHECK(addr); \ + val; \ +}) + extern asmlinkage void handle_exception(void); extern unsigned long ret_from_exception_end; @@ -69,8 +85,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->ra; pc = regs->ra; } else { - fp = frame->fp; - pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, + fp = READ_ONCE_TASK_STACK(task, frame->fp); + pc = READ_ONCE_TASK_STACK(task, frame->ra); + pc = ftrace_graph_ret_addr(current, &graph_idx, pc, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 000f4451a9d8..199d13f86f31 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -5,6 +5,9 @@ * more details. */ #include <linux/syscalls.h> +#include <linux/completion.h> +#include <linux/atomic.h> +#include <linux/once.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> @@ -28,6 +31,11 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, bool first = true; int cpu; + if (pair->key != RISCV_HWPROBE_KEY_MVENDORID && + pair->key != RISCV_HWPROBE_KEY_MIMPID && + pair->key != RISCV_HWPROBE_KEY_MARCHID) + goto out; + for_each_cpu(cpu, cpus) { u64 cpu_id; @@ -58,6 +66,7 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, } } +out: pair->value = id; } @@ -454,28 +463,32 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs, return 0; } -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, - size_t pair_count, size_t cpusetsize, - unsigned long __user *cpus_user, - unsigned int flags) -{ - if (flags & RISCV_HWPROBE_WHICH_CPUS) - return hwprobe_get_cpus(pairs, pair_count, cpusetsize, - cpus_user, flags); +#ifdef CONFIG_MMU - return hwprobe_get_values(pairs, pair_count, cpusetsize, - cpus_user, flags); +static DECLARE_COMPLETION(boot_probes_done); +static atomic_t pending_boot_probes = ATOMIC_INIT(1); + +void riscv_hwprobe_register_async_probe(void) +{ + atomic_inc(&pending_boot_probes); } -#ifdef CONFIG_MMU +void riscv_hwprobe_complete_async_probe(void) +{ + if (atomic_dec_and_test(&pending_boot_probes)) + complete(&boot_probes_done); +} -static int __init init_hwprobe_vdso_data(void) +static int complete_hwprobe_vdso_data(void) { struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; + if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) + wait_for_completion(&boot_probes_done); + /* * Initialize vDSO data with the answers for the "all CPUs" case, to * save a syscall in the common case. @@ -503,13 +516,52 @@ static int __init init_hwprobe_vdso_data(void) * vDSO should defer to the kernel for exotic cpu masks. */ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + + /* + * Make sure all the VDSO values are visible before we look at them. + * This pairs with the implicit "no speculativly visible accesses" + * barrier in the VDSO hwprobe code. + */ + smp_wmb(); + avd->ready = true; + return 0; +} + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_arch_data *avd = vdso_k_arch_data; + + /* + * Prevent the vDSO cached values from being used, as they're not ready + * yet. + */ + avd->ready = false; return 0; } arch_initcall_sync(init_hwprobe_vdso_data); +#else + +static int complete_hwprobe_vdso_data(void) { return 0; } + #endif /* CONFIG_MMU */ +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data); + + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return hwprobe_get_cpus(pairs, pair_count, cpusetsize, + cpus_user, flags); + + return hwprobe_get_values(pairs, pair_count, cpusetsize, + cpus_user, flags); +} + SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, size_t, pair_count, size_t, cpusetsize, unsigned long __user *, cpus, unsigned int, flags) diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5db4df44279e..40f8dafffa0a 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -31,7 +31,7 @@ config RISCV_MODULE_LINKING_KUNIT If unsure, say N. config RISCV_KPROBES_KUNIT - bool "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS + tristate "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS depends on KUNIT depends on KPROBES default KUNIT_ALL_TESTS diff --git a/arch/riscv/kernel/tests/kprobes/Makefile b/arch/riscv/kernel/tests/kprobes/Makefile index 4cb6c66a98e8..df7256f62313 100644 --- a/arch/riscv/kernel/tests/kprobes/Makefile +++ b/arch/riscv/kernel/tests/kprobes/Makefile @@ -1 +1,3 @@ -obj-y += test-kprobes.o test-kprobes-asm.o +obj-$(CONFIG_RISCV_KPROBES_KUNIT) += kprobes_riscv_kunit.o + +kprobes_riscv_kunit-objs := test-kprobes.o test-kprobes-asm.o diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.c b/arch/riscv/kernel/tests/kprobes/test-kprobes.c index 6f6cdfbf5a95..664535ca0a98 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.c +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.c @@ -49,8 +49,11 @@ static struct kunit_case kprobes_testcases[] = { }; static struct kunit_suite kprobes_test_suite = { - .name = "kprobes_test_riscv", + .name = "kprobes_riscv", .test_cases = kprobes_testcases, }; kunit_test_suites(&kprobes_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit test for riscv kprobes"); diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.h b/arch/riscv/kernel/tests/kprobes/test-kprobes.h index 3886ab491ecb..537f44aa9d3f 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.h +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.h @@ -11,7 +11,7 @@ #define KPROBE_TEST_MAGIC_LOWER 0x0000babe #define KPROBE_TEST_MAGIC_UPPER 0xcafe0000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* array of addresses to install kprobes */ extern void *test_kprobes_addresses[]; @@ -19,6 +19,6 @@ extern void *test_kprobes_addresses[]; /* array of functions that return KPROBE_TEST_MAGIC */ extern long (*test_kprobes_functions[])(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* TEST_KPROBES_H */ diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index ae2068425fbc..70b5e6927620 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -379,6 +379,7 @@ free: static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); + riscv_hwprobe_complete_async_probe(); return 0; } @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } else if (!check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); + riscv_hwprobe_register_async_probe(); + if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"))) { + pr_warn("Failed to create vec_unalign_check kthread\n"); + riscv_hwprobe_complete_async_probe(); + } } /* diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 2ddeba6c68dd..8f45500d0a6e 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, * homogeneous, then this function can handle requests for arbitrary * masks. */ - if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready)) return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); /* This is something we can handle, fill out the pairs. */ diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index fda0346f0ea1..11422cb95a64 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) */ read_lock_irqsave(&imsic->vsfile_lock, flags); - if (imsic->vsfile_cpu > -1) - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } read_unlock_irqrestore(&imsic->vsfile_lock, flags); return ret; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 525fb5a330c0..58f5f3536ffd 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { hva_t hva, reg_end, size; - gpa_t base_gpa; bool writable; int ret = 0; @@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = new->userspace_addr; size = new->npages << PAGE_SHIFT; reg_end = hva + size; - base_gpa = new->base_gfn << PAGE_SHIFT; writable = !(new->flags & KVM_MEM_READONLY); mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and - * any holes between them, so iterate over all of them to find - * out if we can map any of them right now. + * any holes between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; + hva_t vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) @@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } /* Take the intersection of this VMA with the memory region */ - vm_start = max(hva, vma->vm_start); vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = base_gpa + (vm_start - hva); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - - ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable, false); - if (ret) - break; } hva = vm_end; } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - if (ret) - kvm_riscv_mmu_iounmap(kvm, base_gpa, size); - out: mmap_read_unlock(current->mm); return ret; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index bccb919ca615..5ce35aba6069 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 3b51690cc876..34299c2b231f 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -21,7 +21,7 @@ #define pt_dump_seq_puts(m, fmt) \ ({ \ if (m) \ - seq_printf(m, fmt); \ + seq_puts(m, fmt); \ }) /* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c4145672ca34..df22b10d9141 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -158,7 +158,6 @@ config S390 select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b4769241332b..8578361133a4 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 -fms-extensions KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -Wno-pointer-sign @@ -35,6 +35,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-membe KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,$(if $(CONFIG_KMSAN),65536,16384)) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b31c1df90257..8433f769f7e1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -101,6 +101,7 @@ CONFIG_SLUB_STATS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -123,12 +124,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -472,6 +473,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -654,9 +656,12 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_XFS_DEBUG=y CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y @@ -666,7 +671,6 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 161dad7ef211..4414dabd04a6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -94,6 +94,7 @@ CONFIG_SLAB_BUCKETS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 @@ -114,12 +115,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -462,6 +463,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -644,16 +646,18 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ed0b137353ad..b5478267d6a7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -33,7 +33,6 @@ CONFIG_NET=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y -# CONFIG_DCSSBLK is not set # CONFIG_DASD is not set CONFIG_ENCLOSURE_SERVICES=y CONFIG_SCSI=y diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 7ecfdc4fba2d..89f3e6d8fd89 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -169,11 +169,18 @@ struct kmac_sha2_ctx { u64 buflen[2]; }; +enum async_op { + OP_NOP = 0, + OP_UPDATE, + OP_FINAL, + OP_FINUP, +}; + /* phmac request context */ struct phmac_req_ctx { struct hash_walk_helper hwh; struct kmac_sha2_ctx kmac_ctx; - bool final; + enum async_op async_op; }; /* @@ -610,6 +617,7 @@ static int phmac_update(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { + req_ctx->async_op = OP_UPDATE; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -647,8 +655,7 @@ static int phmac_final(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req->nbytes = 0; - req_ctx->final = true; + req_ctx->async_op = OP_FINAL; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -676,13 +683,16 @@ static int phmac_finup(struct ahash_request *req) if (rc) goto out; + req_ctx->async_op = OP_FINUP; + /* Try synchronous operations if no active engine usage */ if (!atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_update(req, false); if (rc == 0) - req->nbytes = 0; + req_ctx->async_op = OP_FINAL; } - if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) { + if (!rc && req_ctx->async_op == OP_FINAL && + !atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_final(req, false); if (rc == 0) goto out; @@ -694,7 +704,7 @@ static int phmac_finup(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req_ctx->final = true; + /* req->async_op has been set to either OP_FINUP or OP_FINAL */ atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -855,15 +865,16 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) /* * Three kinds of requests come in here: - * update when req->nbytes > 0 and req_ctx->final is false - * final when req->nbytes = 0 and req_ctx->final is true - * finup when req->nbytes > 0 and req_ctx->final is true - * For update and finup the hwh walk needs to be prepared and - * up to date but the actual nr of bytes in req->nbytes may be - * any non zero number. For final there is no hwh walk needed. + * 1. req->async_op == OP_UPDATE with req->nbytes > 0 + * 2. req->async_op == OP_FINUP with req->nbytes > 0 + * 3. req->async_op == OP_FINAL + * For update and finup the hwh walk has already been prepared + * by the caller. For final there is no hwh walk needed. */ - if (req->nbytes) { + switch (req_ctx->async_op) { + case OP_UPDATE: + case OP_FINUP: rc = phmac_kmac_update(req, true); if (rc == -EKEYEXPIRED) { /* @@ -880,10 +891,11 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) hwh_advance(hwh, rc); goto out; } - req->nbytes = 0; - } - - if (req_ctx->final) { + if (req_ctx->async_op == OP_UPDATE) + break; + req_ctx->async_op = OP_FINAL; + fallthrough; + case OP_FINAL: rc = phmac_kmac_final(req, true); if (rc == -EKEYEXPIRED) { /* @@ -897,10 +909,14 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) cond_resched(); return -ENOSPC; } + break; + default: + /* unknown/unsupported/unimplemented asynch op */ + return -EOPNOTSUPP; } out: - if (rc || req_ctx->final) + if (rc || req_ctx->async_op == OP_FINAL) memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); pr_debug("request complete with rc=%d\n", rc); local_bh_disable(); diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index c500d45fb465..acb4b13d98c5 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -2,69 +2,55 @@ #ifndef _ASM_S390_BUG_H #define _ASM_S390_BUG_H -#include <linux/compiler.h> - -#ifdef CONFIG_BUG - -#ifdef CONFIG_DEBUG_BUGVERBOSE - -#define __EMIT_BUG(x) do { \ - asm_inline volatile( \ - "0: mc 0,0\n" \ - ".section .rodata.str,\"aMS\",@progbits,1\n" \ - "1: .asciz \""__FILE__"\"\n" \ - ".previous\n" \ - ".section __bug_table,\"aw\"\n" \ - "2: .long 0b-.\n" \ - " .long 1b-.\n" \ - " .short %0,%1\n" \ - " .org 2b+%2\n" \ - ".previous\n" \ - : : "i" (__LINE__), \ - "i" (x), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) - -#else /* CONFIG_DEBUG_BUGVERBOSE */ - -#define __EMIT_BUG(x) do { \ - asm_inline volatile( \ - "0: mc 0,0\n" \ - ".section __bug_table,\"aw\"\n" \ - "1: .long 0b-.\n" \ - " .short %0\n" \ - " .org 1b+%1\n" \ - ".previous\n" \ - : : "i" (x), \ - "i" (sizeof(struct bug_entry))); \ +#include <linux/stringify.h> + +#ifndef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) +#else +#define __BUGVERBOSE_LOCATION(file, line) \ + .pushsection .rodata.str, "aMS", @progbits, 1; \ + 10002: .ascii file "\0"; \ + .popsection; \ + \ + .long 10002b - .; \ + .short line; +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#endif + +#ifndef CONFIG_GENERIC_BUG +#define __BUG_ENTRY(cond_str, flags) +#else +#define __BUG_ENTRY(cond_str, flags) \ + .pushsection __bug_table, "aw"; \ + .align 4; \ + 10000: .long 10001f - .; \ + _BUGVERBOSE_LOCATION(WARN_CONDITION_STR(cond_str) __FILE__, __LINE__) \ + .short flags; \ + .popsection; \ + 10001: +#endif + +#define ASM_BUG_FLAGS(cond_str, flags) \ + __BUG_ENTRY(cond_str, flags) \ + mc 0,0 + +#define ASM_BUG() ASM_BUG_FLAGS("", 0) + +#define __BUG_FLAGS(cond_str, flags) \ + asm_inline volatile(__stringify(ASM_BUG_FLAGS(cond_str, flags))); + +#define __WARN_FLAGS(cond_str, flags) \ +do { \ + __BUG_FLAGS(cond_str, BUGFLAG_WARNING|(flags)); \ } while (0) -#endif /* CONFIG_DEBUG_BUGVERBOSE */ - -#define BUG() do { \ - __EMIT_BUG(0); \ - unreachable(); \ +#define BUG() \ +do { \ + __BUG_FLAGS("", 0); \ + unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) do { \ - __EMIT_BUG(BUGFLAG_WARNING|(flags)); \ -} while (0) - -#define WARN_ON(x) ({ \ - int __ret_warn_on = !!(x); \ - if (__builtin_constant_p(__ret_warn_on)) { \ - if (__ret_warn_on) \ - __WARN(); \ - } else { \ - if (unlikely(__ret_warn_on)) \ - __WARN(); \ - } \ - unlikely(__ret_warn_on); \ -}) - #define HAVE_ARCH_BUG -#define HAVE_ARCH_WARN_ON -#endif /* CONFIG_BUG */ #include <asm-generic/bug.h> diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 6ce6b56e282b..46f92bb4c9e5 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -19,7 +19,7 @@ #ifdef CONFIG_EXPOLINE_EXTERN SYM_CODE_START(\name) #else - .pushsection .text.\name,"axG",@progbits,\name,comdat + .pushsection .text..\name,"axG",@progbits,\name,comdat .globl \name .hidden \name .type \name,@function diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6890925d5587..a32f465ecf73 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -145,7 +145,6 @@ struct zpci_dev { u8 has_resources : 1; u8 is_physfn : 1; u8 util_str_avail : 1; - u8 irqs_registered : 1; u8 tid_avail : 1; u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7100c6a4054..6663f1619abb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local) { unsigned long pto; pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1); - asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]" + asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]" : "+m" (*ptep) - : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt), - [asce] "a" (asce), [m4] "i" (local)); + : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK), + [m4] "i" (local)); } static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, @@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * A local RDP can be used to do the flush. */ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) - __ptep_rdp(address, ptep, 0, 0, 1); + __ptep_rdp(address, ptep, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 3e5b8b677057..c5e02addcd67 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -468,8 +468,8 @@ do { \ #endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_nofault __mvc_kernel_nofault -#define __put_kernel_nofault __mvc_kernel_nofault +#define arch_get_kernel_nofault __mvc_kernel_nofault +#define arch_put_kernel_nofault __mvc_kernel_nofault void __cmpxchg_user_key_called_with_bad_pointer(void); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8a6744d658db..5863787ab036 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -472,3 +472,4 @@ 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr sys_file_setattr +470 common listns sys_listns sys_listns diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index d74d4c52ccd0..8609126961dc 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -51,7 +51,7 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT - *(.text.*_indirect_*) + *(.text..*_indirect_*) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9af2aae0a515..528d7c70979f 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -291,16 +291,14 @@ static int ptdump_cmp(const void *a, const void *b) static int add_marker(unsigned long start, unsigned long end, const char *name) { - size_t oldsize, newsize; - - oldsize = markers_cnt * sizeof(*markers); - newsize = oldsize + 2 * sizeof(*markers); - if (!oldsize) - markers = kvmalloc(newsize, GFP_KERNEL); - else - markers = kvrealloc(markers, newsize, GFP_KERNEL); - if (!markers) - goto error; + struct addr_marker *new; + size_t newsize; + + newsize = (markers_cnt + 2) * sizeof(*markers); + new = kvrealloc(markers, newsize, GFP_KERNEL); + if (!new) + return -ENOMEM; + markers = new; markers[markers_cnt].is_start = 1; markers[markers_cnt].start_address = start; markers[markers_cnt].size = end - start; @@ -312,9 +310,6 @@ static int add_marker(unsigned long start, unsigned long end, const char *name) markers[markers_cnt].name = name; markers_cnt++; return 0; -error: - markers_cnt = 0; - return -ENOMEM; } static int pt_dump_init(void) diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index e6175d75e4b0..2f829448c719 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -199,8 +199,7 @@ block: * return to userspace schedule() to block. */ __set_current_state(TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); - set_preempt_need_resched(); + set_need_resched_current(); } } out: diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..05974304d622 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, preempt_disable(); atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_rdp(addr, ptep, 0, 0, 1); + __ptep_rdp(addr, ptep, 1); else - __ptep_rdp(addr, ptep, 0, 0, 0); + __ptep_rdp(addr, ptep, 0); /* * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That * means it is still valid and active, and must not be changed according diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b95376041501..27db1e72c623 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -188,7 +188,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) * is unbound or probed and that userspace can't access its * configuration space while we perform recovery. */ - pci_dev_lock(pdev); + device_lock(&pdev->dev); if (pdev->error_state == pci_channel_io_perm_failure) { ers_res = PCI_ERS_RESULT_DISCONNECT; goto out_unlock; @@ -257,7 +257,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: - pci_dev_unlock(pdev); + device_unlock(&pdev->dev); zpci_report_status(zdev, "recovery", status_str); return ers_res; diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 84482a921332..e73be96ce5fe 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -107,9 +107,6 @@ static int zpci_set_irq(struct zpci_dev *zdev) else rc = zpci_set_airq(zdev); - if (!rc) - zdev->irqs_registered = 1; - return rc; } @@ -123,9 +120,6 @@ static int zpci_clear_irq(struct zpci_dev *zdev) else rc = zpci_clear_airq(zdev); - if (!rc) - zdev->irqs_registered = 0; - return rc; } @@ -427,8 +421,7 @@ bool arch_restore_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); - if (!zdev->irqs_registered) - zpci_set_irq(zdev); + zpci_set_irq(zdev); return true; } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bd39b36e7bd6..0c196a5b194a 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fms-extensions -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common @@ -21,6 +21,7 @@ KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) +KBUILD_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) KBUILD_AFLAGS += -D__DISABLE_EXPORTS diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index b6f36c938f1d..48b2e97114f9 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -81,10 +81,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 9c2644443c4d..85db9ce42d1a 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -60,8 +60,7 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 137573610ec4..e8b3b720578b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -88,8 +88,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index e76694aace25..fcca7cc5a75a 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -109,10 +109,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index f427a95bcd21..98f4611ba553 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -87,8 +87,7 @@ CONFIG_SND_SOC=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index da176f100e00..e5d102cbff89 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -59,8 +59,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 924bb3233b0b..22177aa8f961 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -93,8 +93,7 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 0307bb2be79f..ff992301622b 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -49,8 +49,7 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 93b9aa32dc7c..a29fb912a242 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -64,9 +64,8 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index f28b8c4181c2..58b792dacfec 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -74,8 +74,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 3a4239f20ff1..7edf18451158 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -69,8 +69,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index e4ef259425c4..28a81efefb02 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -59,8 +59,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index e0d1560b2bfd..f8bfa46643ff 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -43,8 +43,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 9870d16d9711..311817161afb 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -102,9 +102,8 @@ CONFIG_LEDS_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 07894f13441e..2433aa5f44a8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -161,8 +161,7 @@ CONFIG_STAGING=y # CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 75db12fb9ad1..b0baa5771c26 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -84,8 +84,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 8770a72e6a63..1078c286a610 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -83,8 +83,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index b15c6406a0e8..edb9e0d2dce5 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -107,8 +107,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 5327a2f70980..33daa0a17a32 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -44,8 +44,7 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 9501e69eb886..d572655f842d 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -110,10 +110,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 4d75c92cac10..3d194d81c92b 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -57,9 +57,8 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index cc6292b3235a..889daa5d2faa 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -95,7 +95,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_DMADEVICES=y CONFIG_TIMB_DMA=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index 48a0f9beb116..25e9d22779b3 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -64,7 +64,7 @@ CONFIG_MMC=y CONFIG_MMC_SDHI=y CONFIG_MMC_SH_MMCIF=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index b77b3313157e..e7b72ff377a8 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -61,8 +61,7 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 44f9b2317f09..17d2471d8e51 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -113,8 +113,7 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index aec74b0e7003..34c8fe755add 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -90,8 +90,7 @@ CONFIG_USB_TEST=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 9a0df5ea3866..52e7a42d66c7 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -84,8 +84,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 8ef72b8dbcd3..2c474645ec36 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -215,9 +215,8 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 103b81ec1ffb..b0c2ba478353 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -66,8 +66,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 00ef62133b04..e6d807f52253 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -114,8 +114,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h index 05a485c4fabc..891276687355 100644 --- a/arch/sh/include/asm/bug.h +++ b/arch/sh/include/asm/bug.h @@ -52,14 +52,14 @@ do { \ unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) \ +#define __WARN_FLAGS(cond_str, flags) \ do { \ __asm__ __volatile__ ( \ "1:\t.short %O0\n" \ _EMIT_BUG_ENTRY \ : \ : "n" (TRAPA_BUG_OPCODE), \ - "i" (__FILE__), \ + "i" (WARN_CONDITION_STR(cond_str) __FILE__), \ "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ "i" (sizeof(struct bug_entry))); \ diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 5e9c9eff5539..969c11325ade 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 7a7c4dec2925..127940aafc39 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -187,10 +187,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ebb7d06d1044..39aa26b6a50b 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fa3b616af03a..34fb46d5341b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -261,6 +261,7 @@ config X86 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES select HAVE_RETHOOK + select HAVE_KLP_BUILD if X86_64 select HAVE_LIVEPATCH if X86_64 select HAVE_MIXED_BREAKPOINTS_REGS select HAVE_MOD_ARCH_SPECIFIC @@ -297,6 +298,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_UNWIND_USER_FP if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO select VDSO_GETRANDOM if X86_64 @@ -379,7 +381,7 @@ config GENERIC_CSUM config GENERIC_BUG def_bool y depends on BUG - select GENERIC_BUG_RELATIVE_POINTERS if X86_64 + select GENERIC_BUG_RELATIVE_POINTERS config GENERIC_BUG_RELATIVE_POINTERS bool diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4db7e4bf69f5..1d403a3612ea 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -48,7 +48,8 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -std=gnu11 -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ +REALMODE_CFLAGS := -std=gnu11 -fms-extensions -m16 -g -Os \ + -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -60,6 +61,7 @@ REALMODE_CFLAGS += $(cc_stack_align4) REALMODE_CFLAGS += $(CLANG_FLAGS) ifdef CONFIG_CC_IS_CLANG REALMODE_CFLAGS += -Wno-gnu +REALMODE_CFLAGS += -Wno-microsoft-anon-tag endif export REALMODE_CFLAGS @@ -75,7 +77,7 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-sse4a KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 @@ -98,7 +100,7 @@ ifeq ($(CONFIG_X86_KERNEL_IBT),y) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 # KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) -KBUILD_RUSTFLAGS += -Zcf-protection=branch -Zno-jump-tables +KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index a2b6b428922a..bda042933a05 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -135,29 +135,29 @@ int enable_a20(void) (legacy free, etc.) */ if (a20_test_short()) return 0; - + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ enable_a20_bios(); if (a20_test_short()) return 0; - + /* Try enabling A20 through the keyboard controller */ kbc_err = empty_8042(); if (a20_test_short()) return 0; /* BIOS worked, but with delayed reaction */ - + if (!kbc_err) { enable_a20_kbc(); if (a20_test_long()) return 0; } - + /* Finally, try enabling the "fast A20 gate" */ enable_a20_fast(); if (a20_test_long()) return 0; } - + return -1; } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index a3c58ebe3662..8e3eab34dff4 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -193,8 +193,6 @@ static inline bool heap_free(size_t n) void copy_to_fs(addr_t dst, void *src, size_t len); void *copy_from_fs(void *dst, addr_t src, size_t len); -void copy_to_gs(addr_t dst, void *src, size_t len); -void *copy_from_gs(void *dst, addr_t src, size_t len); /* a20.c */ int enable_a20(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 74657589264d..68f9d7a1683b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -25,7 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) -KBUILD_CFLAGS += -std=gnu11 +KBUILD_CFLAGS += -std=gnu11 -fms-extensions KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -36,7 +36,10 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += -ffreestanding -fshort-wchar KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag +endif KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index db1048621ea2..fd855e32c9b9 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -152,17 +152,6 @@ bool insn_has_rep_prefix(struct insn *insn); void sev_insn_decode_init(void); bool early_setup_ghcb(void); #else -static inline void sev_enable(struct boot_params *bp) -{ - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 unconditionally (thus here in this stub too) to - * ensure that uninitialized values from buggy bootloaders aren't - * propagated. - */ - if (bp) - bp->cc_blob_address = 0; -} static inline void snp_check_features(void) { } static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index bdd26050dff7..0e89e197e112 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -3,6 +3,7 @@ #include <asm/bootparam.h> #include <asm/bootparam_utils.h> #include <asm/e820/types.h> +#include <asm/pgtable.h> #include <asm/processor.h> #include "../string.h" #include "efi.h" @@ -168,9 +169,10 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = native_read_cr3_pa() | _PAGE_TABLE_NOENC; } else { - unsigned long src; + u64 *new_cr3; + pgd_t *pgdp; /* * For 5- to 4-level paging transition, copy page table pointed @@ -180,8 +182,9 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * We cannot just point to the page table from trampoline as it * may be above 4G. */ - src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); + pgdp = (pgd_t *)native_read_cr3_pa(); + new_cr3 = (u64 *)(native_pgd_val(pgdp[0]) & PTE_PFN_MASK); + memcpy(trampoline_32bit, new_cr3, PAGE_SIZE); } toggle_la57(trampoline_32bit); diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c index 7530ad8b768b..030001b46554 100644 --- a/arch/x86/boot/compressed/sev-handle-vc.c +++ b/arch/x86/boot/compressed/sev-handle-vc.c @@ -29,11 +29,10 @@ bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; - int i; insn_get_prefixes(insn); - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0xf2 || p == 0xf3) return true; } diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 6e5c32a53d03..c8c1464b3a56 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -14,6 +14,7 @@ #include <asm/bootparam.h> #include <asm/pgtable_types.h> +#include <asm/shared/msr.h> #include <asm/sev.h> #include <asm/trapnr.h> #include <asm/trap_pf.h> @@ -397,7 +398,7 @@ void sev_enable(struct boot_params *bp) } /* Set the SME mask if this is an SEV guest. */ - boot_rdmsr(MSR_AMD64_SEV, &m); + raw_rdmsr(MSR_AMD64_SEV, &m); sev_status = m.q; if (!(sev_status & MSR_AMD64_SEV_ENABLED)) return; @@ -446,7 +447,7 @@ u64 sev_get_status(void) if (sev_check_cpu_support() < 0) return 0; - boot_rdmsr(MSR_AMD64_SEV, &m); + raw_rdmsr(MSR_AMD64_SEV, &m); return m.q; } @@ -496,7 +497,7 @@ bool early_is_sevsnp_guest(void) struct msr m; /* Obtain the address of the calling area to use */ - boot_rdmsr(MSR_SVSM_CAA, &m); + raw_rdmsr(MSR_SVSM_CAA, &m); boot_svsm_caa_pa = m.q; /* diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 92f79c21939c..22637b416b46 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -10,7 +10,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT -#include "../msr.h" +#include <asm/shared/msr.h> void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); @@ -20,7 +20,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) { struct msr m; - boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); + raw_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); return m.q; } @@ -30,7 +30,7 @@ static inline void sev_es_wr_ghcb_msr(u64 val) struct msr m; m.q = val; - boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); + raw_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); } #else diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index f82de8de5dc6..2e1bb936cba2 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -26,9 +26,9 @@ #include <asm/intel-family.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> +#include <asm/shared/msr.h> #include "string.h" -#include "msr.h" static u32 err_flags[NCAPINTS]; @@ -134,9 +134,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m; - boot_rdmsr(MSR_K7_HWCR, &m); + raw_rdmsr(MSR_K7_HWCR, &m); m.l &= ~(1 << 15); - boot_wrmsr(MSR_K7_HWCR, &m); + raw_wrmsr(MSR_K7_HWCR, &m); get_cpuflags(); /* Make sure it really did something */ err = check_cpuflags(); @@ -148,9 +148,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m; - boot_rdmsr(MSR_VIA_FCR, &m); + raw_rdmsr(MSR_VIA_FCR, &m); m.l |= (1 << 1) | (1 << 7); - boot_wrmsr(MSR_VIA_FCR, &m); + raw_wrmsr(MSR_VIA_FCR, &m); set_bit(X86_FEATURE_CX8, cpu.flags); err = check_cpuflags(); @@ -160,14 +160,14 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) struct msr m, m_tmp; u32 level = 1; - boot_rdmsr(0x80860004, &m); + raw_rdmsr(0x80860004, &m); m_tmp = m; m_tmp.l = ~0; - boot_wrmsr(0x80860004, &m_tmp); + raw_wrmsr(0x80860004, &m_tmp); asm("cpuid" : "+a" (level), "=d" (cpu.flags[0]) : : "ecx", "ebx"); - boot_wrmsr(0x80860004, &m); + raw_wrmsr(0x80860004, &m); err = check_cpuflags(); } else if (err == 0x01 && diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h deleted file mode 100644 index aed66f7ae199..000000000000 --- a/arch/x86/boot/msr.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Helpers/definitions related to MSR access. - */ - -#ifndef BOOT_MSR_H -#define BOOT_MSR_H - -#include <asm/shared/msr.h> - -/* - * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the - * boot kernel since they rely on tracepoint/exception handling infrastructure - * that's not available here. - */ -static inline void boot_rdmsr(unsigned int reg, struct msr *m) -{ - asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); -} - -static inline void boot_wrmsr(unsigned int reg, const struct msr *m) -{ - asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); -} - -#endif /* BOOT_MSR_H */ diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile index e8fdf020b422..5e499cfb29b5 100644 --- a/arch/x86/boot/startup/Makefile +++ b/arch/x86/boot/startup/Makefile @@ -36,7 +36,7 @@ $(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y # relocations, even if other objtool actions are being deferred. # $(pi-objs): objtool-enabled = 1 -$(pi-objs): objtool-args = $(if $(delay-objtool),,$(objtool-args-y)) --noabs +$(pi-objs): objtool-args = $(if $(delay-objtool),--dry-run,$(objtool-args-y)) --noabs # # Confine the startup code by prefixing all symbols with __pi_ (for position diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 4e22ffd73516..a0fa8bb2b945 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -12,7 +12,7 @@ #include <asm/setup_data.h> #ifndef __BOOT_COMPRESSED -#define has_cpuflag(f) boot_cpu_has(f) +#define has_cpuflag(f) cpu_feature_enabled(f) #else #undef WARN #define WARN(condition, format...) (!!(condition)) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index 7fc136a35334..f08c7505ed82 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -352,7 +352,6 @@ fault: #define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__) #define error(v) -#define has_cpuflag(f) boot_cpu_has(f) #include "vc-shared.c" diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c index 9b01c9ad81be..58b2f985d546 100644 --- a/arch/x86/coco/sev/vc-shared.c +++ b/arch/x86/coco/sev/vc-shared.c @@ -1,5 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef __BOOT_COMPRESSED +#define has_cpuflag(f) cpu_feature_enabled(f) +#endif + static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, unsigned long exit_code) { @@ -546,6 +550,13 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, /* xgetbv will cause #GP - use reset value for xcr0 */ ghcb_set_xcr0(ghcb, 1); + if (has_cpuflag(X86_FEATURE_SHSTK) && regs->ax == 0xd && regs->cx == 1) { + struct msr m; + + raw_rdmsr(MSR_IA32_XSS, &m); + ghcb_set_xss(ghcb, m.q); + } + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); if (ret != ES_OK) return ret; diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 8e9a0cc20a4a..6ba2b3adcef0 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -4,6 +4,7 @@ */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/linkage.h> #include <linux/objtool.h> #include <asm/msr-index.h> @@ -29,8 +30,15 @@ SYM_FUNC_START(write_ibpb) FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET SYM_FUNC_END(write_ibpb) -/* For KVM */ -EXPORT_SYMBOL_GPL(write_ibpb); +EXPORT_SYMBOL_FOR_KVM(write_ibpb); + +SYM_FUNC_START(__WARN_trap) + ANNOTATE_NOENDBR + ANNOTATE_REACHABLE + ud1 (%edx), %_ASM_ARG1 + RET +SYM_FUNC_END(__WARN_trap) +EXPORT_SYMBOL(__WARN_trap) .popsection @@ -48,8 +56,7 @@ SYM_CODE_START_NOALIGN(x86_verw_sel) .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc SYM_CODE_END(x86_verw_sel); -/* For KVM */ -EXPORT_SYMBOL_GPL(x86_verw_sel); +EXPORT_SYMBOL_FOR_KVM(x86_verw_sel); .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ed04a968cc7d..f9983a1907bf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -19,6 +19,7 @@ * - idtentry: Define exception entry points. */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/linkage.h> #include <asm/segment.h> #include <asm/cache.h> @@ -1566,5 +1567,5 @@ SYM_FUNC_START(clear_bhb_loop) pop %rbp RET SYM_FUNC_END(clear_bhb_loop) -EXPORT_SYMBOL_GPL(clear_bhb_loop) +EXPORT_SYMBOL_FOR_KVM(clear_bhb_loop) STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index fafbd3e68cb8..894f7f16eb80 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -4,6 +4,7 @@ */ #include <linux/export.h> +#include <linux/kvm_types.h> #include <asm/asm.h> #include <asm/fred.h> @@ -146,5 +147,5 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) RET SYM_FUNC_END(asm_fred_entry_from_kvm) -EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm); +EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm); #endif diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index f004a4dc74c2..94e626cc6a07 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -78,13 +78,13 @@ static noinstr void fred_intx(struct pt_regs *regs) static __always_inline void fred_other(struct pt_regs *regs) { /* The compiler can fold these conditions into a single test */ - if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.lm)) { + if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.l)) { regs->orig_ax = regs->ax; regs->ax = -ENOSYS; do_syscall_64(regs, regs->orig_ax); return; } else if (ia32_enabled() && - likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.lm)) { + likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.l)) { regs->orig_ax = regs->ax; regs->ax = -ENOSYS; do_fast_syscall_32(regs); diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 2b15ea17bb7c..a67a644d0cfe 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -274,9 +274,10 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) * fetch EBP before invoking any of the syscall entry work * functions. */ - syscall_enter_from_user_mode_prepare(regs); + enter_from_user_mode(regs); instrumentation_begin(); + local_irq_enable(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { /* diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4877e16da69a..e979a3eac7a3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -475,3 +475,4 @@ 467 i386 open_tree_attr sys_open_tree_attr 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr +470 i386 listns sys_listns diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ced2a1deecd7..8a4ac4841be6 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -394,6 +394,7 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index b20661b8621d..44656d2fb555 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -2,6 +2,7 @@ #include <linux/perf_event.h> #include <linux/jump_label.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/types.h> #include <linux/init.h> #include <linux/slab.h> @@ -763,7 +764,12 @@ static void amd_pmu_enable_all(int added) if (!test_bit(idx, cpuc->active_mask)) continue; - amd_pmu_enable_event(cpuc->events[idx]); + /* + * FIXME: cpuc->events[idx] can become NULL in a subtle race + * condition with NMI->throttle->x86_pmu_stop(). + */ + if (cpuc->events[idx]) + amd_pmu_enable_event(cpuc->events[idx]); } } @@ -1569,7 +1575,7 @@ void amd_pmu_enable_virt(void) /* Reload all events */ amd_pmu_reload_virt(); } -EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); +EXPORT_SYMBOL_FOR_KVM(amd_pmu_enable_virt); void amd_pmu_disable_virt(void) { @@ -1586,4 +1592,4 @@ void amd_pmu_disable_virt(void) /* Reload all events */ amd_pmu_reload_virt(); } -EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); +EXPORT_SYMBOL_FOR_KVM(amd_pmu_disable_virt); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 745caa6c15a3..0c38a31d5fc7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -20,6 +20,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/kdebug.h> +#include <linux/kvm_types.h> #include <linux/sched/mm.h> #include <linux/sched/clock.h> #include <linux/uaccess.h> @@ -554,14 +555,22 @@ static inline int precise_br_compat(struct perf_event *event) return m == b; } -int x86_pmu_max_precise(void) +int x86_pmu_max_precise(struct pmu *pmu) { int precise = 0; - /* Support for constant skid */ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { - precise++; + /* arch PEBS */ + if (x86_pmu.arch_pebs) { + precise = 2; + if (hybrid(pmu, arch_pebs_cap).pdists) + precise++; + + return precise; + } + /* legacy PEBS - support for constant skid */ + precise++; /* Support for IP fixup */ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; @@ -569,13 +578,14 @@ int x86_pmu_max_precise(void) if (x86_pmu.pebs_prec_dist) precise++; } + return precise; } int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { - int precise = x86_pmu_max_precise(); + int precise = x86_pmu_max_precise(event->pmu); if (event->attr.precise_ip > precise) return -EOPNOTSUPP; @@ -714,7 +724,7 @@ struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data) { return static_call(x86_pmu_guest_get_msrs)(nr, data); } -EXPORT_SYMBOL_GPL(perf_guest_get_msrs); +EXPORT_SYMBOL_FOR_KVM(perf_guest_get_msrs); /* * There may be PMI landing after enabled=0. The PMI hitting could be before or @@ -1344,6 +1354,7 @@ static void x86_pmu_enable(struct pmu *pmu) hwc->state |= PERF_HES_ARCH; x86_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[hwc->idx] = NULL; } /* @@ -1365,6 +1376,7 @@ static void x86_pmu_enable(struct pmu *pmu) * if cpuc->enabled = 0, then no wrmsr as * per x86_pmu_enable_event() */ + cpuc->events[hwc->idx] = event; x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; @@ -1531,7 +1543,6 @@ static void x86_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; - cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); static_call(x86_pmu_enable)(event); perf_event_update_userpage(event); @@ -1610,7 +1621,6 @@ void x86_pmu_stop(struct perf_event *event, int flags) if (test_bit(hwc->idx, cpuc->active_mask)) { static_call(x86_pmu_disable)(event); __clear_bit(hwc->idx, cpuc->active_mask); - cpuc->events[hwc->idx] = NULL; WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; } @@ -1648,6 +1658,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * Not a TXN, therefore cleanup properly. */ x86_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[event->hw.idx] = NULL; for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) @@ -2629,7 +2640,9 @@ static ssize_t max_precise_show(struct device *cdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); + struct pmu *pmu = dev_get_drvdata(cdev); + + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu)); } static DEVICE_ATTR_RO(max_precise); @@ -2789,13 +2802,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; - - if (perf_hw_regs(regs)) + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; unwind_start(&state, current, regs, NULL); - else + } else { unwind_start(&state, current, NULL, (void *)regs->sp); + } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); @@ -2845,46 +2858,6 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } -#ifdef CONFIG_UPROBES -/* - * Heuristic-based check if uprobe is installed at the function entry. - * - * Under assumption of user code being compiled with frame pointers, - * `push %rbp/%ebp` is a good indicator that we indeed are. - * - * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. - * If we get this wrong, captured stack trace might have one extra bogus - * entry, but the rest of stack trace will still be meaningful. - */ -static bool is_uprobe_at_func_entry(struct pt_regs *regs) -{ - struct arch_uprobe *auprobe; - - if (!current->utask) - return false; - - auprobe = current->utask->auprobe; - if (!auprobe) - return false; - - /* push %rbp/%ebp */ - if (auprobe->insn[0] == 0x55) - return true; - - /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) - return true; - - return false; -} - -#else -static bool is_uprobe_at_func_entry(struct pt_regs *regs) -{ - return false; -} -#endif /* CONFIG_UPROBES */ - #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -3106,7 +3079,7 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) cap->events_mask_len = x86_pmu.events_mask_len; cap->pebs_ept = x86_pmu.pebs_ept; } -EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); +EXPORT_SYMBOL_FOR_KVM(perf_get_x86_pmu_capability); u64 perf_get_hw_event_config(int hw_event) { @@ -3117,4 +3090,4 @@ u64 perf_get_hw_event_config(int hw_event) return 0; } -EXPORT_SYMBOL_GPL(perf_get_hw_event_config); +EXPORT_SYMBOL_FOR_KVM(perf_get_hw_event_config); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 28f5468a6ea3..853fe073bab3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2563,6 +2563,44 @@ static void intel_pmu_disable_fixed(struct perf_event *event) cpuc->fixed_ctrl_val &= ~mask; } +static inline void __intel_pmu_update_event_ext(int idx, u64 ext) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u32 msr; + + if (idx < INTEL_PMC_IDX_FIXED) { + msr = MSR_IA32_PMC_V6_GP0_CFG_C + + x86_pmu.addr_offset(idx, false); + } else { + msr = MSR_IA32_PMC_V6_FX0_CFG_C + + x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); + } + + cpuc->cfg_c_val[idx] = ext; + wrmsrq(msr, ext); +} + +static void intel_pmu_disable_event_ext(struct perf_event *event) +{ + /* + * Only clear CFG_C MSR for PEBS counter group events, + * it avoids the HW counter's value to be added into + * other PEBS records incorrectly after PEBS counter + * group events are disabled. + * + * For other events, it's unnecessary to clear CFG_C MSRs + * since CFG_C doesn't take effect if counter is in + * disabled state. That helps to reduce the WRMSR overhead + * in context switches. + */ + if (!is_pebs_counter_event_group(event)) + return; + + __intel_pmu_update_event_ext(event->hw.idx, 0); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext); + static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -2571,9 +2609,12 @@ static void intel_pmu_disable_event(struct perf_event *event) switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: intel_clear_masks(event, idx); + static_call_cond(intel_pmu_disable_event_ext)(event); x86_pmu_disable_event(event); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + static_call_cond(intel_pmu_disable_event_ext)(event); + fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_disable_fixed(event); break; @@ -2940,6 +2981,79 @@ static void intel_pmu_enable_acr(struct perf_event *event) DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); +static void intel_pmu_enable_event_ext(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + union arch_pebs_index old, new; + struct arch_pebs_cap cap; + u64 ext = 0; + + cap = hybrid(cpuc->pmu, arch_pebs_cap); + + if (event->attr.precise_ip) { + u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event); + + ext |= ARCH_PEBS_EN; + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) + ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD; + + if (pebs_data_cfg && cap.caps) { + if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) + ext |= ARCH_PEBS_AUX & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_GP) + ext |= ARCH_PEBS_GPR & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_XMMS) + ext |= ARCH_PEBS_VECR_XMM & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_LBRS) + ext |= ARCH_PEBS_LBR & cap.caps; + + if (pebs_data_cfg & + (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT)) + ext |= ARCH_PEBS_CNTR_GP & cap.caps; + + if (pebs_data_cfg & + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT)) + ext |= ARCH_PEBS_CNTR_FIXED & cap.caps; + + if (pebs_data_cfg & PEBS_DATACFG_METRICS) + ext |= ARCH_PEBS_CNTR_METRICS & cap.caps; + } + + if (cpuc->n_pebs == cpuc->n_large_pebs) + new.thresh = ARCH_PEBS_THRESH_MULTI; + else + new.thresh = ARCH_PEBS_THRESH_SINGLE; + + rdmsrq(MSR_IA32_PEBS_INDEX, old.whole); + if (new.thresh != old.thresh || !old.en) { + if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) { + /* + * Large PEBS was enabled. + * Drain PEBS buffer before applying the single PEBS. + */ + intel_pmu_drain_pebs_buffer(); + } else { + new.wr = 0; + new.full = 0; + new.en = 1; + wrmsrq(MSR_IA32_PEBS_INDEX, new.whole); + } + } + } + + if (is_pebs_counter_event_group(event)) + ext |= ARCH_PEBS_CNTR_ALLOW; + + if (cpuc->cfg_c_val[hwc->idx] != ext) + __intel_pmu_update_event_ext(hwc->idx, ext); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext); + static void intel_pmu_enable_event(struct perf_event *event) { u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; @@ -2955,10 +3069,12 @@ static void intel_pmu_enable_event(struct perf_event *event) enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); static_call_cond(intel_pmu_enable_acr_event)(event); + static_call_cond(intel_pmu_enable_event_ext)(event); __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: static_call_cond(intel_pmu_enable_acr_event)(event); + static_call_cond(intel_pmu_enable_event_ext)(event); fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); @@ -3216,6 +3332,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) } /* + * Arch PEBS sets bit 54 in the global status register + */ + if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT, + (unsigned long *)&status)) { + handled++; + static_call(x86_pmu_drain_pebs)(regs, &data); + + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; + } + + /* * Intel PT */ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { @@ -3269,7 +3398,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * The PEBS buffer has to be drained before handling the A-PMI */ if (is_pebs_counter_event_group(event)) - x86_pmu.drain_pebs(regs, &data); + static_call(x86_pmu_drain_pebs)(regs, &data); last_period = event->hw.last_period; @@ -4029,7 +4158,9 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) if (!event->attr.exclude_kernel) flags &= ~PERF_SAMPLE_REGS_USER; if (event->attr.sample_regs_user & ~PEBS_GP_REGS) - flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_intr & ~PEBS_GP_REGS) + flags &= ~PERF_SAMPLE_REGS_INTR; return flags; } @@ -4204,6 +4335,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event) return false; } +static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu) +{ + u64 caps; + + if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline) + return true; + + caps = hybrid(pmu, arch_pebs_cap).caps; + if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK)) + return true; + + return false; +} + static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, u64 *cause_mask, int *num) { @@ -4237,6 +4382,8 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (event->attr.precise_ip) { + struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap); + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; @@ -4250,6 +4397,15 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); + + if (x86_pmu.arch_pebs) { + u64 cntr_mask = hybrid(event->pmu, intel_ctrl) & + ~GLOBAL_CTRL_EN_PERF_METRICS; + u64 pebs_mask = event->attr.precise_ip >= 3 ? + pebs_cap.pdists : pebs_cap.counters; + if (cntr_mask != pebs_mask) + event->hw.dyn_constraint &= pebs_mask; + } } if (needs_branch_stack(event)) { @@ -4341,8 +4497,7 @@ static int intel_pmu_hw_config(struct perf_event *event) } if ((event->attr.sample_type & PERF_SAMPLE_READ) && - (x86_pmu.intel_cap.pebs_format >= 6) && - x86_pmu.intel_cap.pebs_baseline && + intel_pmu_has_pebs_counter_group(event->pmu) && is_sampling_event(event) && event->attr.precise_ip) event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; @@ -5212,7 +5367,13 @@ err: static int intel_pmu_cpu_prepare(int cpu) { - return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); + int ret; + + ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); + if (ret) + return ret; + + return alloc_arch_pebs_buf_on_cpu(cpu); } static void flip_smm_bit(void *data) @@ -5257,6 +5418,163 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con u64 fixed_cntr_mask, u64 intel_ctrl); +enum dyn_constr_type { + DYN_CONSTR_NONE, + DYN_CONSTR_BR_CNTR, + DYN_CONSTR_ACR_CNTR, + DYN_CONSTR_ACR_CAUSE, + DYN_CONSTR_PEBS, + DYN_CONSTR_PDIST, + + DYN_CONSTR_MAX, +}; + +static const char * const dyn_constr_type_name[] = { + [DYN_CONSTR_NONE] = "a normal event", + [DYN_CONSTR_BR_CNTR] = "a branch counter logging event", + [DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event", + [DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event", + [DYN_CONSTR_PEBS] = "a PEBS event", + [DYN_CONSTR_PDIST] = "a PEBS PDIST event", +}; + +static void __intel_pmu_check_dyn_constr(struct event_constraint *constr, + enum dyn_constr_type type, u64 mask) +{ + struct event_constraint *c1, *c2; + int new_weight, check_weight; + u64 new_mask, check_mask; + + for_each_event_constraint(c1, constr) { + new_mask = c1->idxmsk64 & mask; + new_weight = hweight64(new_mask); + + /* ignore topdown perf metrics event */ + if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) + continue; + + if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) { + pr_info("The event 0x%llx is not supported as %s.\n", + c1->code, dyn_constr_type_name[type]); + } + + if (new_weight <= 1) + continue; + + for_each_event_constraint(c2, c1 + 1) { + bool check_fail = false; + + check_mask = c2->idxmsk64 & mask; + check_weight = hweight64(check_mask); + + if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN || + !check_weight) + continue; + + /* The same constraints or no overlap */ + if (new_mask == check_mask || + (new_mask ^ check_mask) == (new_mask | check_mask)) + continue; + + /* + * A scheduler issue may be triggered in the following cases. + * - Two overlap constraints have the same weight. + * E.g., A constraints: 0x3, B constraints: 0x6 + * event counter failure case + * B PMC[2:1] 1 + * A PMC[1:0] 0 + * A PMC[1:0] FAIL + * - Two overlap constraints have different weight. + * The constraint has a low weight, but has high last bit. + * E.g., A constraints: 0x7, B constraints: 0xC + * event counter failure case + * B PMC[3:2] 2 + * A PMC[2:0] 0 + * A PMC[2:0] 1 + * A PMC[2:0] FAIL + */ + if (new_weight == check_weight) { + check_fail = true; + } else if (new_weight < check_weight) { + if ((new_mask | check_mask) != check_mask && + fls64(new_mask) > fls64(check_mask)) + check_fail = true; + } else { + if ((new_mask | check_mask) != new_mask && + fls64(new_mask) < fls64(check_mask)) + check_fail = true; + } + + if (check_fail) { + pr_info("The two events 0x%llx and 0x%llx may not be " + "fully scheduled under some circumstances as " + "%s.\n", + c1->code, c2->code, dyn_constr_type_name[type]); + } + } + } +} + +static void intel_pmu_check_dyn_constr(struct pmu *pmu, + struct event_constraint *constr, + u64 cntr_mask) +{ + enum dyn_constr_type i; + u64 mask; + + for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) { + mask = 0; + switch (i) { + case DYN_CONSTR_NONE: + mask = cntr_mask; + break; + case DYN_CONSTR_BR_CNTR: + if (x86_pmu.flags & PMU_FL_BR_CNTR) + mask = x86_pmu.lbr_counters; + break; + case DYN_CONSTR_ACR_CNTR: + mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); + break; + case DYN_CONSTR_ACR_CAUSE: + if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64)) + continue; + mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); + break; + case DYN_CONSTR_PEBS: + if (x86_pmu.arch_pebs) + mask = hybrid(pmu, arch_pebs_cap).counters; + break; + case DYN_CONSTR_PDIST: + if (x86_pmu.arch_pebs) + mask = hybrid(pmu, arch_pebs_cap).pdists; + break; + default: + pr_warn("Unsupported dynamic constraint type %d\n", i); + } + + if (mask) + __intel_pmu_check_dyn_constr(constr, i, mask); + } +} + +static void intel_pmu_check_event_constraints_all(struct pmu *pmu) +{ + struct event_constraint *event_constraints = hybrid(pmu, event_constraints); + struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints); + u64 cntr_mask = hybrid(pmu, cntr_mask64); + u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64); + u64 intel_ctrl = hybrid(pmu, intel_ctrl); + + intel_pmu_check_event_constraints(event_constraints, cntr_mask, + fixed_cntr_mask, intel_ctrl); + + if (event_constraints) + intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask); + + if (pebs_constraints) + intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask); +} + static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); static inline bool intel_pmu_broken_perf_cap(void) @@ -5269,34 +5587,89 @@ static inline bool intel_pmu_broken_perf_cap(void) return false; } +static inline void __intel_update_pmu_caps(struct pmu *pmu) +{ + struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id()); + + if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM) + dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; +} + +static inline void __intel_update_large_pebs_flags(struct pmu *pmu) +{ + u64 caps = hybrid(pmu, arch_pebs_cap).caps; + + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; + if (caps & ARCH_PEBS_LBR) + x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK; + if (caps & ARCH_PEBS_CNTR_MASK) + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; + + if (!(caps & ARCH_PEBS_AUX)) + x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC; + if (!(caps & ARCH_PEBS_GPR)) { + x86_pmu.large_pebs_flags &= + ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER); + } +} + +#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED)) + static void update_pmu_cap(struct pmu *pmu) { - unsigned int cntr, fixed_cntr, ecx, edx; - union cpuid35_eax eax; - union cpuid35_ebx ebx; + unsigned int eax, ebx, ecx, edx; + union cpuid35_eax eax_0; + union cpuid35_ebx ebx_0; + u64 cntrs_mask = 0; + u64 pebs_mask = 0; + u64 pdists_mask = 0; - cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); + cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx); - if (ebx.split.umask2) + if (ebx_0.split.umask2) hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; - if (ebx.split.eq) + if (ebx_0.split.eq) hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; - if (eax.split.cntr_subleaf) { + if (eax_0.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &cntr, &fixed_cntr, &ecx, &edx); - hybrid(pmu, cntr_mask64) = cntr; - hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; + &eax, &ebx, &ecx, &edx); + hybrid(pmu, cntr_mask64) = eax; + hybrid(pmu, fixed_cntr_mask64) = ebx; + cntrs_mask = counter_mask(eax, ebx); } - if (eax.split.acr_subleaf) { + if (eax_0.split.acr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, - &cntr, &fixed_cntr, &ecx, &edx); + &eax, &ebx, &ecx, &edx); /* The mask of the counters which can be reloaded */ - hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); - + hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx); /* The mask of the counters which can cause a reload of reloadable counters */ - hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); + hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx); + } + + /* Bits[5:4] should be set simultaneously if arch-PEBS is supported */ + if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF, + &eax, &ebx, &ecx, &edx); + hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32; + + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF, + &eax, &ebx, &ecx, &edx); + pebs_mask = counter_mask(eax, ecx); + pdists_mask = counter_mask(ebx, edx); + hybrid(pmu, arch_pebs_cap).counters = pebs_mask; + hybrid(pmu, arch_pebs_cap).pdists = pdists_mask; + + if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) { + x86_pmu.arch_pebs = 0; + } else { + __intel_update_pmu_caps(pmu); + __intel_update_large_pebs_flags(pmu); + } + } else { + WARN_ON(x86_pmu.arch_pebs == 1); + x86_pmu.arch_pebs = 0; } if (!intel_pmu_broken_perf_cap()) { @@ -5319,10 +5692,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) else pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS; - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->cntr_mask64, - pmu->fixed_cntr_mask64, - pmu->intel_ctrl); + intel_pmu_check_event_constraints_all(&pmu->pmu); intel_pmu_check_extra_regs(pmu->extra_regs); } @@ -5418,6 +5788,7 @@ static void intel_pmu_cpu_starting(int cpu) return; init_debug_store_on_cpu(cpu); + init_arch_pebs_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up, and that may * even boot with LBRs enabled. @@ -5456,6 +5827,8 @@ static void intel_pmu_cpu_starting(int cpu) } } + __intel_update_pmu_caps(cpuc->pmu); + if (!cpuc->shared_regs) return; @@ -5515,6 +5888,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) static void intel_pmu_cpu_dying(int cpu) { fini_debug_store_on_cpu(cpu); + fini_arch_pebs_on_cpu(cpu); } void intel_cpuc_finish(struct cpu_hw_events *cpuc) @@ -5535,6 +5909,7 @@ static void intel_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + release_arch_pebs_buf_on_cpu(cpu); intel_cpuc_finish(cpuc); if (is_hybrid() && cpuc->pmu) @@ -6250,7 +6625,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.ds_pebs ? attr->mode : 0; + return intel_pmu_has_pebs() ? attr->mode : 0; } static umode_t @@ -6940,8 +7315,11 @@ __init int intel_pmu_init(void) * Many features on and after V6 require dynamic constraint, * e.g., Arch PEBS, ACR. */ - if (version >= 6) + if (version >= 6) { x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; + x86_pmu.late_setup = intel_pmu_late_setup; + } + /* * Install the hw-cache-events table: */ @@ -7596,6 +7974,7 @@ __init int intel_pmu_init(void) break; case INTEL_PANTHERLAKE_L: + case INTEL_WILDCATLAKE_L: pr_cont("Pantherlake Hybrid events, "); name = "pantherlake_hybrid"; goto lnl_common; @@ -7726,6 +8105,14 @@ __init int intel_pmu_init(void) if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) update_pmu_cap(NULL); + if (x86_pmu.arch_pebs) { + static_call_update(intel_pmu_disable_event_ext, + intel_pmu_disable_event_ext); + static_call_update(intel_pmu_enable_event_ext, + intel_pmu_enable_event_ext); + pr_cont("Architectural PEBS, "); + } + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, &x86_pmu.fixed_cntr_mask64, &x86_pmu.intel_ctrl); @@ -7734,10 +8121,8 @@ __init int intel_pmu_init(void) if (x86_pmu.intel_cap.anythread_deprecated) x86_pmu.format_attrs = intel_arch_formats_attr; - intel_pmu_check_event_constraints(x86_pmu.event_constraints, - x86_pmu.cntr_mask64, - x86_pmu.fixed_cntr_mask64, - x86_pmu.intel_ctrl); + intel_pmu_check_event_constraints_all(NULL); + /* * Access LBR MSR may cause #GP under certain circumstances. * Check all LBR MSR here. diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index ec753e39b007..fa67fda6e45b 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,7 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL - * MTL,SRF,GRR,ARL,LNL + * MTL,SRF,GRR,ARL,LNL,PTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -53,31 +53,32 @@ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, - * GRR,ARL,LNL + * GRR,ARL,LNL,PTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL + * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL, + * PTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL,ARL,LNL,SRF + * RPL,SPR,MTL,ARL,LNL,SRF,PTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL,MTL,ARL,LNL + * ADL,RPL,MTL,ARL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, - * ARL,LNL + * ARL,LNL,PTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -96,7 +97,7 @@ * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL,MTL,ARL,LNL + * TNT,RKL,ADL,RPL,MTL,ARL,LNL,PTL * Scope: Package (physical package) * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. * perf code: 0x00 @@ -522,7 +523,6 @@ static const struct cstate_model lnl_cstates __initconst = { BIT(PERF_CSTATE_CORE_C7_RES), .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | - BIT(PERF_CSTATE_PKG_C3_RES) | BIT(PERF_CSTATE_PKG_C6_RES) | BIT(PERF_CSTATE_PKG_C10_RES), }; @@ -628,6 +628,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_cstates), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &srf_cstates), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &grr_cstates), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &srf_cstates), X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_cstates), X86_MATCH_VFM(INTEL_ICELAKE, &icl_cstates), @@ -652,6 +653,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates), X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates), + X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &lnl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c0b7ac1c7594..feb1c3cf63e4 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -317,7 +317,8 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); + WARN_ON_ONCE(is_hybrid() && + hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; @@ -625,13 +626,18 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.ds_pebs) + if (!intel_pmu_has_pebs()) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; + if (x86_pmu.arch_pebs) { + hwev->pebs_vaddr = buffer; + return 0; + } + /* * HSW+ already provides us the eventing ip; no need to allocate this * buffer then. @@ -644,7 +650,7 @@ static int alloc_pebs_buffer(int cpu) } per_cpu(insn_buffer, cpu) = insn_buff; } - hwev->ds_pebs_vaddr = buffer; + hwev->pebs_vaddr = buffer; /* Update the cpu entry area mapping */ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; ds->pebs_buffer_base = (unsigned long) cea; @@ -660,17 +666,20 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.ds_pebs) + if (!intel_pmu_has_pebs()) return; - kfree(per_cpu(insn_buffer, cpu)); - per_cpu(insn_buffer, cpu) = NULL; + if (x86_pmu.ds_pebs) { + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; - /* Clear the fixmap */ - cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; - ds_clear_cea(cea, x86_pmu.pebs_buffer_size); - dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); - hwev->ds_pebs_vaddr = NULL; + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); + } + + dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) @@ -823,6 +832,56 @@ void reserve_ds_buffers(void) } } +inline int alloc_arch_pebs_buf_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return 0; + + return alloc_pebs_buffer(cpu); +} + +inline void release_arch_pebs_buf_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return; + + release_pebs_buffer(cpu); +} + +void init_arch_pebs_on_cpu(int cpu) +{ + struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + u64 arch_pebs_base; + + if (!x86_pmu.arch_pebs) + return; + + if (!cpuc->pebs_vaddr) { + WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu); + x86_pmu.pebs_active = 0; + return; + } + + /* + * 4KB-aligned pointer of the output buffer + * (__alloc_pages_node() return page aligned address) + * Buffer Size = 4KB * 2^SIZE + * contiguous physical buffer (__alloc_pages_node() with order) + */ + arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT; + wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base, + (u32)(arch_pebs_base >> 32)); + x86_pmu.pebs_active = 1; +} + +inline void fini_arch_pebs_on_cpu(int cpu) +{ + if (!x86_pmu.arch_pebs) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0); +} + /* * BTS */ @@ -1470,6 +1529,25 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, } } +u64 intel_get_arch_pebs_data_config(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 pebs_data_cfg = 0; + u64 cntr_mask; + + if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX)) + return 0; + + pebs_data_cfg |= pebs_update_adaptive_cfg(event); + + cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) | + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) | + PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS; + pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask; + + return pebs_data_cfg; +} + void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1531,6 +1609,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) intel_pmu_drain_pebs_buffer(); } +static void __intel_pmu_pebs_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + cpuc->pebs_enabled |= 1ULL << hwc->idx; +} + void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1539,9 +1626,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct debug_store *ds = cpuc->ds; unsigned int idx = hwc->idx; - hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - - cpuc->pebs_enabled |= 1ULL << hwc->idx; + __intel_pmu_pebs_enable(event); if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); @@ -1603,14 +1688,22 @@ void intel_pmu_pebs_del(struct perf_event *event) pebs_update_state(needed_cb, cpuc, event, false); } -void intel_pmu_pebs_disable(struct perf_event *event) +static void __intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; intel_pmu_drain_large_pebs(cpuc); - cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; +} + +void intel_pmu_pebs_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + __intel_pmu_pebs_disable(event); if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) @@ -1622,8 +1715,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) if (cpuc->enabled) wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); - - hwc->config |= ARCH_PERFMON_EVENTSEL_INT; } void intel_pmu_pebs_enable_all(void) @@ -2059,6 +2150,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc, #define PEBS_LATENCY_MASK 0xffff +static inline void __setup_perf_sample_data(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data) +{ + perf_sample_data_init(data, 0, event->hw.last_period); + + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happened between the record and + * PMI. + */ + perf_sample_save_callchain(data, event, iregs); +} + +static inline void __setup_pebs_basic_group(struct perf_event *event, + struct pt_regs *regs, + struct perf_sample_data *data, + u64 sample_type, u64 ip, + u64 tsc, u16 retire) +{ + /* The ip in basic is EventingIP */ + set_linear_ip(regs, ip); + regs->flags = PERF_EFLAGS_EXACT; + setup_pebs_time(event, data, tsc); + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + data->weight.var3_w = retire; +} + +static inline void __setup_pebs_gpr_group(struct perf_event *event, + struct pt_regs *regs, + struct pebs_gprs *gprs, + u64 sample_type) +{ + if (event->attr.precise_ip < 2) { + set_linear_ip(regs, gprs->ip); + regs->flags &= ~PERF_EFLAGS_EXACT; + } + + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) + adaptive_pebs_save_regs(regs, gprs); +} + +static inline void __setup_pebs_meminfo_group(struct perf_event *event, + struct perf_sample_data *data, + u64 sample_type, u64 latency, + u16 instr_latency, u64 address, + u64 aux, u64 tsx_tuning, u64 ax) +{ + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + u64 tsx_latency = intel_get_tsx_weight(tsx_tuning); + + data->weight.var2_w = instr_latency; + + /* + * Although meminfo::latency is defined as a u64, + * only the lower 32 bits include the valid data + * in practice on Ice Lake and earlier platforms. + */ + if (sample_type & PERF_SAMPLE_WEIGHT) + data->weight.full = latency ?: tsx_latency; + else + data->weight.var1_dw = (u32)latency ?: tsx_latency; + + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + data->data_src.val = get_data_src(event, aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } + + if (sample_type & PERF_SAMPLE_ADDR_TYPE) { + data->addr = address; + data->sample_flags |= PERF_SAMPLE_ADDR; + } + + if (sample_type & PERF_SAMPLE_TRANSACTION) { + data->txn = intel_get_tsx_transaction(tsx_tuning, ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } +} + /* * With adaptive PEBS the layout depends on what fields are configured. */ @@ -2068,12 +2243,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 sample_type = event->attr.sample_type; struct pebs_basic *basic = __pebs; void *next_record = basic + 1; - u64 sample_type, format_group; struct pebs_meminfo *meminfo = NULL; struct pebs_gprs *gprs = NULL; struct x86_perf_regs *perf_regs; + u64 format_group; + u16 retire; if (basic == NULL) return; @@ -2081,31 +2258,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_regs = container_of(regs, struct x86_perf_regs, regs); perf_regs->xmm_regs = NULL; - sample_type = event->attr.sample_type; format_group = basic->format_group; - perf_sample_data_init(data, 0, event->hw.last_period); - setup_pebs_time(event, data, basic->tsc); - - /* - * We must however always use iregs for the unwinder to stay sane; the - * record BP,SP,IP can point into thin air when the record is from a - * previous PMI context or an (I)RET happened between the record and - * PMI. - */ - perf_sample_save_callchain(data, event, iregs); + __setup_perf_sample_data(event, iregs, data); *regs = *iregs; - /* The ip in basic is EventingIP */ - set_linear_ip(regs, basic->ip); - regs->flags = PERF_EFLAGS_EXACT; - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { - if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY) - data->weight.var3_w = basic->retire_latency; - else - data->weight.var3_w = 0; - } + /* basic group */ + retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ? + basic->retire_latency : 0; + __setup_pebs_basic_group(event, regs, data, sample_type, + basic->ip, basic->tsc, retire); /* * The record for MEMINFO is in front of GP @@ -2121,54 +2284,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, gprs = next_record; next_record = gprs + 1; - if (event->attr.precise_ip < 2) { - set_linear_ip(regs, gprs->ip); - regs->flags &= ~PERF_EFLAGS_EXACT; - } - - if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) - adaptive_pebs_save_regs(regs, gprs); + __setup_pebs_gpr_group(event, regs, gprs, sample_type); } if (format_group & PEBS_DATACFG_MEMINFO) { - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { - u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? - meminfo->cache_latency : meminfo->mem_latency; - - if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) - data->weight.var2_w = meminfo->instr_latency; - - /* - * Although meminfo::latency is defined as a u64, - * only the lower 32 bits include the valid data - * in practice on Ice Lake and earlier platforms. - */ - if (sample_type & PERF_SAMPLE_WEIGHT) { - data->weight.full = latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); - } else { - data->weight.var1_dw = (u32)latency ?: - intel_get_tsx_weight(meminfo->tsx_tuning); - } - - data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; - } - - if (sample_type & PERF_SAMPLE_DATA_SRC) { - data->data_src.val = get_data_src(event, meminfo->aux); - data->sample_flags |= PERF_SAMPLE_DATA_SRC; - } - - if (sample_type & PERF_SAMPLE_ADDR_TYPE) { - data->addr = meminfo->address; - data->sample_flags |= PERF_SAMPLE_ADDR; - } - - if (sample_type & PERF_SAMPLE_TRANSACTION) { - data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, - gprs ? gprs->ax : 0); - data->sample_flags |= PERF_SAMPLE_TRANSACTION; - } + u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->cache_latency : meminfo->mem_latency; + u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->instr_latency : 0; + u64 ax = gprs ? gprs->ax : 0; + + __setup_pebs_meminfo_group(event, data, sample_type, latency, + instr_latency, meminfo->address, + meminfo->aux, meminfo->tsx_tuning, + ax); } if (format_group & PEBS_DATACFG_XMMS) { @@ -2219,6 +2348,135 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, format_group); } +static inline bool arch_pebs_record_continued(struct arch_pebs_header *header) +{ + /* Continue bit or null PEBS record indicates fragment follows. */ + return header->cont || !(header->format & GENMASK_ULL(63, 16)); +} + +static void setup_arch_pebs_sample_data(struct perf_event *event, + struct pt_regs *iregs, + void *__pebs, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 sample_type = event->attr.sample_type; + struct arch_pebs_header *header = NULL; + struct arch_pebs_aux *meminfo = NULL; + struct arch_pebs_gprs *gprs = NULL; + struct x86_perf_regs *perf_regs; + void *next_record; + void *at = __pebs; + + if (at == NULL) + return; + + perf_regs = container_of(regs, struct x86_perf_regs, regs); + perf_regs->xmm_regs = NULL; + + __setup_perf_sample_data(event, iregs, data); + + *regs = *iregs; + +again: + header = at; + next_record = at + sizeof(struct arch_pebs_header); + if (header->basic) { + struct arch_pebs_basic *basic = next_record; + u16 retire = 0; + + next_record = basic + 1; + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + retire = basic->valid ? basic->retire : 0; + __setup_pebs_basic_group(event, regs, data, sample_type, + basic->ip, basic->tsc, retire); + } + + /* + * The record for MEMINFO is in front of GP + * But PERF_SAMPLE_TRANSACTION needs gprs->ax. + * Save the pointer here but process later. + */ + if (header->aux) { + meminfo = next_record; + next_record = meminfo + 1; + } + + if (header->gpr) { + gprs = next_record; + next_record = gprs + 1; + + __setup_pebs_gpr_group(event, regs, + (struct pebs_gprs *)gprs, + sample_type); + } + + if (header->aux) { + u64 ax = gprs ? gprs->ax : 0; + + __setup_pebs_meminfo_group(event, data, sample_type, + meminfo->cache_latency, + meminfo->instr_latency, + meminfo->address, meminfo->aux, + meminfo->tsx_tuning, ax); + } + + if (header->xmm) { + struct pebs_xmm *xmm; + + next_record += sizeof(struct arch_pebs_xer_header); + + xmm = next_record; + perf_regs->xmm_regs = xmm->xmm; + next_record = xmm + 1; + } + + if (header->lbr) { + struct arch_pebs_lbr_header *lbr_header = next_record; + struct lbr_entry *lbr; + int num_lbr; + + next_record = lbr_header + 1; + lbr = next_record; + + num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ? + lbr_header->depth : + header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES; + next_record += num_lbr * sizeof(struct lbr_entry); + + if (has_branch_stack(event)) { + intel_pmu_store_pebs_lbrs(lbr); + intel_pmu_lbr_save_brstack(data, cpuc, event); + } + } + + if (header->cntr) { + struct arch_pebs_cntr_header *cntr = next_record; + unsigned int nr; + + next_record += sizeof(struct arch_pebs_cntr_header); + + if (is_pebs_counter_event_group(event)) { + __setup_pebs_counter_group(cpuc, event, + (struct pebs_cntr_header *)cntr, next_record); + data->sample_flags |= PERF_SAMPLE_READ; + } + + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed); + if (cntr->metrics == INTEL_CNTR_METRICS) + nr += 2; + next_record += nr * sizeof(u64); + } + + /* Parse followed fragments if there are. */ + if (arch_pebs_record_continued(header)) { + at = at + header->size; + goto again; + } +} + static inline void * get_next_pebs_record_by_bit(void *base, void *top, int bit) { @@ -2601,6 +2859,57 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d } } +static __always_inline void +__intel_pmu_handle_pebs_record(struct pt_regs *iregs, + struct pt_regs *regs, + struct perf_sample_data *data, + void *at, u64 pebs_status, + short *counts, void **last, + setup_fn setup_sample) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + int bit; + + for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) { + event = cpuc->events[bit]; + + if (WARN_ON_ONCE(!event) || + WARN_ON_ONCE(!event->attr.precise_ip)) + continue; + + if (counts[bit]++) { + __intel_pmu_pebs_event(event, iregs, regs, data, + last[bit], setup_sample); + } + + last[bit] = at; + } +} + +static __always_inline void +__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs, + struct pt_regs *regs, + struct perf_sample_data *data, + u64 mask, short *counts, void **last, + setup_fn setup_sample) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *event; + int bit; + + for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) { + if (!counts[bit]) + continue; + + event = cpuc->events[bit]; + + __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit], + counts[bit], setup_sample); + } + +} + static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; @@ -2610,9 +2919,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; struct pebs_basic *basic; - struct perf_event *event; void *base, *at, *top; - int bit; u64 mask; if (!x86_pmu.pebs_active) @@ -2625,6 +2932,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d mask = hybrid(cpuc->pmu, pebs_events_mask) | (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); + mask &= cpuc->pebs_enabled; if (unlikely(base >= top)) { intel_pmu_pebs_event_update_no_drain(cpuc, mask); @@ -2642,38 +2950,114 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d if (basic->format_size != cpuc->pebs_record_size) continue; - pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask; - for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) { - event = cpuc->events[bit]; + pebs_status = mask & basic->applicable_counters; + __intel_pmu_handle_pebs_record(iregs, regs, data, at, + pebs_status, counts, last, + setup_pebs_adaptive_sample_data); + } - if (WARN_ON_ONCE(!event) || - WARN_ON_ONCE(!event->attr.precise_ip)) - continue; + __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last, + setup_pebs_adaptive_sample_data); +} - if (counts[bit]++) { - __intel_pmu_pebs_event(event, iregs, regs, data, last[bit], - setup_pebs_adaptive_sample_data); - } - last[bit] = at; - } +static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs, + struct perf_sample_data *data) +{ + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS]; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + union arch_pebs_index index; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; + void *base, *at, *top; + u64 mask; + + rdmsrq(MSR_IA32_PEBS_INDEX, index.whole); + + if (unlikely(!index.wr)) { + intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX); + return; } - for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) { - if (!counts[bit]) + base = cpuc->pebs_vaddr; + top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT); + + index.wr = 0; + index.full = 0; + index.en = 1; + if (cpuc->n_pebs == cpuc->n_large_pebs) + index.thresh = ARCH_PEBS_THRESH_MULTI; + else + index.thresh = ARCH_PEBS_THRESH_SINGLE; + wrmsrq(MSR_IA32_PEBS_INDEX, index.whole); + + mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled; + + if (!iregs) + iregs = &dummy_iregs; + + /* Process all but the last event for each counter. */ + for (at = base; at < top;) { + struct arch_pebs_header *header; + struct arch_pebs_basic *basic; + u64 pebs_status; + + header = at; + + if (WARN_ON_ONCE(!header->size)) + break; + + /* 1st fragment or single record must have basic group */ + if (!header->basic) { + at += header->size; continue; + } - event = cpuc->events[bit]; + basic = at + sizeof(struct arch_pebs_header); + pebs_status = mask & basic->applicable_counters; + __intel_pmu_handle_pebs_record(iregs, regs, data, at, + pebs_status, counts, last, + setup_arch_pebs_sample_data); + + /* Skip non-last fragments */ + while (arch_pebs_record_continued(header)) { + if (!header->size) + break; + at += header->size; + header = at; + } - __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit], - counts[bit], setup_pebs_adaptive_sample_data); + /* Skip last fragment or the single record */ + at += header->size; } + + __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, + counts, last, + setup_arch_pebs_sample_data); +} + +static void __init intel_arch_pebs_init(void) +{ + /* + * Current hybrid platforms always both support arch-PEBS or not + * on all kinds of cores. So directly set x86_pmu.arch_pebs flag + * if boot cpu supports arch-PEBS. + */ + x86_pmu.arch_pebs = 1; + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; + x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs; + x86_pmu.pebs_capable = ~0ULL; + x86_pmu.flags |= PMU_FL_PEBS_ALL; + + x86_pmu.pebs_enable = __intel_pmu_pebs_enable; + x86_pmu.pebs_disable = __intel_pmu_pebs_disable; } /* * PEBS probe and setup */ -void __init intel_pebs_init(void) +static void __init intel_ds_pebs_init(void) { /* * No support for 32bit formats @@ -2735,10 +3119,8 @@ void __init intel_pebs_init(void) break; case 6: - if (x86_pmu.intel_cap.pebs_baseline) { + if (x86_pmu.intel_cap.pebs_baseline) x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; - x86_pmu.late_setup = intel_pmu_late_setup; - } fallthrough; case 5: x86_pmu.pebs_ept = 1; @@ -2788,6 +3170,14 @@ void __init intel_pebs_init(void) } } +void __init intel_pebs_init(void) +{ + if (x86_pmu.intel_cap.pebs_format == 0xf) + intel_arch_pebs_init(); + else + intel_ds_pebs_init(); +} + void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 7aa59966e7c3..72f2adcda7c6 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/kvm_types.h> #include <linux/perf_event.h> #include <linux/types.h> @@ -1705,7 +1706,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->info = x86_pmu.lbr_info; lbr->has_callstack = x86_pmu_has_lbr_callstack(); } -EXPORT_SYMBOL_GPL(x86_perf_get_lbr); +EXPORT_SYMBOL_FOR_KVM(x86_perf_get_lbr); struct event_constraint vlbr_constraint = __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index e8cf29d2b10c..44524a387c58 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -17,6 +17,7 @@ #include <linux/limits.h> #include <linux/slab.h> #include <linux/device.h> +#include <linux/kvm_types.h> #include <asm/cpuid/api.h> #include <asm/perf_event.h> @@ -82,13 +83,13 @@ u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) return (c & cd->mask) >> shift; } -EXPORT_SYMBOL_GPL(intel_pt_validate_cap); +EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_cap); u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return intel_pt_validate_cap(pt_pmu.caps, cap); } -EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap); +EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_hw_cap); static ssize_t pt_cap_show(struct device *cdev, struct device_attribute *attr, @@ -1590,7 +1591,7 @@ void intel_pt_handle_vmx(int on) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); +EXPORT_SYMBOL_FOR_KVM(intel_pt_handle_vmx); /* * PMU callbacks diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index a762f7f5b161..e228e564b15e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1325,8 +1325,6 @@ static void uncore_pci_sub_driver_init(void) continue; pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; - if (!pmu) - continue; if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) continue; @@ -1895,6 +1893,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &ptl_uncore_init), + X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &ptl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2b969386dcdd..3161ec0a3416 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -283,8 +283,9 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; - void *ds_pebs_vaddr; void *ds_bts_vaddr; + /* DS based PEBS or arch-PEBS buffer address */ + void *pebs_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; @@ -303,6 +304,8 @@ struct cpu_hw_events { /* Intel ACR configuration */ u64 acr_cfg_b[X86_PMC_IDX_MAX]; u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* Cached CFG_C values */ + u64 cfg_c_val[X86_PMC_IDX_MAX]; /* * Intel LBR bits @@ -708,6 +711,12 @@ enum hybrid_pmu_type { hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; +struct arch_pebs_cap { + u64 caps; + u64 counters; + u64 pdists; +}; + struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -752,6 +761,8 @@ struct x86_hybrid_pmu { mid_ack :1, enabled_ack :1; + struct arch_pebs_cap arch_pebs_cap; + u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; }; @@ -906,7 +917,7 @@ struct x86_pmu { union perf_capabilities intel_cap; /* - * Intel DebugStore bits + * Intel DebugStore and PEBS bits */ unsigned int bts :1, bts_active :1, @@ -917,7 +928,8 @@ struct x86_pmu { pebs_no_tlb :1, pebs_no_isolation :1, pebs_block :1, - pebs_ept :1; + pebs_ept :1, + arch_pebs :1; int pebs_record_size; int pebs_buffer_size; u64 pebs_events_mask; @@ -930,6 +942,11 @@ struct x86_pmu { u64 pebs_capable; /* + * Intel Architectural PEBS + */ + struct arch_pebs_cap arch_pebs_cap; + + /* * Intel LBR */ unsigned int lbr_tos, lbr_from, lbr_to, @@ -1124,7 +1141,6 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ .pmu_type = _pmu, \ } -int is_x86_event(struct perf_event *event); struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; @@ -1217,7 +1233,7 @@ int x86_reserve_hardware(void); void x86_release_hardware(void); -int x86_pmu_max_precise(void); +int x86_pmu_max_precise(struct pmu *pmu); void hw_perf_lbr_event_destroy(struct perf_event *event); @@ -1604,6 +1620,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); +int alloc_arch_pebs_buf_on_cpu(int cpu); + +void release_arch_pebs_buf_on_cpu(int cpu); + +void init_arch_pebs_on_cpu(int cpu); + +void fini_arch_pebs_on_cpu(int cpu); + void init_debug_store_on_cpu(int cpu); void fini_debug_store_on_cpu(int cpu); @@ -1760,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_lnl(void); +u64 intel_get_arch_pebs_data_config(struct perf_event *event); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); @@ -1792,6 +1818,11 @@ static inline int intel_pmu_max_num_pebs(struct pmu *pmu) return fls((u32)hybrid(pmu, pebs_events_mask)); } +static inline bool intel_pmu_has_pebs(void) +{ + return x86_pmu.ds_pebs || x86_pmu.arch_pebs; +} + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 15bc07a5ebb3..b14c045679e1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -198,6 +198,7 @@ static inline int alternatives_text_reserved(void *start, void *end) #define ALTINSTR_ENTRY(ft_flags) \ ".pushsection .altinstructions,\"a\"\n" \ + ANNOTATE_DATA_SPECIAL \ " .long 771b - .\n" /* label */ \ " .long 774f - .\n" /* new instruction */ \ " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \ @@ -207,6 +208,7 @@ static inline int alternatives_text_reserved(void *start, void *end) #define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \ ".pushsection .altinstr_replacement, \"ax\"\n" \ + ANNOTATE_DATA_SPECIAL \ "# ALT: replacement\n" \ "774:\n\t" newinstr "\n775:\n" \ ".popsection\n" @@ -337,6 +339,7 @@ void nop_func(void); * instruction. See apply_alternatives(). */ .macro altinstr_entry orig alt ft_flags orig_len alt_len + ANNOTATE_DATA_SPECIAL .long \orig - . .long \alt - . .4byte \ft_flags @@ -365,6 +368,7 @@ void nop_func(void); .popsection ; \ .pushsection .altinstr_replacement,"ax" ; \ 743: \ + ANNOTATE_DATA_SPECIAL ; \ newinst ; \ 744: \ .popsection ; diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h index 23fe617898a8..a672b8765fa8 100644 --- a/arch/x86/include/asm/amd/node.h +++ b/arch/x86/include/asm/amd/node.h @@ -23,7 +23,6 @@ #define AMD_NODE0_PCI_SLOT 0x18 struct pci_dev *amd_node_get_func(u16 node, u8 func); -struct pci_dev *amd_node_get_root(u16 node); static inline u16 amd_num_nodes(void) { diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index d5c8d3afe196..bd62bd87a841 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_ASM_H #define _ASM_X86_ASM_H +#include <linux/annotate.h> + #ifdef __ASSEMBLER__ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ @@ -132,6 +134,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ + ANNOTATE_DATA_SPECIAL ; \ .long (from) - . ; \ .long (to) - . ; \ .long type ; \ @@ -179,6 +182,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ + ANNOTATE_DATA_SPECIAL \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ " .long " __stringify(type) " \n" \ @@ -187,6 +191,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p) # define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ + ANNOTATE_DATA_SPECIAL \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ DEFINE_EXTABLE_TYPE_REG \ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 880ca15073ed..ab5bba6cf7f5 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -7,6 +7,11 @@ #include <linux/objtool.h> #include <asm/asm.h> +#ifndef __ASSEMBLY__ +struct bug_entry; +extern void __WARN_trap(struct bug_entry *bug, ...); +#endif + /* * Despite that some emulators terminate on UD2, we use it for WARN(). */ @@ -31,52 +36,77 @@ #define BUG_UD2 0xfffe #define BUG_UD1 0xfffd #define BUG_UD1_UBSAN 0xfffc +#define BUG_UD1_WARN 0xfffb #define BUG_UDB 0xffd6 #define BUG_LOCK 0xfff0 #ifdef CONFIG_GENERIC_BUG -#ifdef CONFIG_X86_32 -# define __BUG_REL(val) ".long " val +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __BUG_ENTRY_VERBOSE(file, line) \ + "\t.long " file " - .\t# bug_entry::file\n" \ + "\t.word " line "\t# bug_entry::line\n" #else -# define __BUG_REL(val) ".long " val " - ." +#define __BUG_ENTRY_VERBOSE(file, line) #endif -#ifdef CONFIG_DEBUG_BUGVERBOSE -#define __BUG_ENTRY(file, line, flags) \ - "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ - "\t" __BUG_REL(file) "\t# bug_entry::file\n" \ - "\t.word " line "\t# bug_entry::line\n" \ - "\t.word " flags "\t# bug_entry::flags\n" +#if defined(CONFIG_X86_64) || defined(CONFIG_DEBUG_BUGVERBOSE_DETAILED) +#define HAVE_ARCH_BUG_FORMAT +#define __BUG_ENTRY_FORMAT(format) \ + "\t.long " format " - .\t# bug_entry::format\n" #else -#define __BUG_ENTRY(file, line, flags) \ - "2:\t" __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ - "\t.word " flags "\t# bug_entry::flags\n" +#define __BUG_ENTRY_FORMAT(format) +#endif + +#ifdef CONFIG_X86_64 +#define HAVE_ARCH_BUG_FORMAT_ARGS #endif -#define _BUG_FLAGS_ASM(ins, file, line, flags, size, extra) \ - "1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - __BUG_ENTRY(file, line, flags) \ +#define __BUG_ENTRY(format, file, line, flags) \ + "\t.long 1b - ." "\t# bug_entry::bug_addr\n" \ + __BUG_ENTRY_FORMAT(format) \ + __BUG_ENTRY_VERBOSE(file, line) \ + "\t.word " flags "\t# bug_entry::flags\n" + +#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \ + ".pushsection __bug_table,\"aw\"\n\t" \ + ANNOTATE_DATA_SPECIAL \ + "2:\n\t" \ + __BUG_ENTRY(format, file, line, flags) \ "\t.org 2b + " size "\n" \ ".popsection\n" \ extra -#define _BUG_FLAGS(ins, flags, extra) \ +#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED +#define WARN_CONDITION_STR(cond_str) cond_str +#else +#define WARN_CONDITION_STR(cond_str) "" +#endif + +#define _BUG_FLAGS(cond_str, ins, flags, extra) \ do { \ - asm_inline volatile(_BUG_FLAGS_ASM(ins, "%c0", \ - "%c1", "%c2", "%c3", extra) \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ + asm_inline volatile("1:\t" ins "\n" \ + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ + "%c[line]", "%c[fl]", \ + "%c[size]", extra) \ + : : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \ + [file] "i" (__FILE__), \ + [line] "i" (__LINE__), \ + [fl] "i" (flags), \ + [size] "i" (sizeof(struct bug_entry))); \ } while (0) #define ARCH_WARN_ASM(file, line, flags, size) \ - _BUG_FLAGS_ASM(ASM_UD2, file, line, flags, size, "") + ".pushsection .rodata.str1.1, \"aMS\", @progbits, 1\n" \ + "99:\n" \ + "\t.string \"\"\n" \ + ".popsection\n" \ + "1:\t " ASM_UD2 "\n" \ + _BUG_FLAGS_ASM("99b", file, line, flags, size, "") #else -#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins) +#define _BUG_FLAGS(cond_str, ins, flags, extra) asm volatile(ins) #endif /* CONFIG_GENERIC_BUG */ @@ -84,7 +114,7 @@ do { \ #define BUG() \ do { \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, 0, ""); \ + _BUG_FLAGS("", ASM_UD2, 0, ""); \ __builtin_unreachable(); \ } while (0) @@ -97,14 +127,69 @@ do { \ #define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b) -#define __WARN_FLAGS(flags) \ -do { \ - __auto_type __flags = BUGFLAG_WARNING|(flags); \ - instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ - instrumentation_end(); \ +#define __WARN_FLAGS(cond_str, flags) \ +do { \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ + instrumentation_begin(); \ + _BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ + instrumentation_end(); \ } while (0) +#ifdef HAVE_ARCH_BUG_FORMAT_ARGS + +#ifndef __ASSEMBLY__ +#include <linux/static_call_types.h> +DECLARE_STATIC_CALL(WARN_trap, __WARN_trap); + +struct pt_regs; +struct sysv_va_list { /* from AMD64 System V ABI */ + unsigned int gp_offset; + unsigned int fp_offset; + void *overflow_arg_area; + void *reg_save_area; +}; +struct arch_va_list { + unsigned long regs[6]; + struct sysv_va_list args; +}; +extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs); +#endif /* __ASSEMBLY__ */ + +#define __WARN_bug_entry(flags, format) ({ \ + struct bug_entry *bug; \ + asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \ + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ + "%c[line]", "%c[fl]", \ + "%c[size]", "") \ + : [addr] "=r" (bug) \ + : [fmt] "i" (format), \ + [file] "i" (__FILE__), \ + [line] "i" (__LINE__), \ + [fl] "i" (flags), \ + [size] "i" (sizeof(struct bug_entry))); \ + bug; }) + +#define __WARN_print_arg(flags, format, arg...) \ +do { \ + int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \ + static_call_mod(WARN_trap)(__WARN_bug_entry(__flags, format), ## arg); \ + asm (""); /* inhibit tail-call optimization */ \ +} while (0) + +#define __WARN_printf(taint, fmt, arg...) \ + __WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg) + +#define WARN_ONCE(cond, format, arg...) ({ \ + int __ret_warn_on = !!(cond); \ + if (unlikely(__ret_warn_on)) { \ + __WARN_print_arg(BUGFLAG_ONCE|BUGFLAG_TAINT(TAINT_WARN),\ + format, ## arg); \ + } \ + __ret_warn_on; \ +}) + +#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */ + #include <asm-generic/bug.h> #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 96acb669bed4..4b1a6ade1700 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -101,6 +101,7 @@ static __always_inline bool _static_cpu_has(u16 bit) asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" + ANNOTATE_DATA_SPECIAL " testb %[bitnum], %a[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6f82302991d0..d90ce601917c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -321,7 +321,7 @@ #define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ #define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ #define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */ #define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ @@ -503,6 +503,9 @@ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ #define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ +#define X86_FEATURE_SGX_EUPDATESVN (21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */ + +#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */ /* * BUG word(s) diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 12b34d5b2953..2bb65677c079 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -79,7 +79,7 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int .type = type, .vector = vector, .nmi = type == EVENT_TYPE_NMI, - .lm = 1, + .l = 1, }; asm_fred_entry_from_kvm(ss); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 93156ac4ffe0..b08c95872eed 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return &arch_ftrace_regs(fregs)->regs; } +#define arch_ftrace_partial_regs(regs) do { \ + regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->cs = __KERNEL_CS; \ +} while (0) + #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 6e2458088800..fe5d9a10d900 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -46,38 +46,31 @@ do { \ } while(0) static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) + u32 __user *uaddr) { - if (can_do_masked_user_access()) - uaddr = masked_user_access_begin(uaddr); - else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - - switch (op) { - case FUTEX_OP_SET: - unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); - break; - case FUTEX_OP_ADD: - unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, - uaddr, oparg, Efault); - break; - case FUTEX_OP_OR: - unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); - break; - case FUTEX_OP_ANDN: - unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); - break; - case FUTEX_OP_XOR: - unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); - break; - default: - user_access_end(); - return -ENOSYS; + scoped_user_rw_access(uaddr, Efault) { + switch (op) { + case FUTEX_OP_SET: + unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ADD: + unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_OR: + unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); + break; + case FUTEX_OP_ANDN: + unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); + break; + case FUTEX_OP_XOR: + unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); + break; + default: + return -ENOSYS; + } } - user_access_end(); return 0; Efault: - user_access_end(); return -EFAULT; } @@ -86,21 +79,19 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, { int ret = 0; - if (can_do_masked_user_access()) - uaddr = masked_user_access_begin(uaddr); - else if (!user_access_begin(uaddr, sizeof(u32))) - return -EFAULT; - asm volatile("\n" - "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" - "2:\n" - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \ - : "+r" (ret), "=a" (oldval), "+m" (*uaddr) - : "r" (newval), "1" (oldval) - : "memory" - ); - user_access_end(); - *uval = oldval; + scoped_user_rw_access(uaddr, Efault) { + asm_inline volatile("\n" + "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" + "2:\n" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "r" (newval), "1" (oldval) + : "memory"); + *uval = oldval; + } return ret; +Efault: + return -EFAULT; } #endif diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index abd637e54e94..3218770670d3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -393,7 +393,7 @@ static __always_inline void __##func(struct pt_regs *regs) /** * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler - when raised from kernel mode + * when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE @@ -403,7 +403,7 @@ static __always_inline void __##func(struct pt_regs *regs) /** * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler - when raised from user mode + * when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 54368a43abf6..4733e9064ee5 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -44,4 +44,6 @@ enum insn_mmio_type { enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); +bool insn_is_nop(struct insn *insn); + #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 091f88c8254d..846d21c1a7f8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -312,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn) /** * for_each_insn_prefix() -- Iterate prefixes in the instruction * @insn: Pointer to struct insn. - * @idx: Index storage. * @prefix: Prefix byte. * * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix @@ -321,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn) * Since prefixes.nbytes can be bigger than 4 if some prefixes * are repeated, it cannot be used for looping over the prefixes. */ -#define for_each_insn_prefix(insn, idx, prefix) \ - for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) +#define for_each_insn_prefix(insn, prefix) \ + for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f32a0eca2ae5..950bfd006905 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -150,12 +150,12 @@ #define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */ -#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */ #define INTEL_WILDCATLAKE_L IFM(6, 0xD5) -#define INTEL_NOVALAKE IFM(18, 0x01) -#define INTEL_NOVALAKE_L IFM(18, 0x03) +#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */ +#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */ /* "Small Core" Processors (Atom/E-Core) */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 5dbeac48a5b9..695f87efbeb8 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -4,7 +4,15 @@ #include <linux/percpu-defs.h> #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SHIFT 4 +#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT) + +/* + * The largest PEBS record could consume a page, ensure + * a record at least can be written after triggering PMI. + */ +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT) +#define ARCH_PEBS_THRESH_SINGLE 1 /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS_FMT4 8 diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 61dd1dee7812..e0a6930a4029 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -15,6 +15,7 @@ #define JUMP_TABLE_ENTRY(key, label) \ ".pushsection __jump_table, \"aw\" \n\t" \ _ASM_ALIGN "\n\t" \ + ANNOTATE_DATA_SPECIAL \ ".long 1b - . \n\t" \ ".long " label " - . \n\t" \ _ASM_PTR " " key " - . \n\t" \ diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h index 23268a188e70..d7c704ed1be9 100644 --- a/arch/x86/include/asm/kvm_types.h +++ b/arch/x86/include/asm/kvm_types.h @@ -10,6 +10,11 @@ #define KVM_SUB_MODULES kvm-intel #else #undef KVM_SUB_MODULES +/* + * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff + * at least one vendor module is enabled. + */ +#define EXPORT_SYMBOL_FOR_KVM(symbol) #endif #define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 31e3cb550fb3..2d98886de09a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -48,6 +48,7 @@ /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ +#define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ @@ -62,6 +63,7 @@ */ #define MCI_CONFIG_MCAX 0x1 #define MCI_CONFIG_FRUTEXT BIT_ULL(9) +#define MCI_CONFIG_PADDRV BIT_ULL(11) #define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_HWID 0xFFF @@ -166,6 +168,12 @@ #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) /* + * Indicates that handler should check and clear Deferred error registers + * rather than common ones. + */ +#define MCE_CHECK_DFR_REGS BIT_ULL(8) + +/* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external * debugging tools. Each entry is only valid when its finished flag @@ -302,6 +310,12 @@ DECLARE_PER_CPU(struct mce, injectm); /* Disable CMCI/polling for MCA bank claimed by firmware */ extern void mce_disable_bank(int bank); +#ifdef CONFIG_X86_MCE_THRESHOLD +void mce_save_apei_thr_limit(u32 thr_limit); +#else +static inline void mce_save_apei_thr_limit(u32 thr_limit) { } +#endif /* CONFIG_X86_MCE_THRESHOLD */ + /* * Exception handler */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9e1720d73244..3d0a0950d20a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -166,6 +166,10 @@ * Processor MMIO stale data * vulnerabilities. */ +#define ARCH_CAP_MCU_ENUM BIT(16) /* + * Indicates the presence of microcode update + * feature enumeration and status information. + */ #define ARCH_CAP_FB_CLEAR BIT(17) /* * VERW clears CPU fill buffer * even on MDS_NO CPUs. @@ -327,6 +331,26 @@ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \ PERF_CAP_PEBS_TIMING_INFO) +/* Arch PEBS */ +#define MSR_IA32_PEBS_BASE 0x000003f4 +#define MSR_IA32_PEBS_INDEX 0x000003f5 +#define ARCH_PEBS_OFFSET_MASK 0x7fffff +#define ARCH_PEBS_INDEX_WR_SHIFT 4 + +#define ARCH_PEBS_RELOAD 0xffffffff +#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35) +#define ARCH_PEBS_CNTR_GP BIT_ULL(36) +#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37) +#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38) +#define ARCH_PEBS_LBR_SHIFT 40 +#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT) +#define ARCH_PEBS_VECR_XMM BIT_ULL(49) +#define ARCH_PEBS_GPR BIT_ULL(61) +#define ARCH_PEBS_AUX BIT_ULL(62) +#define ARCH_PEBS_EN BIT_ULL(63) +#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \ + ARCH_PEBS_CNTR_METRICS) + #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) #define RTIT_CTL_CYCLEACC BIT(1) @@ -929,6 +953,10 @@ #define MSR_IA32_APICBASE_BASE (0xfffff<<12) #define MSR_IA32_UCODE_WRITE 0x00000079 + +#define MSR_IA32_MCU_ENUMERATION 0x0000007b +#define MCU_STAGING BIT(4) + #define MSR_IA32_UCODE_REV 0x0000008b /* Intel SGX Launch Enclave Public Key Hash MSRs */ @@ -1226,6 +1254,8 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 +#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5 + /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 015d23f3e01f..2f0e47be79a4 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -9,6 +9,7 @@ #include <asm/alternative.h> #include <linux/kmsan-checks.h> +#include <linux/mmdebug.h> /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; @@ -31,18 +32,28 @@ static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); -extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) -#define __phys_addr_symbol(x) \ - ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif +static inline unsigned long __phys_addr_symbol(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* only check upper bounds since lower bounds will trigger carry */ + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + + return y + phys_base; +} + #define __phys_reloc_hide(x) (x) void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); +KCFI_REFERENCE(clear_page_orig); +KCFI_REFERENCE(clear_page_rep); +KCFI_REFERENCE(clear_page_erms); static inline void clear_page(void *page) { diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 332428caaed2..725d0eff7acd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -23,6 +23,7 @@ #else /* !__ASSEMBLY__: */ #include <linux/args.h> +#include <linux/bits.h> #include <linux/build_bug.h> #include <linux/stringify.h> #include <asm/asm.h> @@ -572,9 +573,9 @@ do { \ #define x86_this_cpu_constant_test_bit(_nr, _var) \ ({ \ unsigned long __percpu *addr__ = \ - (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \ + (unsigned long __percpu *)&(_var) + BIT_WORD(_nr); \ \ - !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \ + !!(BIT_MASK(_nr) & raw_cpu_read(*addr__)); \ }) #define x86_this_cpu_variable_test_bit(_nr, _var) \ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 49a4d442f3fc..7276ba70c88a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -141,16 +141,16 @@ #define ARCH_PERFMON_EVENTS_COUNT 7 #define PEBS_DATACFG_MEMINFO BIT_ULL(0) -#define PEBS_DATACFG_GP BIT_ULL(1) +#define PEBS_DATACFG_GP BIT_ULL(1) #define PEBS_DATACFG_XMMS BIT_ULL(2) #define PEBS_DATACFG_LBRS BIT_ULL(3) -#define PEBS_DATACFG_LBR_SHIFT 24 #define PEBS_DATACFG_CNTR BIT_ULL(4) +#define PEBS_DATACFG_METRICS BIT_ULL(5) +#define PEBS_DATACFG_LBR_SHIFT 24 #define PEBS_DATACFG_CNTR_SHIFT 32 #define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0) #define PEBS_DATACFG_FIX_SHIFT 48 #define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0) -#define PEBS_DATACFG_METRICS BIT_ULL(5) /* Steal the highest bit of pebs_data_cfg for SW usage */ #define PEBS_UPDATE_DS_SW BIT_ULL(63) @@ -200,6 +200,8 @@ union cpuid10_edx { #define ARCH_PERFMON_EXT_LEAF 0x00000023 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 #define ARCH_PERFMON_ACR_LEAF 0x2 +#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4 +#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5 union cpuid35_eax { struct { @@ -210,7 +212,10 @@ union cpuid35_eax { unsigned int acr_subleaf:1; /* Events Sub-Leaf */ unsigned int events_subleaf:1; - unsigned int reserved:28; + /* arch-PEBS Sub-Leaves */ + unsigned int pebs_caps_subleaf:1; + unsigned int pebs_cnts_subleaf:1; + unsigned int reserved:26; } split; unsigned int full; }; @@ -432,6 +437,8 @@ static inline bool is_topdown_idx(int idx) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) #define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55 #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT) +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54 +#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT) #define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48 #define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48) @@ -503,6 +510,107 @@ struct pebs_cntr_header { #define INTEL_CNTR_METRICS 0x3 /* + * Arch PEBS + */ +union arch_pebs_index { + struct { + u64 rsvd:4, + wr:23, + rsvd2:4, + full:1, + en:1, + rsvd3:3, + thresh:23, + rsvd4:5; + }; + u64 whole; +}; + +struct arch_pebs_header { + union { + u64 format; + struct { + u64 size:16, /* Record size */ + rsvd:14, + mode:1, /* 64BIT_MODE */ + cont:1, + rsvd2:3, + cntr:5, + lbr:2, + rsvd3:7, + xmm:1, + ymmh:1, + rsvd4:2, + opmask:1, + zmmh:1, + h16zmm:1, + rsvd5:5, + gpr:1, + aux:1, + basic:1; + }; + }; + u64 rsvd6; +}; + +struct arch_pebs_basic { + u64 ip; + u64 applicable_counters; + u64 tsc; + u64 retire :16, /* Retire Latency */ + valid :1, + rsvd :47; + u64 rsvd2; + u64 rsvd3; +}; + +struct arch_pebs_aux { + u64 address; + u64 rsvd; + u64 rsvd2; + u64 rsvd3; + u64 rsvd4; + u64 aux; + u64 instr_latency :16, + pad2 :16, + cache_latency :16, + pad3 :16; + u64 tsx_tuning; +}; + +struct arch_pebs_gprs { + u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di; + u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp; + u64 rsvd; +}; + +struct arch_pebs_xer_header { + u64 xstate; + u64 rsvd; +}; + +#define ARCH_PEBS_LBR_NAN 0x0 +#define ARCH_PEBS_LBR_NUM_8 0x1 +#define ARCH_PEBS_LBR_NUM_16 0x2 +#define ARCH_PEBS_LBR_NUM_VAR 0x3 +#define ARCH_PEBS_BASE_LBR_ENTRIES 8 +struct arch_pebs_lbr_header { + u64 rsvd; + u64 ctl; + u64 depth; + u64 ler_from; + u64 ler_to; + u64 ler_info; +}; + +struct arch_pebs_cntr_header { + u32 cntr; + u32 fixed; + u32 metrics; + u32 reserved; +}; + +/* * AMD Extended Performance Monitoring and Debug cpuid feature detection */ #define EXT_PERFMON_DEBUG_FEATURES 0x80000022 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 50f75467f73d..35d062a2e304 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -84,8 +84,8 @@ struct fred_ss { : 4, /* Event was incident to enclave execution */ enclave : 1, - /* CPU was in long mode */ - lm : 1, + /* CPU was in 64-bit mode */ + l : 1, /* * Nested exception during FRED delivery, not set * for #DF. @@ -187,12 +187,12 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); -static inline unsigned long regs_return_value(struct pt_regs *regs) +static __always_inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->ax = rc; } @@ -277,34 +277,34 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) } #endif -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } -static inline unsigned long instruction_pointer(struct pt_regs *regs) +static __always_inline unsigned long instruction_pointer(struct pt_regs *regs) { return regs->ip; } -static inline void instruction_pointer_set(struct pt_regs *regs, - unsigned long val) +static __always_inline +void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->ip = val; } -static inline unsigned long frame_pointer(struct pt_regs *regs) +static __always_inline unsigned long frame_pointer(struct pt_regs *regs) { return regs->bp; } -static inline unsigned long user_stack_pointer(struct pt_regs *regs) +static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs) { return regs->sp; } -static inline void user_stack_pointer_set(struct pt_regs *regs, - unsigned long val) +static __always_inline +void user_stack_pointer_set(struct pt_regs *regs, unsigned long val) { regs->sp = val; } diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 8d983cfd06ea..e5a13dc8816e 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #ifdef __ASSEMBLY__ .macro RUNTIME_CONST_PTR sym reg diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 6a0069761508..04958459a7ca 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * Copyright(c) 2016-20 Intel Corporation. * * Intel Software Guard Extensions (SGX) support. @@ -28,21 +28,22 @@ #define SGX_CPUID_EPC_MASK GENMASK(3, 0) enum sgx_encls_function { - ECREATE = 0x00, - EADD = 0x01, - EINIT = 0x02, - EREMOVE = 0x03, - EDGBRD = 0x04, - EDGBWR = 0x05, - EEXTEND = 0x06, - ELDU = 0x08, - EBLOCK = 0x09, - EPA = 0x0A, - EWB = 0x0B, - ETRACK = 0x0C, - EAUG = 0x0D, - EMODPR = 0x0E, - EMODT = 0x0F, + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, + EAUG = 0x0D, + EMODPR = 0x0E, + EMODT = 0x0F, + EUPDATESVN = 0x18, }; /** @@ -65,15 +66,19 @@ enum sgx_encls_function { /** * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV - * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. - * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * @SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function. + * @SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not * been completed yet. - * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. - * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's + * @SGX_CHILD_PRESENT: SECS has child pages present in the EPC. + * @SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's * public key does not match IA32_SGXLEPUBKEYHASH. - * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it + * @SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it * is in the PENDING or MODIFIED state. - * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received + * @SGX_INSUFFICIENT_ENTROPY: Insufficient entropy in RNG. + * @SGX_NO_UPDATE: EUPDATESVN could not update the CPUSVN because the + * current SVN was not newer than CPUSVN. This is the most + * common error code returned by EUPDATESVN. + * @SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received */ enum sgx_return_code { SGX_EPC_PAGE_CONFLICT = 7, @@ -81,6 +86,8 @@ enum sgx_return_code { SGX_CHILD_PRESENT = 13, SGX_INVALID_EINITTOKEN = 16, SGX_PAGE_NOT_MODIFIABLE = 20, + SGX_INSUFFICIENT_ENTROPY = 29, + SGX_NO_UPDATE = 31, SGX_UNMASKED_EVENT = 128, }; @@ -89,7 +96,7 @@ enum sgx_return_code { /** * enum sgx_miscselect - additional information to an SSA frame - * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * @SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. * * Save State Area (SSA) is a stack inside the enclave used to store processor * state when an exception or interrupt occurs. This enum defines additional @@ -105,17 +112,17 @@ enum sgx_miscselect { #define SGX_SSA_MISC_EXINFO_SIZE 16 /** - * enum sgx_attributes - the attributes field in &struct sgx_secs - * %SGX_ATTR_INIT: Enclave can be entered (is initialized). - * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). - * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. - * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * enum sgx_attribute - the attributes field in &struct sgx_secs + * @SGX_ATTR_INIT: Enclave can be entered (is initialized). + * @SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * @SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. + * @SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote * attestation. - * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). - * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to + * @SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * @SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to * sign cryptographic tokens that can be passed to * EINIT as an authorization to run an enclave. - * %SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an + * @SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an * asynchronous exit has occurred. */ enum sgx_attribute { @@ -188,7 +195,7 @@ struct sgx_secs { /** * enum sgx_tcs_flags - execution flags for TCS - * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints + * @SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints * inside an enclave. It is cleared by EADD but can * be set later with EDBGWR. */ @@ -253,11 +260,11 @@ struct sgx_pageinfo { /** * enum sgx_page_type - bits in the SECINFO flags defining the page type - * %SGX_PAGE_TYPE_SECS: a SECS page - * %SGX_PAGE_TYPE_TCS: a TCS page - * %SGX_PAGE_TYPE_REG: a regular page - * %SGX_PAGE_TYPE_VA: a VA page - * %SGX_PAGE_TYPE_TRIM: a page in trimmed state + * @SGX_PAGE_TYPE_SECS: a SECS page + * @SGX_PAGE_TYPE_TCS: a TCS page + * @SGX_PAGE_TYPE_REG: a regular page + * @SGX_PAGE_TYPE_VA: a VA page + * @SGX_PAGE_TYPE_TRIM: a page in trimmed state * * Make sure when making changes to this enum that its values can still fit * in the bitfield within &struct sgx_encl_page @@ -275,14 +282,14 @@ enum sgx_page_type { /** * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo - * %SGX_SECINFO_R: allow read - * %SGX_SECINFO_W: allow write - * %SGX_SECINFO_X: allow execution - * %SGX_SECINFO_SECS: a SECS page - * %SGX_SECINFO_TCS: a TCS page - * %SGX_SECINFO_REG: a regular page - * %SGX_SECINFO_VA: a VA page - * %SGX_SECINFO_TRIM: a page in trimmed state + * @SGX_SECINFO_R: allow read + * @SGX_SECINFO_W: allow write + * @SGX_SECINFO_X: allow execution + * @SGX_SECINFO_SECS: a SECS page + * @SGX_SECINFO_TCS: a TCS page + * @SGX_SECINFO_REG: a regular page + * @SGX_SECINFO_VA: a VA page + * @SGX_SECINFO_TRIM: a page in trimmed state */ enum sgx_secinfo_flags { SGX_SECINFO_R = BIT(0), diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h index 1e6ec10b3a15..a20b1c08c99f 100644 --- a/arch/x86/include/asm/shared/msr.h +++ b/arch/x86/include/asm/shared/msr.h @@ -12,4 +12,19 @@ struct msr { }; }; +/* + * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the + * boot kernel since they rely on tracepoint/exception handling infrastructure + * that's not available here. + */ +static inline void raw_rdmsr(unsigned int reg, struct msr *m) +{ + asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); +} + +static inline void raw_wrmsr(unsigned int reg, const struct msr *m) +{ + asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); +} + #endif /* _ASM_X86_SHARED_MSR_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 22bfebe6776d..84951572ab81 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -109,7 +109,7 @@ int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_kick_ap(unsigned int cpu, struct task_struct *tidle); int native_cpu_disable(void); void __noreturn hlt_play_dead(void); -void native_play_dead(void); +void __noreturn native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); void wbinvd_on_all_cpus(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 17f6c3fedeee..0581c477d466 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -701,5 +701,6 @@ DEFINE_GHCB_ACCESSORS(sw_exit_info_1) DEFINE_GHCB_ACCESSORS(sw_exit_info_2) DEFINE_GHCB_ACCESSORS(sw_scratch) DEFINE_GHCB_ACCESSORS(xcr0) +DEFINE_GHCB_ACCESSORS(xss) #endif diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 21041898157a..1fadf0cf520c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void) return __amd_nodes_per_pkg; } +#else /* CONFIG_SMP */ +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } +#endif /* !CONFIG_SMP */ + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -241,12 +247,6 @@ static inline bool topology_is_core_online(unsigned int cpu) } #define topology_is_core_online topology_is_core_online -#else /* CONFIG_SMP */ -static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_smt_threads(void) { return 1; } -static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } -#endif /* !CONFIG_SMP */ - static inline void arch_fix_phys_package_id(int num, u32 slot) { } @@ -325,4 +325,6 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick +extern int arch_sched_node_distance(int from, int to); + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 91a3fb8ae7ff..367297b188c3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -528,18 +528,18 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt #define user_access_save() smap_save() #define user_access_restore(x) smap_restore(x) -#define unsafe_put_user(x, ptr, label) \ +#define arch_unsafe_put_user(x, ptr, label) \ __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define unsafe_get_user(x, ptr, err_label) \ +#define arch_unsafe_get_user(x, ptr, err_label) \ do { \ __inttype(*(ptr)) __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) #else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define unsafe_get_user(x, ptr, err_label) \ +#define arch_unsafe_get_user(x, ptr, err_label) \ do { \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ @@ -618,11 +618,11 @@ do { \ } while (0) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ sizeof(type), err_label) #else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define arch_get_kernel_nofault(dst, src, type, err_label) \ do { \ int __kr_err; \ \ @@ -633,7 +633,7 @@ do { \ } while (0) #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define arch_put_kernel_nofault(dst, src, type, err_label) \ __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ sizeof(type), err_label) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c8a5ae35c871..641f45c22f9d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,12 +12,12 @@ #include <asm/cpufeatures.h> #include <asm/page.h> #include <asm/percpu.h> -#include <asm/runtime-const.h> -/* - * Virtual variable: there's no actual backing store for this, - * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' - */ +#ifdef MODULE + #define runtime_const_ptr(sym) (sym) +#else + #include <asm/runtime-const.h> +#endif extern unsigned long USER_PTR_MAX; #ifdef CONFIG_ADDRESS_MASKING diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h new file mode 100644 index 000000000000..12064284bc4e --- /dev/null +++ b/arch/x86/include/asm/unwind_user.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNWIND_USER_H +#define _ASM_X86_UNWIND_USER_H + +#ifdef CONFIG_HAVE_UNWIND_USER_FP + +#include <asm/ptrace.h> +#include <asm/uprobes.h> + +#define ARCH_INIT_USER_FP_FRAME(ws) \ + .cfa_off = 2*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = -2*(ws), \ + .use_fp = true, + +#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \ + .cfa_off = 1*(ws), \ + .ra_off = -1*(ws), \ + .fp_off = 0, \ + .use_fp = false, + +static inline int unwind_user_word_size(struct pt_regs *regs) +{ + /* We can't unwind VM86 stacks */ + if (regs->flags & X86_VM_MASK) + return 0; +#ifdef CONFIG_X86_64 + if (!user_64bit_mode(regs)) + return sizeof(int); +#endif + return sizeof(long); +} + +static inline bool unwind_user_at_function_start(struct pt_regs *regs) +{ + return is_uprobe_at_func_entry(regs); +} + +#endif /* CONFIG_HAVE_UNWIND_USER_FP */ + +#endif /* _ASM_X86_UNWIND_USER_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 1ee2e5115955..362210c79998 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -62,4 +62,13 @@ struct arch_uprobe_task { unsigned int saved_tf; }; +#ifdef CONFIG_UPROBES +extern bool is_uprobe_at_func_entry(struct pt_regs *regs); +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index 2dd35bbdc822..3c4d52072189 100644 --- a/arch/x86/include/uapi/asm/sgx.h +++ b/arch/x86/include/uapi/asm/sgx.h @@ -10,7 +10,7 @@ /** * enum sgx_page_flags - page control flags - * %SGX_PAGE_MEASURE: Measure the page contents with a sequence of + * @SGX_PAGE_MEASURE: Measure the page contents with a sequence of * ENCLS[EEXTEND] operations. */ enum sgx_page_flags { @@ -143,6 +143,12 @@ struct sgx_enclave_run; /** * typedef sgx_enclave_user_handler_t - Exit handler function accepted by * __vdso_sgx_enter_enclave() + * @rdi: RDI at the time of EEXIT, undefined on AEX + * @rsi: RSI at the time of EEXIT, undefined on AEX + * @rdx: RDX at the time of EEXIT, undefined on AEX + * @rsp: RSP (untrusted) at the time of EEXIT or AEX + * @r8: R8 at the time of EEXIT, undefined on AEX + * @r9: R9 at the time of EEXIT, undefined on AEX * @run: The run instance given by the caller * * The register parameters contain the snapshot of their values at enclave @@ -166,7 +172,7 @@ typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx, * @exception_addr: The address that triggered the exception * @user_handler: User provided callback run on exception * @user_data: Data passed to the user handler - * @reserved Reserved for future extensions + * @reserved: Reserved for future extensions * * If @user_handler is provided, the handler will be invoked on all return paths * of the normal flow. The user handler may transfer control, e.g. via a diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 9792e329343e..1baa86dfe029 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -93,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 #define EXIT_REASON_MSR_READ_IMM 84 #define EXIT_REASON_MSR_WRITE_IMM 85 diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index 0916f00a992e..e21419e686eb 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -19,6 +19,8 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) if (!cmc->enabled) return 0; + mce_save_apei_thr_limit(cmc->notify.error_threshold_value); + /* * We expect HEST to provide a list of MC banks that report errors * in firmware first mode. Otherwise, return non-zero value to diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7047124490f6..d7c8ef1e354d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected) break; } - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { u32 tmp; int ret; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5b09f89070f0..74f4c659f9c9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -9,6 +9,7 @@ #include <asm/text-patching.h> #include <asm/insn.h> +#include <asm/insn-eval.h> #include <asm/ibt.h> #include <asm/set_memory.h> #include <asm/nmi.h> @@ -346,25 +347,6 @@ static void add_nop(u8 *buf, unsigned int len) } /* - * Matches NOP and NOPL, not any of the other possible NOPs. - */ -static bool insn_is_nop(struct insn *insn) -{ - /* Anything NOP, but no REP NOP */ - if (insn->opcode.bytes[0] == 0x90 && - (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) - return true; - - /* NOPL */ - if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F) - return true; - - /* TODO: more nops */ - - return false; -} - -/* * Find the offset of the first non-NOP instruction starting at @offset * but no further than @len. */ @@ -559,7 +541,7 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) +static unsigned int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) { void *target, *bug = &BUG_func; s32 disp; @@ -643,7 +625,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { - int insn_buff_sz = 0; + unsigned int insn_buff_sz = 0; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -683,11 +665,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; - if (a->flags & ALT_FLAG_DIRECT_CALL) { + if (a->flags & ALT_FLAG_DIRECT_CALL) insn_buff_sz = alt_replace_call(instr, insn_buff, a); - if (insn_buff_sz < 0) - continue; - } for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; @@ -2244,21 +2223,34 @@ int alternatives_text_reserved(void *start, void *end) * See entry_{32,64}.S for more details. */ -/* - * We define the int3_magic() function in assembly to control the calling - * convention such that we can 'call' it from assembly. - */ +extern void int3_selftest_asm(unsigned int *ptr); -extern void int3_magic(unsigned int *ptr); /* defined in asm */ +asm ( +" .pushsection .init.text, \"ax\", @progbits\n" +" .type int3_selftest_asm, @function\n" +"int3_selftest_asm:\n" + ANNOTATE_NOENDBR + /* + * INT3 padded with NOP to CALL_INSN_SIZE. The INT3 triggers an + * exception, then the int3_exception_nb notifier emulates a call to + * int3_selftest_callee(). + */ +" int3; nop; nop; nop; nop\n" + ASM_RET +" .size int3_selftest_asm, . - int3_selftest_asm\n" +" .popsection\n" +); + +extern void int3_selftest_callee(unsigned int *ptr); asm ( " .pushsection .init.text, \"ax\", @progbits\n" -" .type int3_magic, @function\n" -"int3_magic:\n" +" .type int3_selftest_callee, @function\n" +"int3_selftest_callee:\n" ANNOTATE_NOENDBR -" movl $1, (%" _ASM_ARG1 ")\n" +" movl $0x1234, (%" _ASM_ARG1 ")\n" ASM_RET -" .size int3_magic, .-int3_magic\n" +" .size int3_selftest_callee, . - int3_selftest_callee\n" " .popsection\n" ); @@ -2267,7 +2259,7 @@ extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) { - unsigned long selftest = (unsigned long)&int3_selftest_ip; + unsigned long selftest = (unsigned long)&int3_selftest_asm; struct die_args *args = data; struct pt_regs *regs = args->regs; @@ -2282,7 +2274,7 @@ int3_exception_notify(struct notifier_block *self, unsigned long val, void *data if (regs->ip - INT3_INSN_SIZE != selftest) return NOTIFY_DONE; - int3_emulate_call(regs, (unsigned long)&int3_magic); + int3_emulate_call(regs, (unsigned long)&int3_selftest_callee); return NOTIFY_STOP; } @@ -2298,19 +2290,11 @@ static noinline void __init int3_selftest(void) BUG_ON(register_die_notifier(&int3_exception_nb)); /* - * Basically: int3_magic(&val); but really complicated :-) - * - * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb - * notifier above will emulate CALL for us. + * Basically: int3_selftest_callee(&val); but really complicated :-) */ - asm volatile ("int3_selftest_ip:\n\t" - ANNOTATE_NOENDBR - " int3; nop; nop; nop; nop\n\t" - : ASM_CALL_CONSTRAINT - : __ASM_SEL_RAW(a, D) (&val) - : "memory"); - - BUG_ON(val != 1); + int3_selftest_asm(&val); + + BUG_ON(val != 0x1234); unregister_die_notifier(&int3_exception_nb); } diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index a40176b62eb5..3d0a4768d603 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -34,62 +34,6 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func) return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); } -#define DF_BLK_INST_CNT 0x040 -#define DF_CFG_ADDR_CNTL_LEGACY 0x084 -#define DF_CFG_ADDR_CNTL_DF4 0xC04 - -#define DF_MAJOR_REVISION GENMASK(27, 24) - -static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) -{ - u32 reg; - - /* - * Revision fields added for DF4 and later. - * - * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. - */ - if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®)) - return 0; - - if (reg & DF_MAJOR_REVISION) - return DF_CFG_ADDR_CNTL_DF4; - - return DF_CFG_ADDR_CNTL_LEGACY; -} - -struct pci_dev *amd_node_get_root(u16 node) -{ - struct pci_dev *root; - u16 cntl_off; - u8 bus; - - if (!cpu_feature_enabled(X86_FEATURE_ZEN)) - return NULL; - - /* - * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) - * Bits [7:0] (SecBusNum) holds the bus number of the root device for - * this Data Fabric instance. The segment, device, and function will be 0. - */ - struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); - if (!df_f0) - return NULL; - - cntl_off = get_cfg_addr_cntl_offset(df_f0); - if (!cntl_off) - return NULL; - - if (pci_read_config_byte(df_f0, cntl_off, &bus)) - return NULL; - - /* Grab the pointer for the actual root device instance. */ - root = pci_get_domain_bus_and_slot(0, bus, 0); - - pci_dbg(root, "is root for AMD node %u\n", node); - return root; -} - static struct pci_dev **amd_roots; /* Protect the PCI config register pairs used for SMN. */ @@ -274,51 +218,21 @@ DEFINE_SHOW_STORE_ATTRIBUTE(smn_node); DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); -static int amd_cache_roots(void) -{ - u16 node, num_nodes = amd_num_nodes(); - - amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); - if (!amd_roots) - return -ENOMEM; - - for (node = 0; node < num_nodes; node++) - amd_roots[node] = amd_node_get_root(node); - - return 0; -} - -static int reserve_root_config_spaces(void) +static struct pci_dev *get_next_root(struct pci_dev *root) { - struct pci_dev *root = NULL; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus))) { - /* Root device is Device 0 Function 0 on each Primary Bus. */ - root = pci_get_slot(bus, 0); - if (!root) + while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) { + /* Root device is Device 0 Function 0. */ + if (root->devfn) continue; if (root->vendor != PCI_VENDOR_ID_AMD && root->vendor != PCI_VENDOR_ID_HYGON) continue; - pci_dbg(root, "Reserving PCI config space\n"); - - /* - * There are a few SMN index/data pairs and other registers - * that shouldn't be accessed by user space. - * So reserve the entire PCI config space for simplicity rather - * than covering specific registers piecemeal. - */ - if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { - pci_err(root, "Failed to reserve config space\n"); - return -EEXIST; - } + break; } - smn_exclusive = true; - return 0; + return root; } static bool enable_dfs; @@ -332,7 +246,8 @@ __setup("amd_smn_debugfs_enable", amd_smn_enable_dfs); static int __init amd_smn_init(void) { - int err; + u16 count, num_roots, roots_per_node, node, num_nodes; + struct pci_dev *root; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) return 0; @@ -342,13 +257,48 @@ static int __init amd_smn_init(void) if (amd_roots) return 0; - err = amd_cache_roots(); - if (err) - return err; + num_roots = 0; + root = NULL; + while ((root = get_next_root(root))) { + pci_dbg(root, "Reserving PCI config space\n"); - err = reserve_root_config_spaces(); - if (err) - return err; + /* + * There are a few SMN index/data pairs and other registers + * that shouldn't be accessed by user space. So reserve the + * entire PCI config space for simplicity rather than covering + * specific registers piecemeal. + */ + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { + pci_err(root, "Failed to reserve config space\n"); + return -EEXIST; + } + + num_roots++; + } + + pr_debug("Found %d AMD root devices\n", num_roots); + + if (!num_roots) + return -ENODEV; + + num_nodes = amd_num_nodes(); + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); + if (!amd_roots) + return -ENOMEM; + + roots_per_node = num_roots / num_nodes; + + count = 0; + node = 0; + root = NULL; + while (node < num_nodes && (root = get_next_root(root))) { + /* Use one root for each node and skip the rest. */ + if (count++ % roots_per_node) + continue; + + pci_dbg(root, "is root for AMD node %u\n", node); + amd_roots[node++] = root; + } if (enable_dfs) { debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); @@ -358,6 +308,8 @@ static int __init amd_smn_init(void) debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); } + smn_exclusive = true; + return 0; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 680d305589a3..9c29e12b84e5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -36,6 +36,7 @@ #include <linux/dmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/kvm_types.h> #include <xen/xen.h> @@ -173,6 +174,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; +/* Measured in ticks per HZ. */ unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -792,6 +794,7 @@ static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); u64 tsc_perj = 0, tsc_start = 0; + long delta_tsc_khz, bus_khz; unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; @@ -894,14 +897,15 @@ static int __init calibrate_APIC_clock(void) apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { - apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n", - (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + delta_tsc_khz = (deltatsc * HZ) / (1000 * LAPIC_CAL_LOOPS); + + apic_pr_verbose("..... CPU clock speed is %ld.%03ld MHz.\n", + delta_tsc_khz / 1000, delta_tsc_khz % 1000); } - apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n", - lapic_timer_period / (1000000 / HZ), - lapic_timer_period % (1000000 / HZ)); + bus_khz = (long)lapic_timer_period * HZ / 1000; + apic_pr_verbose("..... host bus clock speed is %ld.%03ld MHz.\n", + bus_khz / 1000, bus_khz % 1000); /* * Do a sanity check on the APIC calibration result @@ -2316,7 +2320,7 @@ u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) dest |= msg->arch_addr_hi.destid_8_31 << 8; return dest; } -EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); +EXPORT_SYMBOL_FOR_KVM(x86_msi_msg_get_destid); static void __init apic_bsp_up_setup(void) { diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 9ef3be866832..2ed3b5c88c7f 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: GPL-2.0 */ #include <linux/irq.h> +#include <linux/kvm_types.h> #include <asm/apic.h> #include "local.h" @@ -25,7 +26,7 @@ u32 default_cpu_present_to_apicid(int mps_cpu) else return BAD_APICID; } -EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); +EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid); /* * Set up the logical destination ID when the APIC operates in logical diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5ba2feb2c04c..1e0442e867b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2864,7 +2864,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, ioapic = mp_irqdomain_ioapic_idx(domain); pin = info->ioapic.pin; - if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + if (irq_resolve_mapping(domain, (irq_hw_number_t)pin)) return -EEXIST; data = kzalloc(sizeof(*data), GFP_KERNEL); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5398db4dedb4..bc94ff1e250a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -3,7 +3,7 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/mm.h> - +#include <linux/kvm_types.h> #include <linux/io.h> #include <linux/sched.h> #include <linux/sched/clock.h> @@ -516,7 +516,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ZEN5); break; case 0x50 ... 0x5f: - case 0x90 ... 0xaf: + case 0x80 ... 0xaf: case 0xc0 ... 0xcf: setup_force_cpu_cap(X86_FEATURE_ZEN6); break; @@ -1035,8 +1035,26 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) } } +static const struct x86_cpu_id zen5_rdseed_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), + ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), + {}, +}; + static void init_amd_zen5(struct cpuinfo_x86 *c) { + if (!x86_match_min_microcode_rev(zen5_rdseed_microcode)) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n"); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -1300,7 +1318,7 @@ unsigned long amd_get_dr_addr_mask(unsigned int dr) return per_cpu(amd_dr_addr_mask[dr], smp_processor_id()); } -EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask); +EXPORT_SYMBOL_FOR_KVM(amd_get_dr_addr_mask); static void zenbleed_check_cpu(void *unused) { @@ -1355,11 +1373,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6a526ae1fe99..d8660770dc6a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include <linux/sched/smt.h> #include <linux/pgtable.h> #include <linux/bpf.h> +#include <linux/kvm_types.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -53,56 +54,8 @@ * mitigation option. */ -static void __init spectre_v1_select_mitigation(void); -static void __init spectre_v1_apply_mitigation(void); -static void __init spectre_v2_select_mitigation(void); -static void __init spectre_v2_update_mitigation(void); -static void __init spectre_v2_apply_mitigation(void); -static void __init retbleed_select_mitigation(void); -static void __init retbleed_update_mitigation(void); -static void __init retbleed_apply_mitigation(void); -static void __init spectre_v2_user_select_mitigation(void); -static void __init spectre_v2_user_update_mitigation(void); -static void __init spectre_v2_user_apply_mitigation(void); -static void __init ssb_select_mitigation(void); -static void __init ssb_apply_mitigation(void); -static void __init l1tf_select_mitigation(void); -static void __init l1tf_apply_mitigation(void); -static void __init mds_select_mitigation(void); -static void __init mds_update_mitigation(void); -static void __init mds_apply_mitigation(void); -static void __init taa_select_mitigation(void); -static void __init taa_update_mitigation(void); -static void __init taa_apply_mitigation(void); -static void __init mmio_select_mitigation(void); -static void __init mmio_update_mitigation(void); -static void __init mmio_apply_mitigation(void); -static void __init rfds_select_mitigation(void); -static void __init rfds_update_mitigation(void); -static void __init rfds_apply_mitigation(void); -static void __init srbds_select_mitigation(void); -static void __init srbds_apply_mitigation(void); -static void __init l1d_flush_select_mitigation(void); -static void __init srso_select_mitigation(void); -static void __init srso_update_mitigation(void); -static void __init srso_apply_mitigation(void); -static void __init gds_select_mitigation(void); -static void __init gds_apply_mitigation(void); -static void __init bhi_select_mitigation(void); -static void __init bhi_update_mitigation(void); -static void __init bhi_apply_mitigation(void); -static void __init its_select_mitigation(void); -static void __init its_update_mitigation(void); -static void __init its_apply_mitigation(void); -static void __init tsa_select_mitigation(void); -static void __init tsa_apply_mitigation(void); -static void __init vmscape_select_mitigation(void); -static void __init vmscape_update_mitigation(void); -static void __init vmscape_apply_mitigation(void); - /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; -EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); /* The current value of the SPEC_CTRL MSR with task-specific bits set */ DEFINE_PER_CPU(u64, x86_spec_ctrl_current); @@ -179,7 +132,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control IBPB on vCPU load */ DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); +EXPORT_SYMBOL_FOR_KVM(switch_vcpu_ibpb); /* Control CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); @@ -198,7 +151,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); * mitigation is required. */ DEFINE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -EXPORT_SYMBOL_GPL(cpu_buf_vm_clear); +EXPORT_SYMBOL_FOR_KVM(cpu_buf_vm_clear); #undef pr_fmt #define pr_fmt(fmt) "mitigations: " fmt @@ -233,99 +186,6 @@ static void __init cpu_print_attack_vectors(void) } } -void __init cpu_select_mitigations(void) -{ - /* - * Read the SPEC_CTRL MSR to account for reserved bits which may - * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD - * init code as it is not enumerated and depends on the family. - */ - if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) { - rdmsrq(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - - /* - * Previously running kernel (kexec), may have some controls - * turned ON. Clear them and let the mitigations setup below - * rediscover them based on configuration. - */ - x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; - } - - x86_arch_cap_msr = x86_read_arch_cap_msr(); - - cpu_print_attack_vectors(); - - /* Select the proper CPU mitigations before patching alternatives: */ - spectre_v1_select_mitigation(); - spectre_v2_select_mitigation(); - retbleed_select_mitigation(); - spectre_v2_user_select_mitigation(); - ssb_select_mitigation(); - l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); - mmio_select_mitigation(); - rfds_select_mitigation(); - srbds_select_mitigation(); - l1d_flush_select_mitigation(); - srso_select_mitigation(); - gds_select_mitigation(); - its_select_mitigation(); - bhi_select_mitigation(); - tsa_select_mitigation(); - vmscape_select_mitigation(); - - /* - * After mitigations are selected, some may need to update their - * choices. - */ - spectre_v2_update_mitigation(); - /* - * retbleed_update_mitigation() relies on the state set by - * spectre_v2_update_mitigation(); specifically it wants to know about - * spectre_v2=ibrs. - */ - retbleed_update_mitigation(); - /* - * its_update_mitigation() depends on spectre_v2_update_mitigation() - * and retbleed_update_mitigation(). - */ - its_update_mitigation(); - - /* - * spectre_v2_user_update_mitigation() depends on - * retbleed_update_mitigation(), specifically the STIBP - * selection is forced for UNRET or IBPB. - */ - spectre_v2_user_update_mitigation(); - mds_update_mitigation(); - taa_update_mitigation(); - mmio_update_mitigation(); - rfds_update_mitigation(); - bhi_update_mitigation(); - /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ - srso_update_mitigation(); - vmscape_update_mitigation(); - - spectre_v1_apply_mitigation(); - spectre_v2_apply_mitigation(); - retbleed_apply_mitigation(); - spectre_v2_user_apply_mitigation(); - ssb_apply_mitigation(); - l1tf_apply_mitigation(); - mds_apply_mitigation(); - taa_apply_mitigation(); - mmio_apply_mitigation(); - rfds_apply_mitigation(); - srbds_apply_mitigation(); - srso_apply_mitigation(); - gds_apply_mitigation(); - its_apply_mitigation(); - bhi_apply_mitigation(); - tsa_apply_mitigation(); - vmscape_apply_mitigation(); -} - /* * NOTE: This function is *only* called for SVM, since Intel uses * MSR_IA32_SPEC_CTRL for SSBD. @@ -366,7 +226,7 @@ x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest) speculation_ctrl_update(tif); } } -EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); +EXPORT_SYMBOL_FOR_KVM(x86_virt_spec_ctrl); static void x86_amd_ssb_disable(void) { @@ -1032,7 +892,7 @@ bool gds_ucode_mitigated(void) return (gds_mitigation == GDS_MITIGATION_FULL || gds_mitigation == GDS_MITIGATION_FULL_LOCKED); } -EXPORT_SYMBOL_GPL(gds_ucode_mitigated); +EXPORT_SYMBOL_FOR_KVM(gds_ucode_mitigated); void update_gds_msr(void) { @@ -1463,7 +1323,9 @@ static void __init retbleed_update_mitigation(void) break; default: if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) { - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_NONE) + pr_err(RETBLEED_INTEL_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } @@ -1825,13 +1687,6 @@ void unpriv_ebpf_notify(int new_state) } #endif -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - /* The kernel command line selection for spectre v2 */ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_NONE, @@ -2864,7 +2719,7 @@ void x86_spec_ctrl_setup_ap(void) } bool itlb_multihit_kvm_mitigation; -EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); +EXPORT_SYMBOL_FOR_KVM(itlb_multihit_kvm_mitigation); #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt @@ -2872,11 +2727,9 @@ EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); /* Default mitigation for L1TF-affected CPUs */ enum l1tf_mitigations l1tf_mitigation __ro_after_init = IS_ENABLED(CONFIG_MITIGATION_L1TF) ? L1TF_MITIGATION_AUTO : L1TF_MITIGATION_OFF; -#if IS_ENABLED(CONFIG_KVM_INTEL) -EXPORT_SYMBOL_GPL(l1tf_mitigation); -#endif +EXPORT_SYMBOL_FOR_KVM(l1tf_mitigation); enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; -EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); +EXPORT_SYMBOL_FOR_KVM(l1tf_vmx_mitigation); /* * These CPUs all support 44bits physical address space internally in the @@ -3376,6 +3229,99 @@ void cpu_bugs_smt_update(void) mutex_unlock(&spec_ctrl_mutex); } +void __init cpu_select_mitigations(void) +{ + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) { + rdmsrq(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* + * Previously running kernel (kexec), may have some controls + * turned ON. Clear them and let the mitigations setup below + * rediscover them based on configuration. + */ + x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; + } + + x86_arch_cap_msr = x86_read_arch_cap_msr(); + + cpu_print_attack_vectors(); + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); + retbleed_select_mitigation(); + spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + rfds_select_mitigation(); + srbds_select_mitigation(); + l1d_flush_select_mitigation(); + srso_select_mitigation(); + gds_select_mitigation(); + its_select_mitigation(); + bhi_select_mitigation(); + tsa_select_mitigation(); + vmscape_select_mitigation(); + + /* + * After mitigations are selected, some may need to update their + * choices. + */ + spectre_v2_update_mitigation(); + /* + * retbleed_update_mitigation() relies on the state set by + * spectre_v2_update_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_update_mitigation(); + /* + * its_update_mitigation() depends on spectre_v2_update_mitigation() + * and retbleed_update_mitigation(). + */ + its_update_mitigation(); + + /* + * spectre_v2_user_update_mitigation() depends on + * retbleed_update_mitigation(), specifically the STIBP + * selection is forced for UNRET or IBPB. + */ + spectre_v2_user_update_mitigation(); + mds_update_mitigation(); + taa_update_mitigation(); + mmio_update_mitigation(); + rfds_update_mitigation(); + bhi_update_mitigation(); + /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ + srso_update_mitigation(); + vmscape_update_mitigation(); + + spectre_v1_apply_mitigation(); + spectre_v2_apply_mitigation(); + retbleed_apply_mitigation(); + spectre_v2_user_apply_mitigation(); + ssb_apply_mitigation(); + l1tf_apply_mitigation(); + mds_apply_mitigation(); + taa_apply_mitigation(); + mmio_apply_mitigation(); + rfds_apply_mitigation(); + srbds_apply_mitigation(); + srso_apply_mitigation(); + gds_apply_mitigation(); + its_apply_mitigation(); + bhi_apply_mitigation(); + tsa_apply_mitigation(); + vmscape_apply_mitigation(); +} + #ifdef CONFIG_SYSFS #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 981f8b1f0792..dbc99a47be45 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -6,6 +6,7 @@ #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/cpuhotplug.h> +#include <linux/kvm_types.h> #include <asm/cpu_device_id.h> #include <asm/cmdline.h> #include <asm/traps.h> @@ -289,7 +290,7 @@ bool handle_guest_split_lock(unsigned long ip) force_sig_fault(SIGBUS, BUS_ADRALN, NULL); return false; } -EXPORT_SYMBOL_GPL(handle_guest_split_lock); +EXPORT_SYMBOL_FOR_KVM(handle_guest_split_lock); void bus_lock_init(void) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d01dd88fae7d..e7ab22fce3b5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -7,6 +7,7 @@ #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/string.h> #include <linux/ctype.h> @@ -78,6 +79,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Used for modules: built-in code uses runtime constants */ +unsigned long USER_PTR_MAX; +EXPORT_SYMBOL(USER_PTR_MAX); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -482,14 +487,14 @@ void cr4_update_irqsoff(unsigned long set, unsigned long clear) __write_cr4(newval); } } -EXPORT_SYMBOL(cr4_update_irqsoff); +EXPORT_SYMBOL_FOR_KVM(cr4_update_irqsoff); /* Read the CR4 shadow. */ unsigned long cr4_read_shadow(void) { return this_cpu_read(cpu_tlbstate.cr4); } -EXPORT_SYMBOL_GPL(cr4_read_shadow); +EXPORT_SYMBOL_FOR_KVM(cr4_read_shadow); void cr4_init(void) { @@ -744,7 +749,7 @@ void load_direct_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); } -EXPORT_SYMBOL_GPL(load_direct_gdt); +EXPORT_SYMBOL_FOR_KVM(load_direct_gdt); /* Load a fixmap remapping of the per-cpu GDT */ void load_fixmap_gdt(int cpu) @@ -2597,7 +2602,7 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX; + USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index bc38b2d56f26..5c7a3a71191a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -42,15 +42,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; #ifdef CONFIG_CPU_SUP_INTEL -enum tsx_ctrl_states { - TSX_CTRL_ENABLE, - TSX_CTRL_DISABLE, - TSX_CTRL_RTM_ALWAYS_ABORT, - TSX_CTRL_NOT_SUPPORTED, -}; - -extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; - extern void __init tsx_init(void); void tsx_ap_init(void); void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 98d0cdd82574..146f6f8b0650 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -72,6 +72,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL }, + { X86_FEATURE_SDCIAE, X86_FEATURE_CAT_L3 }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, @@ -79,6 +80,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, + { X86_FEATURE_SGX_EUPDATESVN, X86_FEATURE_SGX1 }, { X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 }, { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d6906442f49b..3f1dda355307 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -43,9 +43,6 @@ /* Deferred error settings */ #define MSR_CU_DEF_ERR 0xC0000410 #define MASK_DEF_LVTOFF 0x000000F0 -#define MASK_DEF_INT_TYPE 0x00000006 -#define DEF_LVT_OFF 0x2 -#define DEF_INT_TYPE_APIC 0x2 /* Scalable MCA: */ @@ -54,6 +51,17 @@ static bool thresholding_irq_en; +struct mce_amd_cpu_data { + mce_banks_t thr_intr_banks; + mce_banks_t dfr_intr_banks; + + u32 thr_intr_en: 1, + dfr_intr_en: 1, + __resv: 30; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data); + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -79,6 +87,8 @@ struct smca_bank { const struct smca_hwid *hwid; u32 id; /* Value of MCA_IPID[InstanceId]. */ u8 sysfs_id; /* Value used for sysfs name. */ + u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */ + __reserved :63; }; static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); @@ -264,6 +274,7 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; static void smca_configure(unsigned int bank, unsigned int cpu) { + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); u8 *bank_counts = this_cpu_ptr(smca_bank_counts); const struct smca_hwid *s_hwid; unsigned int i, hwid_mcatype; @@ -294,11 +305,33 @@ static void smca_configure(unsigned int bank, unsigned int cpu) * APIC based interrupt. First, check that no interrupt has been * set. */ - if ((low & BIT(5)) && !((high >> 5) & 0x3)) + if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) { + __set_bit(bank, data->dfr_intr_banks); high |= BIT(5); + } + + /* + * SMCA Corrected Error Interrupt + * + * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can + * send an MCA Thresholding interrupt without the OS initializing + * this feature. This can be used if the threshold limit is managed + * by the platform. + * + * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR). + * The OS should set this to inform the platform that the OS is ready + * to handle the MCA Thresholding interrupt. + */ + if ((low & BIT(10)) && data->thr_intr_en) { + __set_bit(bank, data->thr_intr_banks); + high |= BIT(8); + } this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); + if (low & MCI_CONFIG_PADDRV) + this_cpu_ptr(smca_banks)[bank].paddrv = 1; + wrmsr(smca_config, low, high); } @@ -368,6 +401,14 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; + /* + * On SMCA CPUs, LVT offset is programmed at a different MSR, and + * the BIOS provides the value. The original field where LVT offset + * was set is reserved. Return early here: + */ + if (mce_flags.smca) + return false; + if (apic < 0) { pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, @@ -376,14 +417,6 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) } if (apic != msr) { - /* - * On SMCA CPUs, LVT offset is programmed at a different MSR, and - * the BIOS provides the value. The original field where LVT offset - * was set is reserved. Return early here: - */ - if (mce_flags.smca) - return false; - pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); @@ -443,6 +476,36 @@ static void threshold_restart_block(void *_tr) wrmsr(tr->b->address, lo, hi); } +static void threshold_restart_bank(unsigned int bank, bool intr_en) +{ + struct threshold_bank **thr_banks = this_cpu_read(threshold_banks); + struct threshold_block *block, *tmp; + struct thresh_restart tr; + + if (!thr_banks || !thr_banks[bank]) + return; + + memset(&tr, 0, sizeof(tr)); + + list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) { + tr.b = block; + tr.b->interrupt_enable = intr_en; + threshold_restart_block(&tr); + } +} + +/* Try to use the threshold limit reported through APEI. */ +static u16 get_thr_limit(void) +{ + u32 thr_limit = mce_get_apei_thr_limit(); + + /* Fallback to old default if APEI limit is not available. */ + if (!thr_limit) + return THRESHOLD_MAX; + + return min(thr_limit, THRESHOLD_MAX); +} + static void mce_threshold_block_init(struct threshold_block *b, int offset) { struct thresh_restart tr = { @@ -451,7 +514,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) .lvt_off = offset, }; - b->threshold_limit = THRESHOLD_MAX; + b->threshold_limit = get_thr_limit(); threshold_restart_block(&tr); }; @@ -464,41 +527,6 @@ static int setup_APIC_mce_threshold(int reserved, int new) return reserved; } -static int setup_APIC_deferred_error(int reserved, int new) -{ - if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, - APIC_EILVT_MSG_FIX, 0)) - return new; - - return reserved; -} - -static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) -{ - u32 low = 0, high = 0; - int def_offset = -1, def_new; - - if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) - return; - - def_new = (low & MASK_DEF_LVTOFF) >> 4; - if (!(low & MASK_DEF_LVTOFF)) { - pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); - def_new = DEF_LVT_OFF; - low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); - } - - def_offset = setup_APIC_deferred_error(def_offset, def_new); - if ((def_offset == def_new) && - (deferred_error_int_vector != amd_deferred_error_interrupt)) - deferred_error_int_vector = amd_deferred_error_interrupt; - - if (!mce_flags.smca) - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; - - wrmsr(MSR_CU_DEF_ERR, low, high); -} - static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block, unsigned int cpu) @@ -534,12 +562,10 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, return addr; } -static int -prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, - int offset, u32 misc_high) +static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) { unsigned int cpu = smp_processor_id(); - u32 smca_low, smca_high; struct threshold_block b; int new; @@ -556,20 +582,13 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, if (!b.interrupt_capable) goto done; + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks); b.interrupt_enable = 1; - if (!mce_flags.smca) { - new = (misc_high & MASK_LVTOFF_HI) >> 20; - goto set_offset; - } - - /* Gather LVT offset for thresholding: */ - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) - goto out; - - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + if (mce_flags.smca) + goto done; -set_offset: + new = (misc_high & MASK_LVTOFF_HI) >> 20; offset = setup_APIC_mce_threshold(offset, new); if (offset == new) thresholding_irq_en = true; @@ -577,7 +596,6 @@ set_offset: done: mce_threshold_block_init(&b, offset); -out: return offset; } @@ -668,6 +686,32 @@ static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c) mce_banks[0].ctl = 0; } +/* + * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is + * ready to send interrupts. + * + * Individual error sources are enabled later during per-bank init. + */ +static void smca_enable_interrupt_vectors(void) +{ + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); + u64 mca_intr_cfg, offset; + + if (!mce_flags.smca || !mce_flags.succor) + return; + + if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg)) + return; + + offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12; + if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->thr_intr_en = 1; + + offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4; + if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->dfr_intr_en = 1; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -679,10 +723,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) mce_flags.amd_threshold = 1; + smca_enable_interrupt_vectors(); + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (mce_flags.smca) + if (mce_flags.smca) { smca_configure(bank, cpu); + if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en) + continue; + } + disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { @@ -703,9 +753,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) offset = prepare_threshold_block(bank, block, address, offset, high); } } - - if (mce_flags.succor) - deferred_error_interrupt_enable(c); } void smca_bsp_init(void) @@ -748,9 +795,9 @@ bool amd_mce_is_memory_error(struct mce *m) } /* - * AMD systems do not have an explicit indicator that the value in MCA_ADDR is - * a system physical address. Therefore, individual cases need to be detected. - * Future cases and checks will be added as needed. + * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a + * system physical address. Individual cases though, need to be detected for + * other systems. Future cases will be added as needed. * * 1) General case * a) Assume address is not usable. @@ -764,6 +811,8 @@ bool amd_mce_is_memory_error(struct mce *m) * a) Reported in legacy bank 4 with extended error code (XEC) 8. * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, * this bit should not be checked. + * 4) MCI_STATUS_PADDRVAL is set + * a) Will provide a valid system physical address. * * NOTE: SMCA UMC memory errors fall into case #1. */ @@ -777,6 +826,9 @@ bool amd_mce_usable_address(struct mce *m) return false; } + if (this_cpu_ptr(smca_banks)[m->bank].paddrv) + return m->status & MCI_STATUS_PADDRV; + /* Check poison bit for all other bank types. */ if (m->status & MCI_STATUS_POISON) return true; @@ -785,37 +837,6 @@ bool amd_mce_usable_address(struct mce *m) return false; } -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) -{ - struct mce_hw_err err; - struct mce *m = &err.m; - - mce_prep_record(&err); - - m->status = status; - m->misc = misc; - m->bank = bank; - m->tsc = rdtsc(); - - if (m->status & MCI_STATUS_ADDRV) { - m->addr = addr; - - smca_extract_err_addr(m); - } - - if (mce_flags.smca) { - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); - - if (m->status & MCI_STATUS_SYNDV) { - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); - } - } - - mce_log(&err); -} - DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) { trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); @@ -825,103 +846,20 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) apic_eoi(); } -/* - * Returns true if the logged error is deferred. False, otherwise. - */ -static inline bool -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) -{ - u64 status, addr = 0; - - rdmsrq(msr_stat, status); - if (!(status & MCI_STATUS_VAL)) - return false; - - if (status & MCI_STATUS_ADDRV) - rdmsrq(msr_addr, addr); - - __log_error(bank, status, addr, misc); - - wrmsrq(msr_stat, 0); - - return status & MCI_STATUS_DEFERRED; -} - -static bool _log_error_deferred(unsigned int bank, u32 misc) -{ - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), - mca_msr_reg(bank, MCA_ADDR), misc)) - return false; - - /* - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. - * Return true here to avoid accessing these registers. - */ - if (!mce_flags.smca) - return true; - - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); - return true; -} - -/* - * We have three scenarios for checking for Deferred errors: - * - * 1) Non-SMCA systems check MCA_STATUS and log error if found. - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also - * clear MCA_DESTAT. - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and - * log it. - */ -static void log_error_deferred(unsigned int bank) -{ - if (_log_error_deferred(bank, 0)) - return; - - /* - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check - * for a valid error. - */ - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); -} - /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - unsigned int bank; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) - log_error_deferred(bank); + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); } -static void log_error_thresholding(unsigned int bank, u64 misc) +void mce_amd_handle_storm(unsigned int bank, bool on) { - _log_error_deferred(bank, misc); + threshold_restart_bank(bank, on); } -static void log_and_reset_block(struct threshold_block *block) +static void amd_reset_thr_limit(unsigned int bank) { - struct thresh_restart tr; - u32 low = 0, high = 0; - - if (!block) - return; - - if (rdmsr_safe(block->address, &low, &high)) - return; - - if (!(high & MASK_OVERFLOW_HI)) - return; - - /* Log the MCE which caused the threshold event. */ - log_error_thresholding(block->bank, ((u64)high << 32) | low); - - /* Reset threshold block after logging error. */ - memset(&tr, 0, sizeof(tr)); - tr.b = block; - threshold_restart_block(&tr); + threshold_restart_bank(bank, true); } /* @@ -930,33 +868,21 @@ static void log_and_reset_block(struct threshold_block *block) */ static void amd_threshold_interrupt(void) { - struct threshold_bank **bp = this_cpu_read(threshold_banks), *thr_bank; - unsigned int bank, cpu = smp_processor_id(); - struct threshold_block *block, *tmp; - - /* - * Validate that the threshold bank has been initialized already. The - * handler is installed at boot time, but on a hotplug event the - * interrupt might fire before the data has been initialized. - */ - if (!bp) - return; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) - continue; - - thr_bank = bp[bank]; - if (!thr_bank) - continue; - - list_for_each_entry_safe(block, tmp, &thr_bank->miscj, miscj) - log_and_reset_block(block); - } + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks); } void amd_clear_bank(struct mce *m) { + amd_reset_thr_limit(m->bank); + + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); + + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); } @@ -1172,7 +1098,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->address = address; b->interrupt_enable = 0; b->interrupt_capable = lvt_interrupt_supported(bank, high); - b->threshold_limit = THRESHOLD_MAX; + b->threshold_limit = get_thr_limit(); if (b->interrupt_capable) { default_attrs[2] = &interrupt_enable.attr; @@ -1183,6 +1109,8 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb list_add(&b->miscj, &tb->miscj); + mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 460e90a1a0b1..4aff14e04287 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC)); if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); + if (m->kflags & MCE_CHECK_DFR_REGS) + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i)); + else + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); /* * Mask the reported address by the reported granularity. @@ -715,6 +718,29 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) DEFINE_PER_CPU(unsigned, mce_poll_count); /* + * We have three scenarios for checking for Deferred errors: + * + * 1) Non-SMCA systems check MCA_STATUS and log error if found. + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also + * clear MCA_DESTAT. + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and + * log it. + */ +static bool smca_should_log_poll_error(struct mce *m) +{ + if (m->status & MCI_STATUS_VAL) + return true; + + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank)); + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) { + m->kflags |= MCE_CHECK_DFR_REGS; + return true; + } + + return false; +} + +/* * Newer Intel systems that support software error * recovery need to make additional checks. Other * CPUs should skip over uncorrected errors, but log @@ -740,6 +766,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) { struct mce *m = &err->m; + if (mce_flags.smca) + return smca_should_log_poll_error(m); + /* If this entry is not valid, ignore it. */ if (!(m->status & MCI_STATUS_VAL)) return false; diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index b0e00ec5cc8c..a31cf984619c 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -67,6 +67,7 @@ void mce_track_storm(struct mce *mce); void mce_inherit_storm(unsigned int bank); bool mce_get_storm_mode(void); void mce_set_storm_mode(bool storm); +u32 mce_get_apei_thr_limit(void); #else static inline void cmci_storm_begin(unsigned int bank) {} static inline void cmci_storm_end(unsigned int bank) {} @@ -74,6 +75,7 @@ static inline void mce_track_storm(struct mce *mce) {} static inline void mce_inherit_storm(unsigned int bank) {} static inline bool mce_get_storm_mode(void) { return false; } static inline void mce_set_storm_mode(bool storm) {} +static inline u32 mce_get_apei_thr_limit(void) { return 0; } #endif /* @@ -267,6 +269,7 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m); #ifdef CONFIG_X86_MCE_AMD void mce_threshold_create_device(unsigned int cpu); void mce_threshold_remove_device(unsigned int cpu); +void mce_amd_handle_storm(unsigned int bank, bool on); extern bool amd_filter_mce(struct mce *m); bool amd_mce_usable_address(struct mce *m); void amd_clear_bank(struct mce *m); @@ -299,6 +302,7 @@ void smca_bsp_init(void); #else static inline void mce_threshold_create_device(unsigned int cpu) { } static inline void mce_threshold_remove_device(unsigned int cpu) { } +static inline void mce_amd_handle_storm(unsigned int bank, bool on) { } static inline bool amd_filter_mce(struct mce *m) { return false; } static inline bool amd_mce_usable_address(struct mce *m) { return false; } static inline void amd_clear_bank(struct mce *m) { } diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index f4a007616468..0d13c9ffcba0 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -13,6 +13,19 @@ #include "internal.h" +static u32 mce_apei_thr_limit; + +void mce_save_apei_thr_limit(u32 thr_limit) +{ + mce_apei_thr_limit = thr_limit; + pr_info("HEST corrected error threshold limit: %u\n", thr_limit); +} + +u32 mce_get_apei_thr_limit(void) +{ + return mce_apei_thr_limit; +} + static void default_threshold_interrupt(void) { pr_err("Unexpected threshold interrupt at vector %x\n", @@ -63,6 +76,9 @@ static void mce_handle_storm(unsigned int bank, bool on) case X86_VENDOR_INTEL: mce_intel_handle_storm(bank, on); break; + case X86_VENDOR_AMD: + mce_amd_handle_storm(bank, on); + break; } } @@ -85,7 +101,8 @@ void cmci_storm_end(unsigned int bank) { struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); - __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); + if (!mce_flags.amd_threshold) + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); storm->banks[bank].history = 0; storm->banks[bank].in_storm_mode = false; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index cdce885e2fd5..3821a985f4ff 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -186,60 +186,92 @@ static u32 cpuid_to_ucode_rev(unsigned int val) return p.ucode_rev; } +static u32 get_cutoff_revision(u32 rev) +{ + switch (rev >> 8) { + case 0x80012: return 0x8001277; break; + case 0x80082: return 0x800820f; break; + case 0x83010: return 0x830107c; break; + case 0x86001: return 0x860010e; break; + case 0x86081: return 0x8608108; break; + case 0x87010: return 0x8701034; break; + case 0x8a000: return 0x8a0000a; break; + case 0xa0010: return 0xa00107a; break; + case 0xa0011: return 0xa0011da; break; + case 0xa0012: return 0xa001243; break; + case 0xa0082: return 0xa00820e; break; + case 0xa1011: return 0xa101153; break; + case 0xa1012: return 0xa10124e; break; + case 0xa1081: return 0xa108109; break; + case 0xa2010: return 0xa20102f; break; + case 0xa2012: return 0xa201212; break; + case 0xa4041: return 0xa404109; break; + case 0xa5000: return 0xa500013; break; + case 0xa6012: return 0xa60120a; break; + case 0xa7041: return 0xa704109; break; + case 0xa7052: return 0xa705208; break; + case 0xa7080: return 0xa708009; break; + case 0xa70c0: return 0xa70C009; break; + case 0xaa001: return 0xaa00116; break; + case 0xaa002: return 0xaa00218; break; + case 0xb0021: return 0xb002146; break; + case 0xb0081: return 0xb008111; break; + case 0xb1010: return 0xb101046; break; + case 0xb2040: return 0xb204031; break; + case 0xb4040: return 0xb404031; break; + case 0xb4041: return 0xb404101; break; + case 0xb6000: return 0xb600031; break; + case 0xb6080: return 0xb608031; break; + case 0xb7000: return 0xb700031; break; + default: break; + + } + return 0; +} + static bool need_sha_check(u32 cur_rev) { + u32 cutoff; + if (!cur_rev) { cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax); pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev); } - switch (cur_rev >> 8) { - case 0x80012: return cur_rev <= 0x800126f; break; - case 0x80082: return cur_rev <= 0x800820f; break; - case 0x83010: return cur_rev <= 0x830107c; break; - case 0x86001: return cur_rev <= 0x860010e; break; - case 0x86081: return cur_rev <= 0x8608108; break; - case 0x87010: return cur_rev <= 0x8701034; break; - case 0x8a000: return cur_rev <= 0x8a0000a; break; - case 0xa0010: return cur_rev <= 0xa00107a; break; - case 0xa0011: return cur_rev <= 0xa0011da; break; - case 0xa0012: return cur_rev <= 0xa001243; break; - case 0xa0082: return cur_rev <= 0xa00820e; break; - case 0xa1011: return cur_rev <= 0xa101153; break; - case 0xa1012: return cur_rev <= 0xa10124e; break; - case 0xa1081: return cur_rev <= 0xa108109; break; - case 0xa2010: return cur_rev <= 0xa20102f; break; - case 0xa2012: return cur_rev <= 0xa201212; break; - case 0xa4041: return cur_rev <= 0xa404109; break; - case 0xa5000: return cur_rev <= 0xa500013; break; - case 0xa6012: return cur_rev <= 0xa60120a; break; - case 0xa7041: return cur_rev <= 0xa704109; break; - case 0xa7052: return cur_rev <= 0xa705208; break; - case 0xa7080: return cur_rev <= 0xa708009; break; - case 0xa70c0: return cur_rev <= 0xa70C009; break; - case 0xaa001: return cur_rev <= 0xaa00116; break; - case 0xaa002: return cur_rev <= 0xaa00218; break; - case 0xb0021: return cur_rev <= 0xb002146; break; - case 0xb1010: return cur_rev <= 0xb101046; break; - case 0xb2040: return cur_rev <= 0xb204031; break; - case 0xb4040: return cur_rev <= 0xb404031; break; - case 0xb6000: return cur_rev <= 0xb600031; break; - case 0xb7000: return cur_rev <= 0xb700031; break; - default: break; - } + cutoff = get_cutoff_revision(cur_rev); + if (cutoff) + return cur_rev <= cutoff; pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n"); pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); return true; } +static bool cpu_has_entrysign(void) +{ + unsigned int fam = x86_family(bsp_cpuid_1_eax); + unsigned int model = x86_model(bsp_cpuid_1_eax); + + if (fam == 0x17 || fam == 0x19) + return true; + + if (fam == 0x1a) { + if (model <= 0x2f || + (0x40 <= model && model <= 0x4f) || + (0x60 <= model && model <= 0x6f)) + return true; + } + + return false; +} + static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) { struct patch_digest *pd = NULL; u8 digest[SHA256_DIGEST_SIZE]; int i; - if (x86_family(bsp_cpuid_1_eax) < 0x17) + if (!cpu_has_entrysign()) return true; if (!need_sha_check(cur_rev)) @@ -473,6 +505,7 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size) { u8 family = x86_family(bsp_cpuid_1_eax); struct microcode_header_amd *mc_hdr; + u32 cur_rev, cutoff, patch_rev; u32 sh_psize; u16 proc_id; u8 patch_fam; @@ -512,11 +545,32 @@ static int verify_patch(const u8 *buf, size_t buf_size, u32 *patch_size) proc_id = mc_hdr->processor_rev_id; patch_fam = 0xf + (proc_id >> 12); - ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); - if (patch_fam != family) return 1; + cur_rev = get_patch_level(); + + /* No cutoff revision means old/unaffected by signing algorithm weakness => matches */ + cutoff = get_cutoff_revision(cur_rev); + if (!cutoff) + goto ok; + + patch_rev = mc_hdr->patch_id; + + ucode_dbg("cur_rev: 0x%x, cutoff: 0x%x, patch_rev: 0x%x\n", + cur_rev, cutoff, patch_rev); + + if (cur_rev <= cutoff && patch_rev <= cutoff) + goto ok; + + if (cur_rev > cutoff && patch_rev > cutoff) + goto ok; + + return 1; + +ok: + ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); + return 0; } @@ -585,8 +639,6 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc) mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE); - ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id); - if (mc_patch_matches(mc, eq_id)) { desc->psize = patch_size; desc->mc = mc; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index f75c140906d0..ccc83b0bf63c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -136,7 +136,7 @@ bool __init microcode_loader_disabled(void) return dis_ucode_ldr; } -static void early_parse_cmdline(void) +static void __init early_parse_cmdline(void) { char cmd_buf[64] = {}; char *s, *p = cmd_buf; @@ -589,6 +589,17 @@ static int load_late_stop_cpus(bool is_safe) pr_err("You should switch to early loading, if possible.\n"); } + /* + * Pre-load the microcode image into a staging device. This + * process is preemptible and does not require stopping CPUs. + * Successful staging simplifies the subsequent late-loading + * process, reducing rendezvous time. + * + * Even if the transfer fails, the update will proceed as usual. + */ + if (microcode_ops->use_staging) + microcode_ops->stage_microcode(); + atomic_set(&late_cpus_in, num_online_cpus()); atomic_set(&offline_in_nmi, 0); loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 371ca6eac00e..8744f3adc2a0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -13,12 +13,15 @@ #define pr_fmt(fmt) "microcode: " fmt #include <linux/earlycpio.h> #include <linux/firmware.h> +#include <linux/pci_ids.h> #include <linux/uaccess.h> #include <linux/initrd.h> #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/uio.h> +#include <linux/io.h> #include <linux/mm.h> #include <asm/cpu_device_id.h> @@ -33,6 +36,38 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; #define UCODE_BSP_LOADED ((struct microcode_intel *)0x1UL) +/* Defines for the microcode staging mailbox interface */ +#define MBOX_REG_NUM 4 +#define MBOX_REG_SIZE sizeof(u32) + +#define MBOX_CONTROL_OFFSET 0x0 +#define MBOX_STATUS_OFFSET 0x4 +#define MBOX_WRDATA_OFFSET 0x8 +#define MBOX_RDDATA_OFFSET 0xc + +#define MASK_MBOX_CTRL_ABORT BIT(0) +#define MASK_MBOX_CTRL_GO BIT(31) + +#define MASK_MBOX_STATUS_ERROR BIT(2) +#define MASK_MBOX_STATUS_READY BIT(31) + +#define MASK_MBOX_RESP_SUCCESS BIT(0) +#define MASK_MBOX_RESP_PROGRESS BIT(1) +#define MASK_MBOX_RESP_ERROR BIT(2) + +#define MBOX_CMD_LOAD 0x3 +#define MBOX_OBJ_STAGING 0xb +#define MBOX_HEADER(size) ((PCI_VENDOR_ID_INTEL) | \ + (MBOX_OBJ_STAGING << 16) | \ + ((u64)((size) / sizeof(u32)) << 32)) + +/* The size of each mailbox header */ +#define MBOX_HEADER_SIZE sizeof(u64) +/* The size of staging hardware response */ +#define MBOX_RESPONSE_SIZE sizeof(u64) + +#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC) + /* Current microcode patch used in early patching on the APs. */ static struct microcode_intel *ucode_patch_va __read_mostly; static struct microcode_intel *ucode_patch_late __read_mostly; @@ -54,6 +89,23 @@ struct extended_sigtable { struct extended_signature sigs[]; }; +/** + * struct staging_state - Track the current staging process state + * + * @mmio_base: MMIO base address for staging + * @ucode_len: Total size of the microcode image + * @chunk_size: Size of each data piece + * @bytes_sent: Total bytes transmitted so far + * @offset: Current offset in the microcode image + */ +struct staging_state { + void __iomem *mmio_base; + unsigned int ucode_len; + unsigned int chunk_size; + unsigned int bytes_sent; + unsigned int offset; +}; + #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) @@ -299,6 +351,298 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size, return size ? NULL : patch; } +static inline u32 read_mbox_dword(void __iomem *mmio_base) +{ + u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET); + + /* Acknowledge read completion to the staging hardware */ + writel(0, mmio_base + MBOX_RDDATA_OFFSET); + return dword; +} + +static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword) +{ + writel(dword, mmio_base + MBOX_WRDATA_OFFSET); +} + +static inline u64 read_mbox_header(void __iomem *mmio_base) +{ + u32 high, low; + + low = read_mbox_dword(mmio_base); + high = read_mbox_dword(mmio_base); + + return ((u64)high << 32) | low; +} + +static inline void write_mbox_header(void __iomem *mmio_base, u64 value) +{ + write_mbox_dword(mmio_base, value); + write_mbox_dword(mmio_base, value >> 32); +} + +static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_bytes) +{ + int i; + + /* + * The MMIO space is mapped as Uncached (UC). Each write arrives + * at the device as an individual transaction in program order. + * The device can then reassemble the sequence accordingly. + */ + for (i = 0; i < chunk_bytes / sizeof(u32); i++) + write_mbox_dword(mmio_base, chunk[i]); +} + +/* + * Prepare for a new microcode transfer: reset hardware and record the + * image size. + */ +static void init_stage(struct staging_state *ss) +{ + ss->ucode_len = get_totalsize(&ucode_patch_late->hdr); + + /* + * Abort any ongoing process, effectively resetting the device. + * Unlike regular mailbox data processing requests, this + * operation does not require a status check. + */ + writel(MASK_MBOX_CTRL_ABORT, ss->mmio_base + MBOX_CONTROL_OFFSET); +} + +/* + * Update the chunk size and decide whether another chunk can be sent. + * This accounts for remaining data and retry limits. + */ +static bool can_send_next_chunk(struct staging_state *ss, int *err) +{ + /* A page size or remaining bytes if this is the final chunk */ + ss->chunk_size = min(PAGE_SIZE, ss->ucode_len - ss->offset); + + /* + * Each microcode image is divided into chunks, each at most + * one page size. A 10-chunk image would typically require 10 + * transactions. + * + * However, the hardware managing the mailbox has limited + * resources and may not cache the entire image, potentially + * requesting the same chunk multiple times. + * + * To tolerate this behavior, allow up to twice the expected + * number of transactions (i.e., a 10-chunk image can take up to + * 20 attempts). + * + * If the number of attempts exceeds this limit, treat it as + * exceeding the maximum allowed transfer size. + */ + if (ss->bytes_sent + ss->chunk_size > ss->ucode_len * 2) { + *err = -EMSGSIZE; + return false; + } + + *err = 0; + return true; +} + +/* + * The hardware indicates completion by returning a sentinel end offset. + */ +static inline bool is_end_offset(u32 offset) +{ + return offset == UINT_MAX; +} + +/* + * Determine whether staging is complete: either the hardware signaled + * the end offset, or no more transactions are permitted (retry limit + * reached). + */ +static inline bool staging_is_complete(struct staging_state *ss, int *err) +{ + return is_end_offset(ss->offset) || !can_send_next_chunk(ss, err); +} + +/* + * Wait for the hardware to complete a transaction. + * Return 0 on success, or an error code on failure. + */ +static int wait_for_transaction(struct staging_state *ss) +{ + u32 timeout, status; + + /* Allow time for hardware to complete the operation: */ + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) { + msleep(1); + + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET); + /* Break out early if the hardware is ready: */ + if (status & MASK_MBOX_STATUS_READY) + break; + } + + /* Check for explicit error response */ + if (status & MASK_MBOX_STATUS_ERROR) + return -EIO; + + /* + * Hardware has neither responded to the action nor signaled any + * error. Treat this as a timeout. + */ + if (!(status & MASK_MBOX_STATUS_READY)) + return -ETIMEDOUT; + + return 0; +} + +/* + * Transmit a chunk of the microcode image to the hardware. + * Return 0 on success, or an error code on failure. + */ +static int send_data_chunk(struct staging_state *ss, void *ucode_ptr) +{ + u32 *src_chunk = ucode_ptr + ss->offset; + u16 mbox_size; + + /* + * Write a 'request' mailbox object in this order: + * 1. Mailbox header includes total size + * 2. Command header specifies the load operation + * 3. Data section contains a microcode chunk + * + * Thus, the mailbox size is two headers plus the chunk size. + */ + mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size; + write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size)); + write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD); + write_mbox_data(ss->mmio_base, src_chunk, ss->chunk_size); + ss->bytes_sent += ss->chunk_size; + + /* Notify the hardware that the mailbox is ready for processing. */ + writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET); + + return wait_for_transaction(ss); +} + +/* + * Retrieve the next offset from the hardware response. + * Return 0 on success, or an error code on failure. + */ +static int fetch_next_offset(struct staging_state *ss) +{ + const u64 expected_header = MBOX_HEADER(MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE); + u32 offset, status; + u64 header; + + /* + * The 'response' mailbox returns three fields, in order: + * 1. Header + * 2. Next offset in the microcode image + * 3. Status flags + */ + header = read_mbox_header(ss->mmio_base); + offset = read_mbox_dword(ss->mmio_base); + status = read_mbox_dword(ss->mmio_base); + + /* All valid responses must start with the expected header. */ + if (header != expected_header) { + pr_err_once("staging: invalid response header (0x%llx)\n", header); + return -EBADR; + } + + /* + * Verify the offset: If not at the end marker, it must not + * exceed the microcode image length. + */ + if (!is_end_offset(offset) && offset > ss->ucode_len) { + pr_err_once("staging: invalid offset (%u) past the image end (%u)\n", + offset, ss->ucode_len); + return -EINVAL; + } + + /* Hardware may report errors explicitly in the status field */ + if (status & MASK_MBOX_RESP_ERROR) + return -EPROTO; + + ss->offset = offset; + return 0; +} + +/* + * Handle the staging process using the mailbox MMIO interface. The + * microcode image is transferred in chunks until completion. + * Return 0 on success or an error code on failure. + */ +static int do_stage(u64 mmio_pa) +{ + struct staging_state ss = {}; + int err; + + ss.mmio_base = ioremap(mmio_pa, MBOX_REG_NUM * MBOX_REG_SIZE); + if (WARN_ON_ONCE(!ss.mmio_base)) + return -EADDRNOTAVAIL; + + init_stage(&ss); + + /* Perform the staging process while within the retry limit */ + while (!staging_is_complete(&ss, &err)) { + /* Send a chunk of microcode each time: */ + err = send_data_chunk(&ss, ucode_patch_late); + if (err) + break; + /* + * Then, ask the hardware which piece of the image it + * needs next. The same piece may be sent more than once. + */ + err = fetch_next_offset(&ss); + if (err) + break; + } + + iounmap(ss.mmio_base); + + return err; +} + +static void stage_microcode(void) +{ + unsigned int pkg_id = UINT_MAX; + int cpu, err; + u64 mmio_pa; + + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32))) { + pr_err("Microcode image 32-bit misaligned (0x%x), staging failed.\n", + get_totalsize(&ucode_patch_late->hdr)); + return; + } + + lockdep_assert_cpus_held(); + + /* + * The MMIO address is unique per package, and all the SMT + * primary threads are online here. Find each MMIO space by + * their package IDs to avoid duplicate staging. + */ + for_each_cpu(cpu, cpu_primary_thread_mask) { + if (topology_logical_package_id(cpu) == pkg_id) + continue; + + pkg_id = topology_logical_package_id(cpu); + + err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa); + if (WARN_ON_ONCE(err)) + return; + + err = do_stage(mmio_pa); + if (err) { + pr_err("Error: staging failed (%d) for CPU%d at package %u.\n", + err, cpu, pkg_id); + return; + } + } + + pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev); +} + static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, struct microcode_intel *mc, u32 *cur_rev) @@ -627,6 +971,7 @@ static struct microcode_ops microcode_intel_ops = { .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode_late, .finalize_late_load = finalize_late_load, + .stage_microcode = stage_microcode, .use_nmi = IS_ENABLED(CONFIG_X86_64), }; @@ -638,6 +983,18 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) llc_size_per_core = (unsigned int)llc_size; } +static __init bool staging_available(void) +{ + u64 val; + + val = x86_read_arch_cap_msr(); + if (!(val & ARCH_CAP_MCU_ENUM)) + return false; + + rdmsrq(MSR_IA32_MCU_ENUMERATION, val); + return !!(val & MCU_STAGING); +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -648,6 +1005,11 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + if (staging_available()) { + microcode_intel_ops.use_staging = true; + pr_info("Enabled staging feature.\n"); + } + calc_llc_size_per_core(c); return µcode_intel_ops; diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index ae8dbc2b908d..a10b547eda1e 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -31,10 +31,12 @@ struct microcode_ops { * See also the "Synchronization" section in microcode_core.c. */ enum ucode_state (*apply_microcode)(int cpu); + void (*stage_microcode)(void); int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); void (*finalize_late_load)(int result); unsigned int nmi_safe : 1, - use_nmi : 1; + use_nmi : 1, + use_staging : 1; }; struct early_load_data { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 8c18327eb10b..0863733858dc 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -89,7 +89,6 @@ static int mtrr_state_set; u64 mtrr_tom2; struct mtrr_state_type mtrr_state; -EXPORT_SYMBOL_GPL(mtrr_state); /* Reserved bits in the high portion of the MTRRphysBaseN MSR. */ u32 phys_hi_rsvd; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 5655f253d929..2de3bd2f95d1 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -46,10 +46,6 @@ struct set_mtrr_context { u32 ccr3; }; -void set_mtrr_done(struct set_mtrr_context *ctxt); -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); - void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 06ca5a30140c..3792ab4819dc 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -274,6 +274,11 @@ static void rdt_get_cdp_config(int level) rdt_resources_all[level].r_resctrl.cdp_capable = true; } +static void rdt_set_io_alloc_capable(struct rdt_resource *r) +{ + r->cache.io_alloc_capable = true; +} + static void rdt_get_cdp_l3_config(void) { rdt_get_cdp_config(RDT_RESOURCE_L3); @@ -719,6 +724,7 @@ enum { RDT_FLAG_SMBA, RDT_FLAG_BMEC, RDT_FLAG_ABMC, + RDT_FLAG_SDCIAE, }; #define RDT_OPT(idx, n, f) \ @@ -745,6 +751,7 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), + RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -853,6 +860,8 @@ static __init bool get_rdt_alloc_resources(void) rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); + if (rdt_cpu_has(X86_FEATURE_SDCIAE)) + rdt_set_io_alloc_capable(r); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1189c0df4ad7..b20e705606b8 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -91,3 +91,43 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, return hw_dom->ctrl_val[idx]; } + +bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->sdciae_enabled; +} + +static void resctrl_sdciae_set_one_amd(void *arg) +{ + bool *enable = arg; + + if (*enable) + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); + else + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); +} + +static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_ctrl_domain *d; + + /* Walking r->ctrl_domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + /* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */ + list_for_each_entry(d, &r->ctrl_domains, hdr.list) + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1); +} + +int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (hw_res->r_resctrl.cache.io_alloc_capable && + hw_res->sdciae_enabled != enable) { + _resctrl_sdciae_enable(r, enable); + hw_res->sdciae_enabled = enable; + } + + return 0; +} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9f4c2f0aaf5c..4a916c84a322 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -46,6 +46,9 @@ struct arch_mbm_state { #define ABMC_EXTENDED_EVT_ID BIT(31) #define ABMC_EVT_ID BIT(0) +/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */ +#define SDCIAE_ENABLE_BIT 1 + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function @@ -112,6 +115,7 @@ struct msr_param { * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource * @mbm_cntr_assign_enabled: ABMC feature is enabled + * @sdciae_enabled: SDCIAE feature (backing "io_alloc") is enabled. * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -126,6 +130,7 @@ struct rdt_hw_resource { unsigned int mbm_width; bool cdp_enabled; bool mbm_cntr_assign_enabled; + bool sdciae_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..dffcc8307500 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) @@ -355,6 +361,7 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0), {} }; @@ -452,7 +459,16 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - if (rdt_cpu_has(X86_FEATURE_ABMC)) { + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 271f548ad156..cde4b6cd3471 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -43,6 +43,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, + { X86_FEATURE_SGX_EUPDATESVN, CPUID_EAX, 10, 0x00000012, 0 }, { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, { X86_FEATURE_OVERFLOW_RECOV, CPUID_EBX, 0, 0x80000007, 0 }, { X86_FEATURE_SUCCOR, CPUID_EBX, 1, 0x80000007, 0 }, @@ -56,6 +57,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, + { X86_FEATURE_SDCIAE, CPUID_EBX, 6, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 7f8d1e11dbee..79d6020dfe9c 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -14,7 +14,7 @@ u64 sgx_attributes_reserved_mask; u64 sgx_xfrm_reserved_mask = ~0x3; u32 sgx_misc_reserved_mask; -static int sgx_open(struct inode *inode, struct file *file) +static int __sgx_open(struct inode *inode, struct file *file) { struct sgx_encl *encl; int ret; @@ -41,6 +41,23 @@ static int sgx_open(struct inode *inode, struct file *file) return 0; } +static int sgx_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = sgx_inc_usage_count(); + if (ret) + return ret; + + ret = __sgx_open(inode, file); + if (ret) { + sgx_dec_usage_count(); + return ret; + } + + return 0; +} + static int sgx_release(struct inode *inode, struct file *file) { struct sgx_encl *encl = file->private_data; diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 308dbbae6c6e..cf149b9f4916 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -765,6 +765,7 @@ void sgx_encl_release(struct kref *ref) WARN_ON_ONCE(encl->secs.epc_page); kfree(encl); + sgx_dec_usage_count(); } /* diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 42a088a337c5..74be751199a4 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -233,4 +233,9 @@ static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr) return __encls_2(EAUG, pginfo, addr); } +/* Attempt to update CPUSVN at runtime. */ +static inline int __eupdatesvn(void) +{ + return __encls_ret_1(EUPDATESVN, ""); +} #endif /* _X86_ENCLS_H */ diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 2de01b379aa3..dc73194416ac 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -5,6 +5,7 @@ #include <linux/freezer.h> #include <linux/highmem.h> #include <linux/kthread.h> +#include <linux/kvm_types.h> #include <linux/miscdevice.h> #include <linux/node.h> #include <linux/pagemap.h> @@ -16,6 +17,7 @@ #include <linux/vmalloc.h> #include <asm/msr.h> #include <asm/sgx.h> +#include <asm/archrandom.h> #include "driver.h" #include "encl.h" #include "encls.h" @@ -915,7 +917,107 @@ int sgx_set_attribute(unsigned long *allowed_attributes, *allowed_attributes |= SGX_ATTR_PROVISIONKEY; return 0; } -EXPORT_SYMBOL_GPL(sgx_set_attribute); +EXPORT_SYMBOL_FOR_KVM(sgx_set_attribute); + +/* Counter to count the active SGX users */ +static int sgx_usage_count; + +/** + * sgx_update_svn() - Attempt to call ENCLS[EUPDATESVN]. + * + * This instruction attempts to update CPUSVN to the + * currently loaded microcode update SVN and generate new + * cryptographic assets. + * + * Return: + * * %0: - Success or not supported + * * %-EAGAIN: - Can be safely retried, failure is due to lack of + * * entropy in RNG + * * %-EIO: - Unexpected error, retries are not advisable + */ +static int sgx_update_svn(void) +{ + int ret; + + /* + * If EUPDATESVN is not available, it is ok to + * silently skip it to comply with legacy behavior. + */ + if (!cpu_feature_enabled(X86_FEATURE_SGX_EUPDATESVN)) + return 0; + + /* + * EPC is guaranteed to be empty when there are no users. + * Ensure we are on our first user before proceeding further. + */ + WARN(sgx_usage_count, "Elevated usage count when calling EUPDATESVN\n"); + + for (int i = 0; i < RDRAND_RETRY_LOOPS; i++) { + ret = __eupdatesvn(); + + /* Stop on success or unexpected errors: */ + if (ret != SGX_INSUFFICIENT_ENTROPY) + break; + } + + switch (ret) { + case 0: + /* + * SVN successfully updated. + * Let users know when the update was successful. + */ + pr_info("SVN updated successfully\n"); + return 0; + case SGX_NO_UPDATE: + /* + * SVN update failed since the current SVN is + * not newer than CPUSVN. This is the most + * common case and indicates no harm. + */ + return 0; + case SGX_INSUFFICIENT_ENTROPY: + /* + * SVN update failed due to lack of entropy in DRNG. + * Indicate to userspace that it should retry. + */ + return -EAGAIN; + default: + break; + } + + /* + * EUPDATESVN was called when EPC is empty, all other error + * codes are unexpected. + */ + ENCLS_WARN(ret, "EUPDATESVN"); + return -EIO; +} + +/* Mutex to ensure no concurrent EPC accesses during EUPDATESVN */ +static DEFINE_MUTEX(sgx_svn_lock); + +int sgx_inc_usage_count(void) +{ + int ret; + + guard(mutex)(&sgx_svn_lock); + + if (!sgx_usage_count) { + ret = sgx_update_svn(); + if (ret) + return ret; + } + + sgx_usage_count++; + + return 0; +} + +void sgx_dec_usage_count(void) +{ + guard(mutex)(&sgx_svn_lock); + sgx_usage_count--; +} static int __init sgx_init(void) { diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index d2dad21259a8..f5940393d9bd 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -102,6 +102,9 @@ static inline int __init sgx_vepc_init(void) } #endif +int sgx_inc_usage_count(void); +void sgx_dec_usage_count(void); + void sgx_update_lepubkeyhash(u64 *lepubkeyhash); #endif /* _X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 7aaa3652e31d..8de1f1a755f2 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -5,6 +5,7 @@ * Copyright(c) 2021 Intel Corporation. */ +#include <linux/kvm_types.h> #include <linux/miscdevice.h> #include <linux/mm.h> #include <linux/mman.h> @@ -255,10 +256,11 @@ static int sgx_vepc_release(struct inode *inode, struct file *file) xa_destroy(&vepc->page_array); kfree(vepc); + sgx_dec_usage_count(); return 0; } -static int sgx_vepc_open(struct inode *inode, struct file *file) +static int __sgx_vepc_open(struct inode *inode, struct file *file) { struct sgx_vepc *vepc; @@ -273,6 +275,23 @@ static int sgx_vepc_open(struct inode *inode, struct file *file) return 0; } +static int sgx_vepc_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = sgx_inc_usage_count(); + if (ret) + return ret; + + ret = __sgx_vepc_open(inode, file); + if (ret) { + sgx_dec_usage_count(); + return ret; + } + + return 0; +} + static long sgx_vepc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -363,7 +382,7 @@ int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, WARN_ON_ONCE(ret); return 0; } -EXPORT_SYMBOL_GPL(sgx_virt_ecreate); +EXPORT_SYMBOL_FOR_KVM(sgx_virt_ecreate); static int __sgx_virt_einit(void __user *sigstruct, void __user *token, void __user *secs) @@ -432,4 +451,4 @@ int sgx_virt_einit(void __user *sigstruct, void __user *token, return ret; } -EXPORT_SYMBOL_GPL(sgx_virt_einit); +EXPORT_SYMBOL_FOR_KVM(sgx_virt_einit); diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 6073a16628f9..f55ea3cdbf88 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -75,15 +75,11 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == (u64)cpuid_to_apicid[cpu]; } -#ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } -#else -static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } -#endif /* * Convert the APIC ID to a domain level ID by masking out the low bits diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b5a5e1411469..71625795d711 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -16,6 +16,9 @@ EXPORT_SYMBOL_GPL(x86_topo_system); unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus) { diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 49782724a943..209b5a22d880 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -19,7 +19,17 @@ #undef pr_fmt #define pr_fmt(fmt) "tsx: " fmt -enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; +enum tsx_ctrl_states { + TSX_CTRL_AUTO, + TSX_CTRL_ENABLE, + TSX_CTRL_DISABLE, + TSX_CTRL_RTM_ALWAYS_ABORT, + TSX_CTRL_NOT_SUPPORTED, +}; + +static enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = + IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO) ? TSX_CTRL_AUTO : + IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF) ? TSX_CTRL_DISABLE : TSX_CTRL_ENABLE; static void tsx_disable(void) { @@ -156,11 +166,28 @@ static void tsx_dev_mode_disable(void) } } -void __init tsx_init(void) +static int __init tsx_parse_cmdline(char *str) { - char arg[5] = {}; - int ret; + if (!str) + return -EINVAL; + + if (!strcmp(str, "on")) { + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(str, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(str, "auto")) { + tsx_ctrl_state = TSX_CTRL_AUTO; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("invalid option, defaulting to off\n"); + } + + return 0; +} +early_param("tsx", tsx_parse_cmdline); +void __init tsx_init(void) +{ tsx_dev_mode_disable(); /* @@ -194,27 +221,8 @@ void __init tsx_init(void) return; } - ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); - if (ret >= 0) { - if (!strcmp(arg, "on")) { - tsx_ctrl_state = TSX_CTRL_ENABLE; - } else if (!strcmp(arg, "off")) { - tsx_ctrl_state = TSX_CTRL_DISABLE; - } else if (!strcmp(arg, "auto")) { - tsx_ctrl_state = x86_get_tsx_auto_mode(); - } else { - tsx_ctrl_state = TSX_CTRL_DISABLE; - pr_err("invalid option, defaulting to off\n"); - } - } else { - /* tsx= not provided */ - if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) - tsx_ctrl_state = x86_get_tsx_auto_mode(); - else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) - tsx_ctrl_state = TSX_CTRL_DISABLE; - else - tsx_ctrl_state = TSX_CTRL_ENABLE; - } + if (tsx_ctrl_state == TSX_CTRL_AUTO) + tsx_ctrl_state = x86_get_tsx_auto_mode(); if (tsx_ctrl_state == TSX_CTRL_DISABLE) { tsx_disable(); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 71ee20102a8a..b10684dedc58 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -181,8 +181,8 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, * in false positive reports. Disable instrumentation to avoid those. */ __no_kmsan_checks -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, const char *log_lvl) +static void __show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, const char *log_lvl) { struct unwind_state state; struct stack_info stack_info = {0}; @@ -303,6 +303,25 @@ next: } } +static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, const char *log_lvl) +{ + /* + * Disable KASAN to avoid false positives during walking another + * task's stacks, as values on these stacks may change concurrently + * with task execution. + */ + bool disable_kasan = task && task != current; + + if (disable_kasan) + kasan_disable_current(); + + __show_trace_log_lvl(task, regs, stack, log_lvl); + + if (disable_kasan) + kasan_enable_current(); +} + void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c3acbd26408b..b15b97d3cb52 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -16,6 +16,7 @@ #include <linux/firmware-map.h> #include <linux/sort.h> #include <linux/memory_hotplug.h> +#include <linux/kvm_types.h> #include <asm/e820/api.h> #include <asm/setup.h> @@ -95,7 +96,7 @@ bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) { return _e820__mapped_any(e820_table_firmware, start, end, type); } -EXPORT_SYMBOL_GPL(e820__mapped_raw_any); +EXPORT_SYMBOL_FOR_KVM(e820__mapped_raw_any); bool e820__mapped_any(u64 start, u64 end, enum e820_type type) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1f71cc135e9a..da233f20ae6f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -18,6 +18,7 @@ #include <uapi/asm/kvm.h> #include <linux/hardirq.h> +#include <linux/kvm_types.h> #include <linux/pkeys.h> #include <linux/vmalloc.h> @@ -276,7 +277,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) return true; } -EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_alloc_guest_fpstate); void fpu_free_guest_fpstate(struct fpu_guest *gfpu) { @@ -291,7 +292,7 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu) gfpu->fpstate = NULL; vfree(fpstate); } -EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_free_guest_fpstate); /* * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable @@ -313,7 +314,7 @@ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures) return __xfd_enable_feature(xfeatures, guest_fpu); } -EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features); +EXPORT_SYMBOL_FOR_KVM(fpu_enable_guest_xfd_features); #ifdef CONFIG_X86_64 void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) @@ -324,7 +325,7 @@ void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) xfd_update_state(guest_fpu->fpstate); fpregs_unlock(); } -EXPORT_SYMBOL_GPL(fpu_update_guest_xfd); +EXPORT_SYMBOL_FOR_KVM(fpu_update_guest_xfd); /** * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state @@ -348,7 +349,7 @@ void fpu_sync_guest_vmexit_xfd_state(void) __this_cpu_write(xfd_state, fpstate->xfd); } } -EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state); +EXPORT_SYMBOL_FOR_KVM(fpu_sync_guest_vmexit_xfd_state); #endif /* CONFIG_X86_64 */ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) @@ -390,7 +391,7 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) fpregs_unlock(); return 0; } -EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u64 xfeatures, u32 pkru) @@ -409,7 +410,7 @@ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; } } -EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi); +EXPORT_SYMBOL_FOR_KVM(fpu_copy_guest_fpstate_to_uabi); int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru) @@ -439,7 +440,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } -EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); +EXPORT_SYMBOL_FOR_KVM(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) @@ -825,6 +826,9 @@ void fpu__clear_user_states(struct fpu *fpu) !fpregs_state_valid(fpu, smp_processor_id())) os_xrstor_supervisor(fpu->fpstate); + /* Ensure XFD state is in sync before reloading XSTATE */ + xfd_update_state(fpu->fpstate); + /* Reset user states in registers. */ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); @@ -854,7 +858,7 @@ void switch_fpu_return(void) fpregs_restore_userregs(); } -EXPORT_SYMBOL_GPL(switch_fpu_return); +EXPORT_SYMBOL_FOR_KVM(switch_fpu_return); void fpregs_lock_and_load(void) { @@ -889,7 +893,7 @@ void fpregs_assert_state_consistent(void) WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); } -EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +EXPORT_SYMBOL_FOR_KVM(fpregs_assert_state_consistent); #endif void fpregs_mark_activate(void) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 28e4fd65c9da..48113c5193aa 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -8,6 +8,7 @@ #include <linux/compat.h> #include <linux/cpu.h> #include <linux/mman.h> +#include <linux/kvm_types.h> #include <linux/nospec.h> #include <linux/pkeys.h> #include <linux/seq_file.h> @@ -1058,7 +1059,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } -EXPORT_SYMBOL_GPL(get_xsave_addr); +EXPORT_SYMBOL_FOR_KVM(get_xsave_addr); /* * Given an xstate feature nr, calculate where in the xsave buffer the state is. @@ -1482,7 +1483,7 @@ void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeatu if (addr) memset(addr, 0, xstate_sizes[xfeature]); } -EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); +EXPORT_SYMBOL_FOR_KVM(fpstate_clear_xstate_component); #endif #ifdef CONFIG_X86_64 @@ -1818,7 +1819,7 @@ u64 xstate_get_guest_group_perm(void) { return xstate_get_group_perm(true); } -EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); +EXPORT_SYMBOL_FOR_KVM(xstate_get_guest_group_perm); /** * fpu_xstate_prctl - xstate permission operations diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da3638167..823dbdd0eb41 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Restore return_to_handler value that got eaten by previous ret instruction. */ + subq $8, %rsp + UNWIND_HINT_FUNC + /* Save ftrace_regs for function exit context */ subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) + movq %rsp, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler @@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler) movq RDX(%rsp), %rdx movq RAX(%rsp), %rax - addq $(FRAME_SIZE), %rsp + addq $(FRAME_SIZE) + 8, %rsp + /* * Jump back to the old return address. This cannot be JMP_NOSPEC rdi * since IBT would demand that contain ENDBR, which simply isn't so for diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index b01644c949b2..f846c15f21ca 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> +#include <linux/kvm_types.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/smp.h> @@ -489,7 +490,7 @@ void hw_breakpoint_restore(void) set_debugreg(DR6_RESERVED, 6); set_debugreg(__this_cpu_read(cpu_dr7), 7); } -EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +EXPORT_SYMBOL_FOR_KVM(hw_breakpoint_restore); /* * Handle debug exception notifications. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 10721a125226..86f4e574de02 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include <linux/delay.h> #include <linux/export.h> #include <linux/irq.h> +#include <linux/kvm_types.h> #include <asm/irq_stack.h> #include <asm/apic.h> @@ -361,7 +362,7 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) synchronize_rcu(); } } -EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); +EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); /* * Handler for POSTED_INTERRUPT_VECTOR. diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3863d7709386..c1fac3a9fecc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -141,7 +141,6 @@ bool can_boost(struct insn *insn, void *addr) { kprobe_opcode_t opcode; insn_byte_t prefix; - int i; if (search_exception_tables((unsigned long)addr)) return false; /* Page fault may occur on this address. */ @@ -154,7 +153,7 @@ bool can_boost(struct insn *insn, void *addr) if (insn->opcode.nbytes != 1) return false; - for_each_insn_prefix(insn, i, prefix) { + for_each_insn_prefix(insn, prefix) { insn_attr_t attr; attr = inat_get_opcode_attribute(prefix); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 0aabd4c4e2c4..6f826a00eca2 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -103,7 +103,6 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) asm ( ".pushsection .rodata\n" - "optprobe_template_func:\n" ".global optprobe_template_entry\n" "optprobe_template_entry:\n" #ifdef CONFIG_X86_64 @@ -160,9 +159,6 @@ asm ( "optprobe_template_end:\n" ".popsection\n"); -void optprobe_template_func(void); -STACK_FRAME_NON_STANDARD(optprobe_template_func); - #define TMPL_CLAC_IDX \ ((long)optprobe_template_clac - (long)optprobe_template_entry) #define TMPL_MOVE_IDX \ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b67d7c59dca0..204765004c72 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,6 +29,7 @@ #include <linux/syscore_ops.h> #include <linux/cc_platform.h> #include <linux/efi.h> +#include <linux/kvm_types.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -162,7 +163,7 @@ void kvm_async_pf_task_wait_schedule(u32 token) } finish_swait(&n.wq, &wait); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); +EXPORT_SYMBOL_FOR_KVM(kvm_async_pf_task_wait_schedule); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { @@ -253,7 +254,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) return flags; } -EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); +EXPORT_SYMBOL_FOR_KVM(kvm_read_and_reset_apf_flags); noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) { diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 0ffbae902e2f..11c45ce42694 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -97,6 +97,7 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, DEBUGP("%s relocate section %u to %u\n", apply ? "Applying" : "Clearing", relsec, sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { size_t size; @@ -162,15 +163,17 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, if (apply) { if (memcmp(loc, &zero, size)) { - pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + pr_err("x86/modules: Invalid relocation target, existing value is nonzero for sec %u, idx %u, type %d, loc %lx, val %llx\n", + relsec, i, (int)ELF64_R_TYPE(rel[i].r_info), + (unsigned long)loc, val); return -ENOEXEC; } write(loc, &val, size); } else { if (memcmp(loc, &val, size)) { - pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for sec %u, idx %u, type %d, loc %lx, val %llx\n", + relsec, i, (int)ELF64_R_TYPE(rel[i].r_info), + (unsigned long)loc, val); return -ENOEXEC; } write(loc, &zero, size); @@ -179,8 +182,8 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - pr_err("overflow in relocation type %d val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), val); + pr_err("overflow in relocation type %d val %llx sec %u idx %d\n", + (int)ELF64_R_TYPE(rel[i].r_info), val, relsec, i); pr_err("`%s' likely not compiled with -mcmodel=kernel\n", me->name); return -ENOEXEC; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index e17c16c54a37..4469c784eaa0 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -98,7 +98,7 @@ static int filter_write(u32 reg) if (!__ratelimit(&fw_rs)) return 0; - pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n", + pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d), tainting CPU_OUT_OF_SPEC.\n", reg, current->comm, current->pid); pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n"); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index be93ec7255bf..3d239ed12744 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/atomic.h> #include <linux/sched/clock.h> +#include <linux/kvm_types.h> #include <asm/cpu_entry_area.h> #include <asm/traps.h> @@ -613,9 +614,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx) { exc_nmi(regs); } -#if IS_MODULE(CONFIG_KVM_INTEL) -EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); -#endif +EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx); #endif #ifdef CONFIG_NMI_CHECK_CPU diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52a5c03c353c..432c0a004c60 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -30,6 +30,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/ptrace.h> #include <linux/notifier.h> #include <linux/kprobes.h> @@ -303,9 +304,7 @@ void current_save_fsgs(void) save_fsgs(current); local_irq_restore(flags); } -#if IS_ENABLED(CONFIG_KVM) -EXPORT_SYMBOL_GPL(current_save_fsgs); -#endif +EXPORT_SYMBOL_FOR_KVM(current_save_fsgs); static __always_inline void loadseg(enum which_selector which, unsigned short sel) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 964f6b0a3d68..6032fa9ec753 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -13,6 +13,7 @@ #include <linux/objtool.h> #include <linux/pgtable.h> #include <linux/kexec.h> +#include <linux/kvm_types.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -541,7 +542,7 @@ void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) rcu_assign_pointer(cpu_emergency_virt_callback, callback); } -EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); +EXPORT_SYMBOL_FOR_KVM(cpu_emergency_register_virt_callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) { @@ -551,7 +552,7 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) rcu_assign_pointer(cpu_emergency_virt_callback, NULL); synchronize_rcu(); } -EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); +EXPORT_SYMBOL_FOR_KVM(cpu_emergency_unregister_virt_callback); /* * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eb289abece23..5cd6950ab672 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -103,9 +103,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -/* CPUs which are the primary SMT threads */ -struct cpumask __cpu_primary_thread_mask __read_mostly; - /* Representing CPUs for which sibling maps can be computed */ static cpumask_var_t cpu_sibling_setup_mask; @@ -515,6 +512,76 @@ static void __init build_sched_topology(void) set_sched_topology(topology); } +#ifdef CONFIG_NUMA +static int sched_avg_remote_distance; +static int avg_remote_numa_distance(void) +{ + int i, j; + int distance, nr_remote, total_distance; + + if (sched_avg_remote_distance > 0) + return sched_avg_remote_distance; + + nr_remote = 0; + total_distance = 0; + for_each_node_state(i, N_CPU) { + for_each_node_state(j, N_CPU) { + distance = node_distance(i, j); + + if (distance >= REMOTE_DISTANCE) { + nr_remote++; + total_distance += distance; + } + } + } + if (nr_remote) + sched_avg_remote_distance = total_distance / nr_remote; + else + sched_avg_remote_distance = REMOTE_DISTANCE; + + return sched_avg_remote_distance; +} + +int arch_sched_node_distance(int from, int to) +{ + int d = node_distance(from, to); + + switch (boot_cpu_data.x86_vfm) { + case INTEL_GRANITERAPIDS_X: + case INTEL_ATOM_DARKMONT_X: + + if (!x86_has_numa_in_package || topology_max_packages() == 1 || + d < REMOTE_DISTANCE) + return d; + + /* + * With SNC enabled, there could be too many levels of remote + * NUMA node distances, creating NUMA domain levels + * including local nodes and partial remote nodes. + * + * Trim finer distance tuning for NUMA nodes in remote package + * for the purpose of building sched domains. Group NUMA nodes + * in the remote package in the same sched group. + * Simplify NUMA domains and avoid extra NUMA levels including + * different remote NUMA nodes and local nodes. + * + * GNR and CWF don't expect systems with more than 2 packages + * and more than 2 hops between packages. Single average remote + * distance won't be appropriate if there are more than 2 + * packages as average distance to different remote packages + * could be different. + */ + WARN_ONCE(topology_max_packages() > 2, + "sched: Expect only up to 2 packages for GNR or CWF, " + "but saw %d packages when building sched domains.", + topology_max_packages()); + + d = avg_remote_numa_distance(); + } + return d; +} +#endif /* CONFIG_NUMA */ + void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; @@ -1328,11 +1395,7 @@ void __noreturn hlt_play_dead(void) native_halt(); } -/* - * native_play_dead() is essentially a __noreturn function, but it can't - * be marked as such as the compiler may complain about it. - */ -void native_play_dead(void) +void __noreturn native_play_dead(void) { if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) __update_spec_ctrl(0); @@ -1351,7 +1414,7 @@ int native_cpu_disable(void) return -ENOSYS; } -void native_play_dead(void) +void __noreturn native_play_dead(void) { BUG(); } diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 378c388d1b31..2892cdb14563 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -26,6 +26,11 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; +/* + * ud1 (%edx),%rdi -- see __WARN_trap() / decode_bug() + */ +static const u8 warninsn[] = { 0x67, 0x48, 0x0f, 0xb9, 0x3a }; + static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */ { u8 ret = 0; @@ -69,7 +74,10 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, emulate = code; code = &xor5rax; } - + if (func == &__WARN_trap) { + emulate = code; + code = &warninsn; + } break; case NOP: @@ -128,7 +136,8 @@ static void __static_call_validate(u8 *insn, bool tail, bool tramp) } else { if (opcode == CALL_INSN_OPCODE || !memcmp(insn, x86_nops[5], 5) || - !memcmp(insn, xor5rax, 5)) + !memcmp(insn, xor5rax, 5) || + !memcmp(insn, warninsn, 5)) return; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1b9177b93433..bcf1dedc1d00 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/kexec.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/static_call.h> #include <linux/timer.h> #include <linux/init.h> #include <linux/bug.h> @@ -102,25 +103,37 @@ __always_inline int is_valid_bugaddr(unsigned long addr) * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax * static_call: 0f b9 cc ud1 %esp,%ecx + * __WARN_trap: 67 48 0f b9 3a ud1 (%edx),%reg * - * Notably UBSAN uses EAX, static_call uses ECX. + * Notable, since __WARN_trap can use all registers, the distinction between + * UD1 users is through R/M. */ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) { unsigned long start = addr; + u8 v, reg, rm, rex = 0; + int type = BUG_UD1; bool lock = false; - u8 v; if (addr < TASK_SIZE_MAX) return BUG_NONE; - v = *(u8 *)(addr++); - if (v == INSN_ASOP) + for (;;) { v = *(u8 *)(addr++); + if (v == INSN_ASOP) + continue; - if (v == INSN_LOCK) { - lock = true; - v = *(u8 *)(addr++); + if (v == INSN_LOCK) { + lock = true; + continue; + } + + if ((v & 0xf0) == 0x40) { + rex = v; + continue; + } + + break; } switch (v) { @@ -156,18 +169,33 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4) addr++; /* SIB */ + reg = X86_MODRM_REG(v) + 8*!!X86_REX_R(rex); + rm = X86_MODRM_RM(v) + 8*!!X86_REX_B(rex); + /* Decode immediate, if present */ switch (X86_MODRM_MOD(v)) { case 0: if (X86_MODRM_RM(v) == 5) - addr += 4; /* RIP + disp32 */ + addr += 4; /* RIP + disp32 */ + + if (rm == 0) /* (%eax) */ + type = BUG_UD1_UBSAN; + + if (rm == 2) { /* (%edx) */ + *imm = reg; + type = BUG_UD1_WARN; + } break; case 1: *imm = *(s8 *)addr; addr += 1; + if (rm == 0) /* (%eax) */ + type = BUG_UD1_UBSAN; break; case 2: *imm = *(s32 *)addr; addr += 4; + if (rm == 0) /* (%eax) */ + type = BUG_UD1_UBSAN; break; case 3: break; @@ -176,12 +204,76 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) /* record instruction length */ *len = addr - start; - if (X86_MODRM_REG(v) == 0) /* EAX */ - return BUG_UD1_UBSAN; + return type; +} - return BUG_UD1; +static inline unsigned long pt_regs_val(struct pt_regs *regs, int nr) +{ + int offset = pt_regs_offset(regs, nr); + if (WARN_ON_ONCE(offset < -0)) + return 0; + return *((unsigned long *)((void *)regs + offset)); } +#ifdef HAVE_ARCH_BUG_FORMAT_ARGS +DEFINE_STATIC_CALL(WARN_trap, __WARN_trap); +EXPORT_STATIC_CALL_TRAMP(WARN_trap); + +/* + * Create a va_list from an exception context. + */ +void *__warn_args(struct arch_va_list *args, struct pt_regs *regs) +{ + /* + * Register save area; populate with function call argument registers + */ + args->regs[0] = regs->di; + args->regs[1] = regs->si; + args->regs[2] = regs->dx; + args->regs[3] = regs->cx; + args->regs[4] = regs->r8; + args->regs[5] = regs->r9; + + /* + * From the ABI document: + * + * @gp_offset - the element holds the offset in bytes from + * reg_save_area to the place where the next available general purpose + * argument register is saved. In case all argument registers have + * been exhausted, it is set to the value 48 (6*8). + * + * @fp_offset - the element holds the offset in bytes from + * reg_save_area to the place where the next available floating point + * argument is saved. In case all argument registers have been + * exhausted, it is set to the value 176 (6*8 + 8*16) + * + * @overflow_arg_area - this pointer is used to fetch arguments passed + * on the stack. It is initialized with the address of the first + * argument passed on the stack, if any, and then always updated to + * point to the start of the next argument on the stack. + * + * @reg_save_area - the element points to the start of the register + * save area. + * + * Notably the vararg starts with the second argument and there are no + * floating point arguments in the kernel. + */ + args->args.gp_offset = 1*8; + args->args.fp_offset = 6*8 + 8*16; + args->args.reg_save_area = &args->regs; + args->args.overflow_arg_area = (void *)regs->sp; + + /* + * If the exception came from __WARN_trap, there is a return + * address on the stack, skip that. This is why any __WARN_trap() + * caller must inhibit tail-call optimization. + */ + if ((void *)regs->ip == &__WARN_trap) + args->args.overflow_arg_area += 8; + + return &args->args; +} +#endif /* HAVE_ARCH_BUG_FORMAT */ static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, @@ -334,6 +426,11 @@ static noinstr bool handle_bug(struct pt_regs *regs) raw_local_irq_enable(); switch (ud_type) { + case BUG_UD1_WARN: + if (report_bug_entry((void *)pt_regs_val(regs, ud_imm), regs) == BUG_TRAP_TYPE_WARN) + handled = true; + break; + case BUG_UD2: if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { handled = true; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 87e749106dda..7d3e13e14eab 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -11,6 +11,7 @@ #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/clocksource.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/timex.h> #include <linux/static_key.h> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 845aeaf36b8d..7be8e361ca55 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -17,6 +17,7 @@ #include <linux/kdebug.h> #include <asm/processor.h> #include <asm/insn.h> +#include <asm/insn-eval.h> #include <asm/mmu_context.h> #include <asm/nops.h> @@ -258,9 +259,8 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { insn_byte_t p; - int i; - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); @@ -1158,35 +1158,12 @@ unlock: mmap_write_unlock(mm); } -static bool insn_is_nop(struct insn *insn) -{ - return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90; -} - -static bool insn_is_nopl(struct insn *insn) -{ - if (insn->opcode.nbytes != 2) - return false; - - if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f) - return false; - - if (!insn->modrm.nbytes) - return false; - - if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0) - return false; - - /* 0f 1f /0 - NOPL */ - return true; -} - static bool can_optimize(struct insn *insn, unsigned long vaddr) { if (!insn->x86_64 || insn->length != 5) return false; - if (!insn_is_nop(insn) && !insn_is_nopl(insn)) + if (!insn_is_nop(insn)) return false; /* We can't do cross page atomic writes yet. */ @@ -1426,19 +1403,14 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); insn_byte_t p; - int i; - /* x86_nops[insn->length]; same as jmp with .offs = 0 */ - if (insn->length <= ASM_NOP_MAX && - !memcmp(insn->kaddr, x86_nops[insn->length], insn->length)) + if (insn_is_nop(insn)) goto setup; switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ break; - case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ - goto setup; case 0xe8: /* call relative */ branch_clear_offset(auprobe, insn); @@ -1463,7 +1435,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0x66) return -ENOTSUPP; } @@ -1819,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, else return regs->sp <= ret->stack; } + +/* + * Heuristic-based check if uprobe is installed at the function entry. + * + * Under assumption of user code being compiled with frame pointers, + * `push %rbp/%ebp` is a good indicator that we indeed are. + * + * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. + * If we get this wrong, captured stack trace might have one extra bogus + * entry, but the rest of stack trace will still be meaningful. + */ +bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + struct arch_uprobe *auprobe; + + if (!current->utask) + return false; + + auprobe = current->utask->auprobe; + if (!auprobe) + return false; + + /* push %rbp/%ebp */ + if (auprobe->insn[0] == 0x55) + return true; + + /* endbr64 (64-bit only) */ + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) + return true; + + return false; +} diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 40ac4cb44ed2..487ad19a236e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -108,16 +108,18 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; - perf_get_x86_pmu_capability(&kvm_host_pmu); - /* * Hybrid PMUs don't play nice with virtualization without careful * configuration by userspace, and KVM's APIs for reporting supported * vPMU features do not account for hybrid PMUs. Disable vPMU support * for hybrid PMUs until KVM gains a way to let userspace opt-in. */ - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { enable_pmu = false; + memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu)); + } else { + perf_get_x86_pmu_capability(&kvm_host_pmu); + } if (enable_pmu) { /* diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f286b5706d7c..fef00546c885 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -216,7 +216,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) * This function is called from IOMMU driver to notify * SVM to schedule in a particular vCPU of a particular VM. */ -int avic_ga_log_notifier(u32 ga_tag) +static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; struct kvm_svm *kvm_svm; @@ -788,7 +788,7 @@ int avic_init_vcpu(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; INIT_LIST_HEAD(&svm->ir_list); - spin_lock_init(&svm->ir_list_lock); + raw_spin_lock_init(&svm->ir_list_lock); if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) return 0; @@ -816,9 +816,9 @@ static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd) if (!vcpu) return; - spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); list_del(&irqfd->vcpu_list); - spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); } int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, @@ -855,7 +855,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, * list of IRQs being posted to the vCPU, to ensure the IRTE * isn't programmed with stale pCPU/IsRunning information. */ - guard(spinlock_irqsave)(&svm->ir_list_lock); + guard(raw_spinlock_irqsave)(&svm->ir_list_lock); /* * Update the target pCPU for IOMMU doorbells if the vCPU is @@ -972,7 +972,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, * up-to-date entry information, or that this task will wait until * svm_ir_list_add() completes to set the new target pCPU. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -997,7 +997,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action); - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1035,7 +1035,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) * or that this task will wait until svm_ir_list_add() completes to * mark the vCPU as not running. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); avic_update_iommu_vcpu_affinity(vcpu, -1, action); @@ -1059,7 +1059,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) svm->avic_physical_id_entry = entry; - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) @@ -1243,3 +1243,9 @@ bool __init avic_hardware_setup(void) return true; } + +void avic_hardware_unsetup(void) +{ + if (avic) + amd_iommu_register_ga_log_notifier(NULL); +} diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a6443feab252..da6e80b3ac35 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -677,11 +677,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 */ svm_copy_lbrs(vmcb02, vmcb12); vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; - svm_update_lbrv(&svm->vcpu); - - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + } else { svm_copy_lbrs(vmcb02, vmcb01); } + svm_update_lbrv(&svm->vcpu); } static inline bool is_evtinj_soft(u32 evtinj) @@ -833,11 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_next_rip = vmcb12_rip; } - vmcb02->control.virt_ext = vmcb01->control.virt_ext & - LBR_CTL_ENABLE_MASK; - if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) - vmcb02->control.virt_ext |= - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; @@ -1189,13 +1184,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) svm_copy_lbrs(vmcb12, vmcb02); - svm_update_lbrv(vcpu); - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + else svm_copy_lbrs(vmcb01, vmcb02); - svm_update_lbrv(vcpu); - } + + svm_update_lbrv(vcpu); if (vnmi) { if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 153c12dbf3eb..9d29b2e7e855 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -705,7 +705,11 @@ void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask) static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) { - bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + struct vcpu_svm *svm = to_svm(vcpu); + bool intercept = !(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + + if (intercept == svm->lbr_msrs_intercepted) + return; svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept); svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept); @@ -714,6 +718,8 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) if (sev_es_guest(vcpu->kvm)) svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept); + + svm->lbr_msrs_intercepted = intercept; } void svm_vcpu_free_msrpm(void *msrpm) @@ -806,60 +812,43 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) vmcb_mark_dirty(to_vmcb, VMCB_LBR); } -void svm_enable_lbrv(struct kvm_vcpu *vcpu) +static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - svm_recalc_lbr_msr_intercepts(vcpu); - - /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; } -static void svm_disable_lbrv(struct kvm_vcpu *vcpu) +void svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; + __svm_enable_lbrv(vcpu); svm_recalc_lbr_msr_intercepts(vcpu); - - /* - * Move the LBR msrs back to the vmcb01 to avoid copying them - * on nested guest entries. - */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); } -static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) +static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) { - /* - * If LBR virtualization is disabled, the LBR MSRs are always kept in - * vmcb01. If LBR virtualization is enabled and L1 is running VMs of - * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. - */ - return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : - svm->vmcb01.ptr; + KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); + to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) || (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); - if (enable_lbrv == current_enable_lbrv) - return; + if (enable_lbrv && !current_enable_lbrv) + __svm_enable_lbrv(vcpu); + else if (!enable_lbrv && current_enable_lbrv) + __svm_disable_lbrv(vcpu); - if (enable_lbrv) - svm_enable_lbrv(vcpu); - else - svm_disable_lbrv(vcpu); + /* + * During nested transitions, it is possible that the current VMCB has + * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). + * In this case, even though LBR_CTL does not need an update, intercepts + * do, so always recalculate the intercepts here. + */ + svm_recalc_lbr_msr_intercepts(vcpu); } void disable_nmi_singlestep(struct vcpu_svm *svm) @@ -921,6 +910,8 @@ static void svm_hardware_unsetup(void) { int cpu; + avic_hardware_unsetup(); + sev_hardware_unsetup(); for_each_possible_cpu(cpu) @@ -1236,6 +1227,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) } svm->x2avic_msrs_intercepted = true; + svm->lbr_msrs_intercepted = true; svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -2722,19 +2714,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + msr_info->data = svm->vmcb->save.dbgctl; break; case MSR_IA32_LASTBRANCHFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + msr_info->data = svm->vmcb->save.br_from; break; case MSR_IA32_LASTBRANCHTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + msr_info->data = svm->vmcb->save.br_to; break; case MSR_IA32_LASTINTFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + msr_info->data = svm->vmcb->save.last_excp_from; break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; + msr_info->data = svm->vmcb->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -3002,7 +2994,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - svm_get_lbr_vmcb(svm)->save.dbgctl = data; + if (svm->vmcb->save.dbgctl == data) + break; + + svm->vmcb->save.dbgctl = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: @@ -5386,12 +5382,6 @@ static __init int svm_hardware_setup(void) svm_hv_hardware_setup(); - for_each_possible_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err; - } - enable_apicv = avic_hardware_setup(); if (!enable_apicv) { enable_ipiv = false; @@ -5435,6 +5425,13 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; + + for_each_possible_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } + return 0; err: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e4b04f435b3d..dd78e6402345 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -329,13 +329,14 @@ struct vcpu_svm { * back into remapped mode). */ struct list_head ir_list; - spinlock_t ir_list_lock; + raw_spinlock_t ir_list_lock; struct vcpu_sev_es_state sev_es; bool guest_state_loaded; bool x2avic_msrs_intercepted; + bool lbr_msrs_intercepted; /* Guest GIF value, used when vGIF is not enabled */ bool guest_gif; @@ -805,7 +806,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; ) bool __init avic_hardware_setup(void); -int avic_ga_log_notifier(u32 ga_tag); +void avic_hardware_unsetup(void); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index bc5ece76533a..412d0829d7a2 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK) ? PFERR_PRESENT_MASK : 0; - if (error_code & EPT_VIOLATION_GVA_IS_VALID) + if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 76271962cb70..bcea087b642f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_NOTIFY: /* Notify VM exit is not exposed to L1 */ return false; + case EXIT_REASON_SEAMCALL: + case EXIT_REASON_TDCALL: + /* + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't + * virtualized by KVM for L1 hypervisors, i.e. L1 should + * never want or expect such an exit. + */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f87c216d976d..91b6f2f3edc2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6032,6 +6032,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) return 1; } +static int handle_tdx_instruction(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + #ifndef CONFIG_X86_SGX_KVM static int handle_encls(struct kvm_vcpu *vcpu) { @@ -6157,6 +6163,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, [EXIT_REASON_NOTIFY] = handle_notify, + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction, + [EXIT_REASON_TDCALL] = handle_tdx_instruction, [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42ecd093bb4c..c9c2aa6f4705 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3874,15 +3874,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) /* * Returns true if the MSR in question is managed via XSTATE, i.e. is context - * switched with the rest of guest FPU state. Note! S_CET is _not_ context - * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. - * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, - * the value saved/restored via XSTATE is always the host's value. That detail - * is _extremely_ important, as the guest's S_CET must _never_ be resident in - * hardware while executing in the host. Loading guest values for U_CET and - * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to - * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower - * privilege levels, i.e. are effectively only consumed by userspace as well. + * switched with the rest of guest FPU state. + * + * Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS. */ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) { @@ -3905,6 +3899,11 @@ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) * MSR that is managed via XSTATE. Note, the caller is responsible for doing * the initial FPU load, this helper only ensures that guest state is resident * in hardware (the kernel can load its FPU state in IRQ context). + * + * Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the + * kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only + * consumed when transitioning to lower privilege levels, i.e. are effectively + * only consumed by userspace as well. */ static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info, @@ -11807,6 +11806,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); @@ -11815,6 +11817,9 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); @@ -12137,9 +12142,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int r; vcpu_load(vcpu); - if (kvm_mpx_supported()) - kvm_load_guest_fpu(vcpu); - kvm_vcpu_srcu_read_lock(vcpu); r = kvm_apic_accept_events(vcpu); @@ -12156,9 +12158,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, out: kvm_vcpu_srcu_read_unlock(vcpu); - - if (kvm_mpx_supported()) - kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return r; } @@ -12788,6 +12787,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) { struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; u64 xfeatures_mask; + bool fpu_in_use; int i; /* @@ -12811,13 +12811,23 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); /* - * All paths that lead to INIT are required to load the guest's FPU - * state (because most paths are buried in KVM_RUN). - */ - kvm_put_guest_fpu(vcpu); + * Unload guest FPU state (if necessary) before zeroing XSTATE fields + * as the kernel can only modify the state when its resident in memory, + * i.e. when it's not loaded into hardware. + * + * WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN, + * doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the + * only path that can trigger INIT emulation _and_ loads FPU state, and + * KVM_RUN should _always_ load FPU state. + */ + WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); + fpu_in_use = fpstate->in_use; + if (fpu_in_use) + kvm_put_guest_fpu(vcpu); for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) fpstate_clear_xstate_component(fpstate, i); - kvm_load_guest_fpu(vcpu); + if (fpu_in_use) + kvm_load_guest_fpu(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -13941,10 +13951,11 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GUEST_MEMFD /* - * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory - * (the private vs. shared tracking needs to be moved into guest_memfd). + * KVM doesn't yet support initializing guest_memfd memory as shared for VMs + * with private memory (the private vs. shared tracking needs to be moved into + * guest_memfd). */ -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return !kvm_arch_has_private_mem(kvm); } diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index c5c60d07308c..824664c0ecbd 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -2,6 +2,7 @@ #include <asm/paravirt.h> #include <linux/smp.h> #include <linux/export.h> +#include <linux/kvm_types.h> static void __wbinvd(void *dummy) { @@ -12,7 +13,7 @@ void wbinvd_on_cpu(int cpu) { smp_call_function_single(cpu, __wbinvd, NULL, 1); } -EXPORT_SYMBOL(wbinvd_on_cpu); +EXPORT_SYMBOL_FOR_KVM(wbinvd_on_cpu); void wbinvd_on_all_cpus(void) { @@ -24,7 +25,7 @@ void wbinvd_on_cpus_mask(struct cpumask *cpus) { on_each_cpu_mask(cpus, __wbinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbinvd_on_cpus_mask); +EXPORT_SYMBOL_FOR_KVM(wbinvd_on_cpus_mask); static void __wbnoinvd(void *dummy) { @@ -35,10 +36,10 @@ void wbnoinvd_on_all_cpus(void) { on_each_cpu(__wbnoinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbnoinvd_on_all_cpus); +EXPORT_SYMBOL_FOR_KVM(wbnoinvd_on_all_cpus); void wbnoinvd_on_cpus_mask(struct cpumask *cpus) { on_each_cpu_mask(cpus, __wbnoinvd, NULL, 1); } -EXPORT_SYMBOL_GPL(wbnoinvd_on_cpus_mask); +EXPORT_SYMBOL_FOR_KVM(wbnoinvd_on_cpus_mask); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 4e385cbfd444..e03eeec55cfe 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,11 +63,10 @@ static bool is_string_insn(struct insn *insn) bool insn_has_rep_prefix(struct insn *insn) { insn_byte_t p; - int i; insn_get_prefixes(insn); - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -92,13 +91,13 @@ bool insn_has_rep_prefix(struct insn *insn) static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; - int num_overrides = 0, i; + int num_overrides = 0; insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for_each_insn_prefix(insn, i, p) { + for_each_insn_prefix(insn, p) { insn_attr_t attr; attr = inat_get_opcode_attribute(p); @@ -1676,3 +1675,147 @@ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) return type; } + +/* + * Recognise typical NOP patterns for both 32bit and 64bit. + * + * Notably: + * - NOP, but not: REP NOP aka PAUSE + * - NOPL + * - MOV %reg, %reg + * - LEA 0(%reg),%reg + * - JMP +0 + * + * Must not have false-positives; instructions identified as a NOP might be + * emulated as a NOP (uprobe) or Run Length Encoded in a larger NOP + * (alternatives). + * + * False-negatives are fine; need not be exhaustive. + */ +bool insn_is_nop(struct insn *insn) +{ + u8 b3 = 0, x3 = 0, r3 = 0; + u8 b4 = 0, x4 = 0, r4 = 0, m = 0; + u8 modrm, modrm_mod, modrm_reg, modrm_rm; + u8 sib = 0, sib_scale, sib_index, sib_base; + u8 nrex, rex; + u8 p, rep = 0; + + if ((nrex = insn->rex_prefix.nbytes)) { + rex = insn->rex_prefix.bytes[nrex-1]; + + r3 = !!X86_REX_R(rex); + x3 = !!X86_REX_X(rex); + b3 = !!X86_REX_B(rex); + if (nrex > 1) { + r4 = !!X86_REX2_R(rex); + x4 = !!X86_REX2_X(rex); + b4 = !!X86_REX2_B(rex); + m = !!X86_REX2_M(rex); + } + + } else if (insn->vex_prefix.nbytes) { + /* + * Ignore VEX encoded NOPs + */ + return false; + } + + if (insn->modrm.nbytes) { + modrm = insn->modrm.bytes[0]; + modrm_mod = X86_MODRM_MOD(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*r3 + 16*r4; + modrm_rm = X86_MODRM_RM(modrm) + 8*b3 + 16*b4; + modrm = 1; + } + + if (insn->sib.nbytes) { + sib = insn->sib.bytes[0]; + sib_scale = X86_SIB_SCALE(sib); + sib_index = X86_SIB_INDEX(sib) + 8*x3 + 16*x4; + sib_base = X86_SIB_BASE(sib) + 8*b3 + 16*b4; + sib = 1; + + modrm_rm = sib_base; + } + + for_each_insn_prefix(insn, p) { + if (p == 0xf3) /* REPE */ + rep = 1; + } + + /* + * Opcode map munging: + * + * REX2: 0 - single byte opcode + * 1 - 0f second byte opcode + */ + switch (m) { + case 0: break; + case 1: insn->opcode.value <<= 8; + insn->opcode.value |= 0x0f; + break; + default: + return false; + } + + switch (insn->opcode.bytes[0]) { + case 0x0f: /* 2nd byte */ + break; + + case 0x89: /* MOV */ + if (modrm_mod != 3) /* register-direct */ + return false; + + /* native size */ + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) + return false; + + return modrm_reg == modrm_rm; /* MOV %reg, %reg */ + + case 0x8d: /* LEA */ + if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with disp */ + return false; + + /* native size */ + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) + return false; + + if (insn->displacement.value != 0) + return false; + + if (sib && (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 1) */ + return false; + + for_each_insn_prefix(insn, p) { + if (p != 0x3e) /* DS */ + return false; + } + + return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */ + + case 0x90: /* NOP */ + if (b3 || b4) /* XCHG %r{8,16,24},%rax */ + return false; + + if (rep) /* REP NOP := PAUSE */ + return false; + + return true; + + case 0xe9: /* JMP.d32 */ + case 0xeb: /* JMP.d8 */ + return insn->immediate.value == 0; /* JMP +0 */ + + default: + return false; + } + + switch (insn->opcode.bytes[1]) { + case 0x1f: + return modrm_reg == 0; /* 0f 1f /0 -- NOPL */ + + default: + return false; + } +} diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index b5893928d55c..8c7cd115b484 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -22,7 +22,7 @@ #include <asm/setup.h> #define debug_putstr(v) early_printk("%s", v) -#define has_cpuflag(f) boot_cpu_has(f) +#define has_cpuflag(f) cpu_feature_enabled(f) #define get_boot_seed() kaslr_offset() #endif diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 4ef7c6dcbea6..dfdd1da89f36 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> +#include <linux/kvm_types.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <asm/msr.h> @@ -103,7 +104,7 @@ int msr_set_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, true); } -EXPORT_SYMBOL_GPL(msr_set_bit); +EXPORT_SYMBOL_FOR_KVM(msr_set_bit); /** * msr_clear_bit - Clear @bit in a MSR @msr. @@ -119,7 +120,7 @@ int msr_clear_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, false); } -EXPORT_SYMBOL_GPL(msr_clear_bit); +EXPORT_SYMBOL_FOR_KVM(msr_clear_bit); #ifdef CONFIG_TRACEPOINTS void do_trace_write_msr(u32 msr, u64 val, int failed) diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h index fc1c887ca073..654bfe4e29a0 100644 --- a/arch/x86/math-emu/poly.h +++ b/arch/x86/math-emu/poly.h @@ -39,7 +39,7 @@ asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); asmlinkage void shr_Xsig(Xsig *, const int n); asmlinkage int round_Xsig(Xsig *); asmlinkage int norm_Xsig(Xsig *); -asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, Xsig *dest); /* Macro to extract the most significant 32 bits from a long long */ #define LL_MSW(x) (((unsigned long *)&x)[1]) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a4700ef6eb64..2afa7a23340e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -486,7 +486,6 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, #endif ptdump_walk_pgd_level_core(m, mm, pgd, false, false); } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); void ptdump_walk_user_pgd_level_checkwx(void) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0e4270e20fad..1044aafd5d94 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -504,9 +504,6 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, continue; } - if (0) - pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, - pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); pages++; set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index b68200a0e0c6..8a3d9722f602 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -42,6 +42,7 @@ #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rbtree.h> +#include <linux/kvm_types.h> #include <asm/cpu_device_id.h> #include <asm/cacheflush.h> @@ -697,7 +698,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) cm == _PAGE_CACHE_MODE_UC_MINUS || cm == _PAGE_CACHE_MODE_WC; } -EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); +EXPORT_SYMBOL_FOR_KVM(pat_pfn_immune_to_uc_mtrr); /** * memtype_reserve_io - Request a memory type mapping for a region of memory diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d2d54b8c4dbb..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -446,7 +446,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) } start = fix_addr(__cpa_addr(cpa, 0)); - end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + end = start + cpa->numpages * PAGE_SIZE; if (cpa->force_flush_all) end = TLB_FLUSH_ALL; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index fc3f3d3e2ef2..8d31c6b9e184 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -31,17 +31,6 @@ unsigned long __phys_addr(unsigned long x) return x; } EXPORT_SYMBOL(__phys_addr); - -unsigned long __phys_addr_symbol(unsigned long x) -{ - unsigned long y = x - __START_KERNEL_map; - - /* only check upper bounds since lower bounds will trigger carry */ - VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); - - return y + phys_base; -} -EXPORT_SYMBOL(__phys_addr_symbol); #endif bool __virt_addr_valid(unsigned long x) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..f5b93e01e347 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -12,6 +12,7 @@ #include <linux/task_work.h> #include <linux/mmu_notifier.h> #include <linux/mmu_context.h> +#include <linux/kvm_types.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -911,11 +912,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); @@ -1562,7 +1583,7 @@ unsigned long __get_current_cr3_fast(void) VM_BUG_ON(cr3 != __read_cr3()); return cr3; } -EXPORT_SYMBOL_GPL(__get_current_cr3_fast); +EXPORT_SYMBOL_FOR_KVM(__get_current_cr3_fast); /* * Flush one page in the kernel mapping @@ -1703,7 +1724,7 @@ void __flush_tlb_all(void) flush_tlb_local(); } } -EXPORT_SYMBOL_GPL(__flush_tlb_all); +EXPORT_SYMBOL_FOR_KVM(__flush_tlb_all); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4c93d9e73e4..de5083cb1d37 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2701,7 +2701,7 @@ emit_jmp: /* Update cleanup_addr */ ctx->cleanup_addr = proglen; if (bpf_prog_was_classic(bpf_prog) && - !capable(CAP_SYS_ADMIN)) { + !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) { u8 *ip = image + addrs[i - 1]; if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index eac403248462..5ce4ebe99774 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -29,6 +29,7 @@ #include <linux/acpi.h> #include <linux/suspend.h> #include <linux/idr.h> +#include <linux/kvm_types.h> #include <asm/page.h> #include <asm/special_insns.h> #include <asm/msr-index.h> @@ -181,7 +182,7 @@ int tdx_cpu_enable(void) return 0; } -EXPORT_SYMBOL_GPL(tdx_cpu_enable); +EXPORT_SYMBOL_FOR_KVM(tdx_cpu_enable); /* * Add a memory region as a TDX memory block. The caller must make sure @@ -662,7 +663,7 @@ void tdx_quirk_reset_page(struct page *page) { tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE); } -EXPORT_SYMBOL_GPL(tdx_quirk_reset_page); +EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page); static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr) { @@ -1216,7 +1217,7 @@ int tdx_enable(void) return ret; } -EXPORT_SYMBOL_GPL(tdx_enable); +EXPORT_SYMBOL_FOR_KVM(tdx_enable); static bool is_pamt_page(unsigned long phys) { @@ -1477,13 +1478,13 @@ const struct tdx_sys_info *tdx_get_sysinfo(void) return p; } -EXPORT_SYMBOL_GPL(tdx_get_sysinfo); +EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo); u32 tdx_get_nr_guest_keyids(void) { return tdx_nr_guest_keyids; } -EXPORT_SYMBOL_GPL(tdx_get_nr_guest_keyids); +EXPORT_SYMBOL_FOR_KVM(tdx_get_nr_guest_keyids); int tdx_guest_keyid_alloc(void) { @@ -1491,13 +1492,13 @@ int tdx_guest_keyid_alloc(void) tdx_guest_keyid_start + tdx_nr_guest_keyids - 1, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(tdx_guest_keyid_alloc); +EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_alloc); void tdx_guest_keyid_free(unsigned int keyid) { ida_free(&tdx_guest_keyid_pool, keyid); } -EXPORT_SYMBOL_GPL(tdx_guest_keyid_free); +EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_free); static inline u64 tdx_tdr_pa(struct tdx_td *td) { @@ -1521,7 +1522,7 @@ noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args); } -EXPORT_SYMBOL_GPL(tdh_vp_enter); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_enter); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) { @@ -1533,7 +1534,7 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) tdx_clflush_page(tdcs_page); return seamcall(TDH_MNG_ADDCX, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_addcx); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx); u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2) { @@ -1553,7 +1554,7 @@ u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_add); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_add); u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2) { @@ -1572,7 +1573,7 @@ u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_sept_add); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_sept_add); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { @@ -1584,7 +1585,7 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) tdx_clflush_page(tdcx_page); return seamcall(TDH_VP_ADDCX, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_addcx); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx); u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2) { @@ -1603,7 +1604,7 @@ u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_aug); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_aug); u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2) { @@ -1620,7 +1621,7 @@ u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u6 return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_range_block); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_range_block); u64 tdh_mng_key_config(struct tdx_td *td) { @@ -1630,7 +1631,7 @@ u64 tdh_mng_key_config(struct tdx_td *td) return seamcall(TDH_MNG_KEY_CONFIG, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_key_config); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_config); u64 tdh_mng_create(struct tdx_td *td, u16 hkid) { @@ -1642,7 +1643,7 @@ u64 tdh_mng_create(struct tdx_td *td, u16 hkid) tdx_clflush_page(td->tdr_page); return seamcall(TDH_MNG_CREATE, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_create); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_create); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) { @@ -1654,7 +1655,7 @@ u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) tdx_clflush_page(vp->tdvpr_page); return seamcall(TDH_VP_CREATE, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_create); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_create); u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) { @@ -1671,7 +1672,7 @@ u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) return ret; } -EXPORT_SYMBOL_GPL(tdh_mng_rd); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_rd); u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2) { @@ -1688,7 +1689,7 @@ u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2) return ret; } -EXPORT_SYMBOL_GPL(tdh_mr_extend); +EXPORT_SYMBOL_FOR_KVM(tdh_mr_extend); u64 tdh_mr_finalize(struct tdx_td *td) { @@ -1698,7 +1699,7 @@ u64 tdh_mr_finalize(struct tdx_td *td) return seamcall(TDH_MR_FINALIZE, &args); } -EXPORT_SYMBOL_GPL(tdh_mr_finalize); +EXPORT_SYMBOL_FOR_KVM(tdh_mr_finalize); u64 tdh_vp_flush(struct tdx_vp *vp) { @@ -1708,7 +1709,7 @@ u64 tdh_vp_flush(struct tdx_vp *vp) return seamcall(TDH_VP_FLUSH, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_flush); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_flush); u64 tdh_mng_vpflushdone(struct tdx_td *td) { @@ -1718,7 +1719,7 @@ u64 tdh_mng_vpflushdone(struct tdx_td *td) return seamcall(TDH_MNG_VPFLUSHDONE, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_vpflushdone); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_vpflushdone); u64 tdh_mng_key_freeid(struct tdx_td *td) { @@ -1728,7 +1729,7 @@ u64 tdh_mng_key_freeid(struct tdx_td *td) return seamcall(TDH_MNG_KEY_FREEID, &args); } -EXPORT_SYMBOL_GPL(tdh_mng_key_freeid); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_freeid); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) { @@ -1744,7 +1745,7 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) return ret; } -EXPORT_SYMBOL_GPL(tdh_mng_init); +EXPORT_SYMBOL_FOR_KVM(tdh_mng_init); u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) { @@ -1761,7 +1762,7 @@ u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) return ret; } -EXPORT_SYMBOL_GPL(tdh_vp_rd); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_rd); u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) { @@ -1774,7 +1775,7 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) return seamcall(TDH_VP_WR, &args); } -EXPORT_SYMBOL_GPL(tdh_vp_wr); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_wr); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) { @@ -1787,7 +1788,7 @@ u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) /* apicid requires version == 1. */ return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args); } -EXPORT_SYMBOL_GPL(tdh_vp_init); +EXPORT_SYMBOL_FOR_KVM(tdh_vp_init); /* * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats. @@ -1809,7 +1810,7 @@ u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 return ret; } -EXPORT_SYMBOL_GPL(tdh_phymem_page_reclaim); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_reclaim); u64 tdh_mem_track(struct tdx_td *td) { @@ -1819,7 +1820,7 @@ u64 tdh_mem_track(struct tdx_td *td) return seamcall(TDH_MEM_TRACK, &args); } -EXPORT_SYMBOL_GPL(tdh_mem_track); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_track); u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2) { @@ -1836,7 +1837,7 @@ u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u6 return ret; } -EXPORT_SYMBOL_GPL(tdh_mem_page_remove); +EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_remove); u64 tdh_phymem_cache_wb(bool resume) { @@ -1846,7 +1847,7 @@ u64 tdh_phymem_cache_wb(bool resume) return seamcall(TDH_PHYMEM_CACHE_WB, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_cache_wb); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) { @@ -1856,7 +1857,7 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr); u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) { @@ -1866,7 +1867,7 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } -EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); +EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid); #ifdef CONFIG_KEXEC_CORE void tdx_cpu_flush_cache_for_kexec(void) @@ -1884,5 +1885,5 @@ void tdx_cpu_flush_cache_for_kexec(void) wbinvd(); this_cpu_write(cache_state_incoherent, false); } -EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec); +EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec); #endif diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index f2af1a32c9c7..dc942bbac69f 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -103,7 +103,7 @@ CONFIG_SND_SIMPLE_CARD=y # CONFIG_USB_SUPPORT is not set CONFIG_COMMON_CLK_CDCE706=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 88ed5284e21c..81a057f25f21 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -80,7 +80,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 4427907becca..3ee7e1c56556 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -90,7 +90,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 5828228522ba..c6e96f0aa700 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -91,7 +91,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 326966ca7831..373d42b9e510 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -94,7 +94,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index e37048985b47..72628d31e87a 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -76,7 +76,7 @@ CONFIG_LOGO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index ee47438f9b51..5d6013ea70fc 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -82,7 +82,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 374e4cb788d8..438a3b170402 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns |
