diff options
Diffstat (limited to 'arch')
641 files changed, 10660 insertions, 6137 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index ebe08b9186ad..74ff01133532 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -965,6 +965,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 + depends on ARM64 || X86_64 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 44e7aedac6e8..90e7a9539102 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -107,7 +107,7 @@ struct vm_area_struct; #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) -#define _PAGE_P(x) _PAGE_NORMAL((x) | (((x) & _PAGE_FOW)?0:_PAGE_FOW)) +#define _PAGE_P(x) _PAGE_NORMAL((x) | _PAGE_FOW) #define _PAGE_S(x) _PAGE_NORMAL(x) /* @@ -126,34 +126,11 @@ struct vm_area_struct; #define pgprot_noncached(prot) (prot) /* - * BAD_PAGETABLE is used when we need a bogus page-table, while - * BAD_PAGE is used for a bogus page. - * * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern pte_t __bad_page(void); -extern pmd_t * __bad_pagetable(void); - -extern unsigned long __zero_page(void); - -#define BAD_PAGETABLE __bad_pagetable() -#define BAD_PAGE __bad_page() #define ZERO_PAGE(vaddr) (virt_to_page(ZERO_PGE)) -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8*sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void*)-1)) - -/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -#define SIZEOF_PTR_LOG2 3 - -/* to find an entry in a page-table */ -#define PAGE_PTR(address) \ - ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) - /* * On certain platforms whose physical address space can overlap KSEG, * namely EV6 and above, we must re-twiddle the physaddr to restore the diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 2d491b8cdab9..4c5ab9cd8a0a 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -60,33 +60,6 @@ pgd_alloc(struct mm_struct *mm) } -/* - * BAD_PAGE is the page that is used for page faults when linux - * is out-of-memory. Older versions of linux just did a - * do_exit(), but using this instead means there is less risk - * for a process dying in kernel mode, possibly leaving an inode - * unused etc.. - * - * BAD_PAGETABLE is the accompanying page-table: it is initialized - * to point to BAD_PAGE entries. - * - * ZERO_PAGE is a special page that is used for zero-initialized - * data and COW. - */ -pmd_t * -__bad_pagetable(void) -{ - memset(absolute_pointer(EMPTY_PGT), 0, PAGE_SIZE); - return (pmd_t *) EMPTY_PGT; -} - -pte_t -__bad_page(void) -{ - memset(absolute_pointer(EMPTY_PGE), 0, PAGE_SIZE); - return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED)); -} - static inline unsigned long load_PCB(struct pcb_struct *pcb) { diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a7cd526dd7ca..f930396d9dae 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index afa6a348f444..6b779dee5ea0 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -86,7 +86,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 2bfa6371953c..a89b50d5369d 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1558e8e87767..1b8b2a098cda 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -77,7 +77,7 @@ CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y CONFIG_IIO=y CONFIG_TI_ADC108S102=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 03d9ac20baa9..b7120523e09a 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -74,7 +74,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_SERIAL=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index c09488992f13..4077abd5980c 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -81,7 +81,7 @@ CONFIG_MMC_DW=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2a124c92e4f6..2e3f93b690f4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -108,6 +108,7 @@ config ARM select HAVE_GUP_FAST if ARM_LPAE select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 516689dc6cf1..242a61208a0f 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -194,8 +194,7 @@ CONFIG_MAILBOX=y CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 27dc3bf6b124..4a8ac09843d7 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -154,8 +154,8 @@ CONFIG_PWM_BCM2835=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index e2ddaca0f89d..673408a10888 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -228,7 +228,7 @@ CONFIG_PWM=y CONFIG_PWM_TIECAP=m CONFIG_PWM_TIEHRPWM=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index d76eb12d29a7..bb6c4748bfc8 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -95,8 +95,8 @@ CONFIG_RTC_DRV_MV=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 2248afaf35b5..7f3756d8b086 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -103,8 +103,8 @@ CONFIG_RTC_DRV_EP93XX=y CONFIG_DMADEVICES=y CONFIG_EP93XX_DMA=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9a57763a8d38..0d55056c6f82 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -436,9 +436,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 3cb995b9616a..81199dddcde7 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -158,8 +158,8 @@ CONFIG_IXP4XX_NPE=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_OVERLAY_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 842a989baa27..f67e9cda73e2 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -53,7 +53,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y # CONFIG_RESET_CONTROLLER is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index fa06d98e43fc..e2d9f3610063 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_MOXART=y CONFIG_DMADEVICES=y CONFIG_MOXART_DMA=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index b523bc246c09..59b020e66a0b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -268,7 +268,7 @@ CONFIG_PWM_ATMEL=m CONFIG_PWM_ATMEL_HLCDC_PWM=m CONFIG_PWM_ATMEL_TCB=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 3343f72de7ea..55f4ab67a306 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -91,8 +91,8 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 23dbb80fcc2e..d1742a7cae6a 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -168,7 +168,7 @@ CONFIG_MV_XOR=y CONFIG_STAGING=y CONFIG_FB_XGI=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index ea28ed8991b4..696b4fbc2412 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -116,7 +116,7 @@ CONFIG_IIO_ST_ACCEL_3AXIS=y CONFIG_PWM=y CONFIG_PWM_STMPE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 661e5d6894bd..24c54bf1e243 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -184,7 +184,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_OMAP=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 1d5f75241739..4e53c331cd84 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -679,7 +679,7 @@ CONFIG_TWL4030_USB=m CONFIG_COUNTER=m CONFIG_TI_EQEP=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_SECURITY=y CONFIG_FANOTIFY=y CONFIG_QUOTA=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 62b9c6102789..c28426250ec3 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -115,8 +115,8 @@ CONFIG_RTC_DRV_M48T86=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 70489f3555d0..3ea189f1f42f 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -579,9 +579,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index fa681a7a49c2..29a1dea500f0 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -291,7 +291,7 @@ CONFIG_INTERCONNECT_QCOM_MSM8974=m CONFIG_INTERCONNECT_QCOM_SDX55=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 24f1fa868230..46df453e224e 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -77,7 +77,7 @@ CONFIG_SOUND_VIDC=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8583=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index 967b1cb22136..7bf28a83946a 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -52,9 +52,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S3C=y CONFIG_PWM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_CRAMFS=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index e14720a9a5ac..e2ad9a05566f 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -201,7 +201,7 @@ CONFIG_MCHP_EIC=y CONFIG_RESET_CONTROLLER=y CONFIG_NVMEM_MICROCHIP_OTPC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 294906c8f16e..f2e42846b116 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -136,7 +136,7 @@ CONFIG_FPGA_REGION=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig index a8f992fdb30d..8b19af1ea67c 100644 --- a/arch/arm/configs/spear13xx_defconfig +++ b/arch/arm/configs/spear13xx_defconfig @@ -84,8 +84,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig index 8dc5a388759c..b4e4b96a98af 100644 --- a/arch/arm/configs/spear3xx_defconfig +++ b/arch/arm/configs/spear3xx_defconfig @@ -67,8 +67,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig index 4e9e1a6ff381..7083b1bd8573 100644 --- a/arch/arm/configs/spear6xx_defconfig +++ b/arch/arm/configs/spear6xx_defconfig @@ -53,8 +53,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac2a0f998c73..395df2f9dc8e 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -193,8 +193,8 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index dcd9c316072e..82190b155b14 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -69,7 +69,7 @@ CONFIG_STM32_MDMA=y CONFIG_IIO=y CONFIG_STM32_ADC_CORE=y CONFIG_STM32_ADC=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index ba863b445417..ab477ca13f89 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -319,9 +319,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 9c8dc6dd5fe3..e88533b78327 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -175,7 +175,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index cdb6065e04fd..b9454f6954f8 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -120,7 +120,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index d334c7fb672b..b5793e8fbdc1 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/init.h> #include <linux/io.h> @@ -337,8 +338,8 @@ void pcibios_fixup_bus(struct pci_bus *bus) /* * Report what we did for this bus */ - pr_info("PCI: bus%d: Fast back to back transfers %sabled\n", - bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis"); + pr_info("PCI: bus%d: Fast back to back transfers %s\n", + bus->number, str_enabled_disabled(features & PCI_COMMAND_FAST_BACK)); } EXPORT_SYMBOL(pcibios_fixup_bus); diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index bc598e3d8dd2..e24ee559af81 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -257,11 +257,21 @@ ENDPROC(ftrace_graph_regs_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(return_to_handler) - stmdb sp!, {r0-r3} - add r0, sp, #16 @ sp at exit of instrumented routine + mov ip, sp @ sp at exit of instrumented routine + sub sp, #PT_REGS_SIZE + str r0, [sp, #S_R0] + str r1, [sp, #S_R1] + str r2, [sp, #S_R2] + str r3, [sp, #S_R3] + str ip, [sp, #S_FP] + mov r0, sp bl ftrace_return_to_handler - mov lr, r0 @ r0 has real ret addr - ldmia sp!, {r0-r3} + mov lr, r0 @ r0 has real ret addr + ldr r3, [sp, #S_R3] + ldr r2, [sp, #S_R2] + ldr r1, [sp, #S_R1] + ldr r0, [sp, #S_R0] + add sp, sp, #PT_REGS_SIZE @ restore stack pointer ret lr ENDPROC(return_to_handler) #endif diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 43d91bfd2360..470867160076 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -13,6 +13,7 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/string_choices.h> #include <asm/cacheflush.h> #include <asm/cp15.h> @@ -667,9 +668,9 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock) u32 power_ctrl; power_ctrl = readl_relaxed(base + L310_POWER_CTRL); - pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", - power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", - power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + pr_info("L2C-310 dynamic clock gating %s, standby mode %s\n", + str_enabled_disabled(power_ctrl & L310_DYNAMIC_CLK_GATING_EN), + str_enabled_disabled(power_ctrl & L310_STNDBY_MODE_EN)); } if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46169fe42c61..2bc828a1940c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -135,8 +135,7 @@ static void die_kernel_fault(const char *msg, struct mm_struct *mm, bust_spinlocks(1); pr_alert("8<--- cut here ---\n"); pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n", - msg, addr, fsr & FSR_LNX_PF ? "execute" : - fsr & FSR_WRITE ? "write" : "read"); + msg, addr, fsr & FSR_LNX_PF ? "execute" : str_write_read(fsr & FSR_WRITE)); show_pte(KERN_ALERT, mm, addr); die("Oops", regs, fsr); diff --git a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dtsi b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dtsi index 0a989e9d3d23..654cbce9d6ec 100644 --- a/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e78100-lenovo-thinkpad-t14s.dtsi @@ -887,6 +887,24 @@ }; }; +&i2c6 { + clock-frequency = <400000>; + + status = "okay"; + + embedded-controller@28 { + compatible = "lenovo,thinkpad-t14s-ec"; + reg = <0x28>; + + interrupts-extended = <&tlmm 66 IRQ_TYPE_LEVEL_LOW>; + + pinctrl-0 = <&ec_int_n_default>; + pinctrl-names = "default"; + + wakeup-source; + }; +}; + &i2c7 { clock-frequency = <400000>; @@ -1269,6 +1287,12 @@ <72 2>, /* Secure EC I2C connection (?) */ <238 1>; /* UFS Reset */ + ec_int_n_default: ec-int-n-state { + pins = "gpio66"; + function = "gpio"; + bias-disable; + }; + eusb3_reset_n: eusb3-reset-n-state { pins = "gpio6"; function = "gpio"; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 3bb5b513d5ae..91f3093eee6a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -71,6 +71,7 @@ config CRYPTO_POLYVAL_ARM64_CE config CRYPTO_AES_ARM64 tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES + select CRYPTO_LIB_SHA256 help Block ciphers: AES cipher algorithms (FIPS-197) Length-preserving ciphers: AES with ECB, CBC, CTR, CTS, diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 81560f722b9d..5e207ff34482 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -122,7 +122,6 @@ struct crypto_aes_xts_ctx { struct crypto_aes_essiv_cbc_ctx { struct crypto_aes_ctx key1; struct crypto_aes_ctx __aligned(8) key2; - struct crypto_shash *hash; }; struct mac_tfm_ctx { @@ -171,7 +170,7 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, if (ret) return ret; - crypto_shash_tfm_digest(ctx->hash, in_key, key_len, digest); + sha256(in_key, key_len, digest); return aes_expandkey(&ctx->key2, digest, sizeof(digest)); } @@ -388,22 +387,6 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return skcipher_walk_done(&walk, 0); } -static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm) -{ - struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - ctx->hash = crypto_alloc_shash("sha256", 0, 0); - - return PTR_ERR_OR_ZERO(ctx->hash); -} - -static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) -{ - struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_shash(ctx->hash); -} - static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -793,8 +776,6 @@ static struct skcipher_alg aes_algs[] = { { .setkey = essiv_cbc_set_key, .encrypt = essiv_cbc_encrypt, .decrypt = essiv_cbc_decrypt, - .init = essiv_cbc_init_tfm, - .exit = essiv_cbc_exit_tfm, } }; static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..99a7c0235e6d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -24,22 +24,48 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - * * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH cmp x1, #0 - b.ge .LnVHE_\@ + b.lt .LnE2H0_\@ + /* + * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised + * as such via ID_AA64MMFR4_EL1.E2H0: + * + * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to + * have HCR_EL2.E2H implemented as RAO/WI. + * + * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest + * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV + * guests on these hosts can write to HCR_EL2.E2H without + * trapping to the hypervisor, but these writes have no + * functional effect. + * + * Handle both cases by checking for an essential VHE property + * (system register remapping) to decide whether we're + * effectively VHE-only or not. + */ + msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE + isb + mov x1, #1 // Write something to FAR_EL1 + msr far_el1, x1 + isb + mov x1, #2 // Try to overwrite it via FAR_EL2 + msr far_el2, x1 + isb + mrs x1, far_el1 // If we see the latest write in FAR_EL1, + cmp x1, #2 // we can safely assume we are VHE only. + b.ne .LnVHE_\@ // Otherwise, we know that nVHE works. + +.LnE2H0_\@: orr x0, x0, #HCR_E2H -.LnVHE_\@: msr_hcr_el2 x0 isb +.LnVHE_\@: .endm .macro __init_el2_sctlr diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index bfe3ce9df197..ba7cf7fec5e9 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -153,6 +153,7 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) regs->pc = afregs->pc; regs->regs[29] = afregs->fp; regs->regs[30] = afregs->lr; + regs->pstate = PSR_MODE_EL1h; return regs; } diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index bec227f9500a..9da54d4ee49e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -81,6 +81,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fa8a08a1ccd5..c9eab316398e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -220,6 +220,20 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) { + /* + * DDI0487L.b Known Issue D22105 + * + * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is + * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO + * is the value programmed or 1. + * + * Make the implementation choice of treating the effective value as 1 as + * we cannot subsequently catch changes to TGE or AMO that would + * otherwise lead to the SError becoming deliverable. + */ + if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu)) + return true; + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; } @@ -511,21 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { - u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + enum vcpu_sysreg r; + u64 sctlr; + + r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + + sctlr = vcpu_read_sys_reg(vcpu, r); sctlr |= SCTLR_ELx_EE; - vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); + vcpu_write_sys_reg(vcpu, sctlr, r); } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { + enum vcpu_sysreg r; + u64 bit; + if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - if (vcpu_mode_priv(vcpu)) - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); - else - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); + r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E; + + return vcpu_read_sys_reg(vcpu, r) & bit; } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ee4f6fa3a17..64302c438355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -252,7 +252,8 @@ struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; struct kvm_hyp_memcache stage2_teardown_mc; - bool enabled; + bool is_protected; + bool is_created; }; struct kvm_mpidr_data { @@ -815,6 +816,11 @@ struct kvm_vcpu_arch { u64 hcrx_el2; u64 mdcr_el2; + struct { + u64 r; + u64 w; + } fgt[__NR_FGT_GROUP_IDS__]; + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -1442,7 +1448,7 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected) #define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) @@ -1599,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void) void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); void check_feature_map(void); +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); + +static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + case HFGWTR_EL2: + return HFGRTR_GROUP; + case HFGITR_EL2: + return HFGITR_GROUP; + case HDFGRTR_EL2: + case HDFGWTR_EL2: + return HDFGRTR_GROUP; + case HAFGRTR_EL2: + return HAFGRTR_GROUP; + case HFGRTR2_EL2: + case HFGWTR2_EL2: + return HFGRTR2_GROUP; + case HFGITR2_EL2: + return HFGITR2_GROUP; + case HDFGRTR2_EL2: + case HDFGWTR2_EL2: + return HDFGRTR2_GROUP; + default: + BUILD_BUG_ON(1); + } +} +#define vcpu_fgt(vcpu, reg) \ + ({ \ + enum fgt_group_id id = __fgt_reg_to_group_id(reg); \ + u64 *p; \ + switch (reg) { \ + case HFGWTR_EL2: \ + case HDFGWTR_EL2: \ + case HFGWTR2_EL2: \ + case HDFGWTR2_EL2: \ + p = &(vcpu)->arch.fgt[id].w; \ + break; \ + default: \ + p = &(vcpu)->arch.fgt[id].r; \ + break; \ + } \ + \ + p; \ + }) #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 7fd76f41c296..f7c06a840963 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -83,6 +83,8 @@ extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu); extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); +extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu); + struct kvm_s2_trans { phys_addr_t output; unsigned long block_size; @@ -265,7 +267,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid) return base; } -static inline unsigned int ps_to_output_size(unsigned int ps) +static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit) { switch (ps) { case 0: return 32; @@ -273,7 +275,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps) case 2: return 40; case 3: return 42; case 4: return 44; - case 5: + case 5: return 48; + case 6: if (pa52bit) + return 52; + fallthrough; default: return 48; } @@ -285,13 +290,28 @@ enum trans_regime { TR_EL2, }; +struct s1_walk_info; + +struct s1_walk_context { + struct s1_walk_info *wi; + u64 table_ipa; + int level; +}; + +struct s1_walk_filter { + int (*fn)(struct s1_walk_context *, void *); + void *priv; +}; + struct s1_walk_info { + struct s1_walk_filter *filter; u64 baddr; enum trans_regime regime; unsigned int max_oa_bits; unsigned int pgshift; unsigned int txsz; int sl; + u8 sh; bool as_el0; bool hpd; bool e0poe; @@ -299,6 +319,7 @@ struct s1_walk_info { bool pan; bool be; bool s2; + bool pa52bit; }; struct s1_walk_result { @@ -334,6 +355,8 @@ struct s1_walk_result { int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va); +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, + int *level); /* VNCR management */ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..08be89c95466 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -18,6 +18,7 @@ int pkvm_init_host_vm(struct kvm *kvm); int pkvm_create_hyp_vm(struct kvm *kvm); +bool pkvm_hyp_vm_is_created(struct kvm *kvm); void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index ff6fd0bbd7d2..78a4dbf75e60 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -79,7 +79,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end); -extern void init_idmap_kpti_bbml2_flag(void); extern void linear_map_maybe_split_to_ptes(void); /* @@ -107,5 +106,11 @@ static inline bool kaslr_requires_kpti(void) return true; } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +void kpti_install_ng_mappings(void); +#else +static inline void kpti_install_ng_mappings(void) {} +#endif + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..c231d2a3e515 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1220,10 +1220,19 @@ __val; \ }) +/* + * The "Z" constraint combined with the "%x0" template should be enough + * to force XZR generation if (v) is a constant 0 value but LLVM does not + * yet understand that modifier/constraint combo so a conditional is required + * to nudge the compiler into using XZR as a source for a 0 constant value. + */ #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ u32 __maybe_unused __check_r = (u32)(r); \ - asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ + if (__builtin_constant_p(__val) && __val == 0) \ + asm volatile(__msr_s(r, "xzr")); \ + else \ + asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \ } while (0) /* diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index e3e8944a71c3..e92e4a0e48fc 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -36,6 +36,7 @@ int kasan_brk_handler(struct pt_regs *regs, unsigned long esr); int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); +void dump_kernel_instr(unsigned long kaddr); /* * Move regs->pc to next instruction and do necessary setup before it diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index f6ec500ad3fa..c2485a862e69 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -94,6 +94,8 @@ #define VNCR_PMSICR_EL1 0x838 #define VNCR_PMSIRR_EL1 0x840 #define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_PMSNEVFR_EL1 0x850 +#define VNCR_PMSDSFR_EL1 0x858 #define VNCR_TRFCR_EL1 0x880 #define VNCR_MPAM1_EL1 0x900 #define VNCR_MPAMHCR_EL2 0x930 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af6fd64a8a19..5ed401ff79e3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1941,104 +1941,6 @@ static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope) } #endif -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) - -extern -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags); - -static phys_addr_t __initdata kpti_ng_temp_alloc; - -static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type) -{ - kpti_ng_temp_alloc -= PAGE_SIZE; - return kpti_ng_temp_alloc; -} - -static int __init __kpti_install_ng_mappings(void *__unused) -{ - typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); - extern kpti_remap_fn idmap_kpti_install_ng_mappings; - kpti_remap_fn *remap_fn; - - int cpu = smp_processor_id(); - int levels = CONFIG_PGTABLE_LEVELS; - int order = order_base_2(levels); - u64 kpti_ng_temp_pgd_pa = 0; - pgd_t *kpti_ng_temp_pgd; - u64 alloc = 0; - - if (levels == 5 && !pgtable_l5_enabled()) - levels = 4; - else if (levels == 4 && !pgtable_l4_enabled()) - levels = 3; - - remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); - - if (!cpu) { - alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); - kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); - kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); - - // - // Create a minimal page table hierarchy that permits us to map - // the swapper page tables temporarily as we traverse them. - // - // The physical pages are laid out as follows: - // - // +--------+-/-------+-/------ +-/------ +-\\\--------+ - // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] : - // +--------+-\-------+-\------ +-\------ +-///--------+ - // ^ - // The first page is mapped into this hierarchy at a PMD_SHIFT - // aligned virtual address, so that we can manipulate the PTE - // level entries while the mapping is active. The first entry - // covers the PTE[] page itself, the remaining entries are free - // to be used as a ad-hoc fixmap. - // - create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), - KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, - kpti_ng_pgd_alloc, 0); - } - - cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); - cpu_uninstall_idmap(); - - if (!cpu) { - free_pages(alloc, order); - arm64_use_ng_mappings = true; - } - - return 0; -} - -static void __init kpti_install_ng_mappings(void) -{ - /* Check whether KPTI is going to be used */ - if (!arm64_kernel_unmapped_at_el0()) - return; - - /* - * We don't need to rewrite the page-tables if either we've done - * it already or we have KASLR enabled and therefore have not - * created any global mappings at all. - */ - if (arm64_use_ng_mappings) - return; - - init_idmap_kpti_bbml2_flag(); - stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); -} - -#else -static inline void kpti_install_ng_mappings(void) -{ -} -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ - static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap) { if (__this_cpu_read(this_cpu_vector) == vectors) { @@ -2419,17 +2321,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { + static bool cleared_zero_page = false; + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); mte_cpu_setup(); /* * Clear the tags in the zero page. This needs to be done via the - * linear map which has the Tagged attribute. + * linear map which has the Tagged attribute. Since this page is + * always mapped as pte_special(), set_pte_at() will not attempt to + * clear the tags or set PG_mte_tagged. */ - if (try_page_mte_tagging(ZERO_PAGE(0))) { + if (!cleared_zero_page) { + cleared_zero_page = true; mte_clear_page_tags(lm_alias(empty_zero_page)); - set_page_mte_tagged(ZERO_PAGE(0)); } kasan_init_hw_tags_cpu(); @@ -2550,6 +2456,15 @@ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope) return idr & MPAMIDR_EL1_HAS_HCR; } +static bool +test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF)) + return false; + + return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -3167,6 +3082,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP) }, + { + .desc = "GICv5 Legacy vCPU interface", + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, + .capability = ARM64_HAS_GICV5_LEGACY, + .matches = test_has_gicv5_legacy, + }, {}, }; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..a9c81715ce59 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) { + bool step_done; + if (!is_ttbr0_addr(regs->pc)) arm64_apply_bp_hardening(); @@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete. */ - if (!try_step_suspended_breakpoints(regs)) { - local_daif_restore(DAIF_PROCCTX); + step_done = try_step_suspended_breakpoints(regs); + local_daif_restore(DAIF_PROCCTX); + if (!step_done) do_el0_softstep(esr, regs); - } arm64_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 714b0b5ec5ac..5369763606e7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); +/* Static key indicating whether GICv3 has GICv2 compatibility */ +KVM_NVHE_ALIAS(vgic_v3_has_v2_compat); + /* Static key which is set if CNTVOFF_EL2 is unusable */ KVM_NVHE_ALIAS(broken_cntvoff_key); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 54a52dc5c1ae..43f7a2f39403 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -478,7 +478,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, if (folio_test_hugetlb(folio)) WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); else - WARN_ON_ONCE(!page_mte_tagged(page)); + WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 0c5d408afd95..8ab6104a4883 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "kprobes: " fmt +#include <linux/execmem.h> #include <linux/extable.h> #include <linux/kasan.h> #include <linux/kernel.h> @@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); +void *alloc_insn_page(void) +{ + void *addr; + + addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); + if (!addr) + return NULL; + set_memory_rox((unsigned long)addr, 1); + return addr; +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { kprobe_opcode_t *addr = p->ainsn.xol_insn; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5041817af267..681939ef5d16 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { int show_unhandled_signals = 0; -static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) +void dump_kernel_instr(unsigned long kaddr) { - unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - if (user_mode(regs)) + if (!is_ttbr1_addr(kaddr)) return; for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = aarch64_insn_read(&((u32 *)addr)[i], &val); + bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -169,7 +168,7 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) p += sprintf(p, i == 0 ? "(????????) " : "???????? "); } - printk("%sCode: %s\n", lvl, str); + printk(KERN_EMERG "Code: %s\n", str); } #define S_SMP " SMP" @@ -178,6 +177,7 @@ static int __die(const char *str, long err, struct pt_regs *regs) { static int die_counter; int ret; + unsigned long addr = instruction_pointer(regs); pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n", str, err, ++die_counter); @@ -190,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs) print_modules(); show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + if (user_mode(regs)) + return ret; + + dump_kernel_instr(addr); return ret; } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 713248f240e0..4f803fd1c99a 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,7 @@ menuconfig KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_VFIO select HAVE_KVM_DIRTY_RING_ACQ_REL select NEED_KVM_DIRTY_RING_WITH_BITMAP @@ -37,6 +37,7 @@ menuconfig KVM select HAVE_KVM_VCPU_RUN_PID_CHANGE select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS + select KVM_GUEST_MEMFD help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..3f675875abea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu) u32 timer_get_ctl(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) u64 timer_get_cval(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } -static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) -{ - if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); - return; - } - - WRITE_ONCE(*ctxt->offset.vm_offset, offset); -} - u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) u64 ns; ctx = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ctx->vcpu; + vcpu = timer_context_to_vcpu(ctx); trace_kvm_timer_hrtimer_expire(ctx); @@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) * * But hey, it's fast, right? */ - if (is_hyp_ctxt(ctx->vcpu) && - (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + if (is_hyp_ctxt(vcpu) && + (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { unsigned long val = timer_get_ctl(ctx); __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); timer_set_ctl(ctx, val); @@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); if (should_fire != ctx->irq.level) - kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); kvm_timer_update_status(ctx, should_fire); @@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff) static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct kvm_vcpu *vcpu = ctx->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); bool phys_active = false; /* @@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); @@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); struct kvm *kvm = vcpu->kvm; - ctxt->vcpu = vcpu; + ctxt->timer_id = timerid; if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; @@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void) disable_percpu_irq(host_ptimer_irq); } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_context *timer; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_TIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_TIMER_CVAL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - case KVM_REG_ARM_PTIMER_CTL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_PTIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_ptimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_PTIMER_CVAL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - - default: - return -1; - } - - return 0; -} - static u64 read_timer_ctl(struct arch_timer_context *timer) { /* @@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) return ctl; } -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_TIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_TIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL); - case KVM_REG_ARM_PTIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_PTIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_PTIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL); - } - return (u64)-1; -} - static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index bd6b6a620a09..870953b4a8a7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -6,7 +6,6 @@ #include <linux/bug.h> #include <linux/cpu_pm.h> -#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -170,10 +169,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) return ret; - ret = pkvm_init_host_vm(kvm); - if (ret) - goto err_unshare_kvm; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { ret = -ENOMEM; goto err_unshare_kvm; @@ -184,6 +179,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto err_free_cpumask; + if (is_protected_kvm_enabled()) { + /* + * If any failures occur after this is successful, make sure to + * call __pkvm_unreserve_vm to unreserve the VM in hyp. + */ + ret = pkvm_init_host_vm(kvm); + if (ret) + goto err_free_cpumask; + } + kvm_vgic_early_init(kvm); kvm_timer_init_vm(kvm); @@ -637,6 +642,7 @@ nommu: vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); + kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, @@ -1177,7 +1183,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (!ret) ret = 1; @@ -1789,6 +1795,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (kvm_arm_vcpu_get_events(vcpu, &events)) return -EINVAL; @@ -1800,6 +1809,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (copy_from_user(&events, argp, sizeof(events))) return -EFAULT; @@ -2317,8 +2329,9 @@ static int __init init_subsystems(void) } if (kvm_mode == KVM_MODE_NV && - !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { - kvm_err("NV support requires GICv3, giving up\n"); + !(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 || + kvm_vgic_global_state.has_gcie_v3_compat))) { + kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n"); err = -EINVAL; goto out; } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d71ca4ddc9d1..be26d5aa668c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi) /* Return true if the IPA is out of the OA range */ static bool check_output_size(u64 ipa, struct s1_walk_info *wi) { + if (wi->pa52bit) + return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); } +static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) +{ + switch (BIT(wi->pgshift)) { + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) + return false; + return ((wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_PS_MASK, tcr) : + FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); + case SZ_16K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) + return false; + break; + case SZ_4K: + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) + return false; + break; + } + + return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); +} + +static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) +{ + u64 addr; + + if (!wi->pa52bit) + return desc & GENMASK_ULL(47, wi->pgshift); + + switch (BIT(wi->pgshift)) { + case SZ_4K: + case SZ_16K: + addr = desc & GENMASK_ULL(49, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; + break; + case SZ_64K: + default: /* IMPDEF: treat any other value as 64k */ + addr = desc & GENMASK_ULL(47, wi->pgshift); + addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; + break; + } + + return addr; +} + /* Return the translation regime that applies to an AT instruction */ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) { @@ -43,28 +91,32 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o case OP_AT_S1E2W: case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; - break; default: return (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; } } +static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + if (regime == TR_EL10) { + if (vcpu_has_nv(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) + return 0; + + return vcpu_read_sys_reg(vcpu, TCR2_EL1); + } + + return vcpu_read_sys_reg(vcpu, TCR2_EL2); +} + static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) { if (!kvm_has_s1pie(vcpu->kvm)) return false; - switch (regime) { - case TR_EL2: - case TR_EL20: - return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; - case TR_EL10: - return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && - (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE); - default: - BUG(); - } + /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ + return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; } static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) @@ -76,23 +128,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) return; } - switch (wi->regime) { - case TR_EL2: - case TR_EL20: - val = vcpu_read_sys_reg(vcpu, TCR2_EL2); - wi->poe = val & TCR2_EL2_POE; - wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); - break; - case TR_EL10: - if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { - wi->poe = wi->e0poe = false; - return; - } + val = effective_tcr2(vcpu, wi->regime); - val = __vcpu_sys_reg(vcpu, TCR2_EL1); - wi->poe = val & TCR2_EL1_POE; - wi->e0poe = val & TCR2_EL1_E0POE; - } + /* Abuse TCR2_EL1_* for EL2 */ + wi->poe = val & TCR2_EL1_POE; + wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); } static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, @@ -102,14 +142,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, unsigned int stride, x; bool va55, tbi, lva; - hcr = __vcpu_sys_reg(vcpu, HCR_EL2); - va55 = va & BIT(55); - if (wi->regime == TR_EL2 && va55) - goto addrsz; - - wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + if (vcpu_has_nv(vcpu)) { + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + } else { + WARN_ON_ONCE(wi->regime != TR_EL10); + wi->s2 = false; + hcr = 0; + } switch (wi->regime) { case TR_EL10: @@ -131,6 +173,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, BUG(); } + /* Someone was silly enough to encode TG0/TG1 differently */ + if (va55 && wi->regime != TR_EL2) { + wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG1_MASK, tcr); + + switch (tg << TCR_TG1_SHIFT) { + case TCR_TG1_4K: + wi->pgshift = 12; break; + case TCR_TG1_16K: + wi->pgshift = 14; break; + case TCR_TG1_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } else { + wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG0_MASK, tcr); + + switch (tg << TCR_TG0_SHIFT) { + case TCR_TG0_4K: + wi->pgshift = 12; break; + case TCR_TG0_16K: + wi->pgshift = 14; break; + case TCR_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } + + wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); + + ia_bits = get_ia_size(wi); + + /* AArch64.S1StartLevel() */ + stride = wi->pgshift - 3; + wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + + if (wi->regime == TR_EL2 && va55) + goto addrsz; + tbi = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_TBI, tcr) : (va55 ? @@ -140,6 +222,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (!tbi && (u64)sign_extend64(va, 55) != va) goto addrsz; + wi->sh = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_SH0_MASK, tcr) : + (va55 ? + FIELD_GET(TCR_SH1_MASK, tcr) : + FIELD_GET(TCR_SH0_MASK, tcr))); + va = (u64)sign_extend64(va, 55); /* Let's put the MMU disabled case aside immediately */ @@ -194,53 +282,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, /* R_BVXDG */ wi->hpd |= (wi->poe || wi->e0poe); - /* Someone was silly enough to encode TG0/TG1 differently */ - if (va55) { - wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG1_MASK, tcr); - - switch (tg << TCR_TG1_SHIFT) { - case TCR_TG1_4K: - wi->pgshift = 12; break; - case TCR_TG1_16K: - wi->pgshift = 14; break; - case TCR_TG1_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } else { - wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG0_MASK, tcr); - - switch (tg << TCR_TG0_SHIFT) { - case TCR_TG0_4K: - wi->pgshift = 12; break; - case TCR_TG0_16K: - wi->pgshift = 14; break; - case TCR_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } - /* R_PLCGL, R_YXNYW */ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { if (wi->txsz > 39) - goto transfault_l0; + goto transfault; } else { if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) - goto transfault_l0; + goto transfault; } /* R_GTJBY, R_SXWGM */ switch (BIT(wi->pgshift)) { case SZ_4K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); - break; case SZ_16K: - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); + lva = wi->pa52bit; break; case SZ_64K: lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); @@ -248,38 +303,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, } if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) - goto transfault_l0; - - ia_bits = get_ia_size(wi); + goto transfault; /* R_YYVYV, I_THCZK */ if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || (va55 && va < GENMASK(63, ia_bits))) - goto transfault_l0; + goto transfault; /* I_ZFSYQ */ if (wi->regime != TR_EL2 && (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) - goto transfault_l0; + goto transfault; /* R_BNDVG and following statements */ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) - goto transfault_l0; - - /* AArch64.S1StartLevel() */ - stride = wi->pgshift - 3; - wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + goto transfault; ps = (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); - wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); + wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); /* Compute minimal alignment */ x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); wi->baddr = ttbr & TTBRx_EL1_BADDR; + if (wi->pa52bit) { + /* + * Force the alignment on 64 bytes for top-level tables + * smaller than 8 entries, since TTBR.BADDR[5:2] are used to + * store bits [51:48] of the first level of lookup. + */ + x = max(x, 6); + + wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; + } /* R_VPBBF */ if (check_output_size(wi->baddr, wi)) @@ -289,12 +348,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; -addrsz: /* Address Size Fault level 0 */ +addrsz: + /* + * Address Size Fault level 0 to indicate it comes from TTBR. + * yes, this is an oddity. + */ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); return -EFAULT; -transfault_l0: /* Translation Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); +transfault: + /* Translation Fault on start level */ + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); return -EFAULT; } @@ -339,6 +403,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ipa = kvm_s2_trans_output(&s2_trans); } + if (wi->filter) { + ret = wi->filter->fn(&(struct s1_walk_context) + { + .wi = wi, + .table_ipa = baddr, + .level = level, + }, wi->filter->priv); + if (ret) + return ret; + } + ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); if (ret) { fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); @@ -369,7 +444,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); } - baddr = desc & GENMASK_ULL(47, wi->pgshift); + baddr = desc_to_oa(wi, desc); /* Check for out-of-range OA */ if (check_output_size(baddr, wi)) @@ -386,11 +461,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, switch (BIT(wi->pgshift)) { case SZ_4K: - valid_block = level == 1 || level == 2; + valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); break; case SZ_16K: case SZ_64K: - valid_block = level == 2; + valid_block = level == 2 || (wi->pa52bit && level == 1); break; } @@ -398,7 +473,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, goto transfault; } - if (check_output_size(desc & GENMASK(47, va_bottom), wi)) + baddr = desc_to_oa(wi, desc); + if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) goto addrsz; if (!(desc & PTE_AF)) { @@ -411,7 +487,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wr->failed = false; wr->level = level; wr->desc = desc; - wr->pa = desc & GENMASK(47, va_bottom); + wr->pa = baddr & GENMASK(52, va_bottom); wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); @@ -640,21 +716,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2) #define ATTR_OSH 0b10 #define ATTR_ISH 0b11 -static u8 compute_sh(u8 attr, u64 desc) +static u8 compute_final_sh(u8 attr, u8 sh) { - u8 sh; - /* Any form of device, as well as NC has SH[1:0]=0b10 */ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) return ATTR_OSH; - sh = FIELD_GET(PTE_SHARED, desc); if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ sh = ATTR_NSH; return sh; } +static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, + u8 attr) +{ + u8 sh; + + /* + * non-52bit and LPA have their basic shareability described in the + * descriptor. LPA2 gets it from the corresponding field in TCR, + * conveniently recorded in the walk info. + */ + if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) + sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); + else + sh = wi->sh; + + return compute_final_sh(attr, sh); +} + static u8 combine_sh(u8 s1_sh, u8 s2_sh) { if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) @@ -668,7 +759,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh) static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, struct kvm_s2_trans *tr) { - u8 s1_parattr, s2_memattr, final_attr; + u8 s1_parattr, s2_memattr, final_attr, s2_sh; u64 par; /* If S2 has failed to translate, report the damage */ @@ -741,17 +832,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, !MEMATTR_IS_DEVICE(final_attr)) final_attr = MEMATTR(NC, NC); + s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); par |= tr->output & GENMASK(47, 12); par |= FIELD_PREP(SYS_PAR_EL1_SH, combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), - compute_sh(final_attr, tr->desc))); + compute_final_sh(final_attr, s2_sh))); return par; } -static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, - enum trans_regime regime) +static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, + struct s1_walk_result *wr) { u64 par; @@ -764,9 +857,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, } else if (wr->level == S1_MMU_DISABLED) { /* MMU off or HCR_EL2.DC == 1 */ par = SYS_PAR_EL1_NSE; - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; - if (regime == TR_EL10 && + if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { par |= FIELD_PREP(SYS_PAR_EL1_ATTR, MEMATTR(WbRaWa, WbRaWa)); @@ -781,14 +874,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, par = SYS_PAR_EL1_NSE; - mair = (regime == TR_EL10 ? + mair = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, MAIR_EL1) : vcpu_read_sys_reg(vcpu, MAIR_EL2)); mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; mair &= 0xff; - sctlr = (regime == TR_EL10 ? + sctlr = (wi->regime == TR_EL10 ? vcpu_read_sys_reg(vcpu, SCTLR_EL1) : vcpu_read_sys_reg(vcpu, SCTLR_EL2)); @@ -797,9 +890,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, mair = MEMATTR(NC, NC); par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); - par |= wr->pa & GENMASK_ULL(47, 12); + par |= wr->pa & SYS_PAR_EL1_PA; - sh = compute_sh(mair, wr->desc); + sh = compute_s1_sh(wi, wr, mair); par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); } @@ -873,7 +966,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); break; case TR_EL10: - wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); + wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); break; } @@ -1186,7 +1279,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: - return compute_par_s1(vcpu, &wr, wi.regime); + return compute_par_s1(vcpu, &wi, &wr); } /* @@ -1202,7 +1295,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { struct mmu_config config; struct kvm_s2_mmu *mmu; - bool fail; + bool fail, mmu_cs; u64 par; par = SYS_PAR_EL1_F; @@ -1218,8 +1311,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already * the right one (as we trapped from vEL2). If not, save the * full MMU context. + * + * We are also guaranteed to be in the correct context if + * we're not in a nested VM. */ - if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) + mmu_cs = (vcpu_has_nv(vcpu) && + !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); + if (!mmu_cs) goto skip_mmu_switch; /* @@ -1287,7 +1385,7 @@ skip_mmu_switch: write_sysreg_hcr(HCR_HOST_VHE_FLAGS); - if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) + if (mmu_cs) __mmu_config_restore(&config); return par; @@ -1470,3 +1568,72 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; } + +struct desc_match { + u64 ipa; + int level; +}; + +static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) +{ + struct desc_match *dm = priv; + u64 ipa = dm->ipa; + + /* Use S1 granule alignment */ + ipa &= GENMASK(51, ctxt->wi->pgshift); + + /* Not the IPA we're looking for? Continue. */ + if (ipa != ctxt->table_ipa) + return 0; + + /* Note the level and interrupt the walk */ + dm->level = ctxt->level; + return -EINTR; +} + +int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) +{ + struct desc_match dm = { + .ipa = ipa, + }; + struct s1_walk_info wi = { + .filter = &(struct s1_walk_filter){ + .fn = match_s1_desc, + .priv = &dm, + }, + .as_el0 = false, + .pan = false, + }; + struct s1_walk_result wr = {}; + int ret; + + if (is_hyp_ctxt(vcpu)) + wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + else + wi.regime = TR_EL10; + + ret = setup_s1_walk(vcpu, &wi, &wr, va); + if (ret) + return ret; + + /* We really expect the S1 MMU to be on here... */ + if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { + *level = 0; + return 0; + } + + /* Walk the guest's PT, looking for a match along the way */ + ret = walk_s1(vcpu, &wi, &wr, va); + switch (ret) { + case -EINTR: + /* We interrupted the walk on a match, return the level */ + *level = dm.level; + return 0; + case 0: + /* The walk completed, we failed to find the entry */ + return -ENOENT; + default: + /* Any other error... */ + return ret; + } +} diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index da66c4a14775..24bb3f36e9d5 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -5,14 +5,26 @@ */ #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_nested.h> #include <asm/sysreg.h> +/* + * Describes the dependencies between a set of bits (or the negation + * of a set of RES0 bits) and a feature. The flags indicate how the + * data is interpreted. + */ struct reg_bits_to_feat_map { - u64 bits; + union { + u64 bits; + u64 *res0p; + }; #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ +#define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */ + unsigned long flags; union { @@ -28,9 +40,27 @@ struct reg_bits_to_feat_map { }; }; -#define __NEEDS_FEAT_3(m, f, id, fld, lim) \ +/* + * Describes the dependencies for a given register: + * + * @feat_map describes the dependency for the whole register. If the + * features the register depends on are not present, the whole + * register is effectively RES0. + * + * @bit_feat_map describes the dependencies for a set of bits in that + * register. If the features these bits depend on are not present, the + * bits are effectively RES0. + */ +struct reg_feat_map_desc { + const char *name; + const struct reg_bits_to_feat_map feat_map; + const struct reg_bits_to_feat_map *bit_feat_map; + const unsigned int bit_feat_map_sz; +}; + +#define __NEEDS_FEAT_3(m, f, w, id, fld, lim) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f), \ .regidx = IDREG_IDX(SYS_ ## id), \ .shift = id ##_## fld ## _SHIFT, \ @@ -39,28 +69,63 @@ struct reg_bits_to_feat_map { .lo_lim = id ##_## fld ##_## lim \ } -#define __NEEDS_FEAT_2(m, f, fun, dummy) \ +#define __NEEDS_FEAT_2(m, f, w, fun, dummy) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f) | CALL_FUNC, \ .fval = (fun), \ } -#define __NEEDS_FEAT_1(m, f, fun) \ +#define __NEEDS_FEAT_1(m, f, w, fun) \ { \ - .bits = (m), \ + .w = (m), \ .flags = (f) | CALL_FUNC, \ .match = (fun), \ } +#define __NEEDS_FEAT_FLAG(m, f, w, ...) \ + CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, w, __VA_ARGS__) + #define NEEDS_FEAT_FLAG(m, f, ...) \ - CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, __VA_ARGS__) + __NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__) #define NEEDS_FEAT_FIXED(m, ...) \ - NEEDS_FEAT_FLAG(m, FIXED_VALUE, __VA_ARGS__, 0) + __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) + +#define NEEDS_FEAT_RES0(p, ...) \ + __NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__) +/* + * Declare the dependency between a set of bits and a set of features, + * generating a struct reg_bit_to_feat_map. + */ #define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__) +/* + * Declare the dependency between a non-FGT register, a set of + * feature, and the set of individual bits it contains. This generates + * a struct reg_feat_map_desc. + */ +#define DECLARE_FEAT_MAP(n, r, m, f) \ + struct reg_feat_map_desc n = { \ + .name = #r, \ + .feat_map = NEEDS_FEAT(~r##_RES0, f), \ + .bit_feat_map = m, \ + .bit_feat_map_sz = ARRAY_SIZE(m), \ + } + +/* + * Specialised version of the above for FGT registers that have their + * RES0 masks described as struct fgt_masks. + */ +#define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \ + struct reg_feat_map_desc n = { \ + .name = #msk, \ + .feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\ + .bit_feat_map = m, \ + .bit_feat_map_sz = ARRAY_SIZE(m), \ + } + #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP #define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP @@ -73,6 +138,7 @@ struct reg_bits_to_feat_map { #define FEAT_AA32EL0 ID_AA64PFR0_EL1, EL0, AARCH32 #define FEAT_AA32EL1 ID_AA64PFR0_EL1, EL1, AARCH32 #define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP +#define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP #define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP #define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP #define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP @@ -131,7 +197,6 @@ struct reg_bits_to_feat_map { #define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP #define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP #define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP -#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP #define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2 #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP @@ -143,7 +208,6 @@ struct reg_bits_to_feat_map { #define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP #define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP #define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP -#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2 #define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC #define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP #define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP @@ -151,7 +215,9 @@ struct reg_bits_to_feat_map { #define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP #define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP #define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP +#define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2 #define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP +#define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP static bool not_feat_aa64el3(struct kvm *kvm) { @@ -397,6 +463,10 @@ static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; + +static const DECLARE_FEAT_MAP_FGT(hfgrtr_desc, hfgrtr_masks, + hfgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = { NEEDS_FEAT(HFGWTR_EL2_nAMAIR2_EL1 | HFGWTR_EL2_nMAIR2_EL1, @@ -461,6 +531,9 @@ static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP_FGT(hfgwtr_desc, hfgwtr_masks, + hfgwtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { NEEDS_FEAT(HDFGRTR_EL2_PMBIDR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 | @@ -528,6 +601,9 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1) }; +static const DECLARE_FEAT_MAP_FGT(hdfgrtr_desc, hdfgrtr_masks, + hdfgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { NEEDS_FEAT(HDFGWTR_EL2_PMSLATFR_EL1 | HDFGWTR_EL2_PMSIRR_EL1 | @@ -588,6 +664,8 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { NEEDS_FEAT(HDFGWTR_EL2_TRFCR_EL1, FEAT_TRF), }; +static const DECLARE_FEAT_MAP_FGT(hdfgwtr_desc, hdfgwtr_masks, + hdfgwtr_feat_map, FEAT_FGT); static const struct reg_bits_to_feat_map hfgitr_feat_map[] = { NEEDS_FEAT(HFGITR_EL2_PSBCSYNC, FEAT_SPEv1p5), @@ -662,6 +740,9 @@ static const struct reg_bits_to_feat_map hfgitr_feat_map[] = { NEVER_FGU, FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP_FGT(hfgitr_desc, hfgitr_masks, + hfgitr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = { NEEDS_FEAT(HAFGRTR_EL2_AMEVTYPER115_EL0 | HAFGRTR_EL2_AMEVTYPER114_EL0 | @@ -704,11 +785,17 @@ static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = { FEAT_AMUv1), }; +static const DECLARE_FEAT_MAP_FGT(hafgrtr_desc, hafgrtr_masks, + hafgrtr_feat_map, FEAT_FGT); + static const struct reg_bits_to_feat_map hfgitr2_feat_map[] = { NEEDS_FEAT(HFGITR2_EL2_nDCCIVAPS, FEAT_PoPS), NEEDS_FEAT(HFGITR2_EL2_TSBCSYNC, FEAT_TRBEv1p1) }; +static const DECLARE_FEAT_MAP_FGT(hfgitr2_desc, hfgitr2_masks, + hfgitr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = { NEEDS_FEAT(HFGRTR2_EL2_nPFAR_EL1, FEAT_PFAR), NEEDS_FEAT(HFGRTR2_EL2_nERXGSR_EL1, FEAT_RASv2), @@ -728,6 +815,9 @@ static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = { NEEDS_FEAT(HFGRTR2_EL2_nRCWSMASK_EL1, FEAT_THE), }; +static const DECLARE_FEAT_MAP_FGT(hfgrtr2_desc, hfgrtr2_masks, + hfgrtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = { NEEDS_FEAT(HFGWTR2_EL2_nPFAR_EL1, FEAT_PFAR), NEEDS_FEAT(HFGWTR2_EL2_nACTLRALIAS_EL1 | @@ -746,6 +836,9 @@ static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = { NEEDS_FEAT(HFGWTR2_EL2_nRCWSMASK_EL1, FEAT_THE), }; +static const DECLARE_FEAT_MAP_FGT(hfgwtr2_desc, hfgwtr2_masks, + hfgwtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { NEEDS_FEAT(HDFGRTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), NEEDS_FEAT(HDFGRTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss), @@ -776,6 +869,9 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { NEEDS_FEAT(HDFGRTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), }; +static const DECLARE_FEAT_MAP_FGT(hdfgrtr2_desc, hdfgrtr2_masks, + hdfgrtr2_feat_map, FEAT_FGT2); + static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { NEEDS_FEAT(HDFGWTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), NEEDS_FEAT(HDFGWTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss), @@ -804,6 +900,10 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { NEEDS_FEAT(HDFGWTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), }; +static const DECLARE_FEAT_MAP_FGT(hdfgwtr2_desc, hdfgwtr2_masks, + hdfgwtr2_feat_map, FEAT_FGT2); + + static const struct reg_bits_to_feat_map hcrx_feat_map[] = { NEEDS_FEAT(HCRX_EL2_PACMEn, feat_pauth_lr), NEEDS_FEAT(HCRX_EL2_EnFPM, FEAT_FPMR), @@ -833,6 +933,10 @@ static const struct reg_bits_to_feat_map hcrx_feat_map[] = { NEEDS_FEAT(HCRX_EL2_EnAS0, FEAT_LS64_ACCDATA), }; + +static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2, + hcrx_feat_map, FEAT_HCX); + static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0), NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw), @@ -904,6 +1008,9 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), }; +static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2, + hcr_feat_map, FEAT_AA64EL2); + static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { NEEDS_FEAT(SCTLR2_EL1_NMEA | SCTLR2_EL1_EASE, @@ -921,6 +1028,9 @@ static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { FEAT_CPA2), }; +static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, + sctlr2_feat_map, FEAT_SCTLR2); + static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { NEEDS_FEAT(TCR2_EL2_FNG1 | TCR2_EL2_FNG0 | @@ -943,6 +1053,9 @@ static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), }; +static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, + tcr2_el2_feat_map, FEAT_TCR2); + static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD | @@ -1017,6 +1130,9 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, + sctlr_el1_feat_map, FEAT_AA64EL1); + static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), @@ -1048,6 +1164,9 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { FEAT_AA64EL1), }; +static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, + mdcr_el2_feat_map, FEAT_AA64EL2); + static void __init check_feat_map(const struct reg_bits_to_feat_map *map, int map_size, u64 res0, const char *str) { @@ -1061,32 +1180,36 @@ static void __init check_feat_map(const struct reg_bits_to_feat_map *map, str, mask ^ ~res0); } +static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map) +{ + return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits; +} + +static void __init check_reg_desc(const struct reg_feat_map_desc *r) +{ + check_feat_map(r->bit_feat_map, r->bit_feat_map_sz, + ~reg_feat_map_bits(&r->feat_map), r->name); +} + void __init check_feature_map(void) { - check_feat_map(hfgrtr_feat_map, ARRAY_SIZE(hfgrtr_feat_map), - hfgrtr_masks.res0, hfgrtr_masks.str); - check_feat_map(hfgwtr_feat_map, ARRAY_SIZE(hfgwtr_feat_map), - hfgwtr_masks.res0, hfgwtr_masks.str); - check_feat_map(hfgitr_feat_map, ARRAY_SIZE(hfgitr_feat_map), - hfgitr_masks.res0, hfgitr_masks.str); - check_feat_map(hdfgrtr_feat_map, ARRAY_SIZE(hdfgrtr_feat_map), - hdfgrtr_masks.res0, hdfgrtr_masks.str); - check_feat_map(hdfgwtr_feat_map, ARRAY_SIZE(hdfgwtr_feat_map), - hdfgwtr_masks.res0, hdfgwtr_masks.str); - check_feat_map(hafgrtr_feat_map, ARRAY_SIZE(hafgrtr_feat_map), - hafgrtr_masks.res0, hafgrtr_masks.str); - check_feat_map(hcrx_feat_map, ARRAY_SIZE(hcrx_feat_map), - __HCRX_EL2_RES0, "HCRX_EL2"); - check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map), - HCR_EL2_RES0, "HCR_EL2"); - check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map), - SCTLR2_EL1_RES0, "SCTLR2_EL1"); - check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), - TCR2_EL2_RES0, "TCR2_EL2"); - check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map), - SCTLR_EL1_RES0, "SCTLR_EL1"); - check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map), - MDCR_EL2_RES0, "MDCR_EL2"); + check_reg_desc(&hfgrtr_desc); + check_reg_desc(&hfgwtr_desc); + check_reg_desc(&hfgitr_desc); + check_reg_desc(&hdfgrtr_desc); + check_reg_desc(&hdfgwtr_desc); + check_reg_desc(&hafgrtr_desc); + check_reg_desc(&hfgrtr2_desc); + check_reg_desc(&hfgwtr2_desc); + check_reg_desc(&hfgitr2_desc); + check_reg_desc(&hdfgrtr2_desc); + check_reg_desc(&hdfgwtr2_desc); + check_reg_desc(&hcrx_desc); + check_reg_desc(&hcr_desc); + check_reg_desc(&sctlr2_desc); + check_reg_desc(&tcr2_el2_desc); + check_reg_desc(&sctlr_el1_desc); + check_reg_desc(&mdcr_el2_desc); } static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) @@ -1129,7 +1252,7 @@ static u64 __compute_fixed_bits(struct kvm *kvm, match = idreg_feat_match(kvm, &map[i]); if (!match || (map[i].flags & FIXED_VALUE)) - val |= map[i].bits; + val |= reg_feat_map_bits(&map[i]); } return val; @@ -1145,15 +1268,36 @@ static u64 compute_res0_bits(struct kvm *kvm, require, exclude | FIXED_VALUE); } -static u64 compute_fixed_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) +static u64 compute_reg_res0_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + unsigned long require, unsigned long exclude) + +{ + u64 res0; + + res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + require, exclude); + + /* + * If computing FGUs, don't take RES0 or register existence + * into account -- we're not computing bits for the register + * itself. + */ + if (!(exclude & NEVER_FGU)) { + res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); + res0 |= ~reg_feat_map_bits(&r->feat_map); + } + + return res0; +} + +static u64 compute_reg_fixed_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + u64 *fixed_bits, unsigned long require, + unsigned long exclude) { - return __compute_fixed_bits(kvm, map, map_size, fixed_bits, - require | FIXED_VALUE, exclude); + return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + fixed_bits, require | FIXED_VALUE, exclude); } void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) @@ -1162,51 +1306,40 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) switch (fgt) { case HFGRTR_GROUP: - val |= compute_res0_bits(kvm, hfgrtr_feat_map, - ARRAY_SIZE(hfgrtr_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hfgwtr_feat_map, - ARRAY_SIZE(hfgwtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgrtr_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgwtr_desc, + 0, NEVER_FGU); break; case HFGITR_GROUP: - val |= compute_res0_bits(kvm, hfgitr_feat_map, - ARRAY_SIZE(hfgitr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgitr_desc, + 0, NEVER_FGU); break; case HDFGRTR_GROUP: - val |= compute_res0_bits(kvm, hdfgrtr_feat_map, - ARRAY_SIZE(hdfgrtr_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hdfgwtr_feat_map, - ARRAY_SIZE(hdfgwtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc, + 0, NEVER_FGU); break; case HAFGRTR_GROUP: - val |= compute_res0_bits(kvm, hafgrtr_feat_map, - ARRAY_SIZE(hafgrtr_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hafgrtr_desc, + 0, NEVER_FGU); break; case HFGRTR2_GROUP: - val |= compute_res0_bits(kvm, hfgrtr2_feat_map, - ARRAY_SIZE(hfgrtr2_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hfgwtr2_feat_map, - ARRAY_SIZE(hfgwtr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc, + 0, NEVER_FGU); break; case HFGITR2_GROUP: - val |= compute_res0_bits(kvm, hfgitr2_feat_map, - ARRAY_SIZE(hfgitr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hfgitr2_desc, + 0, NEVER_FGU); break; case HDFGRTR2_GROUP: - val |= compute_res0_bits(kvm, hdfgrtr2_feat_map, - ARRAY_SIZE(hdfgrtr2_feat_map), - 0, NEVER_FGU); - val |= compute_res0_bits(kvm, hdfgwtr2_feat_map, - ARRAY_SIZE(hdfgwtr2_feat_map), - 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc, + 0, NEVER_FGU); + val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc, + 0, NEVER_FGU); break; default: BUG(); @@ -1221,109 +1354,74 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r switch (reg) { case HFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hfgrtr_feat_map, - ARRAY_SIZE(hfgrtr_feat_map), 0, 0); - *res0 |= hfgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0); *res1 = HFGRTR_EL2_RES1; break; case HFGWTR_EL2: - *res0 = compute_res0_bits(kvm, hfgwtr_feat_map, - ARRAY_SIZE(hfgwtr_feat_map), 0, 0); - *res0 |= hfgwtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0); *res1 = HFGWTR_EL2_RES1; break; case HFGITR_EL2: - *res0 = compute_res0_bits(kvm, hfgitr_feat_map, - ARRAY_SIZE(hfgitr_feat_map), 0, 0); - *res0 |= hfgitr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0); *res1 = HFGITR_EL2_RES1; break; case HDFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hdfgrtr_feat_map, - ARRAY_SIZE(hdfgrtr_feat_map), 0, 0); - *res0 |= hdfgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0); *res1 = HDFGRTR_EL2_RES1; break; case HDFGWTR_EL2: - *res0 = compute_res0_bits(kvm, hdfgwtr_feat_map, - ARRAY_SIZE(hdfgwtr_feat_map), 0, 0); - *res0 |= hdfgwtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0); *res1 = HDFGWTR_EL2_RES1; break; case HAFGRTR_EL2: - *res0 = compute_res0_bits(kvm, hafgrtr_feat_map, - ARRAY_SIZE(hafgrtr_feat_map), 0, 0); - *res0 |= hafgrtr_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0); *res1 = HAFGRTR_EL2_RES1; break; case HFGRTR2_EL2: - *res0 = compute_res0_bits(kvm, hfgrtr2_feat_map, - ARRAY_SIZE(hfgrtr2_feat_map), 0, 0); - *res0 |= hfgrtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0); *res1 = HFGRTR2_EL2_RES1; break; case HFGWTR2_EL2: - *res0 = compute_res0_bits(kvm, hfgwtr2_feat_map, - ARRAY_SIZE(hfgwtr2_feat_map), 0, 0); - *res0 |= hfgwtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0); *res1 = HFGWTR2_EL2_RES1; break; case HFGITR2_EL2: - *res0 = compute_res0_bits(kvm, hfgitr2_feat_map, - ARRAY_SIZE(hfgitr2_feat_map), 0, 0); - *res0 |= hfgitr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0); *res1 = HFGITR2_EL2_RES1; break; case HDFGRTR2_EL2: - *res0 = compute_res0_bits(kvm, hdfgrtr2_feat_map, - ARRAY_SIZE(hdfgrtr2_feat_map), 0, 0); - *res0 |= hdfgrtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0); *res1 = HDFGRTR2_EL2_RES1; break; case HDFGWTR2_EL2: - *res0 = compute_res0_bits(kvm, hdfgwtr2_feat_map, - ARRAY_SIZE(hdfgwtr2_feat_map), 0, 0); - *res0 |= hdfgwtr2_masks.res0; + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0); *res1 = HDFGWTR2_EL2_RES1; break; case HCRX_EL2: - *res0 = compute_res0_bits(kvm, hcrx_feat_map, - ARRAY_SIZE(hcrx_feat_map), 0, 0); - *res0 |= __HCRX_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0); *res1 = __HCRX_EL2_RES1; break; case HCR_EL2: - mask = compute_fixed_bits(kvm, hcr_feat_map, - ARRAY_SIZE(hcr_feat_map), &fixed, - 0, 0); - *res0 = compute_res0_bits(kvm, hcr_feat_map, - ARRAY_SIZE(hcr_feat_map), 0, 0); - *res0 |= HCR_EL2_RES0 | (mask & ~fixed); + mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0); + *res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0); + *res0 |= (mask & ~fixed); *res1 = HCR_EL2_RES1 | (mask & fixed); break; case SCTLR2_EL1: case SCTLR2_EL2: - *res0 = compute_res0_bits(kvm, sctlr2_feat_map, - ARRAY_SIZE(sctlr2_feat_map), 0, 0); - *res0 |= SCTLR2_EL1_RES0; + *res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0); *res1 = SCTLR2_EL1_RES1; break; case TCR2_EL2: - *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map, - ARRAY_SIZE(tcr2_el2_feat_map), 0, 0); - *res0 |= TCR2_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0); *res1 = TCR2_EL2_RES1; break; case SCTLR_EL1: - *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map, - ARRAY_SIZE(sctlr_el1_feat_map), 0, 0); - *res0 |= SCTLR_EL1_RES0; + *res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0); *res1 = SCTLR_EL1_RES1; break; case MDCR_EL2: - *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map, - ARRAY_SIZE(mdcr_el2_feat_map), 0, 0); - *res0 |= MDCR_EL2_RES0; + *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); *res1 = MDCR_EL2_RES1; break; default: @@ -1332,3 +1430,91 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r break; } } + +static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + return &hfgrtr_masks; + case HFGWTR_EL2: + return &hfgwtr_masks; + case HFGITR_EL2: + return &hfgitr_masks; + case HDFGRTR_EL2: + return &hdfgrtr_masks; + case HDFGWTR_EL2: + return &hdfgwtr_masks; + case HAFGRTR_EL2: + return &hafgrtr_masks; + case HFGRTR2_EL2: + return &hfgrtr2_masks; + case HFGWTR2_EL2: + return &hfgwtr2_masks; + case HFGITR2_EL2: + return &hfgitr2_masks; + case HDFGRTR2_EL2: + return &hdfgrtr2_masks; + case HDFGWTR2_EL2: + return &hdfgwtr2_masks; + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)]; + struct fgt_masks *m = __fgt_reg_to_masks(reg); + u64 clear = 0, set = 0, val = m->nmask; + + set |= fgu & m->mask; + clear |= fgu & m->nmask; + + if (is_nested_ctxt(vcpu)) { + u64 nested = __vcpu_sys_reg(vcpu, reg); + set |= nested & m->mask; + clear |= ~nested & m->nmask; + } + + val |= set; + val &= ~clear; + *vcpu_fgt(vcpu, reg) = val; +} + +static void __compute_hfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HFGWTR_EL2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; +} + +static void __compute_hdfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HDFGWTR_EL2); + + if (is_hyp_ctxt(vcpu)) + *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1; +} + +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + __compute_fgt(vcpu, HFGRTR_EL2); + __compute_hfgwtr(vcpu); + __compute_fgt(vcpu, HFGITR_EL2); + __compute_fgt(vcpu, HDFGRTR_EL2); + __compute_hdfgwtr(vcpu); + __compute_fgt(vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __compute_fgt(vcpu, HFGRTR2_EL2); + __compute_fgt(vcpu, HFGWTR2_EL2); + __compute_fgt(vcpu, HFGITR2_EL2); + __compute_fgt(vcpu, HDFGRTR2_EL2); + __compute_fgt(vcpu, HDFGWTR2_EL2); +} diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index e027d9c32b0d..3ad6b7c6e4ba 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -15,6 +15,12 @@ #include <asm/kvm_arm.h> #include <asm/kvm_emulate.h> +static int cpu_has_spe(u64 dfr0) +{ + return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P); +} + /** * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value * @@ -56,6 +62,9 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) if (!kvm_guest_owns_debug_regs(vcpu)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + if (vcpu_has_nv(vcpu)) + kvm_nested_setup_mdcr_el2(vcpu); + /* Write MDCR_EL2 directly if we're already at EL2 */ if (has_vhe()) write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); @@ -74,13 +83,12 @@ void kvm_init_host_debug_data(void) *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); + if (cpu_has_spe(dfr0)) + host_data_set_flag(HAS_SPE); + if (has_vhe()) return; - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) - host_data_set_flag(HAS_SPE); - /* Check if we have BRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) host_data_set_flag(HAS_BRBE); @@ -99,7 +107,7 @@ void kvm_init_host_debug_data(void) void kvm_debug_init_vhe(void) { /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ - if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + if (host_data_test_flag(HAS_SPE)) write_sysreg_el1(0, SYS_PMSCR); } @@ -243,29 +251,29 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) preempt_enable(); } -void kvm_enable_trbe(void) +static bool skip_trbe_access(bool skip_condition) { - if (has_vhe() || is_protected_kvm_enabled() || - WARN_ON_ONCE(preemptible())) - return; + return (WARN_ON_ONCE(preemptible()) || skip_condition || + is_protected_kvm_enabled() || !is_kvm_arm_initialised()); +} - host_data_set_flag(TRBE_ENABLED); +void kvm_enable_trbe(void) +{ + if (!skip_trbe_access(has_vhe())) + host_data_set_flag(TRBE_ENABLED); } EXPORT_SYMBOL_GPL(kvm_enable_trbe); void kvm_disable_trbe(void) { - if (has_vhe() || is_protected_kvm_enabled() || - WARN_ON_ONCE(preemptible())) - return; - - host_data_clear_flag(TRBE_ENABLED); + if (!skip_trbe_access(has_vhe())) + host_data_clear_flag(TRBE_ENABLED); } EXPORT_SYMBOL_GPL(kvm_disable_trbe); void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) { - if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible())) + if (skip_trbe_access(false)) return; if (has_vhe()) { diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index af69c897c2c3..834f13fb1fb7 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -1185,6 +1185,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS), + SR_TRAP(SYS_PMSDSFR_EL1, CGT_MDCR_TPMS), SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF), SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB), SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB), diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 16ba5e9ac86c..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -591,64 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CTL, - KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_TIMER_CVAL, - KVM_REG_ARM_PTIMER_CTL, - KVM_REG_ARM_PTIMER_CNT, - KVM_REG_ARM_PTIMER_CVAL, -}; - -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - -static bool is_timer_reg(u64 index) -{ - switch (index) { - case KVM_REG_ARM_TIMER_CTL: - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: - case KVM_REG_ARM_PTIMER_CTL: - case KVM_REG_ARM_PTIMER_CNT: - case KVM_REG_ARM_PTIMER_CVAL: - return true; - } - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - for (int i = 0; i < NUM_TIMER_REGS; i++) { - if (put_user(timer_reg_list[i], uindices)) - return -EFAULT; - uindices++; - } - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -724,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } @@ -755,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -777,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -797,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8bdb1eed090a..cc7d5d1709cb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) if (esr & ESR_ELx_WFx_ISS_RV) { u64 val, now; - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + now = kvm_phys_timer_read(); + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + now -= timer_get_offset(vcpu_hvtimer(vcpu)); + else + now -= timer_get_offset(vcpu_vtimer(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); if (now >= val) @@ -559,6 +564,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, /* Dump the nVHE hypervisor backtrace */ kvm_nvhe_dump_backtrace(hyp_offset); + /* Dump the faulting instruction */ + dump_kernel_instr(panic_addr + kaslr_offset()); + /* * Hyp has panicked and we're going to handle that by panicking the * kernel. The kernel offset will be revealed in the panic so we're diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b6682202edf3..c5d5e5b86eaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) __deactivate_cptr_traps_nvhe(vcpu); } -#define reg_to_fgt_masks(reg) \ - ({ \ - struct fgt_masks *m; \ - switch(reg) { \ - case HFGRTR_EL2: \ - m = &hfgrtr_masks; \ - break; \ - case HFGWTR_EL2: \ - m = &hfgwtr_masks; \ - break; \ - case HFGITR_EL2: \ - m = &hfgitr_masks; \ - break; \ - case HDFGRTR_EL2: \ - m = &hdfgrtr_masks; \ - break; \ - case HDFGWTR_EL2: \ - m = &hdfgwtr_masks; \ - break; \ - case HAFGRTR_EL2: \ - m = &hafgrtr_masks; \ - break; \ - case HFGRTR2_EL2: \ - m = &hfgrtr2_masks; \ - break; \ - case HFGWTR2_EL2: \ - m = &hfgwtr2_masks; \ - break; \ - case HFGITR2_EL2: \ - m = &hfgitr2_masks; \ - break; \ - case HDFGRTR2_EL2: \ - m = &hdfgrtr2_masks; \ - break; \ - case HDFGWTR2_EL2: \ - m = &hdfgwtr2_masks; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - m; \ - }) - -#define compute_clr_set(vcpu, reg, clr, set) \ - do { \ - u64 hfg = __vcpu_sys_reg(vcpu, reg); \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= ~hfg & m->nmask; \ - } while(0) - -#define reg_to_fgt_group_id(reg) \ - ({ \ - enum fgt_group_id id; \ - switch(reg) { \ - case HFGRTR_EL2: \ - case HFGWTR_EL2: \ - id = HFGRTR_GROUP; \ - break; \ - case HFGITR_EL2: \ - id = HFGITR_GROUP; \ - break; \ - case HDFGRTR_EL2: \ - case HDFGWTR_EL2: \ - id = HDFGRTR_GROUP; \ - break; \ - case HAFGRTR_EL2: \ - id = HAFGRTR_GROUP; \ - break; \ - case HFGRTR2_EL2: \ - case HFGWTR2_EL2: \ - id = HFGRTR2_GROUP; \ - break; \ - case HFGITR2_EL2: \ - id = HFGITR2_GROUP; \ - break; \ - case HDFGRTR2_EL2: \ - case HDFGWTR2_EL2: \ - id = HDFGRTR2_GROUP; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - id; \ - }) - -#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ - do { \ - u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= hfg & m->nmask; \ - } while(0) - -#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ - do { \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - u64 c = clr, s = set; \ - u64 val; \ - \ - ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (is_nested_ctxt(vcpu)) \ - compute_clr_set(vcpu, reg, c, s); \ - \ - compute_undef_clr_set(vcpu, kvm, reg, c, s); \ - \ - val = m->nmask; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } while(0) - -#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ - update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void) ID_AA64PFR0_EL1_AMU_SHIFT); } +#define __activate_fgt(hctxt, vcpu, reg) \ + do { \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \ + } while (0) + static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); - update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, - cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGWTR_EL2_TCR_EL1_MASK : 0); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGITR_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGITR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } #define __deactivate_fgt(htcxt, vcpu, reg) \ diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index ce31d3b73603..184ad7a39950 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -29,7 +29,7 @@ struct pkvm_hyp_vcpu { }; /* - * Holds the relevant data for running a protected vm. + * Holds the relevant data for running a vm in protected mode. */ struct pkvm_hyp_vm { struct kvm kvm; @@ -67,6 +67,8 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) void pkvm_hyp_vm_table_init(void *tbl); +int __pkvm_reserve_vm(void); +void __pkvm_unreserve_vm(pkvm_handle_t handle); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h index 1e6d995968a1..ba5382c12787 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -12,7 +12,8 @@ #include <asm/kvm_host.h> #define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] -#define DECLARE_REG(type, name, ctxt, reg) \ +#define DECLARE_REG(type, name, ctxt, reg) \ + __always_unused int ___check_reg_ ## reg; \ type name = (type)cpu_reg(ctxt, (reg)) #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0b0a68b663d4..a244ec25f8c5 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o +hyp-obj-y += ../../../kernel/smccc-call.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 3369dd0c4009..4e16f9b96f63 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -71,36 +71,68 @@ static u32 hyp_ffa_version; static bool has_version_negotiated; static hyp_spinlock_t version_lock; -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno) { - *res = (struct arm_smccc_res) { + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_ERROR, .a2 = ffa_errno, }; } -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop) +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop) { if (ret == FFA_RET_SUCCESS) { - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS, - .a2 = prop }; + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS, + .a2 = prop }; } else { ffa_to_smccc_error(res, ret); } } -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret) +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret) { ffa_to_smccc_res_prop(res, ret, 0); } static void ffa_set_retval(struct kvm_cpu_context *ctxt, - struct arm_smccc_res *res) + struct arm_smccc_1_2_regs *res) { cpu_reg(ctxt, 0) = res->a0; cpu_reg(ctxt, 1) = res->a1; cpu_reg(ctxt, 2) = res->a2; cpu_reg(ctxt, 3) = res->a3; + cpu_reg(ctxt, 4) = res->a4; + cpu_reg(ctxt, 5) = res->a5; + cpu_reg(ctxt, 6) = res->a6; + cpu_reg(ctxt, 7) = res->a7; + + /* + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30. + * + * In FF-A 1.2, we cannot rely on the function ID sent by the caller to + * detect 32-bit calls because the CPU cycle management interfaces (e.g. + * FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses. + * + * FFA-1.3 introduces 64-bit variants of the CPU cycle management + * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests + * complete with SMC32 direct reponses which *should* allow us use the + * function ID sent by the caller to determine whether to return x8-x17. + * + * Note that we also cannot rely on function IDs in the response. + * + * Given the above, assume SMC64 and send back x0-x17 unconditionally + * as the passthrough code (__kvm_hyp_host_forward_smc) does the same. + */ + cpu_reg(ctxt, 8) = res->a8; + cpu_reg(ctxt, 9) = res->a9; + cpu_reg(ctxt, 10) = res->a10; + cpu_reg(ctxt, 11) = res->a11; + cpu_reg(ctxt, 12) = res->a12; + cpu_reg(ctxt, 13) = res->a13; + cpu_reg(ctxt, 14) = res->a14; + cpu_reg(ctxt, 15) = res->a15; + cpu_reg(ctxt, 16) = res->a16; + cpu_reg(ctxt, 17) = res->a17; } static bool is_ffa_call(u64 func_id) @@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id) static int ffa_map_hyp_buffers(u64 ffa_page_count) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP, - hyp_virt_to_phys(hyp_buffers.tx), - hyp_virt_to_phys(hyp_buffers.rx), - ffa_page_count, - 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_RXTX_MAP, + .a1 = hyp_virt_to_phys(hyp_buffers.tx), + .a2 = hyp_virt_to_phys(hyp_buffers.rx), + .a3 = ffa_page_count, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } static int ffa_unmap_hyp_buffers(void) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_RXTX_UNMAP, - HOST_FFA_ID, - 0, 0, 0, 0, 0, 0, - &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RXTX_UNMAP, + .a1 = HOST_FFA_ID, + }, &res); return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; } -static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fraglen, u32 endpoint_id) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_TX, - handle_lo, handle_hi, fraglen, endpoint_id, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_TX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fraglen, + .a4 = endpoint_id, + }, res); } -static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 fragoff) { - arm_smccc_1_1_smc(FFA_MEM_FRAG_RX, - handle_lo, handle_hi, fragoff, HOST_FFA_ID, - 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_FRAG_RX, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = fragoff, + .a4 = HOST_FFA_ID, + }, res); } -static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len, +static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len, u32 fraglen) { - arm_smccc_1_1_smc(func_id, len, fraglen, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = func_id, + .a1 = len, + .a2 = fraglen, + }, res); } -static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo, +static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo, u32 handle_hi, u32 flags) { - arm_smccc_1_1_smc(FFA_MEM_RECLAIM, - handle_lo, handle_hi, flags, - 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_MEM_RECLAIM, + .a1 = handle_lo, + .a2 = handle_hi, + .a3 = flags, + }, res); } -static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) +static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len) { - arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ, - len, len, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_FN64_MEM_RETRIEVE_REQ, + .a1 = len, + .a2 = len, + }, res); } -static void ffa_rx_release(struct arm_smccc_res *res) +static void ffa_rx_release(struct arm_smccc_1_2_regs *res) { - arm_smccc_1_1_smc(FFA_RX_RELEASE, - 0, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_RX_RELEASE, + }, res); } -static void do_ffa_rxtx_map(struct arm_smccc_res *res, +static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(phys_addr_t, tx, ctxt, 1); @@ -267,7 +309,7 @@ err_unmap: goto out_unlock; } -static void do_ffa_rxtx_unmap(struct arm_smccc_res *res, +static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges, return ret; } -static void do_ffa_mem_frag_tx(struct arm_smccc_res *res, +static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -427,7 +469,7 @@ out: } static void __do_ffa_mem_xfer(const u64 func_id, - struct arm_smccc_res *res, + struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, len, ctxt, 1); @@ -521,7 +563,7 @@ err_unshare: __do_ffa_mem_xfer((fid), (res), (ctxt)); \ } while (0); -static void do_ffa_mem_reclaim(struct arm_smccc_res *res, +static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, handle_lo, ctxt, 1); @@ -628,13 +670,26 @@ static bool ffa_call_supported(u64 func_id) case FFA_RXTX_MAP: case FFA_MEM_DONATE: case FFA_MEM_RETRIEVE_REQ: + /* Optional notification interfaces added in FF-A 1.1 */ + case FFA_NOTIFICATION_BITMAP_CREATE: + case FFA_NOTIFICATION_BITMAP_DESTROY: + case FFA_NOTIFICATION_BIND: + case FFA_NOTIFICATION_UNBIND: + case FFA_NOTIFICATION_SET: + case FFA_NOTIFICATION_GET: + case FFA_NOTIFICATION_INFO_GET: + /* Optional interfaces added in FF-A 1.2 */ + case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */ + case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */ + case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */ + case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */ return false; } return true; } -static bool do_ffa_features(struct arm_smccc_res *res, +static bool do_ffa_features(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, id, ctxt, 1); @@ -666,21 +721,25 @@ out_handled: static int hyp_ffa_post_init(void) { size_t min_rxtx_sz; - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_ID_GET, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; if (res.a2 != HOST_FFA_ID) return -EINVAL; - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, - 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ + .a0 = FFA_FEATURES, + .a1 = FFA_FN64_RXTX_MAP, + }, &res); if (res.a0 != FFA_SUCCESS) return -EOPNOTSUPP; - switch (res.a2) { + switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) { case FFA_FEAT_RXTX_MIN_SZ_4K: min_rxtx_sz = SZ_4K; break; @@ -700,7 +759,7 @@ static int hyp_ffa_post_init(void) return 0; } -static void do_ffa_version(struct arm_smccc_res *res, +static void do_ffa_version(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, ffa_req_version, ctxt, 1); @@ -712,7 +771,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_spin_lock(&version_lock); if (has_version_negotiated) { - res->a0 = hyp_ffa_version; + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) + res->a0 = FFA_RET_NOT_SUPPORTED; + else + res->a0 = hyp_ffa_version; goto unlock; } @@ -721,9 +783,10 @@ static void do_ffa_version(struct arm_smccc_res *res, * first if TEE supports it. */ if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { - arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, - 0, 0, 0, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = ffa_req_version, + }, res); if (res->a0 == FFA_RET_NOT_SUPPORTED) goto unlock; @@ -740,7 +803,7 @@ unlock: hyp_spin_unlock(&version_lock); } -static void do_ffa_part_get(struct arm_smccc_res *res, +static void do_ffa_part_get(struct arm_smccc_1_2_regs *res, struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, uuid0, ctxt, 1); @@ -756,9 +819,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res, goto out_unlock; } - arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, - uuid2, uuid3, flags, 0, 0, - res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_PARTITION_INFO_GET, + .a1 = uuid0, + .a2 = uuid1, + .a3 = uuid2, + .a4 = uuid3, + .a5 = flags, + }, res); if (res->a0 != FFA_SUCCESS) goto out_unlock; @@ -791,7 +859,7 @@ out_unlock: bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; /* * There's no way we can tell what a non-standard SMC call might @@ -860,13 +928,16 @@ out_handled: int hyp_ffa_init(void *pages) { - struct arm_smccc_res res; + struct arm_smccc_1_2_regs res; void *tx, *rx; if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) return 0; - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { + .a0 = FFA_VERSION, + .a1 = FFA_VERSION_1_2, + }, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; @@ -886,10 +957,10 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2)) hyp_ffa_version = res.a0; else - hyp_ffa_version = FFA_VERSION_1_1; + hyp_ffa_version = FFA_VERSION_1_2; tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3206b2c07f82..29430c031095 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -546,6 +546,18 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } +static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm(); +} + +static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + __pkvm_unreserve_vm(handle); +} + static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); @@ -606,6 +618,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), + HANDLE_FUNC(__pkvm_reserve_vm), + HANDLE_FUNC(__pkvm_unreserve_vm), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), HANDLE_FUNC(__pkvm_teardown_vm), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8957734d6183..ddc8beb55eee 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1010,9 +1010,12 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip return ret; if (!kvm_pte_valid(pte)) return -ENOENT; - if (kvm_granule_size(level) != size) + if (size && kvm_granule_size(level) != size) return -E2BIG; + if (!size) + size = kvm_granule_size(level); + state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) return -EPERM; @@ -1100,7 +1103,7 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_ if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); @@ -1156,7 +1159,7 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) if (pkvm_hyp_vm_is_protected(vm)) return -EPERM; - assert_host_shared_guest(vm, ipa, PAGE_SIZE); + assert_host_shared_guest(vm, ipa, 0); guest_lock_component(vm); kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 338505cb0171..43bde061b65d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -23,8 +23,8 @@ unsigned int kvm_arm_vmid_bits; unsigned int kvm_host_sve_max_vl; /* - * The currently loaded hyp vCPU for each physical CPU. Used only when - * protected KVM is enabled, but for both protected and non-protected VMs. + * The currently loaded hyp vCPU for each physical CPU. Used in protected mode + * for both protected and non-protected VMs. */ static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); @@ -135,7 +135,7 @@ static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - /* Protected KVM does not support AArch32 guests. */ + /* No AArch32 support for protected guests. */ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) return -EINVAL; @@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; + memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } @@ -192,6 +193,11 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) */ #define HANDLE_OFFSET 0x1000 +/* + * Marks a reserved but not yet used entry in the VM table. + */ +#define RESERVED_ENTRY ((void *)0xa110ca7ed) + static unsigned int vm_handle_to_idx(pkvm_handle_t handle) { return handle - HANDLE_OFFSET; @@ -210,8 +216,8 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx) DEFINE_HYP_SPINLOCK(vm_table_lock); /* - * The table of VM entries for protected VMs in hyp. - * Allocated at hyp initialization and setup. + * A table that tracks all VMs in protected mode. + * Allocated during hyp initialization and setup. */ static struct pkvm_hyp_vm **vm_table; @@ -231,6 +237,10 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) if (unlikely(idx >= KVM_MAX_PVMS)) return NULL; + /* A reserved entry doesn't represent an initialized VM. */ + if (unlikely(vm_table[idx] == RESERVED_ENTRY)) + return NULL; + return vm_table[idx]; } @@ -401,14 +411,26 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], } static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, - unsigned int nr_vcpus) + unsigned int nr_vcpus, pkvm_handle_t handle) { + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx = vm_handle_to_idx(handle); + + hyp_vm->kvm.arch.pkvm.handle = handle; + hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; - hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); + hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); + hyp_vm->kvm.arch.pkvm.is_created = true; hyp_vm->kvm.arch.flags = 0; pkvm_init_features_from_host(hyp_vm, host_kvm); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->vtcr = host_mmu.arch.mmu.vtcr; + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; } static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) @@ -480,7 +502,7 @@ done: return ret; } -static int find_free_vm_table_entry(struct kvm *host_kvm) +static int find_free_vm_table_entry(void) { int i; @@ -493,15 +515,13 @@ static int find_free_vm_table_entry(struct kvm *host_kvm) } /* - * Allocate a VM table entry and insert a pointer to the new vm. + * Reserve a VM table entry. * - * Return a unique handle to the protected VM on success, + * Return a unique handle to the VM on success, * negative error code on failure. */ -static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, - struct pkvm_hyp_vm *hyp_vm) +static int allocate_vm_table_entry(void) { - struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; int idx; hyp_assert_lock_held(&vm_table_lock); @@ -514,20 +534,57 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, if (unlikely(!vm_table)) return -EINVAL; - idx = find_free_vm_table_entry(host_kvm); - if (idx < 0) + idx = find_free_vm_table_entry(); + if (unlikely(idx < 0)) return idx; - hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); + vm_table[idx] = RESERVED_ENTRY; - /* VMID 0 is reserved for the host */ - atomic64_set(&mmu->vmid.id, idx + 1); + return idx; +} - mmu->arch = &hyp_vm->kvm.arch; - mmu->pgt = &hyp_vm->pgt; +static int __insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) +{ + unsigned int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = vm_handle_to_idx(handle); + if (unlikely(idx >= KVM_MAX_PVMS)) + return -EINVAL; + + if (unlikely(vm_table[idx] != RESERVED_ENTRY)) + return -EINVAL; vm_table[idx] = hyp_vm; - return hyp_vm->kvm.arch.pkvm.handle; + + return 0; +} + +/* + * Insert a pointer to the initialized VM into the VM table. + * + * Return 0 on success, or negative error code on failure. + */ +static int insert_vm_table_entry(pkvm_handle_t handle, + struct pkvm_hyp_vm *hyp_vm) +{ + int ret; + + hyp_spin_lock(&vm_table_lock); + ret = __insert_vm_table_entry(handle, hyp_vm); + hyp_spin_unlock(&vm_table_lock); + + return ret; } /* @@ -594,10 +651,45 @@ static void unmap_donated_memory_noclear(void *va, size_t size) } /* - * Initialize the hypervisor copy of the protected VM state using the - * memory donated by the host. + * Reserves an entry in the hypervisor for a new VM in protected mode. * - * Unmaps the donated memory from the host at stage 2. + * Return a unique handle to the VM on success, negative error code on failure. + */ +int __pkvm_reserve_vm(void) +{ + int ret; + + hyp_spin_lock(&vm_table_lock); + ret = allocate_vm_table_entry(); + hyp_spin_unlock(&vm_table_lock); + + if (ret < 0) + return ret; + + return idx_to_vm_handle(ret); +} + +/* + * Removes a reserved entry, but only if is hasn't been used yet. + * Otherwise, the VM needs to be destroyed. + */ +void __pkvm_unreserve_vm(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(!vm_table)) + return; + + hyp_spin_lock(&vm_table_lock); + if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY)) + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); +} + +/* + * Initialize the hypervisor copy of the VM state using host-donated memory. + * + * Unmap the donated memory from the host at stage 2. * * host_kvm: A pointer to the host's struct kvm. * vm_hva: The host va of the area being donated for the VM state. @@ -606,8 +698,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size) * the VM. Must be page aligned. Its size is implied by the VM's * VTCR. * - * Return a unique handle to the protected VM on success, - * negative error code on failure. + * Return 0 success, negative error code on failure. */ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva) @@ -615,6 +706,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, struct pkvm_hyp_vm *hyp_vm = NULL; size_t vm_size, pgd_size; unsigned int nr_vcpus; + pkvm_handle_t handle; void *pgd = NULL; int ret; @@ -628,6 +720,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, goto err_unpin_kvm; } + handle = READ_ONCE(host_kvm->arch.pkvm.handle); + if (unlikely(handle < HANDLE_OFFSET)) { + ret = -EINVAL; + goto err_unpin_kvm; + } + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); @@ -641,24 +739,19 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, if (!pgd) goto err_remove_mappings; - init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); - - hyp_spin_lock(&vm_table_lock); - ret = insert_vm_table_entry(host_kvm, hyp_vm); - if (ret < 0) - goto err_unlock; + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle); ret = kvm_guest_prepare_stage2(hyp_vm, pgd); if (ret) - goto err_remove_vm_table_entry; - hyp_spin_unlock(&vm_table_lock); + goto err_remove_mappings; - return hyp_vm->kvm.arch.pkvm.handle; + /* Must be called last since this publishes the VM. */ + ret = insert_vm_table_entry(handle, hyp_vm); + if (ret) + goto err_remove_mappings; + + return 0; -err_remove_vm_table_entry: - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); -err_unlock: - hyp_spin_unlock(&vm_table_lock); err_remove_mappings: unmap_donated_memory(hyp_vm, vm_size); unmap_donated_memory(pgd, pgd_size); @@ -668,10 +761,9 @@ err_unpin_kvm: } /* - * Initialize the hypervisor copy of the protected vCPU state using the - * memory donated by the host. + * Initialize the hypervisor copy of the vCPU state using host-donated memory. * - * handle: The handle for the protected vm. + * handle: The hypervisor handle for the vm. * host_vcpu: A pointer to the corresponding host vcpu. * vcpu_hva: The host va of the area being donated for the vcpu state. * Must be page aligned. The size of the area must be equal to diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a48d3f5a5afb..90bd014e952f 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -192,6 +192,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, enum pkvm_page_state state; struct hyp_page *page; phys_addr_t phys; + enum kvm_pgtable_prot prot; if (!kvm_pte_valid(ctx->old)) return 0; @@ -210,11 +211,18 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, * configured in the hypervisor stage-1, and make sure to propagate them * to the hyp_vmemmap state. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); + prot = kvm_pgtable_hyp_pte_prot(ctx->old); + state = pkvm_getstate(prot); switch (state) { case PKVM_PAGE_OWNED: set_hyp_state(page, PKVM_PAGE_OWNED); - return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); + /* hyp text is RO in the host stage-2 to be inspected on panic. */ + if (prot == PAGE_HYP_EXEC) { + set_host_state(page, PKVM_NOPAGE); + return host_stage2_idmap_locked(phys, PAGE_SIZE, KVM_PGTABLE_PROT_R); + } else { + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); + } case PKVM_PAGE_SHARED_OWNED: set_hyp_state(page, PKVM_PAGE_SHARED_OWNED); set_host_state(page, PKVM_PAGE_SHARED_BORROWED); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index d81275790e69..acd909b7f225 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -295,12 +295,8 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) } } - /* - * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due - * to be relaxed in a future spec release, at which point this in - * condition can be dropped. - */ - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* Only disable SRE if the host implements the GICv2 interface */ + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { /* * Prevent the guest from touching the ICC_SRE_EL1 system * register. Note that this may not have any effect, as @@ -329,19 +325,16 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } - /* - * Can be dropped in the future when GICv5 spec is relaxed. See comment - * above. - */ - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* Only restore SRE if the host implements the GICv2 interface */ + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { val = read_gicreg(ICC_SRE_EL2); write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - } - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } } /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 0998ad4a2552..9984c492305a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -95,6 +95,13 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) /* Force NV2 in case the guest is forgetful... */ guest_hcr |= HCR_NV2; } + + /* + * Exclude the guest's TWED configuration if it hasn't set TWE + * to avoid potentially delaying traps for the host. + */ + if (!(guest_hcr & HCR_TWE)) + guest_hcr &= ~(HCR_EL2_TWEDEn | HCR_EL2_TWEDEL); } BUG_ON(host_data_test_flag(VCPU_IN_HYP_CONTEXT) && diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6745f38b64f9..dfcd66c65517 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); - u64 esr = 0; + u64 esr = 0, fsc; + int level; + + /* + * If injecting an abort from a failed S1PTW, rewalk the S1 PTs to + * find the failing level. If we can't find it, assume the error was + * transient and restart without changing the state. + */ + if (kvm_vcpu_abt_iss1tw(vcpu)) { + u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu); + int ret; + + if (hpfar == INVALID_GPA) + return; + + ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level); + if (ret) + return; + + WARN_ON_ONCE(level < -1 || level > 3); + fsc = ESR_ELx_FSC_SEA_TTW(level); + } else { + fsc = ESR_ELx_FSC_EXTABT; + } /* This delight is brought to you by FEAT_DoubleFault2. */ if (effective_sctlr2_ease(vcpu)) @@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - esr |= ESR_ELx_FSC_EXTABT; + esr |= fsc; vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 736394292503..7cc964af8d30 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1431,11 +1431,8 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) * able to see the page's tags and therefore they must be initialised first. If * PG_mte_tagged is set, tags have already been initialised. * - * The race in the test/set of the PG_mte_tagged flag is handled by: - * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs - * racing to santise the same page - * - mmap_lock protects between a VM faulting a page in and the VMM performing - * an mprotect() to add VM_MTE + * Must be called with kvm->mmu_lock held to ensure the memory remains mapped + * while the tags are zeroed. */ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, unsigned long size) @@ -1482,13 +1479,132 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma) } } +static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache, + void **memcache) +{ + int min_pages; + + if (!is_protected_kvm_enabled()) + *memcache = &vcpu->arch.mmu_page_cache; + else + *memcache = &vcpu->arch.pkvm_memcache; + + if (!topup_memcache) + return 0; + + min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); + + if (!is_protected_kvm_enabled()) + return kvm_mmu_topup_memory_cache(*memcache, min_pages); + + return topup_hyp_memcache(*memcache, min_pages); +} + +/* + * Potentially reduce shadow S2 permissions to match the guest's own S2. For + * exec faults, we'd only reach this point if the guest actually allowed it (see + * kvm_s2_handle_perm_fault). + * + * Also encode the level of the original translation in the SW bits of the leaf + * entry as a proxy for the span of that translation. This will be retrieved on + * TLB invalidation from the guest and used to limit the invalidation scope if a + * TTL hint or a range isn't provided. + */ +static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, + enum kvm_pgtable_prot *prot, + bool *writable) +{ + *writable &= kvm_s2_trans_writable(nested); + if (!kvm_s2_trans_readable(nested)) + *prot &= ~KVM_PGTABLE_PROT_R; + + *prot |= kvm_encode_nested_level(nested); +} + +#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) + +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, + struct kvm_memory_slot *memslot, bool is_perm) +{ + bool write_fault, exec_fault, writable; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; + unsigned long mmu_seq; + struct page *page; + struct kvm *kvm = vcpu->kvm; + void *memcache; + kvm_pfn_t pfn; + gfn_t gfn; + int ret; + + ret = prepare_mmu_memcache(vcpu, true, &memcache); + if (ret) + return ret; + + if (nested) + gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT; + else + gfn = fault_ipa >> PAGE_SHIFT; + + write_fault = kvm_is_write_fault(vcpu); + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); + + VM_WARN_ON_ONCE(write_fault && exec_fault); + + mmu_seq = kvm->mmu_invalidate_seq; + /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ + smp_rmb(); + + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, + write_fault, exec_fault, false); + return ret; + } + + writable = !(memslot->flags & KVM_MEM_READONLY); + + if (nested) + adjust_nested_fault_perms(nested, &prot, &writable); + + if (writable) + prot |= KVM_PGTABLE_PROT_W; + + if (exec_fault || + (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && + (!nested || kvm_s2_trans_executable(nested)))) + prot |= KVM_PGTABLE_PROT_X; + + kvm_fault_lock(kvm); + if (mmu_invalidate_retry(kvm, mmu_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, + __pfn_to_phys(pfn), prot, + memcache, flags); + +out_unlock: + kvm_release_faultin_page(kvm, page, !!ret, writable); + kvm_fault_unlock(kvm); + + if (writable && !ret) + mark_page_dirty_in_slot(kvm, memslot, gfn); + + return ret != -EAGAIN ? ret : 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, bool fault_is_perm) { int ret = 0; - bool write_fault, writable, force_pte = false; + bool topup_memcache; + bool write_fault, writable; bool exec_fault, mte_allowed, is_vma_cacheable; bool s2_force_noncacheable = false, vfio_allow_any_uc = false; unsigned long mmu_seq; @@ -1500,23 +1616,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); + bool force_pte = logging_active; long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; vm_flags_t vm_flags; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; if (fault_is_perm) fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); - VM_BUG_ON(write_fault && exec_fault); - - if (!is_protected_kvm_enabled()) - memcache = &vcpu->arch.mmu_page_cache; - else - memcache = &vcpu->arch.pkvm_memcache; + VM_WARN_ON_ONCE(write_fault && exec_fault); /* * Permission faults just need to update the existing leaf entry, @@ -1524,17 +1636,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * only exception to this is when dirty logging is enabled at runtime * and a write fault needs to collapse a block entry into a table. */ - if (!fault_is_perm || (logging_active && write_fault)) { - int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - - if (!is_protected_kvm_enabled()) - ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - else - ret = topup_hyp_memcache(memcache, min_pages); - - if (ret) - return ret; - } + topup_memcache = !fault_is_perm || (logging_active && write_fault); + ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache); + if (ret) + return ret; /* * Let's check if we will get back a huge page backed by hugetlbfs, or @@ -1548,16 +1653,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* - * logging_active is guaranteed to never be true for VM_PFNMAP - * memslots. - */ - if (logging_active) { - force_pte = true; + if (force_pte) vma_shift = PAGE_SHIFT; - } else { + else vma_shift = get_vma_page_shift(vma, hva); - } switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED @@ -1609,7 +1708,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, max_map_size = PAGE_SIZE; force_pte = (max_map_size == PAGE_SIZE); - vma_pagesize = min(vma_pagesize, (long)max_map_size); + vma_pagesize = min_t(long, vma_pagesize, max_map_size); } /* @@ -1642,7 +1741,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs * with the smp_wmb() in kvm_mmu_invalidate_end(). */ - mmu_seq = vcpu->kvm->mmu_invalidate_seq; + mmu_seq = kvm->mmu_invalidate_seq; mmap_read_unlock(current->mm); pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, @@ -1673,7 +1772,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * cache maintenance. */ if (!kvm_supports_cacheable_pfnmap()) - return -EFAULT; + ret = -EFAULT; } else { /* * If the page was identified as device early by looking at @@ -1696,27 +1795,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (exec_fault && s2_force_noncacheable) - return -ENOEXEC; + ret = -ENOEXEC; - /* - * Potentially reduce shadow S2 permissions to match the guest's own - * S2. For exec faults, we'd only reach this point if the guest - * actually allowed it (see kvm_s2_handle_perm_fault). - * - * Also encode the level of the original translation in the SW bits - * of the leaf entry as a proxy for the span of that translation. - * This will be retrieved on TLB invalidation from the guest and - * used to limit the invalidation scope if a TTL hint or a range - * isn't provided. - */ - if (nested) { - writable &= kvm_s2_trans_writable(nested); - if (!kvm_s2_trans_readable(nested)) - prot &= ~KVM_PGTABLE_PROT_R; - - prot |= kvm_encode_nested_level(nested); + if (ret) { + kvm_release_page_unused(page); + return ret; } + if (nested) + adjust_nested_fault_perms(nested, &prot, &writable); + kvm_fault_lock(kvm); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) { @@ -1985,8 +2073,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, - esr_fsc_is_permission_fault(esr)); + VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) && + !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu)); + + if (kvm_slot_has_gmem(memslot)) + ret = gmem_abort(vcpu, fault_ipa, nested, memslot, + esr_fsc_is_permission_fault(esr)); + else + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, + esr_fsc_is_permission_fault(esr)); if (ret == 0) ret = 1; out: @@ -2218,6 +2313,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT)) return -EFAULT; + /* + * Only support guest_memfd backed memslots with mappable memory, since + * there aren't any CoCo VMs that support only private memory on arm64. + */ + if (kvm_slot_has_gmem(new) && !kvm_memslot_is_gmem_only(new)) + return -EINVAL; + hva = new->userspace_addr; reg_end = hva + (new->npages << PAGE_SHIFT); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 50d559248a1f..f04cda40545b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), - ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, @@ -1172,8 +1172,9 @@ static u64 read_vncr_el2(struct kvm_vcpu *vcpu) return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48); } -static int kvm_translate_vncr(struct kvm_vcpu *vcpu) +static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem) { + struct kvm_memory_slot *memslot; bool write_fault, writable; unsigned long mmu_seq; struct vncr_tlb *vt; @@ -1216,10 +1217,25 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu) smp_rmb(); gfn = vt->wr.pa >> PAGE_SHIFT; - pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page); - if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (!memslot) return -EFAULT; + *is_gmem = kvm_slot_has_gmem(memslot); + if (!*is_gmem) { + pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, + &writable, &page); + if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + return -EFAULT; + } else { + ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, vt->wr.pa, PAGE_SIZE, + write_fault, false, false); + return ret; + } + } + scoped_guard(write_lock, &vcpu->kvm->mmu_lock) { if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) return -EAGAIN; @@ -1295,23 +1311,36 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); } else if (esr_fsc_is_translation_fault(esr)) { - bool valid; + bool valid, is_gmem = false; int ret; scoped_guard(read_lock, &vcpu->kvm->mmu_lock) valid = kvm_vncr_tlb_lookup(vcpu); if (!valid) - ret = kvm_translate_vncr(vcpu); + ret = kvm_translate_vncr(vcpu, &is_gmem); else ret = -EPERM; switch (ret) { case -EAGAIN: - case -ENOMEM: /* Let's try again... */ break; + case -ENOMEM: + /* + * For guest_memfd, this indicates that it failed to + * create a folio to back the memory. Inform userspace. + */ + if (is_gmem) + return 0; + /* Otherwise, let's try again... */ + break; case -EFAULT: + case -EIO: + case -EHWPOISON: + if (is_gmem) + return 0; + fallthrough; case -EINVAL: case -ENOENT: case -EACCES: @@ -1462,9 +1491,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64PFR1_EL1: /* Only support BTI, SSBS, CSV2_frac */ - val &= (ID_AA64PFR1_EL1_BT | - ID_AA64PFR1_EL1_SSBS | - ID_AA64PFR1_EL1_CSV2_frac); + val &= ~(ID_AA64PFR1_EL1_PFAR | + ID_AA64PFR1_EL1_MTEX | + ID_AA64PFR1_EL1_THE | + ID_AA64PFR1_EL1_GCS | + ID_AA64PFR1_EL1_MTE_frac | + ID_AA64PFR1_EL1_NMI | + ID_AA64PFR1_EL1_SME | + ID_AA64PFR1_EL1_RES0 | + ID_AA64PFR1_EL1_MPAM_frac | + ID_AA64PFR1_EL1_MTE); break; case SYS_ID_AA64MMFR0_EL1: @@ -1517,12 +1553,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) break; case SYS_ID_AA64MMFR1_EL1: - val &= (ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_PAN | - ID_AA64MMFR1_EL1_LO | - ID_AA64MMFR1_EL1_HPDS | - ID_AA64MMFR1_EL1_VH | - ID_AA64MMFR1_EL1_VMIDBits); + val &= ~(ID_AA64MMFR1_EL1_CMOW | + ID_AA64MMFR1_EL1_nTLBPA | + ID_AA64MMFR1_EL1_ETS | + ID_AA64MMFR1_EL1_XNX | + ID_AA64MMFR1_EL1_HAFDBS); /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) val &= ~ID_AA64MMFR1_EL1_VH; @@ -1564,14 +1599,22 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64DFR0_EL1: /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */ - val &= (ID_AA64DFR0_EL1_PMUVer | - ID_AA64DFR0_EL1_WRPs | - ID_AA64DFR0_EL1_BRPs | - ID_AA64DFR0_EL1_DebugVer| - ID_AA64DFR0_EL1_HPMN0); - - /* Cap Debug to ARMv8.1 */ - val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE); + val &= ~(ID_AA64DFR0_EL1_ExtTrcBuff | + ID_AA64DFR0_EL1_BRBE | + ID_AA64DFR0_EL1_MTPMU | + ID_AA64DFR0_EL1_TraceBuffer | + ID_AA64DFR0_EL1_TraceFilt | + ID_AA64DFR0_EL1_PMSVer | + ID_AA64DFR0_EL1_CTX_CMPs | + ID_AA64DFR0_EL1_SEBEP | + ID_AA64DFR0_EL1_PMSS | + ID_AA64DFR0_EL1_TraceVer); + + /* + * FEAT_Debugv8p9 requires support for extended breakpoints / + * watchpoints. + */ + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); break; } @@ -1796,3 +1839,36 @@ void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu) if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); } + +/* + * KVM unconditionally sets most of these traps anyway but use an allowlist + * to document the guest hypervisor traps that may take precedence and guard + * against future changes to the non-nested trap configuration. + */ +#define NV_MDCR_GUEST_INCLUDE (MDCR_EL2_TDE | \ + MDCR_EL2_TDA | \ + MDCR_EL2_TDRA | \ + MDCR_EL2_TTRF | \ + MDCR_EL2_TPMS | \ + MDCR_EL2_TPM | \ + MDCR_EL2_TPMCR | \ + MDCR_EL2_TDCC | \ + MDCR_EL2_TDOSA) + +void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) +{ + u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + + if (is_nested_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); + /* + * In yet another example where FEAT_NV2 is fscking broken, accesses + * to MDSCR_EL1 are redirected to the VNCR despite having an effect + * at EL2. Use a big hammer to apply sanity. + * + * Unless of course we have FEAT_FGT, in which case we can precisely + * trap MDSCR_EL1. + */ + else if (!cpus_have_final_cap(ARM64_HAS_FGT)) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; +} diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..24f0f8a8c943 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -85,16 +85,23 @@ void __init kvm_hyp_reserve(void) hyp_mem_base); } -static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) +static void __pkvm_destroy_hyp_vm(struct kvm *kvm) { - if (host_kvm->arch.pkvm.handle) { + if (pkvm_hyp_vm_is_created(kvm)) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, - host_kvm->arch.pkvm.handle)); + kvm->arch.pkvm.handle)); + } else if (kvm->arch.pkvm.handle) { + /* + * The VM could have been reserved but hyp initialization has + * failed. Make sure to unreserve it. + */ + kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle); } - host_kvm->arch.pkvm.handle = 0; - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); - free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); + kvm->arch.pkvm.handle = 0; + kvm->arch.pkvm.is_created = false; + free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); + free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc); } static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) @@ -129,16 +136,16 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) * * Return 0 on success, negative error code on failure. */ -static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +static int __pkvm_create_hyp_vm(struct kvm *kvm) { size_t pgd_sz, hyp_vm_sz; void *pgd, *hyp_vm; int ret; - if (host_kvm->created_vcpus < 1) + if (kvm->created_vcpus < 1) return -EINVAL; - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr); /* * The PGD pages will be reclaimed using a hyp_memcache which implies @@ -152,7 +159,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) /* Allocate memory to donate to hyp for vm and vcpu pointers. */ hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, size_mul(sizeof(void *), - host_kvm->created_vcpus))); + kvm->created_vcpus))); hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); if (!hyp_vm) { ret = -ENOMEM; @@ -160,12 +167,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) } /* Donate the VM memory to hyp and let hyp initialize it. */ - ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); - if (ret < 0) + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); + if (ret) goto free_vm; - host_kvm->arch.pkvm.handle = ret; - host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; + kvm->arch.pkvm.is_created = true; + kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); return 0; @@ -176,14 +183,19 @@ free_pgd: return ret; } -int pkvm_create_hyp_vm(struct kvm *host_kvm) +bool pkvm_hyp_vm_is_created(struct kvm *kvm) +{ + return READ_ONCE(kvm->arch.pkvm.is_created); +} + +int pkvm_create_hyp_vm(struct kvm *kvm) { int ret = 0; - mutex_lock(&host_kvm->arch.config_lock); - if (!host_kvm->arch.pkvm.handle) - ret = __pkvm_create_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + if (!pkvm_hyp_vm_is_created(kvm)) + ret = __pkvm_create_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -200,15 +212,31 @@ int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) return ret; } -void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +void pkvm_destroy_hyp_vm(struct kvm *kvm) { - mutex_lock(&host_kvm->arch.config_lock); - __pkvm_destroy_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->arch.config_lock); + mutex_lock(&kvm->arch.config_lock); + __pkvm_destroy_hyp_vm(kvm); + mutex_unlock(&kvm->arch.config_lock); } -int pkvm_init_host_vm(struct kvm *host_kvm) +int pkvm_init_host_vm(struct kvm *kvm) { + int ret; + + if (pkvm_hyp_vm_is_created(kvm)) + return -EINVAL; + + /* VM is already reserved, no need to proceed. */ + if (kvm->arch.pkvm.handle) + return 0; + + /* Reserve the VM in hyp and obtain a hyp handle for the VM. */ + ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm); + if (ret < 0) + return ret; + + kvm->arch.pkvm.handle = ret; + return 0; } diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index 098416d7e5c2..dc5acfb00af9 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -32,23 +32,23 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = { .set = " ", .clear = "F", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, .set = "R", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, .set = "W", .clear = " ", }, { - .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, - .val = PTE_VALID, - .set = " ", - .clear = "X", + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .set = "NX", + .clear = "x ", }, { - .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, - .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, .set = "AF", .clear = " ", }, { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b29f72478a50..e67eb39ddc11 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); case CNTHCTL_EL2: @@ -1595,14 +1594,47 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -static bool access_hv_timer(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) { - if (!vcpu_el2_e2h_is_set(vcpu)) - return undef_access(vcpu, p, r); + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; + } - return access_arch_timer(vcpu, p, r); + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } + + return 0; } static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, @@ -1757,7 +1789,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: - val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; + val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_LSFE | + ID_AA64ISAR3_EL1_FAMINMAX; break; case SYS_ID_AA64MMFR2_EL1: val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; @@ -1997,6 +2030,26 @@ static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +/* + * Older versions of KVM erroneously claim support for FEAT_DoubleLock with + * NV-enabled VMs on unsupporting hardware. Silently ignore the incorrect + * value if it is consistent with the bug. + */ +static bool ignore_feat_doublelock(struct kvm_vcpu *vcpu, u64 val) +{ + u8 host, user; + + if (!vcpu_has_nv(vcpu)) + return false; + + host = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, + read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1)); + user = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, val); + + return host == ID_AA64DFR0_EL1_DoubleLock_NI && + user == ID_AA64DFR0_EL1_DoubleLock_IMP; +} + static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) @@ -2028,6 +2081,11 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP) return -EINVAL; + if (ignore_feat_doublelock(vcpu, val)) { + val &= ~ID_AA64DFR0_EL1_DoubleLock; + val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI); + } + return set_id_reg(vcpu, rd, val); } @@ -2148,16 +2206,29 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, return set_id_reg(vcpu, rd, user_val); } +/* + * Allow userspace to de-feature a stage-2 translation granule but prevent it + * from claiming the impossible. + */ +#define tgran2_val_allowed(tg, safe, user) \ +({ \ + u8 __s = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, safe); \ + u8 __u = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, user); \ + \ + __s == __u || __u == ID_AA64MMFR0_EL1_##tg##_NI; \ +}) + static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 user_val) { u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd); - u64 tgran2_mask = ID_AA64MMFR0_EL1_TGRAN4_2_MASK | - ID_AA64MMFR0_EL1_TGRAN16_2_MASK | - ID_AA64MMFR0_EL1_TGRAN64_2_MASK; - if (vcpu_has_nv(vcpu) && - ((sanitized_val & tgran2_mask) != (user_val & tgran2_mask))) + if (!vcpu_has_nv(vcpu)) + return set_id_reg(vcpu, rd, user_val); + + if (!tgran2_val_allowed(TGRAN4_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN16_2, sanitized_val, user_val) || + !tgran2_val_allowed(TGRAN64_2, sanitized_val, user_val)) return -EINVAL; return set_id_reg(vcpu, rd, user_val); @@ -2468,15 +2539,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + #define EL2_REG(name, acc, rst, v) \ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) @@ -2487,6 +2563,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + arch_timer_get_user, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -2666,18 +2746,17 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); - + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } @@ -2794,6 +2873,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3141,6 +3230,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64ISAR2_EL1_APA3 | ID_AA64ISAR2_EL1_GPA3)), ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT | + ID_AA64ISAR3_EL1_LSFE | ID_AA64ISAR3_EL1_FAMINMAX)), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), @@ -3152,8 +3242,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64MMFR0_EL1_RES0 | ID_AA64MMFR0_EL1_ASIDBITS)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | - ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_TWED | ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_VH | ID_AA64MMFR1_EL1_VMIDBits)), @@ -3238,6 +3326,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMBLIMITR_EL1), undef_access }, { SYS_DESC(SYS_PMBPTR_EL1), undef_access }, { SYS_DESC(SYS_PMBSR_EL1), undef_access }, + { SYS_DESC(SYS_PMSDSFR_EL1), undef_access }, /* PMBIDR_EL1 is not trapped */ { PMU_SYS_REG(PMINTENSET_EL1), @@ -3443,17 +3532,19 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), + TIMER_REG(CNTP_CVAL_EL0, NULL), { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -3651,12 +3742,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, - EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, @@ -5194,15 +5285,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5235,13 +5339,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5301,10 +5406,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 317abc490368..b3f904472fac 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu); (val); \ }) +#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 4c3c0d82e476..1796b1a22a72 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -554,7 +554,6 @@ int vgic_lazy_init(struct kvm *kvm) * Also map the virtual CPU interface into the VM. * v2 calls vgic_init() if not already done. * v3 and derivatives return an error if the VGIC is not initialized. - * vgic_ready() returns true if this function has succeeded. */ int kvm_vgic_map_resources(struct kvm *kvm) { @@ -563,12 +562,12 @@ int kvm_vgic_map_resources(struct kvm *kvm) gpa_t dist_base; int ret = 0; - if (likely(vgic_ready(kvm))) + if (likely(smp_load_acquire(&dist->ready))) return 0; mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->arch.config_lock); - if (vgic_ready(kvm)) + if (dist->ready) goto out; if (!irqchip_in_kernel(kvm)) @@ -594,14 +593,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) goto out_slots; } - /* - * kvm_io_bus_register_dev() guarantees all readers see the new MMIO - * registration before returning through synchronize_srcu(), which also - * implies a full memory barrier. As such, marking the distributor as - * 'ready' here is guaranteed to be ordered after all vCPUs having seen - * a completely configured distributor. - */ - dist->ready = true; + smp_store_release(&dist->ready, true); goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b9ad7c42c5b0..6fbb4b099855 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -297,8 +297,11 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + if (!vgic_is_v3(vcpu->kvm)) + return; + /* Hide GICv3 sysreg if necessary */ - if (!kvm_has_gicv3(vcpu->kvm)) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; @@ -588,6 +591,7 @@ int vgic_v3_map_resources(struct kvm *kvm) } DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat); static int __init early_group0_trap_cfg(char *buf) { @@ -697,6 +701,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); + /* + * Flip the static branch if the HW supports v2, even if we're + * not using it (such as in protected mode). + */ + if (has_v2) + static_branch_enable(&vgic_v3_has_v2_compat); + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { group0_trap = true; group1_trap = true; diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 6bdbb221bcde..2d3811f4e117 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -15,7 +15,7 @@ int vgic_v5_probe(const struct gic_kvm_info *info) u64 ich_vtr_el2; int ret; - if (!info->has_gcie_v3_compat) + if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) return -ENODEV; kvm_vgic_global_state.type = VGIC_V5; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b3d8c3de4149..b8d37eb037fc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -470,14 +470,6 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, mutex_unlock(&fixmap_lock); } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -extern __alias(__create_pgd_mapping_locked) -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags); -#endif - #define INVALID_PHYS_ADDR (-1ULL) static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, @@ -823,7 +815,7 @@ static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; -void __init init_idmap_kpti_bbml2_flag(void) +static void __init init_idmap_kpti_bbml2_flag(void) { WRITE_ONCE(idmap_kpti_bbml2_flag, 1); /* Must be visible to other CPUs before stop_machine() is called. */ @@ -1135,7 +1127,93 @@ static void __init declare_vma(struct vm_struct *vma, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -static pgprot_t kernel_exec_prot(void) +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +static phys_addr_t kpti_ng_temp_alloc __initdata; + +static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + +static int __init __kpti_install_ng_mappings(void *__unused) +{ + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); + extern kpti_remap_fn idmap_kpti_install_ng_mappings; + kpti_remap_fn *remap_fn; + + int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; + + if (levels == 5 && !pgtable_l5_enabled()) + levels = 4; + else if (levels == 4 && !pgtable_l4_enabled()) + levels = 3; + + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-/------ +-\\\--------+ + // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] : + // +--------+-\-------+-\------ +-\------ +-///--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + + cpu_install_idmap(); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); + cpu_uninstall_idmap(); + + if (!cpu) { + free_pages(alloc, order); + arm64_use_ng_mappings = true; + } + + return 0; +} + +void __init kpti_install_ng_mappings(void) +{ + /* Check whether KPTI is going to be used */ + if (!arm64_kernel_unmapped_at_el0()) + return; + + /* + * We don't need to rewrite the page-tables if either we've done + * it already or we have KASLR enabled and therefore have not + * created any global mappings at all. + */ + if (arm64_use_ng_mappings) + return; + + init_idmap_kpti_bbml2_flag(); + stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); +} + +static pgprot_t __init kernel_exec_prot(void) { return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9ff5cdbd2759..1b32c1232d28 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -37,6 +37,7 @@ HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_GICV3_CPUIF HAS_GICV5_CPUIF +HAS_GICV5_LEGACY HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC HAS_HCR_NV1 diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index a885518ce1dd..a6ca7dff4215 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -277,7 +277,7 @@ retry: if (fault & VM_FAULT_COMPLETED) return; - if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { + if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index c6108f000288..22d7f8ac58a3 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -46,10 +46,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ea683bcea14c..5b1116733d88 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -70,6 +70,7 @@ config LOONGARCH select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_SUPPORTS_RT select ARCH_SUPPORTS_SCHED_SMT if SMP select ARCH_SUPPORTS_SCHED_MC if SMP @@ -618,6 +619,16 @@ config CPU_HAS_PREFETCH config ARCH_SUPPORTS_KEXEC def_bool y +config ARCH_SUPPORTS_KEXEC_FILE + def_bool 64BIT + +config ARCH_SELECTS_KEXEC_FILE + def_bool 64BIT + depends on KEXEC_FILE + select KEXEC_ELF + select RELOCATABLE + select HAVE_IMA_KEXEC if IMA + config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index ae419e32f22e..dc5bd3f1b8d2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -115,7 +115,7 @@ ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. # Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to # be passed via '-mllvm' to ld.lld. -KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump +KBUILD_LDFLAGS += $(call ld-option,-mllvm --loongarch-annotate-tablejump) endif endif @@ -129,7 +129,7 @@ KBUILD_RUSTFLAGS_KERNEL += -Crelocation-model=pie LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif -cflags-y += $(call cc-option, -mno-check-zero-division) +cflags-y += $(call cc-option, -mno-check-zero-division -fno-isolate-erroneous-paths-dereference) ifndef CONFIG_KASAN cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 2b8df0e9e42a..3e838c229cd5 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -45,6 +45,7 @@ CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_PERF_EVENTS=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_LOONGARCH=y CONFIG_64BIT=y @@ -55,7 +56,7 @@ CONFIG_DMI=y CONFIG_EFI=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y -CONFIG_NR_CPUS=256 +CONFIG_NR_CPUS=2048 CONFIG_NUMA=y CONFIG_CPU_HAS_FPU=y CONFIG_CPU_HAS_LSX=y @@ -154,7 +155,16 @@ CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_BBR=m +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_NV=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_DCTCP=m +CONFIG_TCP_CONG_CDG=m +CONFIG_TCP_CONG_BBR=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_INET6_AH=m @@ -331,15 +341,33 @@ CONFIG_LLC2=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_CBS=m +CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_SKBPRIO=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_CAKE=m +CONFIG_NET_SCH_FQ=m +CONFIG_NET_SCH_PIE=m +CONFIG_NET_SCH_FQ_PIE=m CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_DEFAULT=y CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -407,6 +435,7 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_FW_LOADER_COMPRESS=y CONFIG_FW_LOADER_COMPRESS_ZSTD=y +CONFIG_SYSFB_SIMPLEFB=y CONFIG_EFI_ZBOOT=y CONFIG_EFI_BOOTLOADER_CONTROL=m CONFIG_EFI_CAPSULE_LOADER=m @@ -420,6 +449,11 @@ CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_CFI_STAA=m CONFIG_MTD_RAM=m CONFIG_MTD_ROM=m +CONFIG_MTD_RAW_NAND=m +CONFIG_MTD_NAND_PLATFORM=m +CONFIG_MTD_NAND_LOONGSON=m +CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y +CONFIG_MTD_NAND_ECC_SW_BCH=y CONFIG_MTD_UBI=m CONFIG_MTD_UBI_BLOCK=y CONFIG_PARPORT=y @@ -575,6 +609,11 @@ CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IGB=y CONFIG_IXGBE=y +CONFIG_I40E=y +CONFIG_ICE=y +CONFIG_FM10K=y +CONFIG_IGC=y +CONFIG_IDPF=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -679,6 +718,9 @@ CONFIG_USB4_NET=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=m +CONFIG_KEYBOARD_GPIO_POLLED=m +CONFIG_KEYBOARD_MATRIX=m CONFIG_KEYBOARD_XTKBD=m CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_MOUSE_PS2_SENTELIC=y @@ -703,8 +745,11 @@ CONFIG_VIRTIO_CONSOLE=y CONFIG_IPMI_HANDLER=m CONFIG_IPMI_DEVICE_INTERFACE=m CONFIG_IPMI_SI=m +CONFIG_IPMI_LS2K=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_TCG_TPM=m +CONFIG_TCG_LOONGSON=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_I2C_DESIGNWARE_CORE=y @@ -720,6 +765,10 @@ CONFIG_PINCTRL_LOONGSON2=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y CONFIG_GPIO_LOONGSON_64BIT=y +CONFIG_GPIO_PCA953X=m +CONFIG_GPIO_PCA953X_IRQ=y +CONFIG_GPIO_PCA9570=m +CONFIG_GPIO_PCF857X=m CONFIG_POWER_RESET=y CONFIG_POWER_RESET_RESTART=y CONFIG_POWER_RESET_SYSCON=y @@ -730,6 +779,7 @@ CONFIG_SENSORS_LM93=m CONFIG_SENSORS_W83795=m CONFIG_SENSORS_W83627HF=m CONFIG_LOONGSON2_THERMAL=m +CONFIG_MFD_LOONGSON_SE=m CONFIG_RC_CORE=m CONFIG_LIRC=y CONFIG_RC_DECODERS=y @@ -761,6 +811,7 @@ CONFIG_DRM_AST=y CONFIG_DRM_QXL=m CONFIG_DRM_VIRTIO_GPU=m CONFIG_DRM_LOONGSON=y +CONFIG_DRM_SIMPLEDRM=y CONFIG_FB=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y @@ -801,6 +852,7 @@ CONFIG_SND_HDA_CODEC_HDMI_ATI=y CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m +CONFIG_SND_USB_AUDIO_MIDI_V2=y CONFIG_SND_SOC=m CONFIG_SND_SOC_LOONGSON_CARD=m CONFIG_SND_SOC_ES7134=m @@ -861,6 +913,8 @@ CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TCPCI=m CONFIG_TYPEC_UCSI=m CONFIG_UCSI_ACPI=m +CONFIG_MMC=y +CONFIG_MMC_LOONGSON2=m CONFIG_INFINIBAND=m CONFIG_EDAC=y # CONFIG_EDAC_LEGACY_SYSFS is not set @@ -922,19 +976,22 @@ CONFIG_NTB_SWITCHTEC=m CONFIG_NTB_PERF=m CONFIG_NTB_TRANSPORT=m CONFIG_PWM=y +CONFIG_PWM_LOONGSON=y CONFIG_GENERIC_PHY=y CONFIG_USB4=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_GFS2_FS=m @@ -1026,9 +1083,12 @@ CONFIG_CEPH_FS_SECURITY_LABEL=y CONFIG_CIFS=m # CONFIG_CIFS_DEBUG is not set CONFIG_9P_FS=y +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_DLM=m CONFIG_KEY_DH_OPERATIONS=y @@ -1049,9 +1109,11 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m @@ -1063,6 +1125,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_DEV_LOONGSON_RNG=m CONFIG_DMA_CMA=y CONFIG_DMA_NUMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/loongarch/include/asm/image.h b/arch/loongarch/include/asm/image.h new file mode 100644 index 000000000000..cab981cdb72a --- /dev/null +++ b/arch/loongarch/include/asm/image.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch binary image header for EFI(PE/COFF) format. + * + * Author: Youling Tang <tangyouling@kylinos.cn> + * Copyright (C) 2025 KylinSoft Corporation. + */ + +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#ifndef __ASSEMBLER__ + +/** + * struct loongarch_image_header + * + * @dos_sig: Optional PE format 'MZ' signature. + * @padding_1: Reserved. + * @kernel_entry: Kernel image entry pointer. + * @kernel_asize: An estimated size of the memory image size in LSB byte order. + * @text_offset: The image load offset in LSB byte order. + * @padding_2: Reserved. + * @pe_header: Optional offset to a PE format header. + **/ + +struct loongarch_image_header { + uint8_t dos_sig[2]; + uint16_t padding_1[3]; + uint64_t kernel_entry; + uint64_t kernel_asize; + uint64_t text_offset; + uint32_t padding_2[7]; + uint32_t pe_header; +}; + +/* + * loongarch_header_check_dos_sig - Helper to check the header + * + * Returns true (non-zero) if 'MZ' signature is found. + */ + +static inline int loongarch_header_check_dos_sig(const struct loongarch_image_header *h) +{ + if (!h) + return 0; + + return (h->dos_sig[0] == 'M' && h->dos_sig[1] == 'Z'); +} + +#endif /* __ASSEMBLER__ */ + +#endif /* __ASM_IMAGE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 277d2140676b..55e64a12a124 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -77,6 +77,10 @@ enum reg2_op { iocsrwrh_op = 0x19205, iocsrwrw_op = 0x19206, iocsrwrd_op = 0x19207, + llacqw_op = 0xe15e0, + screlw_op = 0xe15e1, + llacqd_op = 0xe15e2, + screld_op = 0xe15e3, }; enum reg2i5_op { @@ -189,6 +193,7 @@ enum reg3_op { fldxd_op = 0x7068, fstxs_op = 0x7070, fstxd_op = 0x7078, + scq_op = 0x70ae, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2, diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h index cf95cd3eb2de..209fa43222e1 100644 --- a/arch/loongarch/include/asm/kexec.h +++ b/arch/loongarch/include/asm/kexec.h @@ -41,6 +41,18 @@ struct kimage_arch { unsigned long systable_ptr; }; +#ifdef CONFIG_KEXEC_FILE +extern const struct kexec_file_ops kexec_efi_ops; +extern const struct kexec_file_ops kexec_elf_ops; + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + +extern int load_other_segments(struct kimage *image, + unsigned long kernel_load_addr, unsigned long kernel_size, + char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len); +#endif + typedef void (*do_kexec_t)(unsigned long efi_boot, unsigned long cmdline_ptr, unsigned long systable_ptr, diff --git a/arch/loongarch/include/asm/kvm_pch_pic.h b/arch/loongarch/include/asm/kvm_pch_pic.h index e6df6a4c1c70..7f33a3039272 100644 --- a/arch/loongarch/include/asm/kvm_pch_pic.h +++ b/arch/loongarch/include/asm/kvm_pch_pic.h @@ -34,13 +34,26 @@ #define PCH_PIC_INT_ISR_END 0x3af #define PCH_PIC_POLARITY_START 0x3e0 #define PCH_PIC_POLARITY_END 0x3e7 -#define PCH_PIC_INT_ID_VAL 0x7000000UL +#define PCH_PIC_INT_ID_VAL 0x7UL #define PCH_PIC_INT_ID_VER 0x1UL +union pch_pic_id { + struct { + uint8_t reserved_0[3]; + uint8_t id; + uint8_t version; + uint8_t reserved_1; + uint8_t irq_num; + uint8_t reserved_2; + } desc; + uint64_t data; +}; + struct loongarch_pch_pic { spinlock_t lock; struct kvm *kvm; struct kvm_io_device device; + union pch_pic_id id; uint64_t mask; /* 1:disable irq, 0:enable irq */ uint64_t htmsi_en; /* 1:msi */ uint64_t edge; /* 1:edge triggered, 0:level triggered */ diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 5f354f5c6847..57ba1a563bb1 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -103,6 +103,7 @@ struct kvm_fpu { #define KVM_LOONGARCH_VM_FEAT_PMU 5 #define KVM_LOONGARCH_VM_FEAT_PV_IPI 6 #define KVM_LOONGARCH_VM_FEAT_PV_STEALTIME 7 +#define KVM_LOONGARCH_VM_FEAT_PTW 8 /* Device Control API on vcpu fd */ #define KVM_LOONGARCH_VCPU_CPUCFG 0 diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 6f5a4574a911..001924877772 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_efi.o kexec_elf.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index fedaa67cde41..cbfce2872d71 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -52,6 +52,48 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_loongarch *c) c->fpu_mask = ~(fcsr0 ^ fcsr1) & ~mask; } +/* simd = -1/0/128/256 */ +static unsigned int simd = -1U; + +static int __init cpu_setup_simd(char *str) +{ + get_option(&str, &simd); + pr_info("Set SIMD width = %u\n", simd); + + return 0; +} + +early_param("simd", cpu_setup_simd); + +static int __init cpu_final_simd(void) +{ + struct cpuinfo_loongarch *c = &cpu_data[0]; + + if (simd < 128) { + c->options &= ~LOONGARCH_CPU_LSX; + elf_hwcap &= ~HWCAP_LOONGARCH_LSX; + } + + if (simd < 256) { + c->options &= ~LOONGARCH_CPU_LASX; + elf_hwcap &= ~HWCAP_LOONGARCH_LASX; + } + + simd = 0; + + if (c->options & LOONGARCH_CPU_LSX) + simd = 128; + + if (c->options & LOONGARCH_CPU_LASX) + simd = 256; + + pr_info("Final SIMD width = %u\n", simd); + + return 0; +} + +arch_initcall(cpu_final_simd); + static inline void set_elf_platform(int cpu, const char *plat) { if (cpu == 0) @@ -134,13 +176,13 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) elf_hwcap |= HWCAP_LOONGARCH_FPU; } #ifdef CONFIG_CPU_HAS_LSX - if (config & CPUCFG2_LSX) { + if ((config & CPUCFG2_LSX) && (simd >= 128)) { c->options |= LOONGARCH_CPU_LSX; elf_hwcap |= HWCAP_LOONGARCH_LSX; } #endif #ifdef CONFIG_CPU_HAS_LASX - if (config & CPUCFG2_LASX) { + if ((config & CPUCFG2_LASX) && (simd >= 256)) { c->options |= LOONGARCH_CPU_LASX; elf_hwcap |= HWCAP_LOONGARCH_LASX; } diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 72ecfed29d55..bf037f0c6b26 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -141,6 +141,9 @@ bool insns_not_supported(union loongarch_instruction insn) case amswapw_op ... ammindbdu_op: pr_notice("atomic memory access instructions are not supported\n"); return true; + case scq_op: + pr_notice("sc.q instruction is not supported\n"); + return true; } switch (insn.reg2i14_format.opcode) { @@ -152,6 +155,15 @@ bool insns_not_supported(union loongarch_instruction insn) return true; } + switch (insn.reg2_format.opcode) { + case llacqw_op: + case llacqd_op: + case screlw_op: + case screld_op: + pr_notice("llacq and screl instructions are not supported\n"); + return true; + } + switch (insn.reg1i21_format.opcode) { case bceqz_op: pr_notice("bceqz and bcnez instructions are not supported\n"); diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c new file mode 100644 index 000000000000..45121b914f8f --- /dev/null +++ b/arch/loongarch/kernel/kexec_efi.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Load EFI vmlinux file for the kexec_file_load syscall. + * + * Author: Youling Tang <tangyouling@kylinos.cn> + * Copyright (C) 2025 KylinSoft Corporation. + */ + +#define pr_fmt(fmt) "kexec_file(EFI): " fmt + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <linux/pe.h> +#include <linux/string.h> +#include <asm/byteorder.h> +#include <asm/cpufeature.h> +#include <asm/image.h> + +static int efi_kexec_probe(const char *kernel_buf, unsigned long kernel_len) +{ + const struct loongarch_image_header *h = (const struct loongarch_image_header *)kernel_buf; + + if (!h || (kernel_len < sizeof(*h))) { + kexec_dprintk("No LoongArch image header.\n"); + return -EINVAL; + } + + if (!loongarch_header_check_dos_sig(h)) { + kexec_dprintk("No LoongArch PE image header.\n"); + return -EINVAL; + } + + return 0; +} + +static void *efi_kexec_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + int ret; + unsigned long text_offset, kernel_segment_number; + struct kexec_buf kbuf; + struct kexec_segment *kernel_segment; + struct loongarch_image_header *h; + + h = (struct loongarch_image_header *)kernel; + if (!h->kernel_asize) + return ERR_PTR(-EINVAL); + + /* + * Load the kernel + * FIXME: Non-relocatable kernel rejected for kexec_file (require CONFIG_RELOCATABLE) + */ + kbuf.image = image; + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = false; + + kbuf.buffer = kernel; + kbuf.bufsz = kernel_len; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = le64_to_cpu(h->kernel_asize); + text_offset = le64_to_cpu(h->text_offset); + kbuf.buf_min = text_offset; + kbuf.buf_align = SZ_2M; + + kernel_segment_number = image->nr_segments; + + /* + * The location of the kernel segment may make it impossible to + * satisfy the other segment requirements, so we try repeatedly + * to find a location that will work. + */ + while ((ret = kexec_add_buffer(&kbuf)) == 0) { + /* Try to load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, + kernel_segment->memsz, initrd, + initrd_len, cmdline, cmdline_len); + if (!ret) + break; + + /* + * We couldn't find space for the other segments; erase the + * kernel segment and try the next available hole. + */ + image->nr_segments -= 1; + kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + } + + if (ret < 0) { + pr_err("Could not find any suitable kernel location!"); + return ERR_PTR(ret); + } + + kernel_segment = &image->segment[kernel_segment_number]; + + /* Make sure the second kernel jumps to the correct "kernel_entry" */ + image->start = kernel_segment->mem + h->kernel_entry - text_offset; + + kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); + + return NULL; +} + +const struct kexec_file_ops kexec_efi_ops = { + .probe = efi_kexec_probe, + .load = efi_kexec_load, +}; diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c new file mode 100644 index 000000000000..97b2f049801a --- /dev/null +++ b/arch/loongarch/kernel/kexec_elf.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Author: Youling Tang <tangyouling@kylinos.cn> + * Copyright (C) 2025 KylinSoft Corporation. + */ + +#define pr_fmt(fmt) "kexec_file(ELF): " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <asm/setup.h> + +#define elf_kexec_probe kexec_elf_probe + +static int _elf_kexec_load(struct kimage *image, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + struct kexec_buf *kbuf, unsigned long *text_offset) +{ + int i, ret = -1; + + /* Read in the PT_LOAD segments. */ + for (i = 0; i < ehdr->e_phnum; i++) { + size_t size; + const struct elf_phdr *phdr; + + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf->buffer = (void *)elf_info->buffer + phdr->p_offset; + kbuf->bufsz = size; + kbuf->buf_align = phdr->p_align; + *text_offset = __pa(phdr->p_paddr); + kbuf->buf_min = *text_offset; + kbuf->memsz = ALIGN(phdr->p_memsz, SZ_64K); + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(kbuf); + if (ret < 0) + break; + } + + return ret; +} + +static void *elf_kexec_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + int ret; + unsigned long text_offset, kernel_segment_number; + struct elfhdr ehdr; + struct kexec_buf kbuf; + struct kexec_elf_info elf_info; + struct kexec_segment *kernel_segment; + + ret = kexec_build_elf_info(kernel, kernel_len, &ehdr, &elf_info); + if (ret < 0) + return ERR_PTR(ret); + + /* + * Load the kernel + * FIXME: Non-relocatable kernel rejected for kexec_file (require CONFIG_RELOCATABLE) + */ + kbuf.image = image; + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = false; + + kernel_segment_number = image->nr_segments; + + ret = _elf_kexec_load(image, &ehdr, &elf_info, &kbuf, &text_offset); + if (ret < 0) + goto out; + + /* Load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, kernel_segment->memsz, + initrd, initrd_len, cmdline, cmdline_len); + if (ret < 0) + goto out; + + /* Make sure the second kernel jumps to the correct "kernel_entry". */ + image->start = kernel_segment->mem + __pa(ehdr.e_entry) - text_offset; + + kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); + +out: + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops kexec_elf_ops = { + .probe = elf_kexec_probe, + .load = elf_kexec_load, +}; diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index f9381800e291..e4b2bbc47e62 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -70,18 +70,28 @@ int machine_kexec_prepare(struct kimage *kimage) kimage->arch.efi_boot = fw_arg0; kimage->arch.systable_ptr = fw_arg2; - /* Find the command line */ - for (i = 0; i < kimage->nr_segments; i++) { - if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) { - if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE)) - kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr; - break; + if (kimage->file_mode == 1) { + /* + * kimage->cmdline_buf will be released in kexec_file_load, so copy + * to the KEXEC_CMDLINE_ADDR safe area. + */ + memcpy((void *)KEXEC_CMDLINE_ADDR, (void *)kimage->arch.cmdline_ptr, + strlen((char *)kimage->arch.cmdline_ptr) + 1); + kimage->arch.cmdline_ptr = (unsigned long)KEXEC_CMDLINE_ADDR; + } else { + /* Find the command line */ + for (i = 0; i < kimage->nr_segments; i++) { + if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) { + if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE)) + kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr; + break; + } } - } - if (!kimage->arch.cmdline_ptr) { - pr_err("Command line not included in the provided image\n"); - return -EINVAL; + if (!kimage->arch.cmdline_ptr) { + pr_err("Command line not included in the provided image\n"); + return -EINVAL; + } } /* kexec/kdump need a safe page to save reboot_code_buffer */ @@ -287,9 +297,10 @@ void machine_kexec(struct kimage *image) /* We do not want to be bothered. */ local_irq_disable(); - pr_notice("EFI boot flag 0x%lx\n", efi_boot); - pr_notice("Command line at 0x%lx\n", cmdline_ptr); - pr_notice("System table at 0x%lx\n", systable_ptr); + pr_notice("EFI boot flag: 0x%lx\n", efi_boot); + pr_notice("Command line addr: 0x%lx\n", cmdline_ptr); + pr_notice("Command line string: %s\n", (char *)cmdline_ptr); + pr_notice("System table addr: 0x%lx\n", systable_ptr); pr_notice("We will call new kernel at 0x%lx\n", start_addr); pr_notice("Bye ...\n"); diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c new file mode 100644 index 000000000000..dda236b51a88 --- /dev/null +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kexec_file for LoongArch + * + * Author: Youling Tang <tangyouling@kylinos.cn> + * Copyright (C) 2025 KylinSoft Corporation. + * + * Most code is derived from LoongArch port of kexec-tools + */ + +#define pr_fmt(fmt) "kexec_file: " fmt + +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <asm/bootinfo.h> + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &kexec_efi_ops, + &kexec_elf_ops, + NULL +}; + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + +/* Add the "kexec_file" command line parameter to command line. */ +static void cmdline_add_loader(unsigned long *cmdline_tmplen, char *modified_cmdline) +{ + int loader_strlen; + + loader_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "kexec_file "); + *cmdline_tmplen += loader_strlen; +} + +/* Add the "initrd=start,size" command line parameter to command line. */ +static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmplen, + char *modified_cmdline, unsigned long initrd) +{ + int initrd_strlen; + + initrd_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "initrd=0x%lx,0x%lx ", + initrd, image->initrd_buf_len); + *cmdline_tmplen += initrd_strlen; +} + +#ifdef CONFIG_CRASH_DUMP + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + int ret, nr_ranges; + uint64_t i; + phys_addr_t start, end; + struct crash_mem *cmem; + + nr_ranges = 2; /* for exclusion of crashkernel region */ + for_each_mem_range(i, &start, &end) + nr_ranges++; + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + for_each_mem_range(i, &start, &end) { + cmem->ranges[cmem->nr_ranges].start = start; + cmem->ranges[cmem->nr_ranges].end = end - 1; + cmem->nr_ranges++; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret < 0) + goto out; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret < 0) + goto out; + } + + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +/* + * Add the "mem=size@start" command line parameter to command line, indicating the + * memory region the new kernel can use to boot into. + */ +static void cmdline_add_mem(unsigned long *cmdline_tmplen, char *modified_cmdline) +{ + int mem_strlen = 0; + + mem_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "mem=0x%llx@0x%llx ", + crashk_res.end - crashk_res.start + 1, crashk_res.start); + *cmdline_tmplen += mem_strlen; + + if (crashk_low_res.end) { + mem_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "mem=0x%llx@0x%llx ", + crashk_low_res.end - crashk_low_res.start + 1, crashk_low_res.start); + *cmdline_tmplen += mem_strlen; + } +} + +/* Add the "elfcorehdr=size@start" command line parameter to command line. */ +static void cmdline_add_elfcorehdr(struct kimage *image, unsigned long *cmdline_tmplen, + char *modified_cmdline, unsigned long elfcorehdr_sz) +{ + int elfcorehdr_strlen = 0; + + elfcorehdr_strlen = sprintf(modified_cmdline + (*cmdline_tmplen), "elfcorehdr=0x%lx@0x%lx ", + elfcorehdr_sz, image->elf_load_addr); + *cmdline_tmplen += elfcorehdr_strlen; +} + +#endif + +/* + * Try to add the initrd to the image. If it is not possible to find valid + * locations, this function will undo changes to the image and return non zero. + */ +int load_other_segments(struct kimage *image, + unsigned long kernel_load_addr, unsigned long kernel_size, + char *initrd, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len) +{ + int ret = 0; + unsigned long cmdline_tmplen = 0; + unsigned long initrd_load_addr = 0; + unsigned long orig_segments = image->nr_segments; + char *modified_cmdline = NULL; + struct kexec_buf kbuf; + + kbuf.image = image; + /* Don't allocate anything below the kernel */ + kbuf.buf_min = kernel_load_addr + kernel_size; + + modified_cmdline = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!modified_cmdline) + return -EINVAL; + + cmdline_add_loader(&cmdline_tmplen, modified_cmdline); + /* Ensure it's null terminated */ + modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0'; + +#ifdef CONFIG_CRASH_DUMP + /* Load elf core header */ + if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; + + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret < 0) { + pr_err("Preparing elf core header failed\n"); + goto out_err; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = SZ_64K; /* largest supported page size */ + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret < 0) { + vfree(headers); + goto out_err; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Add the mem=size@start parameter to the command line */ + cmdline_add_mem(&cmdline_tmplen, modified_cmdline); + + /* Add the elfcorehdr=size@start parameter to the command line */ + cmdline_add_elfcorehdr(image, &cmdline_tmplen, modified_cmdline, headers_sz); + } +#endif + + /* Load initrd */ + if (initrd) { + kbuf.buffer = initrd; + kbuf.bufsz = initrd_len; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = initrd_len; + kbuf.buf_align = 0; + /* within 1GB-aligned window of up to 32GB in size */ + kbuf.buf_max = round_down(kernel_load_addr, SZ_1G) + (unsigned long)SZ_1G * 32; + kbuf.top_down = false; + + ret = kexec_add_buffer(&kbuf); + if (ret < 0) + goto out_err; + initrd_load_addr = kbuf.mem; + + kexec_dprintk("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + initrd_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Add the initrd=start,size parameter to the command line */ + cmdline_add_initrd(image, &cmdline_tmplen, modified_cmdline, initrd_load_addr); + } + + if (cmdline_len + cmdline_tmplen > COMMAND_LINE_SIZE) { + pr_err("Appending command line exceeds COMMAND_LINE_SIZE\n"); + ret = -EINVAL; + goto out_err; + } + + memcpy(modified_cmdline + cmdline_tmplen, cmdline, cmdline_len); + cmdline = modified_cmdline; + image->arch.cmdline_ptr = (unsigned long)cmdline; + + return 0; + +out_err: + image->nr_segments = orig_segments; + kfree(modified_cmdline); + return ret; +} diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 50c469067f3a..b5e2312a2fca 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -166,6 +166,10 @@ static inline __init bool kaslr_disabled(void) return true; #endif + str = strstr(boot_command_line, "kexec_file"); + if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) + return true; + return false; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 075b79b2c1d3..69c17d162fff 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -355,6 +355,7 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_table_upgrade(); + acpi_gbl_use_global_lock = false; acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 40eea6da7c25..ae64bbdf83a7 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -31,7 +31,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 6c9c7de7226b..cb493980d874 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -218,16 +218,16 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) } trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); } else { + vcpu->arch.io_gpr = rd; /* Set register id for iocsr read completion */ idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, + run->iocsr_io.len, run->iocsr_io.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); - if (ret == 0) + if (ret == 0) { + kvm_complete_iocsr_read(vcpu, run); ret = EMULATE_DONE; - else { + } else ret = EMULATE_DO_IOCSR; - /* Save register id for iocsr read completion */ - vcpu->arch.io_gpr = rd; - } trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } @@ -468,6 +468,8 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) if (ret == EMULATE_DO_MMIO) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, run->mmio.phys_addr, NULL); + vcpu->arch.io_gpr = rd; /* Set for kvm_complete_mmio_read() use */ + /* * If mmio device such as PCH-PIC is emulated in KVM, * it need not return to user space to handle the mmio @@ -475,16 +477,15 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) */ idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, - run->mmio.len, &vcpu->arch.gprs[rd]); + run->mmio.len, run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { + kvm_complete_mmio_read(vcpu, run); update_pc(&vcpu->arch); vcpu->mmio_needed = 0; return EMULATE_DONE; } - /* Set for kvm_complete_mmio_read() use */ - vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; return EMULATE_DO_MMIO; diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 5a8481dda052..05cefd29282e 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -7,13 +7,26 @@ #include <asm/kvm_ipi.h> #include <asm/kvm_vcpu.h> -static void ipi_send(struct kvm *kvm, uint64_t data) +static void ipi_set(struct kvm_vcpu *vcpu, uint32_t data) { - int cpu, action; uint32_t status; - struct kvm_vcpu *vcpu; struct kvm_interrupt irq; + spin_lock(&vcpu->arch.ipi_state.lock); + status = vcpu->arch.ipi_state.status; + vcpu->arch.ipi_state.status |= data; + spin_unlock(&vcpu->arch.ipi_state.lock); + if ((status == 0) && data) { + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } +} + +static void ipi_send(struct kvm *kvm, uint64_t data) +{ + int cpu; + struct kvm_vcpu *vcpu; + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); if (unlikely(vcpu == NULL)) { @@ -21,15 +34,7 @@ static void ipi_send(struct kvm *kvm, uint64_t data) return; } - action = BIT(data & 0x1f); - spin_lock(&vcpu->arch.ipi_state.lock); - status = vcpu->arch.ipi_state.status; - vcpu->arch.ipi_state.status |= action; - spin_unlock(&vcpu->arch.ipi_state.lock); - if (status == 0) { - irq.irq = LARCH_INT_IPI; - kvm_vcpu_ioctl_interrupt(vcpu, &irq); - } + ipi_set(vcpu, BIT(data & 0x1f)); } static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data) @@ -96,6 +101,34 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int spin_unlock(&vcpu->arch.ipi_state.lock); } +static int mail_send(struct kvm *kvm, uint64_t data) +{ + int i, cpu, mailbox, offset; + uint32_t val = 0, mask = 0; + struct kvm_vcpu *vcpu; + + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return -EINVAL; + } + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + offset = IOCSR_IPI_BUF_20 + mailbox * 4; + if ((data >> 27) & 0xf) { + val = read_mailbox(vcpu, offset, 4); + for (i = 0; i < 4; i++) + if (data & (BIT(27 + i))) + mask |= (0xff << (i * 8)); + val &= mask; + } + + val |= ((uint32_t)(data >> 32) & ~mask); + write_mailbox(vcpu, offset, val, 4); + + return 0; +} + static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { int i, idx, ret; @@ -132,23 +165,6 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) return ret; } -static int mail_send(struct kvm *kvm, uint64_t data) -{ - int cpu, mailbox, offset; - struct kvm_vcpu *vcpu; - - cpu = ((data & 0xffffffff) >> 16) & 0x3ff; - vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu); - if (unlikely(vcpu == NULL)) { - kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); - return -EINVAL; - } - mailbox = ((data & 0xffffffff) >> 2) & 0x7; - offset = IOCSR_IPI_BASE + IOCSR_IPI_BUF_20 + mailbox * 4; - - return send_ipi_data(vcpu, offset, data); -} - static int any_send(struct kvm *kvm, uint64_t data) { int cpu, offset; @@ -231,7 +247,7 @@ static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, cons spin_unlock(&vcpu->arch.ipi_state.lock); break; case IOCSR_IPI_SET: - ret = -EINVAL; + ipi_set(vcpu, data); break; case IOCSR_IPI_CLEAR: /* Just clear the status of the current vcpu */ @@ -250,10 +266,10 @@ static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, cons ipi_send(vcpu->kvm, data); break; case IOCSR_MAIL_SEND: - ret = mail_send(vcpu->kvm, *(uint64_t *)val); + ret = mail_send(vcpu->kvm, data); break; case IOCSR_ANY_SEND: - ret = any_send(vcpu->kvm, *(uint64_t *)val); + ret = any_send(vcpu->kvm, data); break; default: kvm_err("%s: unknown addr: %llx\n", __func__, addr); diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index baf3b4faf7ea..a698a73de399 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -35,16 +35,11 @@ static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level) /* update batch irqs, the irq_mask is a bitmap of irqs */ static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level) { - int irq, bits; + unsigned int irq; + DECLARE_BITMAP(irqs, 64) = { BITMAP_FROM_U64(irq_mask) }; - /* find each irq by irqs bitmap and update each irq */ - bits = sizeof(irq_mask) * 8; - irq = find_first_bit((void *)&irq_mask, bits); - while (irq < bits) { + for_each_set_bit(irq, irqs, 64) pch_pic_update_irq(s, irq, level); - bitmap_clear((void *)&irq_mask, irq, 1); - irq = find_first_bit((void *)&irq_mask, bits); - } } /* called when a irq is triggered in pch pic */ @@ -77,109 +72,65 @@ void pch_msi_set_irq(struct kvm *kvm, int irq, int level) eiointc_set_irq(kvm->arch.eiointc, irq, level); } -/* - * pch pic register is 64-bit, but it is accessed by 32-bit, - * so we use high to get whether low or high 32 bits we want - * to read. - */ -static u32 pch_pic_read_reg(u64 *s, int high) -{ - u64 val = *s; - - /* read the high 32 bits when high is 1 */ - return high ? (u32)(val >> 32) : (u32)val; -} - -/* - * pch pic register is 64-bit, but it is accessed by 32-bit, - * so we use high to get whether low or high 32 bits we want - * to write. - */ -static u32 pch_pic_write_reg(u64 *s, int high, u32 v) -{ - u64 val = *s, data = v; - - if (high) { - /* - * Clear val high 32 bits - * Write the high 32 bits when the high is 1 - */ - *s = (val << 32 >> 32) | (data << 32); - val >>= 32; - } else - /* - * Clear val low 32 bits - * Write the low 32 bits when the high is 0 - */ - *s = (val >> 32 << 32) | v; - - return (u32)val; -} - static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val) { - int offset, index, ret = 0; - u32 data = 0; - u64 int_id = 0; + int ret = 0, offset; + u64 data = 0; + void *ptemp; offset = addr - s->pch_pic_base; + offset -= offset & 7; spin_lock(&s->lock); switch (offset) { case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END: - /* int id version */ - int_id |= (u64)PCH_PIC_INT_ID_VER << 32; - /* irq number */ - int_id |= (u64)31 << (32 + 16); - /* int id value */ - int_id |= PCH_PIC_INT_ID_VAL; - *(u64 *)val = int_id; + data = s->id.data; break; case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: - offset -= PCH_PIC_MASK_START; - index = offset >> 2; - /* read mask reg */ - data = pch_pic_read_reg(&s->mask, index); - *(u32 *)val = data; + data = s->mask; break; case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: - offset -= PCH_PIC_HTMSI_EN_START; - index = offset >> 2; /* read htmsi enable reg */ - data = pch_pic_read_reg(&s->htmsi_en, index); - *(u32 *)val = data; + data = s->htmsi_en; break; case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: - offset -= PCH_PIC_EDGE_START; - index = offset >> 2; /* read edge enable reg */ - data = pch_pic_read_reg(&s->edge, index); - *(u32 *)val = data; + data = s->edge; break; case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: /* we only use default mode: fixed interrupt distribution mode */ - *(u32 *)val = 0; break; case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: /* only route to int0: eiointc */ - *(u8 *)val = 1; + ptemp = s->route_entry + (offset - PCH_PIC_ROUTE_ENTRY_START); + data = *(u64 *)ptemp; break; case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: - offset -= PCH_PIC_HTMSI_VEC_START; /* read htmsi vector */ - data = s->htmsi_vector[offset]; - *(u8 *)val = data; + ptemp = s->htmsi_vector + (offset - PCH_PIC_HTMSI_VEC_START); + data = *(u64 *)ptemp; break; case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: - /* we only use defalut value 0: high level triggered */ - *(u32 *)val = 0; + data = s->polarity; + break; + case PCH_PIC_INT_IRR_START: + data = s->irr; + break; + case PCH_PIC_INT_ISR_START: + data = s->isr; break; default: ret = -EINVAL; } spin_unlock(&s->lock); + if (ret == 0) { + offset = (addr - s->pch_pic_base) & 7; + data = data >> (offset * 8); + memcpy(val, &data, len); + } + return ret; } @@ -210,81 +161,69 @@ static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr, int len, const void *val) { - int ret; - u32 old, data, offset, index; - u64 irq; + int ret = 0, offset; + u64 old, data, mask; + void *ptemp; + + switch (len) { + case 1: + data = *(u8 *)val; + mask = 0xFF; + break; + case 2: + data = *(u16 *)val; + mask = USHRT_MAX; + break; + case 4: + data = *(u32 *)val; + mask = UINT_MAX; + break; + case 8: + default: + data = *(u64 *)val; + mask = ULONG_MAX; + break; + } - ret = 0; - data = *(u32 *)val; - offset = addr - s->pch_pic_base; + offset = (addr - s->pch_pic_base) & 7; + mask = mask << (offset * 8); + data = data << (offset * 8); + offset = (addr - s->pch_pic_base) - offset; spin_lock(&s->lock); switch (offset) { - case PCH_PIC_MASK_START ... PCH_PIC_MASK_END: - offset -= PCH_PIC_MASK_START; - /* get whether high or low 32 bits we want to write */ - index = offset >> 2; - old = pch_pic_write_reg(&s->mask, index, data); - /* enable irq when mask value change to 0 */ - irq = (old & ~data) << (32 * index); - pch_pic_update_batch_irqs(s, irq, 1); - /* disable irq when mask value change to 1 */ - irq = (~old & data) << (32 * index); - pch_pic_update_batch_irqs(s, irq, 0); - break; - case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END: - offset -= PCH_PIC_HTMSI_EN_START; - index = offset >> 2; - pch_pic_write_reg(&s->htmsi_en, index, data); + case PCH_PIC_MASK_START: + old = s->mask; + s->mask = (old & ~mask) | data; + if (old & ~data) + pch_pic_update_batch_irqs(s, old & ~data, 1); + if (~old & data) + pch_pic_update_batch_irqs(s, ~old & data, 0); break; - case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END: - offset -= PCH_PIC_EDGE_START; - index = offset >> 2; - /* 1: edge triggered, 0: level triggered */ - pch_pic_write_reg(&s->edge, index, data); - break; - case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END: - offset -= PCH_PIC_CLEAR_START; - index = offset >> 2; - /* write 1 to clear edge irq */ - old = pch_pic_read_reg(&s->irr, index); - /* - * get the irq bitmap which is edge triggered and - * already set and to be cleared - */ - irq = old & pch_pic_read_reg(&s->edge, index) & data; - /* write irr to the new state where irqs have been cleared */ - pch_pic_write_reg(&s->irr, index, old & ~irq); - /* update cleared irqs */ - pch_pic_update_batch_irqs(s, irq, 0); + case PCH_PIC_HTMSI_EN_START: + s->htmsi_en = (s->htmsi_en & ~mask) | data; break; - case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END: - offset -= PCH_PIC_AUTO_CTRL0_START; - index = offset >> 2; - /* we only use default mode: fixed interrupt distribution mode */ - pch_pic_write_reg(&s->auto_ctrl0, index, 0); + case PCH_PIC_EDGE_START: + s->edge = (s->edge & ~mask) | data; break; - case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END: - offset -= PCH_PIC_AUTO_CTRL1_START; - index = offset >> 2; - /* we only use default mode: fixed interrupt distribution mode */ - pch_pic_write_reg(&s->auto_ctrl1, index, 0); + case PCH_PIC_POLARITY_START: + s->polarity = (s->polarity & ~mask) | data; break; - case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: - offset -= PCH_PIC_ROUTE_ENTRY_START; - /* only route to int0: eiointc */ - s->route_entry[offset] = 1; + case PCH_PIC_CLEAR_START: + old = s->irr & s->edge & data; + if (old) { + s->irr &= ~old; + pch_pic_update_batch_irqs(s, old, 0); + } break; case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END: - /* route table to eiointc */ - offset -= PCH_PIC_HTMSI_VEC_START; - s->htmsi_vector[offset] = (u8)data; + ptemp = s->htmsi_vector + (offset - PCH_PIC_HTMSI_VEC_START); + *(u64 *)ptemp = (*(u64 *)ptemp & ~mask) | data; break; - case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END: - offset -= PCH_PIC_POLARITY_START; - index = offset >> 2; - /* we only use defalut value 0: high level triggered */ - pch_pic_write_reg(&s->polarity, index, 0); + /* Not implemented */ + case PCH_PIC_AUTO_CTRL0_START: + case PCH_PIC_AUTO_CTRL1_START: + case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END: break; default: ret = -EINVAL; @@ -484,7 +423,7 @@ static int kvm_setup_default_irq_routing(struct kvm *kvm) static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) { - int ret; + int i, ret, irq_num; struct kvm *kvm = dev->kvm; struct loongarch_pch_pic *s; @@ -500,6 +439,22 @@ static int kvm_pch_pic_create(struct kvm_device *dev, u32 type) if (!s) return -ENOMEM; + /* + * Interrupt controller identification register 1 + * Bit 24-31 Interrupt Controller ID + * Interrupt controller identification register 2 + * Bit 0-7 Interrupt Controller version number + * Bit 16-23 The number of interrupt sources supported + */ + irq_num = 32; + s->mask = -1UL; + s->id.desc.id = PCH_PIC_INT_ID_VAL; + s->id.desc.version = PCH_PIC_INT_ID_VER; + s->id.desc.irq_num = irq_num - 1; + for (i = 0; i < irq_num; i++) { + s->route_entry[i] = 1; + s->htmsi_vector[i] = i; + } spin_lock_init(&s->lock); s->kvm = kvm; kvm->arch.pch_pic = s; diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index 145514dab6d5..3467ee22b704 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -161,6 +161,41 @@ TRACE_EVENT(kvm_aux, __entry->pc) ); +#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 +#define KVM_TRACE_IOCSR_READ 1 +#define KVM_TRACE_IOCSR_WRITE 2 + +#define kvm_trace_symbol_iocsr \ + { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_IOCSR_READ, "read" }, \ + { KVM_TRACE_IOCSR_WRITE, "write" } + +TRACE_EVENT(kvm_iocsr, + TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), + __entry->len, __entry->gpa, __entry->val) +); + TRACE_EVENT(kvm_vpid_change, TP_PROTO(struct kvm_vcpu *vcpu, unsigned long vpid), TP_ARGS(vcpu, vpid), diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index ce478151466c..30e3b089a596 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -4,7 +4,6 @@ */ #include <linux/kvm_host.h> -#include <linux/entry-kvm.h> #include <asm/fpu.h> #include <asm/lbt.h> #include <asm/loongarch.h> @@ -251,7 +250,7 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret < 0) return ret; @@ -680,6 +679,8 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) *v |= CPUCFG2_ARMBT; if (cpu_has_lbt_mips) *v |= CPUCFG2_MIPSBT; + if (cpu_has_ptw) + *v |= CPUCFG2_PTW; return 0; case LOONGARCH_CPUCFG3: diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index edccfc8c9cd8..a49b1c1a3dd1 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -146,6 +146,10 @@ static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr if (kvm_pvtime_supported()) return 0; return -ENXIO; + case KVM_LOONGARCH_VM_FEAT_PTW: + if (cpu_has_ptw) + return 0; + return -ENXIO; default: return -ENXIO; } diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index deefd9617d00..2c93d33356e5 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -215,6 +215,58 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (write) { + flags |= FAULT_FLAG_WRITE; + if (!(vma->vm_flags & VM_WRITE)) { + vma_end_read(vma); + si_code = SEGV_ACCERR; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto bad_area_nosemaphore; + } + } else { + if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) { + vma_end_read(vma); + si_code = SEGV_ACCERR; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto bad_area_nosemaphore; + } + if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) { + vma_end_read(vma); + si_code = SEGV_ACCERR; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto bad_area_nosemaphore; + } + } + + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + + count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, write, address); + return; + } +lock_mmap: + retry: vma = lock_mm_and_find_vma(mm, address, regs); if (unlikely(!vma)) @@ -276,8 +328,10 @@ good_area: */ goto retry; } + mmap_read_unlock(mm); + +done: if (unlikely(fault & VM_FAULT_ERROR)) { - mmap_read_unlock(mm); if (fault & VM_FAULT_OOM) { do_out_of_memory(regs, write, address); return; @@ -290,8 +344,6 @@ good_area: } BUG(); } - - mmap_read_unlock(mm); } asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index abfdb6bb5c38..cbe53d0b7fb0 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -527,13 +527,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext emit_zext_32(ctx, dst, is32); break; case 8: - move_reg(ctx, t1, src); - emit_insn(ctx, extwb, dst, t1); + emit_insn(ctx, extwb, dst, src); emit_zext_32(ctx, dst, is32); break; case 16: - move_reg(ctx, t1, src); - emit_insn(ctx, extwh, dst, t1); + emit_insn(ctx, extwh, dst, src); emit_zext_32(ctx, dst, is32); break; case 32: @@ -1294,8 +1292,10 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; - if (!is_kernel_text((unsigned long)ip) && - !is_bpf_text_address((unsigned long)ip)) + /* Only poking bpf text is supported. Since kernel function entry + * is set up by ftrace, we rely on ftrace to poke kernel functions. + */ + if (!is_bpf_text_address((unsigned long)ip)) return -ENOTSUPP; ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call); @@ -1448,12 +1448,43 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } +/* + * Sign-extend the register if necessary + */ +static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign) +{ + /* ABI requires unsigned char/short to be zero-extended */ + if (!sign && (size == 1 || size == 2)) { + if (rd != rj) + move_reg(ctx, rd, rj); + return; + } + + switch (size) { + case 1: + emit_insn(ctx, extwb, rd, rj); + break; + case 2: + emit_insn(ctx, extwh, rd, rj); + break; + case 4: + emit_insn(ctx, addiw, rd, rj, 0); + break; + case 8: + if (rd != rj) + move_reg(ctx, rd, rj); + break; + default: + pr_warn("bpf_jit: invalid size %d for sign_extend\n", size); + } +} + static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, const struct btf_func_model *m, struct bpf_tramp_links *tlinks, void *func_addr, u32 flags) { int i, ret, save_ret; - int stack_size = 0, nargs = 0; + int stack_size, nargs; int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; @@ -1462,9 +1493,6 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; u32 **branches = NULL; - if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) - return -ENOTSUPP; - /* * FP + 8 [ RA to parent func ] return address to parent * function @@ -1495,20 +1523,23 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i if (m->nr_args > LOONGARCH_MAX_REG_ARGS) return -ENOTSUPP; + /* FIXME: No support of struct argument */ + for (i = 0; i < m->nr_args; i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + } + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) return -ENOTSUPP; - stack_size = 0; - /* Room of trampoline frame to store return address and frame pointer */ - stack_size += 16; + stack_size = 16; save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); - if (save_ret) { - /* Save BPF R0 and A0 */ - stack_size += 16; - retval_off = stack_size; - } + if (save_ret) + stack_size += 16; /* Save BPF R0 and A0 */ + + retval_off = stack_size; /* Room of trampoline frame to store args */ nargs = m->nr_args; @@ -1595,7 +1626,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i orig_call += LOONGARCH_BPF_FENTRY_NBYTES; if (flags & BPF_TRAMP_F_CALL_ORIG) { - move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); ret = emit_call(ctx, (const u64)__bpf_tramp_enter); if (ret) return ret; @@ -1645,7 +1676,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i if (flags & BPF_TRAMP_F_CALL_ORIG) { im->ip_epilogue = ctx->ro_image + ctx->idx; - move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); ret = emit_call(ctx, (const u64)__bpf_tramp_exit); if (ret) goto out; @@ -1655,8 +1686,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i restore_args(ctx, m->nr_args, args_off); if (save_ret) { - emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + if (is_struct_ops) + sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0], + m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG); + else + emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); } emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); @@ -1715,7 +1750,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); - if (ret > 0 && validate_code(&ctx) < 0) { + if (ret < 0) + goto out; + + if (validate_code(&ctx) < 0) { ret = -EINVAL; goto out; } @@ -1726,7 +1764,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, goto out; } - bpf_flush_icache(ro_image, ro_image_end); out: kvfree(image); return ret < 0 ? ret : size; @@ -1744,8 +1781,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); - /* Page align */ - return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE); + return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE; } struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 7787a4dd7c3c..f3268fed02fc 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -72,9 +72,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 62f2ff4e6799..bba64a9c49ac 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -119,16 +119,6 @@ extern void *empty_zero_page; */ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8*sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void*)-1)) - -/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -/* 64-bit machines, beware! SRB. */ -#define SIZEOF_PTR_LOG2 2 - extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode); /* diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c index 9504eb19d73a..e6ab3f9ff5d8 100644 --- a/arch/m68k/kernel/pcibios.c +++ b/arch/m68k/kernel/pcibios.c @@ -44,41 +44,24 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, */ int pcibios_enable_device(struct pci_dev *dev, int mask) { - struct resource *r; u16 cmd, newcmd; - int idx; + int ret; - pci_read_config_word(dev, PCI_COMMAND, &cmd); - newcmd = cmd; - - for (idx = 0; idx < 6; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1 << idx))) - continue; - - r = dev->resource + idx; - if (!r->start && r->end) { - pr_err("PCI: Device %s not available because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - newcmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - newcmd |= PCI_COMMAND_MEMORY; - } + ret = pci_enable_resources(dev, mask); + if (ret < 0) + return ret; /* * Bridges (eg, cardbus bridges) need to be fully enabled */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); newcmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY; - - - if (newcmd != cmd) { - pr_info("PCI: enabling device %s (0x%04x -> 0x%04x)\n", - pci_name(dev), cmd, newcmd); - pci_write_config_word(dev, PCI_COMMAND, newcmd); + if (newcmd != cmd) { + pr_info("PCI: enabling bridge %s (0x%04x -> 0x%04x)\n", + pci_name(dev), cmd, newcmd); + pci_write_config_word(dev, PCI_COMMAND, newcmd); + } } return 0; } diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 176314f3c9aa..fbbdcb394ca2 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -73,7 +73,7 @@ CONFIG_FB_XILINX=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y CONFIG_UIO_DMEM_GENIRQ=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index a60e8d895102..4eb76de6be4a 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -99,7 +99,6 @@ extern pte_t *va_to_pte(unsigned long address); #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_PGD_NR 0 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 32d2d701c684..e8683f58fd3e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -479,6 +479,23 @@ config LANTIQ config MACH_LOONGSON32 bool "Loongson 32-bit family of machines" + select MACH_GENERIC_CORE + select USE_OF + select BUILTIN_DTB + select BOOT_ELF32 + select CEVT_R4K + select CSRC_R4K + select COMMON_CLK + select DMA_NONCOHERENT + select GENERIC_IRQ_SHOW_LEVEL + select IRQ_MIPS_CPU + select LS1X_IRQ + select SYS_HAS_CPU_LOONGSON32 + select SYS_HAS_EARLY_PRINTK + select USE_GENERIC_EARLY_PRINTK_8250 + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_ZBOOT help This enables support for the Loongson-1 family of machines. @@ -1385,25 +1402,20 @@ config CPU_LOONGSON2F have a similar programming interface with FPGA northbridge used in Loongson2E. -config CPU_LOONGSON1B - bool "Loongson 1B" - depends on SYS_HAS_CPU_LOONGSON1B - select CPU_LOONGSON32 - select LEDS_GPIO_REGISTER - help - The Loongson 1B is a 32-bit SoC, which implements the MIPS32 - Release 1 instruction set and part of the MIPS32 Release 2 - instruction set. - -config CPU_LOONGSON1C - bool "Loongson 1C" - depends on SYS_HAS_CPU_LOONGSON1C - select CPU_LOONGSON32 +config CPU_LOONGSON32 + bool "Loongson 32-bit CPU" + depends on SYS_HAS_CPU_LOONGSON32 + select CPU_MIPS32 + select CPU_MIPSR2 + select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_CPUFREQ select LEDS_GPIO_REGISTER help - The Loongson 1C is a 32-bit SoC, which implements the MIPS32 - Release 1 instruction set and part of the MIPS32 Release 2 - instruction set. + The Loongson GS232 microarchitecture implements the MIPS32 Release 1 + instruction set and part of the MIPS32 Release 2 instruction set. config CPU_MIPS32_R1 bool "MIPS32 Release 1" @@ -1838,15 +1850,6 @@ config CPU_LOONGSON2EF select CPU_SUPPORTS_HUGEPAGES select RTC_MC146818_LIB -config CPU_LOONGSON32 - bool - select CPU_MIPS32 - select CPU_MIPSR2 - select CPU_HAS_PREFETCH - select CPU_SUPPORTS_32BIT_KERNEL - select CPU_SUPPORTS_HIGHMEM - select CPU_SUPPORTS_CPUFREQ - config CPU_BMIPS32_3300 select SMP_UP if SMP bool @@ -1884,10 +1887,7 @@ config SYS_HAS_CPU_LOONGSON2F select CPU_SUPPORTS_CPUFREQ select CPU_SUPPORTS_ADDRWINCFG if 64BIT -config SYS_HAS_CPU_LOONGSON1B - bool - -config SYS_HAS_CPU_LOONGSON1C +config SYS_HAS_CPU_LOONGSON32 bool config SYS_HAS_CPU_MIPS32_R1 @@ -2986,8 +2986,8 @@ choice prompt "Kernel command line type" depends on !CMDLINE_OVERRIDE default MIPS_CMDLINE_FROM_DTB if USE_OF && !ATH79 && !MACH_INGENIC && \ - !MACH_LOONGSON64 && !MIPS_MALTA && \ - !CAVIUM_OCTEON_SOC + !MACH_LOONGSON64 && !MACH_LOONGSON32 && \ + !MIPS_MALTA && !CAVIUM_OCTEON_SOC default MIPS_CMDLINE_FROM_BOOTLOADER config MIPS_CMDLINE_FROM_DTB diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index 6c8996e20a7d..551b0d21d9dc 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -211,30 +211,34 @@ static int alchemy_clk_aux_setr(struct clk_hw *hw, return 0; } -static long alchemy_clk_aux_roundr(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int alchemy_clk_aux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct alchemy_auxpll_clk *a = to_auxpll_clk(hw); unsigned long mult; - if (!rate || !*parent_rate) + if (!req->rate || !req->best_parent_rate) { + req->rate = 0; + return 0; + } - mult = rate / (*parent_rate); + mult = req->rate / req->best_parent_rate; if (mult && (mult < 7)) mult = 7; if (mult > a->maxmult) mult = a->maxmult; - return (*parent_rate) * mult; + req->rate = req->best_parent_rate * mult; + + return 0; } static const struct clk_ops alchemy_clkops_aux = { .recalc_rate = alchemy_clk_aux_recalc, .set_rate = alchemy_clk_aux_setr, - .round_rate = alchemy_clk_aux_roundr, + .determine_rate = alchemy_clk_aux_determine_rate, }; static struct clk __init *alchemy_clk_setup_aux(const char *parent_name, diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index 7375c6ced82b..6d9dbe945541 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -8,6 +8,7 @@ subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img subdir-$(CONFIG_MACH_INGENIC) += ingenic subdir-$(CONFIG_LANTIQ) += lantiq subdir-$(CONFIG_MACH_LOONGSON64) += loongson +subdir-$(CONFIG_MACH_LOONGSON32) += loongson subdir-$(CONFIG_SOC_VCOREIII) += mscc subdir-$(CONFIG_MIPS_MALTA) += mti subdir-$(CONFIG_LEGACY_BOARD_SEAD3) += mti diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index 2afa0dada575..9d6f97e02ff9 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -531,7 +531,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index a57cacea91cf..a7f60f059e50 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -450,7 +450,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index 728b9e9f84b8..2d483cbf254f 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -446,7 +446,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index 62588c53d356..c3bb020ff2b5 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -542,7 +542,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; @@ -569,7 +570,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index cfdf9804e126..60cfa4074cce 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -558,7 +558,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; @@ -585,7 +586,8 @@ }; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-a.0.0", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x300>; }; diff --git a/arch/mips/boot/dts/lantiq/danube.dtsi b/arch/mips/boot/dts/lantiq/danube.dtsi index 7a7ba66aa534..650400bd5725 100644 --- a/arch/mips/boot/dts/lantiq/danube.dtsi +++ b/arch/mips/boot/dts/lantiq/danube.dtsi @@ -5,8 +5,12 @@ compatible = "lantiq,xway", "lantiq,danube"; cpus { + #address-cells = <1>; + #size-cells = <0>; + cpu@0 { compatible = "mips,mips24Kc"; + reg = <0>; }; }; @@ -100,6 +104,8 @@ 0x1000000 0 0x00000000 0xae00000 0 0x200000>; /* io space */ reg = <0x7000000 0x8000 /* config space */ 0xe105400 0x400>; /* pci bridge */ + + device_type = "pci"; }; }; }; diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index c4d7aa5753b0..c9f7886f57b8 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -4,6 +4,8 @@ /include/ "danube.dtsi" / { + model = "Intel EASY50712"; + chosen { bootargs = "console=ttyLTQ0,115200 init=/etc/preinit"; }; @@ -94,7 +96,7 @@ lantiq,tx-burst-length = <4>; }; - stp0: stp@e100bb0 { + stp0: gpio@e100bb0 { #gpio-cells = <2>; compatible = "lantiq,gpio-stp-xway"; gpio-controller; diff --git a/arch/mips/boot/dts/loongson/Makefile b/arch/mips/boot/dts/loongson/Makefile index 5e3ab984d70f..8ee12504d353 100644 --- a/arch/mips/boot/dts/loongson/Makefile +++ b/arch/mips/boot/dts/loongson/Makefile @@ -1,7 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 + +ifneq ($(CONFIG_BUILTIN_DTB_NAME),) +dtb-y := $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME)) +else dtb-$(CONFIG_MACH_LOONGSON64) += loongson64_2core_2k1000.dtb dtb-$(CONFIG_MACH_LOONGSON64) += loongson64c_4core_ls7a.dtb dtb-$(CONFIG_MACH_LOONGSON64) += loongson64c_4core_rs780e.dtb dtb-$(CONFIG_MACH_LOONGSON64) += loongson64c_8core_rs780e.dtb dtb-$(CONFIG_MACH_LOONGSON64) += loongson64g_4core_ls7a.dtb dtb-$(CONFIG_MACH_LOONGSON64) += loongson64v_4core_virtio.dtb + +dtb-$(CONFIG_MACH_LOONGSON32) += cq-t300b.dtb +dtb-$(CONFIG_MACH_LOONGSON32) += ls1b-demo.dtb +dtb-$(CONFIG_MACH_LOONGSON32) += lsgz_1b_dev.dtb +dtb-$(CONFIG_MACH_LOONGSON32) += smartloong-1c.dtb +endif diff --git a/arch/mips/boot/dts/loongson/cq-t300b.dts b/arch/mips/boot/dts/loongson/cq-t300b.dts new file mode 100644 index 000000000000..5244fab2496d --- /dev/null +++ b/arch/mips/boot/dts/loongson/cq-t300b.dts @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> + +#include "loongson1c.dtsi" + +/ { + compatible = "loongson,cq-t300b", "loongson,ls1c"; + model = "CQ-T300B Board"; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x8000000>; + }; + + aliases { + gpio0 = &gpio0; + gpio1 = &gpio1; + gpio2 = &gpio2; + gpio3 = &gpio3; + serial0 = &uart2; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + leds { + compatible = "gpio-leds"; + + led0 { + label = "led0"; + gpios = <&gpio1 20 GPIO_ACTIVE_LOW>; + linux,default-trigger = "heartbeat"; + }; + + led1 { + label = "led1"; + gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; + linux,default-trigger = "nand-disk"; + }; + }; +}; + +&xtal { + clock-frequency = <24000000>; +}; + +&emac { + phy-handle = <&phy0>; + phy-mode = "rmii"; + status = "okay"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy0: ethernet-phy@13 { + reg = <0x13>; + }; + }; +}; + +&nand { + status = "okay"; + + nand@0 { + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; + reg = <0x0 0x1000000>; + }; + + partition@1000000 { + label = "rootfs"; + reg = <0x1000000 0x3f000000>; + }; + }; + }; +}; + +&ehci { + status = "okay"; +}; + +&ohci { + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&watchdog { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/loongson/loongson1.dtsi b/arch/mips/boot/dts/loongson/loongson1.dtsi new file mode 100644 index 000000000000..5ba5a5d131ba --- /dev/null +++ b/arch/mips/boot/dts/loongson/loongson1.dtsi @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; + +#include <dt-bindings/clock/loongson,ls1x-clk.h> +#include <dt-bindings/interrupt-controller/irq.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + + xtal: clock { + compatible = "fixed-clock"; + clock-output-names = "xtal"; + #clock-cells = <0>; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + reg = <0>; + device_type = "cpu"; + clocks = <&clkc LS1X_CLKID_CPU>; + #clock-cells = <1>; + }; + }; + + cpu_intc: interrupt-controller { + compatible = "mti,cpu-interrupt-controller"; + interrupt-controller; + #interrupt-cells = <1>; + #address-cells = <0>; + }; + + soc: bus@1fd00000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x1fd00000 0x130000>; + + intc0: interrupt-controller@1040 { + compatible = "loongson,ls1x-intc"; + reg = <0x1040 0x18>; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = <2>; + #interrupt-cells = <2>; + }; + + intc1: interrupt-controller@1058 { + compatible = "loongson,ls1x-intc"; + reg = <0x1058 0x18>; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = <3>; + #interrupt-cells = <2>; + }; + + intc2: interrupt-controller@1070 { + compatible = "loongson,ls1x-intc"; + reg = <0x1070 0x18>; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = <4>; + #interrupt-cells = <2>; + }; + + intc3: interrupt-controller@1088 { + compatible = "loongson,ls1x-intc"; + reg = <0x1088 0x18>; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = <5>; + #interrupt-cells = <2>; + }; + + gpio0: gpio@10c0 { + compatible = "loongson,ls1x-gpio"; + reg = <0x10c0 0x4>; + gpio-controller; + #gpio-cells = <2>; + }; + + gpio1: gpio@10c4 { + compatible = "loongson,ls1x-gpio"; + reg = <0x10c4 0x4>; + gpio-controller; + #gpio-cells = <2>; + }; + }; + + apb: bus@1fe40000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x1fe40000 0xc0000>; + + uart0: serial@0 { + compatible = "ns16550a"; + reg = <0x0 0x8>; + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + uart1: serial@4000 { + compatible = "ns16550a"; + reg = <0x4000 0x8>; + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + status = "disabled"; + }; + + uart2: serial@8000 { + compatible = "ns16550a"; + reg = <0x8000 0x8>; + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + status = "disabled"; + }; + + uart3: serial@c000 { + compatible = "ns16550a"; + reg = <0xc000 0x8>; + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + status = "disabled"; + }; + }; +}; diff --git a/arch/mips/boot/dts/loongson/loongson1b.dtsi b/arch/mips/boot/dts/loongson/loongson1b.dtsi new file mode 100644 index 000000000000..776d272b0f43 --- /dev/null +++ b/arch/mips/boot/dts/loongson/loongson1b.dtsi @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; +#include "loongson1.dtsi" + +/ { + cpu_opp_table: opp-table { + compatible = "operating-points-v2"; + opp-shared; + + opp-44000000 { + opp-hz = /bits/ 64 <44000000>; + }; + opp-47142000 { + opp-hz = /bits/ 64 <47142000>; + }; + opp-50769000 { + opp-hz = /bits/ 64 <50769000>; + }; + opp-55000000 { + opp-hz = /bits/ 64 <55000000>; + }; + opp-60000000 { + opp-hz = /bits/ 64 <60000000>; + }; + opp-66000000 { + opp-hz = /bits/ 64 <66000000>; + }; + opp-73333000 { + opp-hz = /bits/ 64 <73333000>; + }; + opp-82500000 { + opp-hz = /bits/ 64 <82500000>; + }; + opp-94285000 { + opp-hz = /bits/ 64 <94285000>; + }; + opp-110000000 { + opp-hz = /bits/ 64 <110000000>; + }; + opp-132000000 { + opp-hz = /bits/ 64 <132000000>; + }; + opp-165000000 { + opp-hz = /bits/ 64 <165000000>; + }; + opp-220000000 { + opp-hz = /bits/ 64 <220000000>; + }; + }; + + clkc: clock-controller@1fe78030 { + compatible = "loongson,ls1b-clk"; + reg = <0x1fe78030 0x8>; + clocks = <&xtal>; + #clock-cells = <1>; + }; +}; + +&soc { + syscon: syscon@420 { + compatible = "loongson,ls1b-syscon", "syscon"; + reg = <0x420 0x8>; + }; + + dma: dma-controller@1160 { + compatible = "loongson,ls1b-apbdma"; + reg = <0x1160 0x4>; + interrupt-parent = <&intc0>; + interrupts = <13 IRQ_TYPE_EDGE_RISING>, + <14 IRQ_TYPE_EDGE_RISING>, + <15 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "ch0", "ch1", "ch2"; + #dma-cells = <1>; + }; + + ehci: usb@100000 { + compatible = "generic-ehci"; + reg = <0x100000 0x100>; + interrupt-parent = <&intc1>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + ohci: usb@108000 { + compatible = "generic-ohci"; + reg = <0x108000 0x100>; + interrupt-parent = <&intc1>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + gmac0: ethernet@110000 { + compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a"; + reg = <0x110000 0x10000>; + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + interrupt-parent = <&intc1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + loongson,ls1-syscon = <&syscon>; + snps,pbl = <1>; + status = "disabled"; + }; + + gmac1: ethernet@120000 { + compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a"; + reg = <0x120000 0x10000>; + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + interrupt-parent = <&intc1>; + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + loongson,ls1-syscon = <&syscon>; + snps,pbl = <1>; + status = "disabled"; + }; +}; + +&apb { + clocksource: timer@1c030 { + compatible = "loongson,ls1b-pwmtimer"; + reg = <0x1c030 0x10>; + clocks = <&clkc LS1X_CLKID_APB>; + interrupt-parent = <&intc0>; + interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; + }; + + watchdog: watchdog@1c060 { + compatible = "loongson,ls1b-wdt"; + reg = <0x1c060 0xc>; + clocks = <&clkc LS1X_CLKID_APB>; + status = "disabled"; + }; + + rtc: rtc@24000 { + compatible = "loongson,ls1b-rtc"; + reg = <0x24000 0x78>; + interrupt-parent = <&intc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + ac97: audio-controller@34000 { + compatible = "loongson,ls1b-ac97"; + reg = <0x34000 0x60>, <0x32420 0x4>, <0x34c4c 0x4>; + reg-names = "ac97", "audio-tx", "audio-rx"; + dmas = <&dma 1>, <&dma 2>; + dma-names = "tx", "rx"; + #sound-dai-cells = <0>; + status = "disabled"; + }; + + nand: nand-controller@38000 { + compatible = "loongson,ls1b-nand-controller"; + reg = <0x38000 0x24>, <0x38040 0x4>; + reg-names = "nand", "nand-dma"; + dmas = <&dma 0>; + dma-names = "rxtx"; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + + nand@0 { + reg = <0>; + label = "ls1x-nand"; + nand-use-soft-ecc-engine; + nand-ecc-algo = "hamming"; + }; + }; +}; + +&cpu0 { + operating-points-v2 = <&cpu_opp_table>; +}; + +&gpio0 { + ngpios = <31>; +}; + +&gpio1 { + ngpios = <30>; +}; + +&uart1 { + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; +}; + +&uart2 { + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; +}; + +&uart3 { + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; +}; diff --git a/arch/mips/boot/dts/loongson/loongson1c.dtsi b/arch/mips/boot/dts/loongson/loongson1c.dtsi new file mode 100644 index 000000000000..5e80c6a657af --- /dev/null +++ b/arch/mips/boot/dts/loongson/loongson1c.dtsi @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; +#include "loongson1.dtsi" + +/ { + clkc: clock-controller@1fe78030 { + compatible = "loongson,ls1c-clk"; + reg = <0x1fe78030 0x8>; + clocks = <&xtal>; + #clock-cells = <1>; + }; +}; + +&soc { + syscon: syscon@420 { + compatible = "loongson,ls1c-syscon", "syscon"; + reg = <0x420 0x8>; + }; + + intc4: interrupt-controller@10a0 { + compatible = "loongson,ls1x-intc"; + reg = <0x10a0 0x18>; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = <6>; + #interrupt-cells = <2>; + }; + + gpio2: gpio@10c8 { + compatible = "loongson,ls1x-gpio"; + reg = <0x10c8 0x4>; + gpio-controller; + ngpios = <32>; + #gpio-cells = <2>; + }; + + gpio3: gpio@10cc { + compatible = "loongson,ls1x-gpio"; + reg = <0x10cc 0x4>; + gpio-controller; + ngpios = <32>; + #gpio-cells = <2>; + }; + + dma: dma-controller@1160 { + compatible = "loongson,ls1c-apbdma", "loongson,ls1b-apbdma"; + reg = <0x1160 0x4>; + interrupt-parent = <&intc0>; + interrupts = <13 IRQ_TYPE_EDGE_RISING>, + <14 IRQ_TYPE_EDGE_RISING>, + <15 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "ch0", "ch1", "ch2"; + #dma-cells = <1>; + }; + + emac: ethernet@110000 { + compatible = "loongson,ls1c-emac", "snps,dwmac-3.50a"; + reg = <0x110000 0x10000>; + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + interrupt-parent = <&intc1>; + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + loongson,ls1-syscon = <&syscon>; + snps,pbl = <1>; + status = "disabled"; + }; + + ehci: usb@120000 { + compatible = "generic-ehci"; + reg = <0x120000 0x100>; + interrupt-parent = <&intc1>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + ohci: usb@128000 { + compatible = "generic-ohci"; + reg = <0x128000 0x100>; + interrupt-parent = <&intc1>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; +}; + +&apb { + watchdog: watchdog@1c060 { + compatible = "loongson,ls1c-wdt"; + reg = <0x1c060 0xc>; + clocks = <&clkc LS1X_CLKID_APB>; + status = "disabled"; + }; + + rtc: rtc@24000 { + compatible = "loongson,ls1c-rtc"; + reg = <0x24000 0x78>; + status = "disabled"; + }; + + nand: nand-controller@38000 { + compatible = "loongson,ls1c-nand-controller"; + reg = <0x38000 0x24>, <0x38040 0x4>; + reg-names = "nand", "nand-dma"; + dmas = <&dma 0>; + dma-names = "rxtx"; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + + nand@0 { + reg = <0>; + label = "ls1x-nand"; + nand-use-soft-ecc-engine; + nand-ecc-algo = "hamming"; + }; + }; +}; + +&gpio0 { + ngpios = <32>; +}; + +&gpio1 { + ngpios = <32>; +}; + +&uart1 { + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; +}; + +&uart2 { + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; +}; + +&uart3 { + interrupts = <29 IRQ_TYPE_LEVEL_HIGH>; +}; diff --git a/arch/mips/boot/dts/loongson/ls1b-demo.dts b/arch/mips/boot/dts/loongson/ls1b-demo.dts new file mode 100644 index 000000000000..13f8b102e100 --- /dev/null +++ b/arch/mips/boot/dts/loongson/ls1b-demo.dts @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> + +#include "loongson1b.dtsi" + +/ { + compatible = "loongson,ls1b-demo", "loongson,ls1b"; + model = "LS1B-DEMO Board"; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x10000000>; + }; + + aliases { + ethernet0 = &gmac0; + ethernet1 = &gmac1; + gpio0 = &gpio0; + gpio1 = &gpio1; + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:38400n8"; + }; + + codec: audio-codec { + compatible = "realtek,alc203"; + #sound-dai-cells = <0>; + }; + + sound { + compatible = "simple-audio-card"; + simple-audio-card,name = "ls1b-alc203"; + simple-audio-card,format = "ac97"; + simple-audio-card,widgets = + "Speaker", "Line Out Jack", + "Headphone", "Headphone Jack", + "Microphone", "Microphone Jack"; + simple-audio-card,routing = + "Line Out Jack", "TX", + "Headphone Jack", "TX", + "RX", "Microphone Jack"; + + simple-audio-card,cpu { + sound-dai = <&ac97>; + }; + + simple-audio-card,codec { + sound-dai = <&codec>; + }; + }; +}; + +&xtal { + clock-frequency = <33000000>; +}; + +&gmac0 { + phy-handle = <&phy0>; + phy-mode = "rgmii-id"; + status = "okay"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy0: ethernet-phy@0 { + reg = <0x0>; + }; + }; +}; + +&nand { + status = "okay"; + + nand@0 { + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; + reg = <0x0 0x1000000>; + }; + + partition@1000000 { + label = "rootfs"; + reg = <0x1000000 0x7000000>; + }; + }; + }; +}; + +&ac97 { + status = "okay"; +}; + +&ehci { + status = "okay"; +}; + +&ohci { + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&uart0 { + status = "okay"; +}; + +&watchdog { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/loongson/lsgz_1b_dev.dts b/arch/mips/boot/dts/loongson/lsgz_1b_dev.dts new file mode 100644 index 000000000000..94ec151c0a94 --- /dev/null +++ b/arch/mips/boot/dts/loongson/lsgz_1b_dev.dts @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> + +#include "loongson1b.dtsi" + +/ { + compatible = "loongson,lsgz-1b-dev", "loongson,ls1b"; + model = "LSGZ_1B_DEV Board"; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x4000000>; + }; + + aliases { + ethernet0 = &gmac0; + ethernet1 = &gmac1; + gpio0 = &gpio0; + gpio1 = &gpio1; + serial0 = &uart2; + serial1 = &uart3; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + leds { + compatible = "gpio-leds"; + + led9 { + label = "led9"; + gpios = <&gpio1 6 GPIO_ACTIVE_LOW>; + linux,default-trigger = "heartbeat"; + }; + + led6 { + label = "led6"; + gpios = <&gpio1 7 GPIO_ACTIVE_LOW>; + linux,default-trigger = "nand-disk"; + }; + }; + + codec: audio-codec { + compatible = "realtek,alc203"; + #sound-dai-cells = <0>; + }; + + sound { + compatible = "simple-audio-card"; + simple-audio-card,name = "ls1b-alc655"; + simple-audio-card,format = "ac97"; + simple-audio-card,widgets = + "Speaker", "Line Out Jack", + "Line", "Line In Jack", + "Microphone", "Microphone Jack"; + simple-audio-card,routing = + "Line Out Jack", "TX", + "RX", "Line In Jack", + "RX", "Microphone Jack"; + + simple-audio-card,cpu { + sound-dai = <&ac97>; + }; + + simple-audio-card,codec { + sound-dai = <&codec>; + }; + }; +}; + +&xtal { + clock-frequency = <33000000>; +}; + +&gmac0 { + phy-handle = <&phy0>; + phy-mode = "mii"; + status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0x0>; + }; + }; +}; + +&gmac1 { + phy-handle = <&phy1>; + phy-mode = "mii"; + status = "okay"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy1: ethernet-phy@0 { + reg = <0x0>; + }; + }; +}; + +&nand { + status = "okay"; + + nand@0 { + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; + reg = <0x0 0x1000000>; + }; + + partition@1000000 { + label = "rootfs"; + reg = <0x1000000 0x7000000>; + }; + }; + }; +}; + +&ac97 { + status = "okay"; +}; + +&ehci { + status = "okay"; +}; + +&ohci { + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&uart3 { + status = "okay"; +}; + +&watchdog { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/loongson/smartloong-1c.dts b/arch/mips/boot/dts/loongson/smartloong-1c.dts new file mode 100644 index 000000000000..e6c6c2f00c42 --- /dev/null +++ b/arch/mips/boot/dts/loongson/smartloong-1c.dts @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023-2025 Keguang Zhang <keguang.zhang@gmail.com> + */ + +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> + +#include "loongson1c.dtsi" + +/ { + compatible = "loongmasses,smartloong-1c", "loongson,ls1c"; + model = "Smartloong-1C Board"; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x4000000>; + }; + + aliases { + gpio0 = &gpio0; + gpio1 = &gpio1; + gpio2 = &gpio2; + gpio3 = &gpio3; + serial0 = &uart2; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + leds { + compatible = "gpio-leds"; + + led0 { + label = "led0"; + gpios = <&gpio1 20 GPIO_ACTIVE_LOW>; + linux,default-trigger = "heartbeat"; + }; + + led1 { + label = "led1"; + gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; + linux,default-trigger = "nand-disk"; + }; + }; +}; + +&xtal { + clock-frequency = <24000000>; +}; + +&emac { + phy-handle = <&phy0>; + phy-mode = "rmii"; + status = "okay"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy0: ethernet-phy@13 { + reg = <0x13>; + }; + }; +}; + +&nand { + status = "okay"; + + nand@0 { + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; + reg = <0x0 0x1000000>; + }; + + partition@1000000 { + label = "rootfs"; + reg = <0x1000000 0x7000000>; + }; + }; + }; +}; + +&ehci { + status = "okay"; +}; + +&ohci { + status = "okay"; +}; + +&rtc { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&watchdog { + status = "okay"; +}; diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c index 657dbad9644e..98996cc0857e 100644 --- a/arch/mips/cavium-octeon/executive/octeon-model.c +++ b/arch/mips/cavium-octeon/executive/octeon-model.c @@ -25,6 +25,7 @@ * Contact Cavium Networks for more information ***********************license end**************************************/ +#include <linux/string.h> #include <asm/octeon/octeon.h> enum octeon_feature_bits __octeon_feature_bits __read_mostly; @@ -208,16 +209,16 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, */ switch (chip_id & 0xf) { case 0: - strcpy(pass, "1.X"); + strscpy(pass, "1.X"); break; case 1: - strcpy(pass, "2.X"); + strscpy(pass, "2.X"); break; case 3: - strcpy(pass, "3.X"); + strscpy(pass, "3.X"); break; default: - strcpy(pass, "X.X"); + strscpy(pass, "X.X"); break; } break; @@ -232,13 +233,13 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, */ switch (chip_id & 0xf) { case 0: - strcpy(pass, "1.0"); + strscpy(pass, "1.0"); break; case 2: - strcpy(pass, "1.1"); + strscpy(pass, "1.1"); break; default: - strcpy(pass, "X.X"); + strscpy(pass, "X.X"); break; } break; @@ -253,13 +254,13 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, */ switch (chip_id & 0xf) { case 0: - strcpy(pass, "1.0"); + strscpy(pass, "1.0"); break; case 2: - strcpy(pass, "1.1"); + strscpy(pass, "1.1"); break; default: - strcpy(pass, "X.X"); + strscpy(pass, "X.X"); break; } break; @@ -273,16 +274,16 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, if ((chip_id & 0xFF) < 0x8) { switch (chip_id & 0x3) { case 0: - strcpy(pass, "1.0"); + strscpy(pass, "1.0"); break; case 1: - strcpy(pass, "1.1"); + strscpy(pass, "1.1"); break; case 3: - strcpy(pass, "1.2"); + strscpy(pass, "1.2"); break; default: - strcpy(pass, "1.X"); + strscpy(pass, "1.X"); break; } } @@ -447,7 +448,7 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, default: family = "XX"; core_model = "XX"; - strcpy(pass, "X.X"); + strscpy(pass, "X.X"); suffix = "XXX"; break; } diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 5e1dd4e6e82f..47677b5d7ed0 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -13,6 +13,7 @@ #include <linux/of_fdt.h> #include <linux/platform_device.h> #include <linux/libfdt.h> +#include <linux/string.h> #include <asm/octeon/octeon.h> #include <asm/octeon/cvmx-helper-board.h> @@ -538,8 +539,7 @@ static void __init octeon_fdt_set_phy(int eth, int phy_addr) if (octeon_has_88e1145()) { fdt_nop_property(initial_boot_params, phy, "marvell,reg-init"); - memset(new_name, 0, sizeof(new_name)); - strcpy(new_name, "marvell,88e1145"); + strscpy_pad(new_name, "marvell,88e1145"); p = fdt_getprop(initial_boot_params, phy, "compatible", ¤t_len); if (p && current_len >= strlen(new_name)) diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 08ea2cde1eb5..054e331b3202 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -334,7 +334,7 @@ static void octeon_cpu_die(unsigned int cpu) new_mask = *p; } - pr_info("Reset core %d. Available Coremask = 0x%x \n", coreid, new_mask); + pr_info("Reset core %d. Available Coremask = 0x%x\n", coreid, new_mask); mb(); cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid); cvmx_write_csr(CVMX_CIU_PP_RST, 0); diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 97d2cd997285..349e9e0b4f54 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -144,9 +144,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index b0b551efac7c..6ee9ee391fdc 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -59,9 +59,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 85a4472cb058..52a63dd7aac7 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -133,9 +133,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index a3b2c8da2dde..59fb7ee5eeb0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index a476717b8a6a..8be1cb433e95 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index cdedbb8a8f53..b6fe3c962464 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -173,7 +173,7 @@ CONFIG_USB_ISIGHTFW=m CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 2decf8b98d31..e123848f94ab 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -232,9 +232,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1286=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 5d079941fd20..1c10242b148b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -272,9 +272,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 6db21e498faa..755cbf20f5a5 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -49,9 +49,9 @@ CONFIG_WATCHDOG=y CONFIG_INDYDOG=y # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index a4524e785469..718f3060d9fa 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -143,9 +143,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index d8ac11427f69..7568838eb08b 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -89,9 +89,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 65adb538030d..a790c2610fd3 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -69,7 +69,7 @@ CONFIG_FB_G364=y CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 5038a27d035f..8d3f20ed19b5 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -226,9 +226,9 @@ CONFIG_MMC=m CONFIG_LEDS_CLASS=y CONFIG_STAGING=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1_defconfig index 68207b31dc20..81acae6f61c8 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_KERNEL_XZ=y CONFIG_SYSVIPC=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -12,15 +11,16 @@ CONFIG_NAMESPACES=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y -# CONFIG_COMPAT_BRK is not set CONFIG_MACH_LOONGSON32=y -# CONFIG_SECCOMP is not set # CONFIG_SUSPEND is not set +# CONFIG_SECCOMP is not set +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -31,6 +31,7 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set +# CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set @@ -38,32 +39,75 @@ CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAW_NAND=y +CONFIG_MTD_NAND_LOONGSON1=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=m # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=m +# CONFIG_BLK_DEV_BSG is not set # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ASIX is not set # CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_CADENCE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set +# CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DAVICOM is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_FUNGIBLE is not set +# CONFIG_NET_VENDOR_GOOGLE is not set +# CONFIG_NET_VENDOR_HISILICON is not set +# CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_META is not set # CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MICROCHIP is not set +# CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_PENSANDO is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set CONFIG_STMMAC_ETH=y +# CONFIG_DWMAC_GENERIC is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set +CONFIG_DAVICOM_PHY=y +CONFIG_REALTEK_PHY=y +# CONFIG_USB_NET_DRIVERS is not set # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_VT_CONSOLE is not set CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_LEGACY_PTY_COUNT=8 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +# CONFIG_PTP_1588_CLOCK is not set CONFIG_GPIOLIB=y CONFIG_GPIO_LOONGSON1=y # CONFIG_HWMON is not set @@ -71,7 +115,15 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_WATCHDOG_SYSFS=y CONFIG_LOONGSON1_WDT=y -# CONFIG_VGA_CONSOLE is not set +CONFIG_SOUND=y +CONFIG_SND=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_MIPS is not set +# CONFIG_SND_USB is not set +CONFIG_SND_SOC=y +CONFIG_SND_LOONGSON1_AC97=y +CONFIG_SND_SIMPLE_CARD=y CONFIG_HID_GENERIC=m CONFIG_USB_HID=m CONFIG_USB=y @@ -86,17 +138,20 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_MTD=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_LOONGSON1=y +# CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_LOONGSON=y +CONFIG_DMADEVICES=y +CONFIG_LOONGSON1_APB_DMA=y +# CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set +# CONFIG_MIPS_PLATFORM_DEVICES is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +# CONFIG_NVMEM is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -105,16 +160,21 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_UBIFS_FS=y CONFIG_UBIFS_FS_ADVANCED_COMPR=y CONFIG_UBIFS_ATIME_SUPPORT=y +# CONFIG_UBIFS_FS_SECURITY is not set CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -# CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_HW is not set +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_ARM64 is not set +# CONFIG_XZ_DEC_SPARC is not set +# CONFIG_XZ_DEC_RISCV is not set CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_MISC is not set CONFIG_MAGIC_SYSRQ=y -# CONFIG_SCHED_DEBUG is not set -# CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set # CONFIG_EARLY_PRINTK is not set diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig deleted file mode 100644 index c3910a9dee9e..000000000000 --- a/arch/mips/configs/loongson1c_defconfig +++ /dev/null @@ -1,121 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_KERNEL_XZ=y -CONFIG_SYSVIPC=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=16 -CONFIG_NAMESPACES=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_EXPERT=y -CONFIG_PERF_EVENTS=y -# CONFIG_COMPAT_BRK is not set -CONFIG_MACH_LOONGSON32=y -CONFIG_LOONGSON1_LS1C=y -# CONFIG_SECCOMP is not set -# CONFIG_SUSPEND is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -# CONFIG_WIRELESS is not set -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_STANDALONE is not set -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_RAW_NAND=y -CONFIG_MTD_UBI=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_SCSI=m -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=m -# CONFIG_SCSI_LOWLEVEL is not set -CONFIG_NETDEVICES=y -# CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -# CONFIG_NET_VENDOR_MICREL is not set -# CONFIG_NET_VENDOR_NATSEMI is not set -# CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SMSC is not set -CONFIG_STMMAC_ETH=y -# CONFIG_NET_VENDOR_WIZNET is not set -# CONFIG_WLAN is not set -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_LEGACY_PTY_COUNT=8 -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_HW_RANDOM is not set -CONFIG_GPIOLIB=y -CONFIG_GPIO_LOONGSON1=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_WATCHDOG_SYSFS=y -CONFIG_LOONGSON1_WDT=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_HID_GENERIC=m -CONFIG_USB_HID=m -CONFIG_USB=y -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_TT_NEWSCHED is not set -CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_STORAGE=m -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_LOONGSON1=y -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -# CONFIG_DNOTIFY is not set -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_ADVANCED_COMPR=y -CONFIG_UBIFS_ATIME_SUPPORT=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_ISO8859_1=m -# CONFIG_CRYPTO_ECHAINIV is not set -# CONFIG_CRYPTO_HW is not set -CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_FS=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_SCHED_DEBUG is not set -# CONFIG_DEBUG_PREEMPT is not set -# CONFIG_FTRACE is not set -# CONFIG_EARLY_PRINTK is not set diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index 0cc665d3ea34..aec1fd1902eb 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -298,9 +298,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 240efff37d98..575aaf242361 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -348,9 +348,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_POSIX_ACL=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 9fcbac829920..81704ec67f09 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -313,7 +313,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 19102386a81c..82a97f58bce1 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 1b98f6945c2d..accb471a1d93 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 7b8905cb3400..6bda67c5f68f 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -149,7 +149,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 8249f6a51895..e4082537f80f 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -148,9 +148,9 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 21cb37668763..58f5af45fa98 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -152,7 +152,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 3df9cd669683..9bfef7de0d1c 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 1dd07c9d1812..0f9ef20744f9 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 2707ab134639..c58d1a61d528 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -595,9 +595,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 39a2419e1f3e..b507dc4dddd4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -307,7 +307,7 @@ CONFIG_USB_SISUSBVGA=m CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/fw/arc/cmdline.c b/arch/mips/fw/arc/cmdline.c index 155c5e911723..86b0e377b713 100644 --- a/arch/mips/fw/arc/cmdline.c +++ b/arch/mips/fw/arc/cmdline.c @@ -42,12 +42,13 @@ static char __init *move_firmware_args(int argc, LONG *argv, char *cp) { char *s; int actr, i; + size_t len; actr = 1; /* Always ignore argv[0] */ while (actr < argc) { - for(i = 0; i < ARRAY_SIZE(used_arc); i++) { - int len = strlen(used_arc[i][0]); + for (i = 0; i < ARRAY_SIZE(used_arc); i++) { + len = strlen(used_arc[i][0]); if (!strncmp(prom_argv(actr), used_arc[i][0], len)) { /* Ok, we want it. First append the replacement... */ @@ -57,8 +58,9 @@ static char __init *move_firmware_args(int argc, LONG *argv, char *cp) s = strchr(prom_argv(actr), '='); if (s) { s++; - strcpy(cp, s); - cp += strlen(s); + len = strlen(s); + memcpy(cp, s, len + 1); + cp += len; } *cp++ = ' '; break; @@ -74,6 +76,7 @@ void __init prom_init_cmdline(int argc, LONG *argv) { char *cp; int actr, i; + size_t len; actr = 1; /* Always ignore argv[0] */ @@ -86,14 +89,15 @@ void __init prom_init_cmdline(int argc, LONG *argv) while (actr < argc) { for (i = 0; i < ARRAY_SIZE(ignored); i++) { - int len = strlen(ignored[i]); - + len = strlen(ignored[i]); if (!strncmp(prom_argv(actr), ignored[i], len)) goto pic_cont; } + /* Ok, we want it. */ - strcpy(cp, prom_argv(actr)); - cp += strlen(prom_argv(actr)); + len = strlen(prom_argv(actr)); + memcpy(cp, prom_argv(actr), len + 1); + cp += len; *cp++ = ' '; pic_cont: @@ -105,6 +109,6 @@ void __init prom_init_cmdline(int argc, LONG *argv) *cp = '\0'; #ifdef DEBUG_CMDLINE - printk(KERN_DEBUG "prom cmdline: %s\n", arcs_cmdline); + pr_debug("prom cmdline: %s\n", arcs_cmdline); #endif } diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c index 7115410acb4f..59a0fb243582 100644 --- a/arch/mips/generic/board-ocelot.c +++ b/arch/mips/generic/board-ocelot.c @@ -4,6 +4,7 @@ * * Copyright (c) 2017 Microsemi Corporation */ +#include <linux/string.h> #include <asm/machine.h> #include <asm/prom.h> @@ -41,7 +42,7 @@ static __init bool ocelot_detect(void) if (prom_argc > 1 && strlen(prom_argv[1]) > 0) /* ignore all built-in args if any f/w args given */ - strcpy(arcs_cmdline, prom_argv[1]); + strscpy(arcs_cmdline, prom_argv[1]); } return true; diff --git a/arch/mips/include/asm/addrspace.h b/arch/mips/include/asm/addrspace.h index 7e9ef01cb182..e2354e9b0ee2 100644 --- a/arch/mips/include/asm/addrspace.h +++ b/arch/mips/include/asm/addrspace.h @@ -15,7 +15,7 @@ /* * Configure language */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ @@ -34,7 +34,7 @@ /* * 32-bit MIPS address spaces */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ACAST32_ #define _ACAST64_ #else diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h index e327ebc76753..220431d00ee9 100644 --- a/arch/mips/include/asm/asm-eva.h +++ b/arch/mips/include/asm/asm-eva.h @@ -10,7 +10,7 @@ #ifndef __ASM_ASM_EVA_H #define __ASM_ASM_EVA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Kernel variants */ @@ -99,7 +99,7 @@ #endif /* CONFIG_EVA */ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define kernel_cache(op, base) cache op, base #define kernel_pref(hint, base) pref hint, base @@ -185,6 +185,6 @@ #endif /* CONFIG_EVA */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EVA_H */ diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 87ff609b53fe..0ed19ffed076 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -37,7 +37,7 @@ #define CFI_SECTIONS #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * LEAF - declare leaf routine */ @@ -123,7 +123,7 @@ symbol = value #define ASM_PRINT(string) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Stack alignment @@ -228,7 +228,7 @@ symbol = value #define LONG_INS ins #define LONG_EXT ext -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .word #endif #define LONGSIZE 4 @@ -257,7 +257,7 @@ symbol = value #define LONG_INS dins #define LONG_EXT dext -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .dword #endif #define LONGSIZE 8 diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index 3a1cdfddb987..0eee81be9e2b 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -42,7 +42,7 @@ #define ZSCM_REG_BASE 0x97000000 -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) #include <linux/cpumask.h> #include <asm/r4kcache.h> @@ -124,6 +124,6 @@ static inline void bmips_write_zscm_reg(unsigned int offset, unsigned long data) barrier(); } -#endif /* !defined(__ASSEMBLY__) */ +#endif /* !defined(__ASSEMBLER__) */ #endif /* _ASM_BMIPS_H */ diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index a4a66bd93748..fd37a44a2f19 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -24,8 +24,7 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_LOONGSON64: #endif -#if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \ - defined(CONFIG_SYS_HAS_CPU_LOONGSON1C) +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON32 case CPU_LOONGSON32: #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index ecb9854cb432..0fd9f9bbd21f 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -248,8 +248,7 @@ #define PRID_REV_VR4181A 0x0070 /* Same as VR4122 */ #define PRID_REV_VR4130 0x0080 #define PRID_REV_34K_V1_0_2 0x0022 -#define PRID_REV_LOONGSON1B 0x0020 -#define PRID_REV_LOONGSON1C 0x0020 /* Same as Loongson-1B */ +#define PRID_REV_LOONGSON1 0x0020 #define PRID_REV_LOONGSON2E 0x0002 #define PRID_REV_LOONGSON2F 0x0003 #define PRID_REV_LOONGSON2K_R1_0 0x0000 @@ -288,7 +287,7 @@ #define FPIR_IMP_NONE 0x0000 -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) enum cpu_type_enum { CPU_UNKNOWN, @@ -329,7 +328,7 @@ enum cpu_type_enum { CPU_LAST }; -#endif /* !__ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * ISA Level encodings diff --git a/arch/mips/include/asm/dec/ecc.h b/arch/mips/include/asm/dec/ecc.h index c3a3f71f1a54..dbc39643c31c 100644 --- a/arch/mips/include/asm/dec/ecc.h +++ b/arch/mips/include/asm/dec/ecc.h @@ -37,7 +37,7 @@ #define KN0X_ESR_SYNLO (0x7f<<0) /* syndrome from ECC logic */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/interrupt.h> diff --git a/arch/mips/include/asm/dec/interrupts.h b/arch/mips/include/asm/dec/interrupts.h index e10d341067c8..c1cd36c04b6c 100644 --- a/arch/mips/include/asm/dec/interrupts.h +++ b/arch/mips/include/asm/dec/interrupts.h @@ -95,7 +95,7 @@ #define DEC_CPU_IRQ_ALL (0xff << CAUSEB_IP) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Interrupt table structures to hide differences between systems. @@ -121,6 +121,6 @@ extern void cpu_all_int(void); extern void dec_intr_unimplemented(void); extern void asic_intr_unimplemented(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/mips/include/asm/dec/kn01.h b/arch/mips/include/asm/dec/kn01.h index 88d9ffd74258..6c074b93a7db 100644 --- a/arch/mips/include/asm/dec/kn01.h +++ b/arch/mips/include/asm/dec/kn01.h @@ -71,7 +71,7 @@ #define KN01_CSR_LEDS (0xff<<0) /* ~diagnostic LEDs (w/o) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/interrupt.h> #include <linux/spinlock.h> diff --git a/arch/mips/include/asm/dec/kn02.h b/arch/mips/include/asm/dec/kn02.h index 93430b5f4724..9fea17020079 100644 --- a/arch/mips/include/asm/dec/kn02.h +++ b/arch/mips/include/asm/dec/kn02.h @@ -80,7 +80,7 @@ #define KN02_IRQ_ALL 0xff -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> diff --git a/arch/mips/include/asm/dec/kn02xa.h b/arch/mips/include/asm/dec/kn02xa.h index b56b4577f6ef..3580d78b906f 100644 --- a/arch/mips/include/asm/dec/kn02xa.h +++ b/arch/mips/include/asm/dec/kn02xa.h @@ -70,7 +70,7 @@ #define KN02XA_EAR_RES_0 (0x3<<0) /* unused */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/interrupt.h> diff --git a/arch/mips/include/asm/eva.h b/arch/mips/include/asm/eva.h index a3d1807f227c..c7b39f38634b 100644 --- a/arch/mips/include/asm/eva.h +++ b/arch/mips/include/asm/eva.h @@ -13,7 +13,7 @@ #include <kernel-entry-init.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_EVA @@ -38,6 +38,6 @@ platform_eva_init #endif /* CONFIG_EVA */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index b41fc1044668..7d557f03188f 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -15,7 +15,7 @@ #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void _mcount(void); #define mcount _mcount @@ -89,11 +89,11 @@ struct dyn_arch_ftrace { void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FTRACE_SYSCALLS -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Some syscall entry functions on mips start with "__sys_" (fork and clone, * for instance). We should also match the sys_ variant with those. @@ -105,6 +105,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, return !strcmp(sym, name) || (!strncmp(sym, "__sys_", 6) && !strcmp(sym + 6, name + 4)); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FTRACE_SYSCALLS */ #endif /* _ASM_MIPS_FTRACE_H */ diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index cb16be93b048..a084b3b3bc81 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -301,7 +301,7 @@ do { \ #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ssnop ___ssnop #define _ehb ___ehb @@ -417,6 +417,6 @@ do { \ */ extern void mips_ihb(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_HAZARDS_H */ diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index f5b8300f4573..70e5b05fd88b 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -11,7 +11,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/stringify.h> @@ -142,7 +142,7 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* #ifndef __ASSEMBLY__ */ +#endif /* #ifndef __ASSEMBLER__ */ /* * Do the CPU's IRQ-state tracing from assembly code. diff --git a/arch/mips/include/asm/jazz.h b/arch/mips/include/asm/jazz.h index a61970d01a81..9356e87dd64b 100644 --- a/arch/mips/include/asm/jazz.h +++ b/arch/mips/include/asm/jazz.h @@ -70,7 +70,7 @@ #define LED_E 0x9e #define LED_F 0x8e -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __inline__ void pica_set_led(unsigned int bits) { @@ -79,7 +79,7 @@ static __inline__ void pica_set_led(unsigned int bits) *led_register = bits; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Base address of the Sonic Ethernet adapter in Jazz machines. @@ -100,7 +100,7 @@ static __inline__ void pica_set_led(unsigned int bits) #define JAZZ_KEYBOARD_DATA 0xe0005000 #define JAZZ_KEYBOARD_COMMAND 0xe0005001 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct { unsigned char data; @@ -121,7 +121,7 @@ typedef struct { */ #define keyboard_hardware jazz_keyboard_hardware -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * i8042 keyboard controller for most other Mips machines. @@ -154,7 +154,7 @@ typedef struct { /* * DRAM configuration register */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef __MIPSEL__ typedef struct { unsigned int bank2 : 3; @@ -174,7 +174,7 @@ typedef struct { unsigned int bank2 : 3; } dram_configuration; #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define PICA_DRAM_CONFIG 0xe00fffe0 @@ -260,7 +260,7 @@ typedef struct { /* * Access the R4030 DMA and I/O Controller */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void r4030_delay(void) { @@ -299,7 +299,7 @@ static inline void r4030_write_reg32(unsigned long addr, unsigned val) r4030_delay(); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define JAZZ_FDC_BASE 0xe0003000 #define JAZZ_RTC_BASE 0xe0004000 diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index ff5d388502d4..c1508f88e03e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -10,7 +10,7 @@ #define arch_jump_label_transform_static arch_jump_label_transform -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/isa-rev.h> @@ -76,5 +76,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_MIPS_JUMP_LABEL_H */ diff --git a/arch/mips/include/asm/linkage.h b/arch/mips/include/asm/linkage.h index 1829c2b6da6c..fd44ba754f1a 100644 --- a/arch/mips/include/asm/linkage.h +++ b/arch/mips/include/asm/linkage.h @@ -2,7 +2,7 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm.h> #endif diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h index f8783d339fb0..6332b6cbf7ee 100644 --- a/arch/mips/include/asm/mach-generic/spaces.h +++ b/arch/mips/include/asm/mach-generic/spaces.h @@ -21,13 +21,13 @@ /* * This gives the physical RAM offset. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # if defined(CONFIG_MIPS_AUTO_PFN_OFFSET) # define PHYS_OFFSET ((unsigned long)PFN_PHYS(ARCH_PFN_OFFSET)) # elif !defined(PHYS_OFFSET) # define PHYS_OFFSET _AC(0, UL) # endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_32BIT #define CAC_BASE _AC(0x80000000, UL) diff --git a/arch/mips/include/asm/mach-loongson32/irq.h b/arch/mips/include/asm/mach-loongson32/irq.h deleted file mode 100644 index 6115f025ba21..000000000000 --- a/arch/mips/include/asm/mach-loongson32/irq.h +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - * - * IRQ mappings for Loongson 1 - */ - -#ifndef __ASM_MACH_LOONGSON32_IRQ_H -#define __ASM_MACH_LOONGSON32_IRQ_H - -/* - * CPU core Interrupt Numbers - */ -#define MIPS_CPU_IRQ_BASE 0 -#define MIPS_CPU_IRQ(x) (MIPS_CPU_IRQ_BASE + (x)) - -#define SOFTINT0_IRQ MIPS_CPU_IRQ(0) -#define SOFTINT1_IRQ MIPS_CPU_IRQ(1) -#define INT0_IRQ MIPS_CPU_IRQ(2) -#define INT1_IRQ MIPS_CPU_IRQ(3) -#define INT2_IRQ MIPS_CPU_IRQ(4) -#define INT3_IRQ MIPS_CPU_IRQ(5) -#define INT4_IRQ MIPS_CPU_IRQ(6) -#define TIMER_IRQ MIPS_CPU_IRQ(7) /* cpu timer */ - -#define MIPS_CPU_IRQS (MIPS_CPU_IRQ(7) + 1 - MIPS_CPU_IRQ_BASE) - -/* - * INT0~3 Interrupt Numbers - */ -#define LS1X_IRQ_BASE MIPS_CPU_IRQS -#define LS1X_IRQ(n, x) (LS1X_IRQ_BASE + (n << 5) + (x)) - -#define LS1X_UART0_IRQ LS1X_IRQ(0, 2) -#if defined(CONFIG_LOONGSON1_LS1B) -#define LS1X_UART1_IRQ LS1X_IRQ(0, 3) -#define LS1X_UART2_IRQ LS1X_IRQ(0, 4) -#define LS1X_UART3_IRQ LS1X_IRQ(0, 5) -#elif defined(CONFIG_LOONGSON1_LS1C) -#define LS1X_UART1_IRQ LS1X_IRQ(0, 4) -#define LS1X_UART2_IRQ LS1X_IRQ(0, 5) -#endif -#define LS1X_CAN0_IRQ LS1X_IRQ(0, 6) -#define LS1X_CAN1_IRQ LS1X_IRQ(0, 7) -#define LS1X_SPI0_IRQ LS1X_IRQ(0, 8) -#define LS1X_SPI1_IRQ LS1X_IRQ(0, 9) -#define LS1X_AC97_IRQ LS1X_IRQ(0, 10) -#define LS1X_DMA0_IRQ LS1X_IRQ(0, 13) -#define LS1X_DMA1_IRQ LS1X_IRQ(0, 14) -#define LS1X_DMA2_IRQ LS1X_IRQ(0, 15) -#if defined(CONFIG_LOONGSON1_LS1C) -#define LS1X_NAND_IRQ LS1X_IRQ(0, 16) -#endif -#define LS1X_PWM0_IRQ LS1X_IRQ(0, 17) -#define LS1X_PWM1_IRQ LS1X_IRQ(0, 18) -#define LS1X_PWM2_IRQ LS1X_IRQ(0, 19) -#define LS1X_PWM3_IRQ LS1X_IRQ(0, 20) -#define LS1X_RTC_INT0_IRQ LS1X_IRQ(0, 21) -#define LS1X_RTC_INT1_IRQ LS1X_IRQ(0, 22) -#define LS1X_RTC_INT2_IRQ LS1X_IRQ(0, 23) -#if defined(CONFIG_LOONGSON1_LS1B) -#define LS1X_TOY_INT0_IRQ LS1X_IRQ(0, 24) -#define LS1X_TOY_INT1_IRQ LS1X_IRQ(0, 25) -#define LS1X_TOY_INT2_IRQ LS1X_IRQ(0, 26) -#define LS1X_RTC_TICK_IRQ LS1X_IRQ(0, 27) -#define LS1X_TOY_TICK_IRQ LS1X_IRQ(0, 28) -#define LS1X_UART4_IRQ LS1X_IRQ(0, 29) -#define LS1X_UART5_IRQ LS1X_IRQ(0, 30) -#elif defined(CONFIG_LOONGSON1_LS1C) -#define LS1X_UART3_IRQ LS1X_IRQ(0, 29) -#define LS1X_ADC_IRQ LS1X_IRQ(0, 30) -#define LS1X_SDIO_IRQ LS1X_IRQ(0, 31) -#endif - -#define LS1X_EHCI_IRQ LS1X_IRQ(1, 0) -#define LS1X_OHCI_IRQ LS1X_IRQ(1, 1) -#if defined(CONFIG_LOONGSON1_LS1B) -#define LS1X_GMAC0_IRQ LS1X_IRQ(1, 2) -#define LS1X_GMAC1_IRQ LS1X_IRQ(1, 3) -#elif defined(CONFIG_LOONGSON1_LS1C) -#define LS1X_OTG_IRQ LS1X_IRQ(1, 2) -#define LS1X_GMAC0_IRQ LS1X_IRQ(1, 3) -#define LS1X_CAM_IRQ LS1X_IRQ(1, 4) -#define LS1X_UART4_IRQ LS1X_IRQ(1, 5) -#define LS1X_UART5_IRQ LS1X_IRQ(1, 6) -#define LS1X_UART6_IRQ LS1X_IRQ(1, 7) -#define LS1X_UART7_IRQ LS1X_IRQ(1, 8) -#define LS1X_UART8_IRQ LS1X_IRQ(1, 9) -#define LS1X_UART9_IRQ LS1X_IRQ(1, 13) -#define LS1X_UART10_IRQ LS1X_IRQ(1, 14) -#define LS1X_UART11_IRQ LS1X_IRQ(1, 15) -#define LS1X_I2C0_IRQ LS1X_IRQ(1, 17) -#define LS1X_I2C1_IRQ LS1X_IRQ(1, 18) -#define LS1X_I2C2_IRQ LS1X_IRQ(1, 19) -#endif - -#if defined(CONFIG_LOONGSON1_LS1B) -#define INTN 4 -#elif defined(CONFIG_LOONGSON1_LS1C) -#define INTN 5 -#endif - -#define LS1X_IRQS (LS1X_IRQ(INTN, 31) + 1 - LS1X_IRQ_BASE) - -#define NR_IRQS (MIPS_CPU_IRQS + LS1X_IRQS) - -#endif /* __ASM_MACH_LOONGSON32_IRQ_H */ diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h deleted file mode 100644 index 84f45461c832..000000000000 --- a/arch/mips/include/asm/mach-loongson32/loongson1.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Register mappings for Loongson 1 - */ - -#ifndef __ASM_MACH_LOONGSON32_LOONGSON1_H -#define __ASM_MACH_LOONGSON32_LOONGSON1_H - -#if defined(CONFIG_LOONGSON1_LS1B) -#define DEFAULT_MEMSIZE 64 /* If no memsize provided */ -#elif defined(CONFIG_LOONGSON1_LS1C) -#define DEFAULT_MEMSIZE 32 -#endif - -/* Loongson 1 Register Bases */ -#define LS1X_MUX_BASE 0x1fd00420 -#define LS1X_INTC_BASE 0x1fd01040 -#define LS1X_GPIO0_BASE 0x1fd010c0 -#define LS1X_GPIO1_BASE 0x1fd010c4 -#define LS1X_DMAC_BASE 0x1fd01160 -#define LS1X_CBUS_BASE 0x1fd011c0 -#define LS1X_EHCI_BASE 0x1fe00000 -#define LS1X_OHCI_BASE 0x1fe08000 -#define LS1X_GMAC0_BASE 0x1fe10000 -#define LS1X_GMAC1_BASE 0x1fe20000 - -#define LS1X_UART0_BASE 0x1fe40000 -#define LS1X_UART1_BASE 0x1fe44000 -#define LS1X_UART2_BASE 0x1fe48000 -#define LS1X_UART3_BASE 0x1fe4c000 -#define LS1X_CAN0_BASE 0x1fe50000 -#define LS1X_CAN1_BASE 0x1fe54000 -#define LS1X_I2C0_BASE 0x1fe58000 -#define LS1X_I2C1_BASE 0x1fe68000 -#define LS1X_I2C2_BASE 0x1fe70000 -#define LS1X_PWM0_BASE 0x1fe5c000 -#define LS1X_PWM1_BASE 0x1fe5c010 -#define LS1X_PWM2_BASE 0x1fe5c020 -#define LS1X_PWM3_BASE 0x1fe5c030 -#define LS1X_WDT_BASE 0x1fe5c060 -#define LS1X_RTC_BASE 0x1fe64000 -#define LS1X_AC97_BASE 0x1fe74000 -#define LS1X_NAND_BASE 0x1fe78000 -#define LS1X_CLK_BASE 0x1fe78030 - -#include <regs-mux.h> - -#endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */ diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h deleted file mode 100644 index f74292b13bc3..000000000000 --- a/arch/mips/include/asm/mach-loongson32/platform.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#ifndef __ASM_MACH_LOONGSON32_PLATFORM_H -#define __ASM_MACH_LOONGSON32_PLATFORM_H - -#include <linux/platform_device.h> - -extern struct platform_device ls1x_uart_pdev; -extern struct platform_device ls1x_eth0_pdev; -extern struct platform_device ls1x_eth1_pdev; -extern struct platform_device ls1x_ehci_pdev; -extern struct platform_device ls1x_gpio0_pdev; -extern struct platform_device ls1x_gpio1_pdev; -extern struct platform_device ls1x_rtc_pdev; -extern struct platform_device ls1x_wdt_pdev; - -void __init ls1x_rtc_set_extclk(struct platform_device *pdev); -void __init ls1x_serial_set_uartclk(struct platform_device *pdev); - -#endif /* __ASM_MACH_LOONGSON32_PLATFORM_H */ diff --git a/arch/mips/include/asm/mach-loongson32/regs-mux.h b/arch/mips/include/asm/mach-loongson32/regs-mux.h deleted file mode 100644 index 95788a4f03a0..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-mux.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2014 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 MUX Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_MUX_H -#define __ASM_MACH_LOONGSON32_REGS_MUX_H - -#define LS1X_MUX_REG(x) \ - ((void __iomem *)KSEG1ADDR(LS1X_MUX_BASE + (x))) - -#define LS1X_MUX_CTRL0 LS1X_MUX_REG(0x0) -#define LS1X_MUX_CTRL1 LS1X_MUX_REG(0x4) - -#if defined(CONFIG_LOONGSON1_LS1B) -/* MUX CTRL0 Register Bits */ -#define UART0_USE_PWM23 BIT(28) -#define UART0_USE_PWM01 BIT(27) -#define UART1_USE_LCD0_5_6_11 BIT(26) -#define I2C2_USE_CAN1 BIT(25) -#define I2C1_USE_CAN0 BIT(24) -#define NAND3_USE_UART5 BIT(23) -#define NAND3_USE_UART4 BIT(22) -#define NAND3_USE_UART1_DAT BIT(21) -#define NAND3_USE_UART1_CTS BIT(20) -#define NAND3_USE_PWM23 BIT(19) -#define NAND3_USE_PWM01 BIT(18) -#define NAND2_USE_UART5 BIT(17) -#define NAND2_USE_UART4 BIT(16) -#define NAND2_USE_UART1_DAT BIT(15) -#define NAND2_USE_UART1_CTS BIT(14) -#define NAND2_USE_PWM23 BIT(13) -#define NAND2_USE_PWM01 BIT(12) -#define NAND1_USE_UART5 BIT(11) -#define NAND1_USE_UART4 BIT(10) -#define NAND1_USE_UART1_DAT BIT(9) -#define NAND1_USE_UART1_CTS BIT(8) -#define NAND1_USE_PWM23 BIT(7) -#define NAND1_USE_PWM01 BIT(6) -#define GMAC1_USE_UART1 BIT(4) -#define GMAC1_USE_UART0 BIT(3) -#define LCD_USE_UART0_DAT BIT(2) -#define LCD_USE_UART15 BIT(1) -#define LCD_USE_UART0 BIT(0) - -/* MUX CTRL1 Register Bits */ -#define USB_RESET BIT(31) -#define SPI1_CS_USE_PWM01 BIT(24) -#define SPI1_USE_CAN BIT(23) -#define DISABLE_DDR_CONFSPACE BIT(20) -#define DDR32TO16EN BIT(16) -#define GMAC1_SHUT BIT(13) -#define GMAC0_SHUT BIT(12) -#define USB_SHUT BIT(11) -#define UART1_3_USE_CAN1 BIT(5) -#define UART1_2_USE_CAN0 BIT(4) -#define GMAC1_USE_TXCLK BIT(3) -#define GMAC0_USE_TXCLK BIT(2) -#define GMAC1_USE_PWM23 BIT(1) -#define GMAC0_USE_PWM01 BIT(0) - -#elif defined(CONFIG_LOONGSON1_LS1C) - -/* SHUT_CTRL Register Bits */ -#define UART_SPLIT GENMASK(31, 30) -#define OUTPUT_CLK GENMASK(29, 26) -#define ADC_SHUT BIT(25) -#define SDIO_SHUT BIT(24) -#define DMA2_SHUT BIT(23) -#define DMA1_SHUT BIT(22) -#define DMA0_SHUT BIT(21) -#define SPI1_SHUT BIT(20) -#define SPI0_SHUT BIT(19) -#define I2C2_SHUT BIT(18) -#define I2C1_SHUT BIT(17) -#define I2C0_SHUT BIT(16) -#define AC97_SHUT BIT(15) -#define I2S_SHUT BIT(14) -#define UART3_SHUT BIT(13) -#define UART2_SHUT BIT(12) -#define UART1_SHUT BIT(11) -#define UART0_SHUT BIT(10) -#define CAN1_SHUT BIT(9) -#define CAN0_SHUT BIT(8) -#define ECC_SHUT BIT(7) -#define GMAC_SHUT BIT(6) -#define USBHOST_SHUT BIT(5) -#define USBOTG_SHUT BIT(4) -#define SDRAM_SHUT BIT(3) -#define SRAM_SHUT BIT(2) -#define CAM_SHUT BIT(1) -#define LCD_SHUT BIT(0) - -#define UART_SPLIT_SHIFT 30 -#define OUTPUT_CLK_SHIFT 26 - -/* MISC_CTRL Register Bits */ -#define USBHOST_RSTN BIT(31) -#define PHY_INTF_SELI GENMASK(30, 28) -#define AC97_EN BIT(25) -#define SDIO_DMA_EN GENMASK(24, 23) -#define ADC_DMA_EN BIT(22) -#define SDIO_USE_SPI1 BIT(17) -#define SDIO_USE_SPI0 BIT(16) -#define SRAM_CTRL GENMASK(15, 0) - -#define PHY_INTF_SELI_SHIFT 28 -#define SDIO_DMA_EN_SHIFT 23 -#define SRAM_CTRL_SHIFT 0 - -#define LS1X_CBUS_REG(n, x) \ - ((void __iomem *)KSEG1ADDR(LS1X_CBUS_BASE + (n * 0x04) + (x))) - -#define LS1X_CBUS_FIRST(n) LS1X_CBUS_REG(n, 0x00) -#define LS1X_CBUS_SECOND(n) LS1X_CBUS_REG(n, 0x10) -#define LS1X_CBUS_THIRD(n) LS1X_CBUS_REG(n, 0x20) -#define LS1X_CBUS_FOURTHT(n) LS1X_CBUS_REG(n, 0x30) -#define LS1X_CBUS_FIFTHT(n) LS1X_CBUS_REG(n, 0x40) - -#endif - -#endif /* __ASM_MACH_LOONGSON32_REGS_MUX_H */ diff --git a/arch/mips/include/asm/mips-boards/bonito64.h b/arch/mips/include/asm/mips-boards/bonito64.h index 31a31fe78d77..74c5fc0fc6c0 100644 --- a/arch/mips/include/asm/mips-boards/bonito64.h +++ b/arch/mips/include/asm/mips-boards/bonito64.h @@ -21,7 +21,7 @@ #ifndef _ASM_MIPS_BOARDS_BONITO64_H #define _ASM_MIPS_BOARDS_BONITO64_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* offsets from base register */ #define BONITO(x) (x) @@ -36,7 +36,7 @@ extern unsigned long _pcictrl_bonito_pcicfg; #define BONITO(x) *(volatile u32 *)(_pcictrl_bonito + (x)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define BONITO_BOOT_BASE 0x1fc00000 diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index b1ee3c48e84b..cab7582010e8 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -10,7 +10,7 @@ #include <asm/mipsregs.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * C macros @@ -176,7 +176,7 @@ /* TCHalt */ #define TCHALT_H (_ULCAST_(1)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline unsigned core_nvpes(void) { @@ -469,6 +469,6 @@ do { \ __BUILD_SET_C0(mvpcontrol) -#endif /* Not __ASSEMBLY__ */ +#endif /* Not __ASSEMBLER__ */ #endif diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index c025558754d5..f799c0d723da 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -32,7 +32,7 @@ /* * Configure language */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ULCAST_ #define _U64CAST_ #else @@ -1346,7 +1346,7 @@ #define FPU_CSR_RD 0x3 /* towards -Infinity */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Macros for handling the ISA mode bit for MIPS16 and microMIPS. @@ -3095,6 +3095,6 @@ static inline unsigned int get_ebase_cpunum(void) return read_c0_ebase() & MIPS_EBASE_CPUNUM; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_MIPSREGS_H */ diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h index 236a49ee2e3e..c6077f5fa4b1 100644 --- a/arch/mips/include/asm/msa.h +++ b/arch/mips/include/asm/msa.h @@ -8,7 +8,7 @@ #include <asm/mipsregs.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/inst.h> @@ -218,7 +218,7 @@ __BUILD_MSA_CTL_REG(request, 5) __BUILD_MSA_CTL_REG(map, 6) __BUILD_MSA_CTL_REG(unmap, 7) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define MSA_IR 0 #define MSA_CSR 1 diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h index 9c476a0400e0..eaeafccd82c7 100644 --- a/arch/mips/include/asm/pci/bridge.h +++ b/arch/mips/include/asm/pci/bridge.h @@ -43,7 +43,7 @@ * Bridge address map */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define ATE_V 0x01 #define ATE_CO 0x02 @@ -288,7 +288,7 @@ struct bridge_err_cmdword { }; #define berr_field berr_un.berr_st -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * The values of these macros can and should be crosschecked diff --git a/arch/mips/include/asm/pm.h b/arch/mips/include/asm/pm.h index 7ecd4dfe3846..52f3d64c5f34 100644 --- a/arch/mips/include/asm/pm.h +++ b/arch/mips/include/asm/pm.h @@ -8,7 +8,7 @@ #ifndef __ASM_PM_H #define __ASM_PM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm-offsets.h> #include <asm/asm.h> @@ -130,7 +130,7 @@ RESUME_RESTORE_REGS_RETURN .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ /** * struct mips_static_suspend_state - Core saved CPU state across S2R. @@ -150,6 +150,6 @@ struct mips_static_suspend_state { unsigned long sp; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PM_HELPERS_H */ diff --git a/arch/mips/include/asm/prefetch.h b/arch/mips/include/asm/prefetch.h index a56594f360ee..4bd359fa3d97 100644 --- a/arch/mips/include/asm/prefetch.h +++ b/arch/mips/include/asm/prefetch.h @@ -42,7 +42,7 @@ #define Pref_WriteBackInvalidate 25 #define Pref_PrepareForStore 30 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro __pref hint addr #ifdef CONFIG_CPU_HAS_PREFETCH diff --git a/arch/mips/include/asm/regdef.h b/arch/mips/include/asm/regdef.h index 236051364f78..dd0b558c9767 100644 --- a/arch/mips/include/asm/regdef.h +++ b/arch/mips/include/asm/regdef.h @@ -103,7 +103,7 @@ #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* @@ -192,6 +192,6 @@ #define ra $31 /* return address */ #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_REGDEF_H */ diff --git a/arch/mips/include/asm/sibyte/board.h b/arch/mips/include/asm/sibyte/board.h index 03463faa4244..d29c1c013dc5 100644 --- a/arch/mips/include/asm/sibyte/board.h +++ b/arch/mips/include/asm/sibyte/board.h @@ -19,7 +19,7 @@ #include <asm/sibyte/bigsur.h> #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef LEDS_PHYS #define setleds(t0, t1, c0, c1, c2, c3) \ @@ -46,6 +46,6 @@ extern void setleds(char *str); #define setleds(s) do { } while (0) #endif /* LEDS_PHYS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _SIBYTE_BOARD_H */ diff --git a/arch/mips/include/asm/sibyte/sb1250.h b/arch/mips/include/asm/sibyte/sb1250.h index 495b31925ed7..de4b352256c8 100644 --- a/arch/mips/include/asm/sibyte/sb1250.h +++ b/arch/mips/include/asm/sibyte/sb1250.h @@ -19,7 +19,7 @@ #define SB1250_DUART_MINOR_BASE 64 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/addrspace.h> diff --git a/arch/mips/include/asm/sibyte/sb1250_defs.h b/arch/mips/include/asm/sibyte/sb1250_defs.h index 68cd7c0b37ea..98cbb65cce0a 100644 --- a/arch/mips/include/asm/sibyte/sb1250_defs.h +++ b/arch/mips/include/asm/sibyte/sb1250_defs.h @@ -199,7 +199,7 @@ * Note: you'll need to define uint32_t and uint64_t in your headers. */ -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) #define _SB_MAKE64(x) ((uint64_t)(x)) #define _SB_MAKE32(x) ((uint32_t)(x)) #else @@ -238,9 +238,9 @@ */ -#if defined(__mips64) && !defined(__ASSEMBLY__) +#if defined(__mips64) && !defined(__ASSEMBLER__) #define SBWRITECSR(csr, val) *((volatile uint64_t *) PHYS_TO_K1(csr)) = (val) #define SBREADCSR(csr) (*((volatile uint64_t *) PHYS_TO_K1(csr))) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/mips/include/asm/smp-cps.h b/arch/mips/include/asm/smp-cps.h index 88cfae5d22c8..63620abbd067 100644 --- a/arch/mips/include/asm/smp-cps.h +++ b/arch/mips/include/asm/smp-cps.h @@ -9,7 +9,7 @@ #define CPS_ENTRY_PATCH_INSNS 6 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct vpe_boot_config { unsigned long pc; @@ -55,9 +55,9 @@ static inline bool mips_cps_smp_in_use(void) { return false; } #endif /* !CONFIG_MIPS_CPS */ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ .extern mips_cps_bootcfg; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __MIPS_ASM_SMP_CPS_H__ */ diff --git a/arch/mips/include/asm/sn/addrs.h b/arch/mips/include/asm/sn/addrs.h index 837d23e24976..7c675fecbf9a 100644 --- a/arch/mips/include/asm/sn/addrs.h +++ b/arch/mips/include/asm/sn/addrs.h @@ -10,10 +10,10 @@ #define _ASM_SN_ADDRS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/smp.h> #include <linux/types.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #include <asm/addrspace.h> #include <asm/sn/kldir.h> @@ -25,15 +25,15 @@ #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define UINT64_CAST (unsigned long) -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define UINT64_CAST -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define NASID_GET_META(_n) ((_n) >> NASID_LOCAL_BITS) @@ -254,7 +254,7 @@ #define LOCAL_HUB_ADDR(_x) (IALIAS_BASE + (_x)) #define REMOTE_HUB_ADDR(_n, _x) ((NODE_SWIN_BASE(_n, 1) + 0x800000 + (_x))) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define LOCAL_HUB_PTR(_x) ((u64 *)LOCAL_HUB_ADDR((_x))) #define REMOTE_HUB_PTR(_n, _x) ((u64 *)REMOTE_HUB_ADDR((_n), (_x))) @@ -265,7 +265,7 @@ #define REMOTE_HUB_S(_n, _r, _d) __raw_writeq((_d), \ REMOTE_HUB_PTR((_n), (_r))) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Software structure locations -- permanently fixed @@ -315,7 +315,7 @@ #define KLI_KERN_XP 8 #define KLI_KERN_PARTID 9 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define KLD_BASE(nasid) ((kldir_ent_t *) KLDIR_ADDR(nasid)) #define KLD_LAUNCH(nasid) (KLD_BASE(nasid) + KLI_LAUNCH) @@ -371,7 +371,7 @@ #define KERN_VARS_ADDR(nasid) KLD_KERN_VARS(nasid)->pointer #define KERN_VARS_SIZE(nasid) KLD_KERN_VARS(nasid)->size -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_SN_ADDRS_H */ diff --git a/arch/mips/include/asm/sn/gda.h b/arch/mips/include/asm/sn/gda.h index 5b8c96d5b587..d8fd80137206 100644 --- a/arch/mips/include/asm/sn/gda.h +++ b/arch/mips/include/asm/sn/gda.h @@ -39,7 +39,7 @@ #define G_PARTIDOFF 40 #define G_TABLEOFF 128 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct gda { u32 g_magic; /* GDA magic number */ @@ -63,7 +63,7 @@ typedef struct gda { #define GDA ((gda_t*) GDA_ADDR(get_nasid())) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Define: PART_GDA_VERSION * Purpose: Define the minimum version of the GDA required, lower diff --git a/arch/mips/include/asm/sn/kldir.h b/arch/mips/include/asm/sn/kldir.h index 245f59bf3845..f394b1e0c956 100644 --- a/arch/mips/include/asm/sn/kldir.h +++ b/arch/mips/include/asm/sn/kldir.h @@ -15,7 +15,7 @@ #define KLDIR_ENT_SIZE 0x40 #define KLDIR_MAX_ENTRIES (0x400 / 0x40) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct kldir_ent_s { u64 magic; /* Indicates validity of entry */ off_t offset; /* Offset from start of node space */ @@ -27,7 +27,7 @@ typedef struct kldir_ent_s { /* NOTE: These 16 bytes are used in the Partition KLDIR entry to store partition info. Refer to klpart.h for this. */ } kldir_ent_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_SGI_IP27 #include <asm/sn/sn0/kldir.h> diff --git a/arch/mips/include/asm/sn/klkernvars.h b/arch/mips/include/asm/sn/klkernvars.h index ea6b21795163..bb7a6c36f6e7 100644 --- a/arch/mips/include/asm/sn/klkernvars.h +++ b/arch/mips/include/asm/sn/klkernvars.h @@ -12,7 +12,7 @@ #define KV_MAGIC 0x5f4b565f -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/sn/types.h> @@ -24,6 +24,6 @@ typedef struct kern_vars_s { unsigned long kv_rw_baseaddr; } kern_vars_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_SN_KLKERNVARS_H */ diff --git a/arch/mips/include/asm/sn/launch.h b/arch/mips/include/asm/sn/launch.h index 04226d8d30c4..ce95187362e7 100644 --- a/arch/mips/include/asm/sn/launch.h +++ b/arch/mips/include/asm/sn/launch.h @@ -59,7 +59,7 @@ * clears the BUSY flag after control is returned to it. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef int launch_state_t; typedef void (*launch_proc_t)(u64 call_parm); @@ -101,6 +101,6 @@ typedef struct launch_s { #define LAUNCH_FLASH (*(void (*)(void)) \ IP27PROM_FLASHLEDS) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_SN_LAUNCH_H */ diff --git a/arch/mips/include/asm/sn/nmi.h b/arch/mips/include/asm/sn/nmi.h index 12ac210f12a1..eff51606bbce 100644 --- a/arch/mips/include/asm/sn/nmi.h +++ b/arch/mips/include/asm/sn/nmi.h @@ -48,7 +48,7 @@ * */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct nmi_s { volatile unsigned long magic; /* Magic number */ @@ -59,13 +59,13 @@ typedef struct nmi_s { volatile unsigned long gmaster; /* Flag true only on global master*/ } nmi_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Following definitions are needed both in the prom & the kernel * to identify the format of the nmi cpu register save area in the * low memory on each node. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct reg_struct { unsigned long gpr[32]; @@ -78,7 +78,7 @@ struct reg_struct { unsigned long nmi_sr; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* These are the assembly language offsets into the reg_struct structure */ diff --git a/arch/mips/include/asm/sn/sn0/addrs.h b/arch/mips/include/asm/sn/sn0/addrs.h index f13df84edfdd..a28158a91ecf 100644 --- a/arch/mips/include/asm/sn/sn0/addrs.h +++ b/arch/mips/include/asm/sn/sn0/addrs.h @@ -84,15 +84,15 @@ #define NASID_GET(_pa) (int) ((UINT64_CAST (_pa) >> \ NASID_SHFT) & NASID_BITMASK) -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) #define NODE_SWIN_BASE(nasid, widget) \ ((widget == 0) ? NODE_BWIN_BASE((nasid), SWIN0_BIGWIN) \ : RAW_NODE_SWIN_BASE(nasid, widget)) -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define NODE_SWIN_BASE(nasid, widget) \ (NODE_IO_BASE(nasid) + (UINT64_CAST(widget) << SWIN_SIZE_BITS)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * The following definitions pertain to the IO special address @@ -139,11 +139,11 @@ /* Turn on sable logging for the processors whose bits are set. */ #define SABLE_LOG_TRIGGER(_map) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define KERN_NMI_ADDR(nasid, slice) \ TO_NODE_UNCAC((nasid), IP27_NMI_KREGS_OFFSET + \ (IP27_NMI_KREGS_CPU_SIZE * (slice))) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef PROM @@ -248,7 +248,7 @@ #define KL_UART_DATA LOCAL_HUB_ADDR(MD_UREG0_1) /* UART data reg */ #define KL_I2C_REG MD_UREG0_0 /* I2C reg */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Address 0x400 to 0x1000 ualias points to cache error eframe + misc * CACHE_ERR_SP_PTR could either contain an address to the stack, or @@ -266,7 +266,7 @@ #define CACHE_ERR_SP (CACHE_ERR_SP_PTR - 16) #define CACHE_ERR_AREA_SIZE (ARCS_SPB_OFFSET - CACHE_ERR_EFRAME) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define _ARCSPROM diff --git a/arch/mips/include/asm/sn/sn0/hub.h b/arch/mips/include/asm/sn/sn0/hub.h index c84adde36d41..916394319af5 100644 --- a/arch/mips/include/asm/sn/sn0/hub.h +++ b/arch/mips/include/asm/sn/sn0/hub.h @@ -37,7 +37,7 @@ #define UATTR_MSPEC 2 #define UATTR_UNCAC 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* * Returns the local nasid into res. */ diff --git a/arch/mips/include/asm/sn/sn0/hubio.h b/arch/mips/include/asm/sn/sn0/hubio.h index 57ece90f8cf1..c489426f8f9e 100644 --- a/arch/mips/include/asm/sn/sn0/hubio.h +++ b/arch/mips/include/asm/sn/sn0/hubio.h @@ -169,7 +169,7 @@ /* * The IO LLP control status register and widget control register */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union hubii_wid_u { u64 wid_reg_value; @@ -292,7 +292,7 @@ typedef union io_perf_cnt { } perf_cnt_bits; } io_perf_cnt_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define LNK_STAT_WORKING 0x2 @@ -440,7 +440,7 @@ typedef union io_perf_cnt { /* * Fields in CRB Register A */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union icrba_u { u64 reg_value; struct { @@ -486,7 +486,7 @@ typedef union h1_icrba_u { #define ICRBN_A_CERR_SHFT 54 #define ICRBN_A_ERR_MASK 0x3ff -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define IIO_ICRB_ADDR_SHFT 2 /* Shift to get proper address */ @@ -509,7 +509,7 @@ typedef union h1_icrba_u { /* * Fields in CRB Register B */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union icrbb_u { u64 reg_value; struct { @@ -608,7 +608,7 @@ typedef union h1_icrbb_u { #define b_imsg icrbb_field_s.imsg #define b_initiator icrbb_field_s.initiator -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * values for field xtsize @@ -666,7 +666,7 @@ typedef union h1_icrbb_u { * Fields in CRB Register C */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union icrbc_s { u64 reg_value; @@ -698,13 +698,13 @@ typedef union icrbc_s { #define c_barrop icrbc_field_s.barrop #define c_doresp icrbc_field_s.doresp #define c_gbr icrbc_field_s.gbr -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Fields in CRB Register D */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union icrbd_s { u64 reg_value; struct { @@ -737,7 +737,7 @@ typedef union hubii_ifdr_u { } hi_ifdr_fields; } hubii_ifdr_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Hardware designed names for the BTE control registers. @@ -784,7 +784,7 @@ typedef union hubii_ifdr_u { * IO PIO Read Table Entry format */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union iprte_a { u64 entry; @@ -806,7 +806,7 @@ typedef union iprte_a { #define iprte_init iprte_fields.initiator #define iprte_addr iprte_fields.addr -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define IPRTE_ADDRSHFT 3 @@ -814,7 +814,7 @@ typedef union iprte_a { * Hub IIO PRB Register format. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Note: Fields bnakctr, anakctr, xtalkctrmode, ovflow fields are * "Status" fields, and should only be used in case of clean up after errors. @@ -846,7 +846,7 @@ typedef union iprb_u { #define iprb_anakctr iprb_fields_s.anakctr #define iprb_xtalkctr iprb_fields_s.xtalkctr -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * values for mode field in iprb_t. @@ -861,7 +861,7 @@ typedef union iprb_u { /* * IO CRB entry C_A to E_A : Partial (cache) CRBS */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union icrbp_a { u64 ip_reg; /* the entire register value */ struct { @@ -895,7 +895,7 @@ typedef union icrbp_a { } ip_fmt; } icrbp_a_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * A couple of defines to go with the above structure. @@ -903,7 +903,7 @@ typedef union icrbp_a { #define ICRBP_A_CERR_SHFT 54 #define ICRBP_A_ERR_MASK 0x3ff -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union hubii_idsr { u64 iin_reg; struct { @@ -917,7 +917,7 @@ typedef union hubii_idsr { level : 7; } iin_fmt; } hubii_idsr_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * IO BTE Length/Status (IIO_IBLS) register bit field definitions diff --git a/arch/mips/include/asm/sn/sn0/hubmd.h b/arch/mips/include/asm/sn/sn0/hubmd.h index 305d002be182..97d9cbbf9f4c 100644 --- a/arch/mips/include/asm/sn/sn0/hubmd.h +++ b/arch/mips/include/asm/sn/sn0/hubmd.h @@ -423,7 +423,7 @@ * Operations on page migration threshold register */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * LED register macros @@ -735,7 +735,7 @@ typedef union md_perf_cnt { } md_perf_cnt_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define DIR_ERROR_VALID_MASK 0xe000000000000000 diff --git a/arch/mips/include/asm/sn/sn0/hubni.h b/arch/mips/include/asm/sn/sn0/hubni.h index b8253142cb83..4830bae723e4 100644 --- a/arch/mips/include/asm/sn/sn0/hubni.h +++ b/arch/mips/include/asm/sn/sn0/hubni.h @@ -11,7 +11,7 @@ #ifndef _ASM_SGI_SN0_HUBNI_H #define _ASM_SGI_SN0_HUBNI_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #endif @@ -226,7 +226,7 @@ #define NLT_EXIT_PORT_MASK (UINT64_CAST 0xf) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union hubni_port_error_u { u64 nipe_reg_value; @@ -258,6 +258,6 @@ static inline int get_region_shift(void) return NASID_TO_COARSEREG_SHFT; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_SGI_SN0_HUBNI_H */ diff --git a/arch/mips/include/asm/sn/sn0/hubpi.h b/arch/mips/include/asm/sn/sn0/hubpi.h index 7b83655913c5..a4fe0feeef0c 100644 --- a/arch/mips/include/asm/sn/sn0/hubpi.h +++ b/arch/mips/include/asm/sn/sn0/hubpi.h @@ -306,7 +306,7 @@ #define ERR_STACK_SIZE_BYTES(_sz) \ ((_sz) ? (PI_MIN_STACK_SIZE << ((_sz) - 1)) : 0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * format of error stack and error status registers. */ @@ -359,7 +359,7 @@ typedef union pi_err_stat1 { typedef u64 rtc_time_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Bits in PI_SYSAD_ERRCHK_EN */ diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h index 451ba1ee41ad..53d04c04d6f5 100644 --- a/arch/mips/include/asm/sn/types.h +++ b/arch/mips/include/asm/sn/types.h @@ -11,7 +11,7 @@ #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long cpuid_t; typedef signed short nasid_t; /* node id in numa-as-id space */ diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h index 44c04a82d0b7..d7873e8d7e6f 100644 --- a/arch/mips/include/asm/sync.h +++ b/arch/mips/include/asm/sync.h @@ -193,7 +193,7 @@ * Preprocessor magic to expand macros used as arguments before we insert them * into assembly code. */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define ___SYNC(type, reason, else) \ ____SYNC(type, reason, else) #else diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index b9d76e8ac5a2..2707dad260dd 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -11,7 +11,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> @@ -73,7 +73,7 @@ static inline struct thread_info *current_thread_info(void) register unsigned long current_stack_pointer __asm__("sp"); #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index ba83d3fb0a84..6a974b990f4b 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -29,7 +29,7 @@ #define NR_syscalls (__NR_O32_Linux + __NR_O32_Linux_syscalls) #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_OLD_READDIR @@ -62,6 +62,6 @@ /* whitelists for checksyscalls */ #define __IGNORE_fadvise64_64 -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_UNISTD_H */ diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index fd32baa30e17..32d2d173fdc0 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -11,7 +11,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/vdso/vdso.h> #include <asm/clocksource.h> @@ -215,6 +215,6 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( } #define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/mips/include/asm/vdso/processor.h b/arch/mips/include/asm/vdso/processor.h index 511c95d735e6..05cdb366dc21 100644 --- a/arch/mips/include/asm/vdso/processor.h +++ b/arch/mips/include/asm/vdso/processor.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_CPU_LOONGSON64 /* @@ -22,6 +22,6 @@ #define cpu_relax() barrier() #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index acd0efcd3d93..6889e0f2e5db 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -9,7 +9,7 @@ #define __VDSO_PAGES 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/vdso.h> @@ -69,4 +69,4 @@ static inline void __iomem *get_gic(const struct vdso_time_data *data) #endif /* CONFIG_CLKSRC_MIPS_GIC */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 2b1debb62dee..0f061a9babd1 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -4,13 +4,13 @@ #include <asm/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/mips/include/asm/xtalk/xtalk.h b/arch/mips/include/asm/xtalk/xtalk.h index 680e7efebbaf..dfe6a3fce65a 100644 --- a/arch/mips/include/asm/xtalk/xtalk.h +++ b/arch/mips/include/asm/xtalk/xtalk.h @@ -12,7 +12,7 @@ #ifndef _ASM_XTALK_XTALK_H #define _ASM_XTALK_XTALK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * User-level device driver visible types */ @@ -47,6 +47,6 @@ typedef struct xtalk_piomap_s *xtalk_piomap_t; #define XIO_PORT(x) ((xwidgetnum_t)(((x)&XIO_PORT_BITS) >> XIO_PORT_SHIFT)) #define XIO_PACK(p, o) ((((uint64_t)(p))<<XIO_PORT_SHIFT) | ((o)&XIO_ADDR_BITS)) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_XTALK_XTALK_H */ diff --git a/arch/mips/include/asm/xtalk/xwidget.h b/arch/mips/include/asm/xtalk/xwidget.h index 24f121da6a1d..efcfe4494576 100644 --- a/arch/mips/include/asm/xtalk/xwidget.h +++ b/arch/mips/include/asm/xtalk/xwidget.h @@ -203,7 +203,7 @@ static const struct widget_ident __initconst widget_idents[] = { * widget target flush register are widget dependent thus will not be * defined here */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef u32 widgetreg_t; /* widget configuration registers */ @@ -274,6 +274,6 @@ typedef struct xwidget_hwid_s { ((hwid2)->mfg_num == XWIDGET_MFG_NUM_NONE) || \ ((hwid1)->mfg_num == (hwid2)->mfg_num))) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_XTALK_XWIDGET_H */ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 04dc9ab55524..1e49e05ac8b1 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1288,14 +1288,14 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) set_cpu_asid_mask(c, MIPS_ENTRYHI_ASID); c->writecombine = _CACHE_UNCACHED_ACCELERATED; break; - case PRID_IMP_LOONGSON_32: /* Loongson-1 */ + case PRID_IMP_LOONGSON_32: decode_configs(c); c->cputype = CPU_LOONGSON32; switch (c->processor_id & PRID_REV_MASK) { - case PRID_REV_LOONGSON1B: - __cpu_name[cpu] = "Loongson 1B"; + case PRID_REV_LOONGSON1: + __cpu_name[cpu] = "ICT Loongson-1"; break; } diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 6031a0272d87..d9aa80afdf9d 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -485,7 +485,7 @@ void __init ltq_soc_init(void) /* add our generic xway clocks */ clkdev_add_pmu("10000000.fpi", NULL, 0, 0, PMU_FPI); clkdev_add_pmu("1e100a00.gptu", NULL, 1, 0, PMU_GPT); - clkdev_add_pmu("1e100bb0.stp", NULL, 1, 0, PMU_STP); + clkdev_add_pmu("1e100bb0.gpio", NULL, 1, 0, PMU_STP); clkdev_add_pmu("1e100c00.serial", NULL, 0, 0, PMU_ASC1); clkdev_add_pmu("1e104100.dma", NULL, 1, 0, PMU_DMA); clkdev_add_pmu("1e100800.spi", NULL, 1, 0, PMU_SPI); diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig index a7c500959577..461d518b0033 100644 --- a/arch/mips/loongson32/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -1,38 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -if MACH_LOONGSON32 -choice - prompt "Machine Type" - -config LOONGSON1_LS1B - bool "Loongson LS1B board" - select CEVT_R4K if !MIPS_EXTERNAL_TIMER - select CSRC_R4K if !MIPS_EXTERNAL_TIMER - select SYS_HAS_CPU_LOONGSON1B - select DMA_NONCOHERENT - select BOOT_ELF32 - select IRQ_MIPS_CPU - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_HIGHMEM - select SYS_HAS_EARLY_PRINTK - select USE_GENERIC_EARLY_PRINTK_8250 - select COMMON_CLK - -config LOONGSON1_LS1C - bool "Loongson LS1C board" - select CEVT_R4K if !MIPS_EXTERNAL_TIMER - select CSRC_R4K if !MIPS_EXTERNAL_TIMER - select SYS_HAS_CPU_LOONGSON1C - select DMA_NONCOHERENT - select BOOT_ELF32 - select IRQ_MIPS_CPU - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_HIGHMEM - select SYS_HAS_EARLY_PRINTK - select USE_GENERIC_EARLY_PRINTK_8250 - select COMMON_CLK -endchoice - -endif # MACH_LOONGSON32 +config BUILTIN_DTB_NAME + string "Source file for built-in DTB" + depends on BUILTIN_DTB + help + Base name (without suffix, relative to arch/mips/boot/dts/loongson) + for the DTS file that will be used to produce the DTB linked into + the kernel. diff --git a/arch/mips/loongson32/Makefile b/arch/mips/loongson32/Makefile index ba10954b4b21..a4e40e534e6a 100644 --- a/arch/mips/loongson32/Makefile +++ b/arch/mips/loongson32/Makefile @@ -1,18 +1 @@ # SPDX-License-Identifier: GPL-2.0-only -# -# Common code for all Loongson 1 based systems -# - -obj-$(CONFIG_MACH_LOONGSON32) += common/ - -# -# Loongson LS1B board -# - -obj-$(CONFIG_LOONGSON1_LS1B) += ls1b/ - -# -# Loongson LS1C board -# - -obj-$(CONFIG_LOONGSON1_LS1C) += ls1c/ diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform index 3b9673e7a2fa..67fd07450488 100644 --- a/arch/mips/loongson32/Platform +++ b/arch/mips/loongson32/Platform @@ -1,3 +1,2 @@ cflags-$(CONFIG_CPU_LOONGSON32) += -march=mips32r2 -Wa,--trap -cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32 load-$(CONFIG_CPU_LOONGSON32) += 0xffffffff80200000 diff --git a/arch/mips/loongson32/common/Makefile b/arch/mips/loongson32/common/Makefile deleted file mode 100644 index f3950d308187..000000000000 --- a/arch/mips/loongson32/common/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for common code of loongson1 based machines. -# - -obj-y += time.o irq.o platform.o prom.o setup.o diff --git a/arch/mips/loongson32/common/irq.c b/arch/mips/loongson32/common/irq.c deleted file mode 100644 index 9a50070f74f7..000000000000 --- a/arch/mips/loongson32/common/irq.c +++ /dev/null @@ -1,191 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <asm/irq_cpu.h> - -#include <loongson1.h> -#include <irq.h> - -#define LS1X_INTC_REG(n, x) \ - ((void __iomem *)KSEG1ADDR(LS1X_INTC_BASE + (n * 0x18) + (x))) - -#define LS1X_INTC_INTISR(n) LS1X_INTC_REG(n, 0x0) -#define LS1X_INTC_INTIEN(n) LS1X_INTC_REG(n, 0x4) -#define LS1X_INTC_INTSET(n) LS1X_INTC_REG(n, 0x8) -#define LS1X_INTC_INTCLR(n) LS1X_INTC_REG(n, 0xc) -#define LS1X_INTC_INTPOL(n) LS1X_INTC_REG(n, 0x10) -#define LS1X_INTC_INTEDGE(n) LS1X_INTC_REG(n, 0x14) - -static void ls1x_irq_ack(struct irq_data *d) -{ - unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f; - unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5; - - __raw_writel(__raw_readl(LS1X_INTC_INTCLR(n)) - | (1 << bit), LS1X_INTC_INTCLR(n)); -} - -static void ls1x_irq_mask(struct irq_data *d) -{ - unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f; - unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5; - - __raw_writel(__raw_readl(LS1X_INTC_INTIEN(n)) - & ~(1 << bit), LS1X_INTC_INTIEN(n)); -} - -static void ls1x_irq_mask_ack(struct irq_data *d) -{ - unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f; - unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5; - - __raw_writel(__raw_readl(LS1X_INTC_INTIEN(n)) - & ~(1 << bit), LS1X_INTC_INTIEN(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTCLR(n)) - | (1 << bit), LS1X_INTC_INTCLR(n)); -} - -static void ls1x_irq_unmask(struct irq_data *d) -{ - unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f; - unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5; - - __raw_writel(__raw_readl(LS1X_INTC_INTIEN(n)) - | (1 << bit), LS1X_INTC_INTIEN(n)); -} - -static int ls1x_irq_settype(struct irq_data *d, unsigned int type) -{ - unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f; - unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5; - - switch (type) { - case IRQ_TYPE_LEVEL_HIGH: - __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n)) - | (1 << bit), LS1X_INTC_INTPOL(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n)) - & ~(1 << bit), LS1X_INTC_INTEDGE(n)); - break; - case IRQ_TYPE_LEVEL_LOW: - __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n)) - & ~(1 << bit), LS1X_INTC_INTPOL(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n)) - & ~(1 << bit), LS1X_INTC_INTEDGE(n)); - break; - case IRQ_TYPE_EDGE_RISING: - __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n)) - | (1 << bit), LS1X_INTC_INTPOL(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n)) - | (1 << bit), LS1X_INTC_INTEDGE(n)); - break; - case IRQ_TYPE_EDGE_FALLING: - __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n)) - & ~(1 << bit), LS1X_INTC_INTPOL(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n)) - | (1 << bit), LS1X_INTC_INTEDGE(n)); - break; - case IRQ_TYPE_EDGE_BOTH: - __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n)) - & ~(1 << bit), LS1X_INTC_INTPOL(n)); - __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n)) - | (1 << bit), LS1X_INTC_INTEDGE(n)); - break; - case IRQ_TYPE_NONE: - break; - default: - return -EINVAL; - } - - return 0; -} - -static struct irq_chip ls1x_irq_chip = { - .name = "LS1X-INTC", - .irq_ack = ls1x_irq_ack, - .irq_mask = ls1x_irq_mask, - .irq_mask_ack = ls1x_irq_mask_ack, - .irq_unmask = ls1x_irq_unmask, - .irq_set_type = ls1x_irq_settype, -}; - -static void ls1x_irq_dispatch(int n) -{ - u32 int_status, irq; - - /* Get pending sources, masked by current enables */ - int_status = __raw_readl(LS1X_INTC_INTISR(n)) & - __raw_readl(LS1X_INTC_INTIEN(n)); - - if (int_status) { - irq = LS1X_IRQ(n, __ffs(int_status)); - do_IRQ(irq); - } -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending; - - pending = read_c0_cause() & read_c0_status() & ST0_IM; - - if (pending & CAUSEF_IP7) - do_IRQ(TIMER_IRQ); - else if (pending & CAUSEF_IP2) - ls1x_irq_dispatch(0); /* INT0 */ - else if (pending & CAUSEF_IP3) - ls1x_irq_dispatch(1); /* INT1 */ - else if (pending & CAUSEF_IP4) - ls1x_irq_dispatch(2); /* INT2 */ - else if (pending & CAUSEF_IP5) - ls1x_irq_dispatch(3); /* INT3 */ - else if (pending & CAUSEF_IP6) - ls1x_irq_dispatch(4); /* INT4 */ - else - spurious_interrupt(); - -} - -static void __init ls1x_irq_init(int base) -{ - int n; - - /* Disable interrupts and clear pending, - * setup all IRQs as high level triggered - */ - for (n = 0; n < INTN; n++) { - __raw_writel(0x0, LS1X_INTC_INTIEN(n)); - __raw_writel(0xffffffff, LS1X_INTC_INTCLR(n)); - __raw_writel(0xffffffff, LS1X_INTC_INTPOL(n)); - /* set DMA0, DMA1 and DMA2 to edge trigger */ - __raw_writel(n ? 0x0 : 0xe000, LS1X_INTC_INTEDGE(n)); - } - - - for (n = base; n < NR_IRQS; n++) { - irq_set_chip_and_handler(n, &ls1x_irq_chip, - handle_level_irq); - } - - if (request_irq(INT0_IRQ, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", INT0_IRQ); - if (request_irq(INT1_IRQ, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", INT1_IRQ); - if (request_irq(INT2_IRQ, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", INT2_IRQ); - if (request_irq(INT3_IRQ, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", INT3_IRQ); -#if defined(CONFIG_LOONGSON1_LS1C) - if (request_irq(INT4_IRQ, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", INT4_IRQ); -#endif -} - -void __init arch_init_irq(void) -{ - mips_cpu_irq_init(); - ls1x_irq_init(LS1X_IRQ_BASE); -} diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c deleted file mode 100644 index 623eb4bc7b41..000000000000 --- a/arch/mips/loongson32/common/platform.c +++ /dev/null @@ -1,285 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2011-2016 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#include <linux/clk.h> -#include <linux/dma-mapping.h> -#include <linux/err.h> -#include <linux/mtd/partitions.h> -#include <linux/sizes.h> -#include <linux/phy.h> -#include <linux/serial_8250.h> -#include <linux/stmmac.h> -#include <linux/usb/ehci_pdriver.h> - -#include <platform.h> -#include <loongson1.h> - -/* 8250/16550 compatible UART */ -#define LS1X_UART(_id) \ - { \ - .mapbase = LS1X_UART ## _id ## _BASE, \ - .irq = LS1X_UART ## _id ## _IRQ, \ - .iotype = UPIO_MEM, \ - .flags = UPF_IOREMAP | UPF_FIXED_TYPE, \ - .type = PORT_16550A, \ - } - -static struct plat_serial8250_port ls1x_serial8250_pdata[] = { - LS1X_UART(0), - LS1X_UART(1), - LS1X_UART(2), - LS1X_UART(3), - {}, -}; - -struct platform_device ls1x_uart_pdev = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = ls1x_serial8250_pdata, - }, -}; - -void __init ls1x_serial_set_uartclk(struct platform_device *pdev) -{ - struct clk *clk; - struct plat_serial8250_port *p; - - clk = clk_get(&pdev->dev, pdev->name); - if (IS_ERR(clk)) { - pr_err("unable to get %s clock, err=%ld", - pdev->name, PTR_ERR(clk)); - return; - } - clk_prepare_enable(clk); - - for (p = pdev->dev.platform_data; p->flags != 0; ++p) - p->uartclk = clk_get_rate(clk); -} - -/* Synopsys Ethernet GMAC */ -static struct stmmac_mdio_bus_data ls1x_mdio_bus_data = { - .phy_mask = 0, -}; - -static struct stmmac_dma_cfg ls1x_eth_dma_cfg = { - .pbl = 1, -}; - -int ls1x_eth_mux_init(struct platform_device *pdev, void *priv) -{ - struct plat_stmmacenet_data *plat_dat = NULL; - u32 val; - - val = __raw_readl(LS1X_MUX_CTRL1); - -#if defined(CONFIG_LOONGSON1_LS1B) - plat_dat = dev_get_platdata(&pdev->dev); - if (plat_dat->bus_id) { - __raw_writel(__raw_readl(LS1X_MUX_CTRL0) | GMAC1_USE_UART1 | - GMAC1_USE_UART0, LS1X_MUX_CTRL0); - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RGMII: - val &= ~(GMAC1_USE_TXCLK | GMAC1_USE_PWM23); - break; - case PHY_INTERFACE_MODE_MII: - val |= (GMAC1_USE_TXCLK | GMAC1_USE_PWM23); - break; - default: - pr_err("unsupported mii mode %d\n", - plat_dat->phy_interface); - return -ENOTSUPP; - } - val &= ~GMAC1_SHUT; - } else { - switch (plat_dat->phy_interface) { - case PHY_INTERFACE_MODE_RGMII: - val &= ~(GMAC0_USE_TXCLK | GMAC0_USE_PWM01); - break; - case PHY_INTERFACE_MODE_MII: - val |= (GMAC0_USE_TXCLK | GMAC0_USE_PWM01); - break; - default: - pr_err("unsupported mii mode %d\n", - plat_dat->phy_interface); - return -ENOTSUPP; - } - val &= ~GMAC0_SHUT; - } - __raw_writel(val, LS1X_MUX_CTRL1); -#elif defined(CONFIG_LOONGSON1_LS1C) - plat_dat = dev_get_platdata(&pdev->dev); - - val &= ~PHY_INTF_SELI; - if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII) - val |= 0x4 << PHY_INTF_SELI_SHIFT; - __raw_writel(val, LS1X_MUX_CTRL1); - - val = __raw_readl(LS1X_MUX_CTRL0); - __raw_writel(val & (~GMAC_SHUT), LS1X_MUX_CTRL0); -#endif - - return 0; -} - -static struct plat_stmmacenet_data ls1x_eth0_pdata = { - .bus_id = 0, - .phy_addr = -1, -#if defined(CONFIG_LOONGSON1_LS1B) - .phy_interface = PHY_INTERFACE_MODE_MII, -#elif defined(CONFIG_LOONGSON1_LS1C) - .phy_interface = PHY_INTERFACE_MODE_RMII, -#endif - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .rx_queues_to_use = 1, - .tx_queues_to_use = 1, - .init = ls1x_eth_mux_init, -}; - -static struct resource ls1x_eth0_resources[] = { - [0] = { - .start = LS1X_GMAC0_BASE, - .end = LS1X_GMAC0_BASE + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .name = "macirq", - .start = LS1X_GMAC0_IRQ, - .flags = IORESOURCE_IRQ, - }, -}; - -struct platform_device ls1x_eth0_pdev = { - .name = "stmmaceth", - .id = 0, - .num_resources = ARRAY_SIZE(ls1x_eth0_resources), - .resource = ls1x_eth0_resources, - .dev = { - .platform_data = &ls1x_eth0_pdata, - }, -}; - -#ifdef CONFIG_LOONGSON1_LS1B -static struct plat_stmmacenet_data ls1x_eth1_pdata = { - .bus_id = 1, - .phy_addr = -1, - .phy_interface = PHY_INTERFACE_MODE_MII, - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .rx_queues_to_use = 1, - .tx_queues_to_use = 1, - .init = ls1x_eth_mux_init, -}; - -static struct resource ls1x_eth1_resources[] = { - [0] = { - .start = LS1X_GMAC1_BASE, - .end = LS1X_GMAC1_BASE + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .name = "macirq", - .start = LS1X_GMAC1_IRQ, - .flags = IORESOURCE_IRQ, - }, -}; - -struct platform_device ls1x_eth1_pdev = { - .name = "stmmaceth", - .id = 1, - .num_resources = ARRAY_SIZE(ls1x_eth1_resources), - .resource = ls1x_eth1_resources, - .dev = { - .platform_data = &ls1x_eth1_pdata, - }, -}; -#endif /* CONFIG_LOONGSON1_LS1B */ - -/* GPIO */ -static struct resource ls1x_gpio0_resources[] = { - [0] = { - .start = LS1X_GPIO0_BASE, - .end = LS1X_GPIO0_BASE + SZ_4 - 1, - .flags = IORESOURCE_MEM, - }, -}; - -struct platform_device ls1x_gpio0_pdev = { - .name = "ls1x-gpio", - .id = 0, - .num_resources = ARRAY_SIZE(ls1x_gpio0_resources), - .resource = ls1x_gpio0_resources, -}; - -static struct resource ls1x_gpio1_resources[] = { - [0] = { - .start = LS1X_GPIO1_BASE, - .end = LS1X_GPIO1_BASE + SZ_4 - 1, - .flags = IORESOURCE_MEM, - }, -}; - -struct platform_device ls1x_gpio1_pdev = { - .name = "ls1x-gpio", - .id = 1, - .num_resources = ARRAY_SIZE(ls1x_gpio1_resources), - .resource = ls1x_gpio1_resources, -}; - -/* USB EHCI */ -static u64 ls1x_ehci_dmamask = DMA_BIT_MASK(32); - -static struct resource ls1x_ehci_resources[] = { - [0] = { - .start = LS1X_EHCI_BASE, - .end = LS1X_EHCI_BASE + SZ_32K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = LS1X_EHCI_IRQ, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct usb_ehci_pdata ls1x_ehci_pdata = { -}; - -struct platform_device ls1x_ehci_pdev = { - .name = "ehci-platform", - .id = -1, - .num_resources = ARRAY_SIZE(ls1x_ehci_resources), - .resource = ls1x_ehci_resources, - .dev = { - .dma_mask = &ls1x_ehci_dmamask, - .platform_data = &ls1x_ehci_pdata, - }, -}; - -/* Real Time Clock */ -struct platform_device ls1x_rtc_pdev = { - .name = "ls1x-rtc", - .id = -1, -}; - -/* Watchdog */ -static struct resource ls1x_wdt_resources[] = { - { - .start = LS1X_WDT_BASE, - .end = LS1X_WDT_BASE + SZ_16 - 1, - .flags = IORESOURCE_MEM, - }, -}; - -struct platform_device ls1x_wdt_pdev = { - .name = "ls1x-wdt", - .id = -1, - .num_resources = ARRAY_SIZE(ls1x_wdt_resources), - .resource = ls1x_wdt_resources, -}; diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c deleted file mode 100644 index fc580a22748e..000000000000 --- a/arch/mips/loongson32/common/prom.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Modified from arch/mips/pnx833x/common/prom.c. - */ - -#include <linux/io.h> -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/serial_reg.h> -#include <asm/fw/fw.h> - -#include <loongson1.h> - -unsigned long memsize; - -void __init prom_init(void) -{ - void __iomem *uart_base; - - fw_init_cmdline(); - - memsize = fw_getenvl("memsize"); - if(!memsize) - memsize = DEFAULT_MEMSIZE; - - if (strstr(arcs_cmdline, "console=ttyS3")) - uart_base = ioremap(LS1X_UART3_BASE, 0x0f); - else if (strstr(arcs_cmdline, "console=ttyS2")) - uart_base = ioremap(LS1X_UART2_BASE, 0x0f); - else if (strstr(arcs_cmdline, "console=ttyS1")) - uart_base = ioremap(LS1X_UART1_BASE, 0x0f); - else - uart_base = ioremap(LS1X_UART0_BASE, 0x0f); - setup_8250_early_printk_port((unsigned long)uart_base, 0, 0); -} - -void __init plat_mem_setup(void) -{ - memblock_add(0x0, (memsize << 20)); -} diff --git a/arch/mips/loongson32/common/setup.c b/arch/mips/loongson32/common/setup.c deleted file mode 100644 index 4733fe037176..000000000000 --- a/arch/mips/loongson32/common/setup.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#include <linux/io.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <asm/cpu-info.h> -#include <asm/bootinfo.h> - -const char *get_system_type(void) -{ - unsigned int processor_id = (¤t_cpu_data)->processor_id; - - switch (processor_id & PRID_REV_MASK) { - case PRID_REV_LOONGSON1B: -#if defined(CONFIG_LOONGSON1_LS1B) - return "LOONGSON LS1B"; -#elif defined(CONFIG_LOONGSON1_LS1C) - return "LOONGSON LS1C"; -#endif - default: - return "LOONGSON (unknown)"; - } -} diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c deleted file mode 100644 index 74ad2b17918d..000000000000 --- a/arch/mips/loongson32/common/time.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2014 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#include <linux/clk.h> -#include <linux/of_clk.h> -#include <asm/time.h> - -void __init plat_time_init(void) -{ - struct clk *clk = NULL; - - /* initialize LS1X clocks */ - of_clk_init(NULL); - - /* setup mips r4k timer */ - clk = clk_get(NULL, "cpu_clk"); - if (IS_ERR(clk)) - panic("unable to get cpu clock, err=%ld", PTR_ERR(clk)); - - mips_hpt_frequency = clk_get_rate(clk) / 2; -} diff --git a/arch/mips/loongson32/ls1b/Makefile b/arch/mips/loongson32/ls1b/Makefile deleted file mode 100644 index 33c574dc0f7f..000000000000 --- a/arch/mips/loongson32/ls1b/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for loongson1B based machines. -# - -obj-y += board.o diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c deleted file mode 100644 index fe115bdcb22c..000000000000 --- a/arch/mips/loongson32/ls1b/board.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2011-2016 Zhang, Keguang <keguang.zhang@gmail.com> - */ - -#include <linux/leds.h> -#include <linux/mtd/partitions.h> -#include <linux/sizes.h> - -#include <loongson1.h> -#include <platform.h> - -static const struct gpio_led ls1x_gpio_leds[] __initconst = { - { - .name = "LED9", - .default_trigger = "heartbeat", - .gpio = 38, - .active_low = 1, - .default_state = LEDS_GPIO_DEFSTATE_OFF, - }, { - .name = "LED6", - .default_trigger = "nand-disk", - .gpio = 39, - .active_low = 1, - .default_state = LEDS_GPIO_DEFSTATE_OFF, - }, -}; - -static const struct gpio_led_platform_data ls1x_led_pdata __initconst = { - .num_leds = ARRAY_SIZE(ls1x_gpio_leds), - .leds = ls1x_gpio_leds, -}; - -static struct platform_device *ls1b_platform_devices[] __initdata = { - &ls1x_uart_pdev, - &ls1x_eth0_pdev, - &ls1x_eth1_pdev, - &ls1x_ehci_pdev, - &ls1x_gpio0_pdev, - &ls1x_gpio1_pdev, - &ls1x_rtc_pdev, - &ls1x_wdt_pdev, -}; - -static int __init ls1b_platform_init(void) -{ - ls1x_serial_set_uartclk(&ls1x_uart_pdev); - - gpio_led_register_device(-1, &ls1x_led_pdata); - - return platform_add_devices(ls1b_platform_devices, - ARRAY_SIZE(ls1b_platform_devices)); -} - -arch_initcall(ls1b_platform_init); diff --git a/arch/mips/loongson32/ls1c/Makefile b/arch/mips/loongson32/ls1c/Makefile deleted file mode 100644 index 1cf3aa264d55..000000000000 --- a/arch/mips/loongson32/ls1c/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for loongson1C based machines. -# - -obj-y += board.o diff --git a/arch/mips/loongson32/ls1c/board.c b/arch/mips/loongson32/ls1c/board.c deleted file mode 100644 index 9dcfe9de55b0..000000000000 --- a/arch/mips/loongson32/ls1c/board.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com> - */ - -#include <platform.h> - -static struct platform_device *ls1c_platform_devices[] __initdata = { - &ls1x_uart_pdev, - &ls1x_eth0_pdev, - &ls1x_rtc_pdev, - &ls1x_wdt_pdev, -}; - -static int __init ls1c_platform_init(void) -{ - ls1x_serial_set_uartclk(&ls1x_uart_pdev); - - return platform_add_devices(ls1c_platform_devices, - ARRAY_SIZE(ls1c_platform_devices)); -} - -arch_initcall(ls1c_platform_init); diff --git a/arch/mips/loongson64/boardinfo.c b/arch/mips/loongson64/boardinfo.c index 8bb275c93ac0..827ab94b98b3 100644 --- a/arch/mips/loongson64/boardinfo.c +++ b/arch/mips/loongson64/boardinfo.c @@ -1,17 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kobject.h> +#include <linux/string.h> #include <boot_param.h> static ssize_t boardinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - char board_manufacturer[64] = {0}; + char board_manufacturer[64]; char *tmp_board_manufacturer = board_manufacturer; - char bios_vendor[64] = {0}; + char bios_vendor[64]; char *tmp_bios_vendor = bios_vendor; - strcpy(board_manufacturer, eboard->name); - strcpy(bios_vendor, einter->description); + strscpy_pad(board_manufacturer, eboard->name); + strscpy_pad(bios_vendor, einter->description); return sprintf(buf, "Board Info\n" diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c index d5ad76b2bb67..aeddf7aecfc5 100644 --- a/arch/mips/math-emu/me-debugfs.c +++ b/arch/mips/math-emu/me-debugfs.c @@ -37,11 +37,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); * used in debugfs item names to be clearly associated to corresponding * MIPS FPU instructions. */ -static void adjust_instruction_counter_name(char *out_name, char *in_name) +static void adjust_instruction_counter_name(char *out_name, char *in_name, size_t len) { int i = 0; - strcpy(out_name, in_name); + strscpy(out_name, in_name, len); while (in_name[i] != '\0') { if (out_name[i] == '_') out_name[i] = '.'; @@ -226,7 +226,7 @@ do { \ #define FPU_STAT_CREATE_EX(m) \ do { \ - adjust_instruction_counter_name(name, #m); \ + adjust_instruction_counter_name(name, #m, sizeof(name)); \ \ debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \ (void *)FPU_EMU_STAT_OFFSET(m), \ diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 66898fd182dc..d04b7c1294b6 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -249,45 +249,11 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); -static int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - pci_dev_for_each_resource(dev, r, idx) { - /* Only set up the requested stuff */ - if (!(mask & (1<<idx))) - continue; - - if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - pci_err(dev, - "can't enable device: resource collisions\n"); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { - int err = pcibios_enable_resources(dev, mask); + int err; + err = pci_enable_resources(dev, mask); if (err < 0) return err; diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c index b88e89ec5894..8c370eb180ef 100644 --- a/arch/mips/rb532/prom.c +++ b/arch/mips/rb532/prom.c @@ -53,6 +53,7 @@ static void __init prom_setup_cmdline(void) int prom_argc; char **prom_argv; int i; + size_t len; prom_argc = fw_arg0; prom_argv = (char **) fw_arg1; @@ -82,20 +83,20 @@ static void __init prom_setup_cmdline(void) mips_machtype = MACH_MIKROTIK_RB532; } - strcpy(cp, prom_argv[i]); - cp += strlen(prom_argv[i]); + len = strlen(prom_argv[i]); + memcpy(cp, prom_argv[i], len + 1); + cp += len; } *(cp++) = ' '; - i = strlen(arcs_cmdline); - if (i > 0) { + len = strlen(arcs_cmdline); + if (len > 0) { *(cp++) = ' '; - strcpy(cp, arcs_cmdline); - cp += strlen(arcs_cmdline); + memcpy(cp, arcs_cmdline, len + 1); + cp += len; } cmd_line[COMMAND_LINE_SIZE - 1] = '\0'; - - strcpy(arcs_cmdline, cmd_line); + strscpy(arcs_cmdline, cmd_line); } void __init prom_init(void) diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c index 0b2002e02a47..3a53690b4b33 100644 --- a/arch/mips/sgi-ip22/ip22-platform.c +++ b/arch/mips/sgi-ip22/ip22-platform.c @@ -221,3 +221,35 @@ static int __init sgi_ds1286_devinit(void) } device_initcall(sgi_ds1286_devinit); + +#define SGI_ZILOG_BASE (HPC3_CHIP0_BASE + \ + offsetof(struct hpc3_regs, pbus_extregs[6]) + \ + offsetof(struct sgioc_regs, uart)) + +static struct resource sgi_zilog_resources[] = { + { + .start = SGI_ZILOG_BASE, + .end = SGI_ZILOG_BASE + 15, + .flags = IORESOURCE_MEM + }, + { + .start = SGI_SERIAL_IRQ, + .end = SGI_SERIAL_IRQ, + .flags = IORESOURCE_IRQ + } +}; + +static struct platform_device zilog_device = { + .name = "ip22zilog", + .id = 0, + .num_resources = ARRAY_SIZE(sgi_zilog_resources), + .resource = sgi_zilog_resources, +}; + + +static int __init sgi_zilog_devinit(void) +{ + return platform_device_register(&zilog_device); +} + +device_initcall(sgi_zilog_devinit); diff --git a/arch/mips/sgi-ip22/ip22-setup.c b/arch/mips/sgi-ip22/ip22-setup.c index e06a818fe792..f083b25be13b 100644 --- a/arch/mips/sgi-ip22/ip22-setup.c +++ b/arch/mips/sgi-ip22/ip22-setup.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/console.h> #include <linux/sched.h> +#include <linux/string.h> #include <linux/tty.h> #include <asm/addrspace.h> @@ -65,7 +66,7 @@ void __init plat_mem_setup(void) static char options[8] __initdata; char *baud = ArcGetEnvironmentVariable("dbaud"); if (baud) - strcpy(options, baud); + strscpy(options, baud); add_preferred_console("ttyS", *(ctype + 1) == '2' ? 1 : 0, baud ? options : NULL); } else if (!ctype || *ctype != 'g') { diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c index aeb0805aae57..c2ebc4bbd866 100644 --- a/arch/mips/sgi-ip32/ip32-setup.c +++ b/arch/mips/sgi-ip32/ip32-setup.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/param.h> #include <linux/sched.h> +#include <linux/string.h> #include <asm/bootinfo.h> #include <asm/mipsregs.h> @@ -90,7 +91,7 @@ void __init plat_mem_setup(void) static char options[8] __initdata; char *baud = ArcGetEnvironmentVariable("dbaud"); if (baud) - strcpy(options, baud); + strscpy(options, baud); add_preferred_console("ttyS", *(con + 1) == '2' ? 1 : 0, baud ? options : NULL); } diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 03cb69937258..fc7da12284f5 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/console.h> #include <linux/screen_info.h> +#include <linux/string.h> #ifdef CONFIG_FW_ARC #include <asm/fw/arc/types.h> @@ -80,7 +81,7 @@ static void __init sni_console_setup(void) break; } if (baud) - strcpy(options, baud); + strscpy(options, baud); if (strncmp(cdev, "tty552", 6) == 0) add_preferred_console("ttyS", port, baud ? options : NULL); diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5dc867ea2c69..03f8a3a95637 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -200,7 +200,7 @@ static void __init preprocess_cmdline(void) static char cmdline[COMMAND_LINE_SIZE] __initdata; char *s; - strcpy(cmdline, arcs_cmdline); + strscpy(cmdline, arcs_cmdline); s = cmdline; arcs_cmdline[0] = '\0'; while (s && *s) { @@ -270,7 +270,7 @@ void __init prom_init(void) preprocess_cmdline(); select_board(); - strcpy(txx9_system_type, txx9_board_vec->system); + strscpy(txx9_system_type, txx9_board_vec->system); txx9_board_vec->prom_init(); } diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index b38fee299bc4..9156635dd264 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -24,6 +24,8 @@ config OPENRISC select GENERIC_PCI_IOMAP select GENERIC_IOREMAP select GENERIC_CPU_DEVICES + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_PCI select HAVE_UID16 select HAVE_PAGE_SIZE_8KB diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index 3e849d25838a..fb1eb9a68bd6 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -38,7 +38,7 @@ CONFIG_MMC_LITEX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_LITEX_SOC_CONTROLLER=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_EXFAT_FS=y diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig index 59fe33cefba2..769705ac24d5 100644 --- a/arch/openrisc/configs/or1ksim_defconfig +++ b/arch/openrisc/configs/or1ksim_defconfig @@ -3,26 +3,23 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set CONFIG_EXPERT=y -# CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set # CONFIG_TIMERFD is not set # CONFIG_EVENTFD is not set # CONFIG_AIO is not set -# CONFIG_VM_EVENT_COUNTERS is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_SLUB=y -CONFIG_SLUB_TINY=y -CONFIG_MODULES=y -# CONFIG_BLOCK is not set +# CONFIG_KALLSYMS is not set CONFIG_BUILTIN_DTB_NAME="or1ksim" CONFIG_HZ_100=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +# CONFIG_BLOCK is not set +CONFIG_SLUB_TINY=y +# CONFIG_COMPAT_BRK is not set +# CONFIG_VM_EVENT_COUNTERS is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set @@ -35,7 +32,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_NETDEVICES=y CONFIG_ETHOC=y CONFIG_MICREL_PHY=y @@ -53,4 +49,3 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_NFS_FS=y -# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/openrisc/configs/virt_defconfig b/arch/openrisc/configs/virt_defconfig index c1b69166c500..0b9979b35ca8 100644 --- a/arch/openrisc/configs/virt_defconfig +++ b/arch/openrisc/configs/virt_defconfig @@ -12,6 +12,7 @@ CONFIG_NR_CPUS=8 CONFIG_SMP=y CONFIG_HZ_100=y # CONFIG_OPENRISC_NO_SPR_SR_DSX is not set +CONFIG_JUMP_LABEL=y # CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y @@ -55,7 +56,6 @@ CONFIG_DRM=y # CONFIG_DRM_FBDEV_EMULATION is not set CONFIG_DRM_VIRTIO_GPU=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y @@ -94,8 +94,8 @@ CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2b1a6b00cdac..cef49d60d74c 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -9,4 +9,3 @@ generic-y += spinlock.h generic-y += qrwlock_types.h generic-y += qrwlock.h generic-y += user.h -generic-y += text-patching.h diff --git a/arch/openrisc/include/asm/fixmap.h b/arch/openrisc/include/asm/fixmap.h index aaa6a26a3e92..74000215064d 100644 --- a/arch/openrisc/include/asm/fixmap.h +++ b/arch/openrisc/include/asm/fixmap.h @@ -28,6 +28,7 @@ enum fixed_addresses { FIX_EARLYCON_MEM_BASE, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; diff --git a/arch/openrisc/include/asm/insn-def.h b/arch/openrisc/include/asm/insn-def.h new file mode 100644 index 000000000000..1e0c028a5b95 --- /dev/null +++ b/arch/openrisc/include/asm/insn-def.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Chen Miao + */ + +#ifndef __ASM_OPENRISC_INSN_DEF_H +#define __ASM_OPENRISC_INSN_DEF_H + +/* or1k instructions are always 32 bits. */ +#define OPENRISC_INSN_SIZE 4 + +/* or1k nop instruction code */ +#define OPENRISC_INSN_NOP 0x15000000U + +#endif /* __ASM_OPENRISC_INSN_DEF_H */ diff --git a/arch/openrisc/include/asm/jump_label.h b/arch/openrisc/include/asm/jump_label.h new file mode 100644 index 000000000000..3ec0f4e19f9c --- /dev/null +++ b/arch/openrisc/include/asm/jump_label.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Chen Miao + * + * Based on arch/arm/include/asm/jump_label.h + */ +#ifndef __ASM_OPENRISC_JUMP_LABEL_H +#define __ASM_OPENRISC_JUMP_LABEL_H + +#ifndef __ASSEMBLER__ + +#include <linux/types.h> +#include <asm/insn-def.h> + +#define HAVE_JUMP_LABEL_BATCH + +#define JUMP_LABEL_NOP_SIZE OPENRISC_INSN_SIZE + +/** + * JUMP_TABLE_ENTRY - Create a jump table entry + * @key: Jump key identifier (typically a symbol address) + * @label: Target label address + * + * This macro creates a jump table entry in the dedicated kernel section (__jump_table). + * Each entry contains the following information: + * Offset from current instruction to jump instruction (1b - .) + * Offset from current instruction to target label (label - .) + * Offset from current instruction to key identifier (key - .) + */ +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\" \n\t" \ + ".align 4 \n\t" \ + ".long 1b - ., " label " - . \n\t" \ + ".long " key " - . \n\t" \ + ".popsection \n\t" + +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + ".align 4 \n\t" \ + "1: l.nop \n\t" \ + " l.nop \n\t" \ + JUMP_TABLE_ENTRY(key, label) + +static __always_inline bool arch_static_branch(struct static_key *const key, + const bool branch) +{ + asm goto (ARCH_STATIC_BRANCH_ASM("%0", "%l[l_yes]") + ::"i"(&((char *)key)[branch])::l_yes); + + return false; +l_yes: + return true; +} + +#define ARCH_STATIC_BRANCH_JUMP_ASM(key, label) \ + ".align 4 \n\t" \ + "1: l.j " label " \n\t" \ + " l.nop \n\t" \ + JUMP_TABLE_ENTRY(key, label) + +static __always_inline bool +arch_static_branch_jump(struct static_key *const key, const bool branch) +{ + asm goto (ARCH_STATIC_BRANCH_JUMP_ASM("%0", "%l[l_yes]") + ::"i"(&((char *)key)[branch])::l_yes); + + return false; +l_yes: + return true; +} + +#endif /* __ASSEMBLER__ */ +#endif /* __ASM_OPENRISC_JUMP_LABEL_H */ diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index d33702831505..b218050e2f6d 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -183,23 +183,6 @@ extern void paging_init(void); extern unsigned long empty_zero_page[2048]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8*sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void *)-1)) - -/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -/* 64-bit machines, beware! SRB. */ -#define SIZEOF_PTR_LOG2 2 - -/* to find an entry in a page-table */ -#define PAGE_PTR(address) \ -((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) - -/* to set the page-dir */ -#define SET_PAGE_DIR(tsk, pgdir) - #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) #define pte_clear(mm, addr, xp) do { pte_val(*(xp)) = 0; } while (0) diff --git a/arch/openrisc/include/asm/text-patching.h b/arch/openrisc/include/asm/text-patching.h new file mode 100644 index 000000000000..d19098dac0cc --- /dev/null +++ b/arch/openrisc/include/asm/text-patching.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Chen Miao + */ + +#ifndef _ASM_OPENRISC_PATCHING_H +#define _ASM_OPENRISC_PATCHING_H + +#include <linux/types.h> + +int patch_insn_write(void *addr, u32 insn); + +#endif /* _ASM_OPENRISC_PATCHING_H */ diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 58e6a1b525b7..19e0eb94f2eb 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -9,9 +9,11 @@ obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ sys_call_table.o unwinder.o cacheinfo.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_SMP) += smp.o sync-timer.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_OF) += prom.o +obj-y += patching.o clean: diff --git a/arch/openrisc/kernel/jump_label.c b/arch/openrisc/kernel/jump_label.c new file mode 100644 index 000000000000..ab7137c23b46 --- /dev/null +++ b/arch/openrisc/kernel/jump_label.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Chen Miao + * + * Based on arch/arm/kernel/jump_label.c + */ +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/memory.h> +#include <asm/bug.h> +#include <asm/cacheflush.h> +#include <asm/text-patching.h> + +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) +{ + void *addr = (void *)jump_entry_code(entry); + u32 insn; + + if (type == JUMP_LABEL_JMP) { + long offset; + + offset = jump_entry_target(entry) - jump_entry_code(entry); + /* + * The actual maximum range of the l.j instruction's offset is -134,217,728 + * ~ 134,217,724 (sign 26-bit imm). + * For the original jump range, we need to right-shift N by 2 to obtain the + * instruction's offset. + */ + WARN_ON_ONCE(offset < -134217728 || offset > 134217724); + + /* 26bit imm mask */ + offset = (offset >> 2) & 0x03ffffff; + + insn = offset; + } else { + insn = OPENRISC_INSN_NOP; + } + + if (early_boot_irqs_disabled) + copy_to_kernel_nofault(addr, &insn, sizeof(insn)); + else + patch_insn_write(addr, insn); + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); +} diff --git a/arch/openrisc/kernel/module.c b/arch/openrisc/kernel/module.c index c9ff4c4a0b29..4ac4fbaa827c 100644 --- a/arch/openrisc/kernel/module.c +++ b/arch/openrisc/kernel/module.c @@ -55,6 +55,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, value |= *location & 0xfc000000; *location = value; break; + case R_OR1K_32_PCREL: + value -= (uint32_t)location; + *location = value; + break; case R_OR1K_AHI16: /* Adjust the operand to match with a signed LO16. */ value += 0x8000; diff --git a/arch/openrisc/kernel/patching.c b/arch/openrisc/kernel/patching.c new file mode 100644 index 000000000000..d186172beb33 --- /dev/null +++ b/arch/openrisc/kernel/patching.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 SiFive + * Copyright (C) 2025 Chen Miao + */ + +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> + +#include <asm/insn-def.h> +#include <asm/cacheflush.h> +#include <asm/page.h> +#include <asm/fixmap.h> +#include <asm/text-patching.h> +#include <asm/sections.h> + +static DEFINE_RAW_SPINLOCK(patch_lock); + +static __always_inline void *patch_map(void *addr, int fixmap) +{ + uintptr_t uaddr = (uintptr_t) addr; + phys_addr_t phys; + + if (core_kernel_text(uaddr)) { + phys = __pa_symbol(addr); + } else { + struct page *page = vmalloc_to_page(addr); + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } + + return (void *)set_fixmap_offset(fixmap, phys); +} + +static void patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +static int __patch_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + raw_spin_lock_irqsave(&patch_lock, flags); + + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = copy_to_kernel_nofault(waddr, &insn, OPENRISC_INSN_SIZE); + local_icache_range_inv((unsigned long)waddr, + (unsigned long)waddr + OPENRISC_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +/* + * patch_insn_write - Write a single instruction to a specified memory location + * This API provides a single-instruction patching, primarily used for runtime + * code modification. + * By the way, the insn size must be 4 bytes. + */ +int patch_insn_write(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + if ((uintptr_t) tp & 0x3) + return -EINVAL; + + ret = __patch_insn_write(tp, insn); + + return ret; +} diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index a9fb9cc6779e..000a9cc10e6f 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -249,6 +249,8 @@ void __init setup_arch(char **cmdline_p) initrd_below_start_ok = 1; } #endif + /* perform jump_table sorting before paging_init locks down read only memory */ + jump_label_init(); /* paging_init() sets up the MMU and marks all pages as reserved */ paging_init(); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index e4904ca6f0a0..9382d9a0ec78 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -226,7 +226,11 @@ static int __init map_page(unsigned long va, phys_addr_t pa, pgprot_t prot) return 0; } -void __init __set_fixmap(enum fixed_addresses idx, +/* + * __set_fix must now support both EARLYCON and TEXT_POKE mappings, + * which are used at different stages of kernel execution. + */ +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long address = __fix_to_virt(idx); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 0940c162f1f7..47fd9662d800 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -31,6 +31,9 @@ config PARISC select HAVE_KERNEL_UNCOMPRESSED select HAVE_PCI select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select PERF_USE_VMALLOC select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 94928d114d4c..52031bde9f17 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -232,8 +232,8 @@ CONFIG_AUXDISPLAY=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d8cd7f858b2a..1aec04c09d0b 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -251,8 +251,8 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h index 1e0fd8ba6c03..8a2925029d15 100644 --- a/arch/parisc/include/asm/perf_event.h +++ b/arch/parisc/include/asm/perf_event.h @@ -1,6 +1,12 @@ #ifndef __ASM_PARISC_PERF_EVENT_H #define __ASM_PARISC_PERF_EVENT_H -/* Empty, just to avoid compiling error */ +#include <asm/psw.h> + +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->gr[0] = KERNEL_PSW; \ + (regs)->iaoq[0] = (__ip); \ + asm volatile("copy %%sp, %0\n":"=r"((regs)->gr[30])); \ +} #endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h index 82d1148c6379..74b4027a4e80 100644 --- a/arch/parisc/include/uapi/asm/ioctls.h +++ b/arch/parisc/include/uapi/asm/ioctls.h @@ -10,10 +10,10 @@ #define TCSETS _IOW('T', 17, struct termios) /* TCSETATTR */ #define TCSETSW _IOW('T', 18, struct termios) /* TCSETATTRD */ #define TCSETSF _IOW('T', 19, struct termios) /* TCSETATTRF */ -#define TCGETA _IOR('T', 1, struct termio) -#define TCSETA _IOW('T', 2, struct termio) -#define TCSETAW _IOW('T', 3, struct termio) -#define TCSETAF _IOW('T', 4, struct termio) +#define TCGETA 0x40125401 +#define TCSETA 0x80125402 +#define TCSETAW 0x80125403 +#define TCSETAF 0x80125404 #define TCSBRK _IO('T', 5) #define TCXONC _IO('T', 6) #define TCFLSH _IO('T', 7) diff --git a/arch/parisc/include/uapi/asm/perf_regs.h b/arch/parisc/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..1ae687bb3d3c --- /dev/null +++ b/arch/parisc/include/uapi/asm/perf_regs.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_PARISC_PERF_REGS_H +#define _UAPI_ASM_PARISC_PERF_REGS_H + +/* see struct user_regs_struct */ +enum perf_event_parisc_regs { + PERF_REG_PARISC_R0, /* PSW is in gr[0] */ + PERF_REG_PARISC_R1, + PERF_REG_PARISC_R2, + PERF_REG_PARISC_R3, + PERF_REG_PARISC_R4, + PERF_REG_PARISC_R5, + PERF_REG_PARISC_R6, + PERF_REG_PARISC_R7, + PERF_REG_PARISC_R8, + PERF_REG_PARISC_R9, + PERF_REG_PARISC_R10, + PERF_REG_PARISC_R11, + PERF_REG_PARISC_R12, + PERF_REG_PARISC_R13, + PERF_REG_PARISC_R14, + PERF_REG_PARISC_R15, + PERF_REG_PARISC_R16, + PERF_REG_PARISC_R17, + PERF_REG_PARISC_R18, + PERF_REG_PARISC_R19, + PERF_REG_PARISC_R20, + PERF_REG_PARISC_R21, + PERF_REG_PARISC_R22, + PERF_REG_PARISC_R23, + PERF_REG_PARISC_R24, + PERF_REG_PARISC_R25, + PERF_REG_PARISC_R26, + PERF_REG_PARISC_R27, + PERF_REG_PARISC_R28, + PERF_REG_PARISC_R29, + PERF_REG_PARISC_R30, + PERF_REG_PARISC_R31, + + PERF_REG_PARISC_SR0, + PERF_REG_PARISC_SR1, + PERF_REG_PARISC_SR2, + PERF_REG_PARISC_SR3, + PERF_REG_PARISC_SR4, + PERF_REG_PARISC_SR5, + PERF_REG_PARISC_SR6, + PERF_REG_PARISC_SR7, + + PERF_REG_PARISC_IAOQ0, + PERF_REG_PARISC_IAOQ1, + PERF_REG_PARISC_IASQ0, + PERF_REG_PARISC_IASQ1, + + PERF_REG_PARISC_SAR, /* CR11 */ + PERF_REG_PARISC_IIR, /* CR19 */ + PERF_REG_PARISC_ISR, /* CR20 */ + PERF_REG_PARISC_IOR, /* CR21 */ + PERF_REG_PARISC_IPSW, /* CR22 */ + + PERF_REG_PARISC_MAX +}; + +#endif /* _UAPI_ASM_PARISC_PERF_REGS_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index d5055ba33722..9157bc8bdf41 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += topology.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 1e793f770f71..1f8936fc2292 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -995,6 +995,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) struct pdc_system_map_mod_info pdc_mod_info; struct pdc_module_path mod_path; + memset(&iodc_data, 0, sizeof(iodc_data)); status = pdc_iodc_read(&count, hpa, 0, &iodc_data, sizeof(iodc_data)); if (status != PDC_OK) { @@ -1012,6 +1013,11 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) mod_index = 0; do { + /* initialize device path for old machines */ + memset(&mod_path, 0xff, sizeof(mod_path)); + get_node_path(dev->dev.parent, &mod_path.path); + mod_path.path.mod = dev->hw_path; + memset(&pdc_mod_info, 0, sizeof(pdc_mod_info)); status = pdc_system_map_find_mods(&pdc_mod_info, &mod_path, mod_index++); } while (status == PDC_OK && pdc_mod_info.mod_addr != hpa); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index c69f6d5946e9..042343492a28 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -464,7 +464,8 @@ int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, unsigned long flags; spin_lock_irqsave(&pdc_lock, flags); - retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_MODULE, __pa(pdc_result), + memcpy(pdc_result2, mod_path, sizeof(*mod_path)); + retval = mem_pdc_call(PDC_SYSTEM_MAP, PDC_FIND_MODULE, __pa(pdc_result), __pa(pdc_result2), mod_index); convert_to_wide(pdc_result); memcpy(pdc_mod_info, pdc_result, sizeof(*pdc_mod_info)); diff --git a/arch/parisc/kernel/perf_event.c b/arch/parisc/kernel/perf_event.c new file mode 100644 index 000000000000..f90b83886ab4 --- /dev/null +++ b/arch/parisc/kernel/perf_event.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support for parisc + * + * Copyright (C) 2025 by Helge Deller <deller@gmx.de> + */ + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <asm/unwind.h> + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + + struct unwind_frame_info info; + + unwind_frame_init_task(&info, current, NULL); + while (1) { + if (unwind_once(&info) < 0 || info.ip == 0) + break; + + if (!__kernel_text_address(info.ip) || + perf_callchain_store(entry, info.ip)) + return; + } +} diff --git a/arch/parisc/kernel/perf_regs.c b/arch/parisc/kernel/perf_regs.c new file mode 100644 index 000000000000..68458e2f6197 --- /dev/null +++ b/arch/parisc/kernel/perf_regs.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (C) 2025 by Helge Deller <deller@gmx.de> */ + +#include <linux/perf_event.h> +#include <linux/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + switch (idx) { + case PERF_REG_PARISC_R0 ... PERF_REG_PARISC_R31: + return regs->gr[idx - PERF_REG_PARISC_R0]; + case PERF_REG_PARISC_SR0 ... PERF_REG_PARISC_SR7: + return regs->sr[idx - PERF_REG_PARISC_SR0]; + case PERF_REG_PARISC_IASQ0 ... PERF_REG_PARISC_IASQ1: + return regs->iasq[idx - PERF_REG_PARISC_IASQ0]; + case PERF_REG_PARISC_IAOQ0 ... PERF_REG_PARISC_IAOQ1: + return regs->iasq[idx - PERF_REG_PARISC_IAOQ0]; + case PERF_REG_PARISC_SAR: /* CR11 */ + return regs->sar; + case PERF_REG_PARISC_IIR: /* CR19 */ + return regs->iir; + case PERF_REG_PARISC_ISR: /* CR20 */ + return regs->isr; + case PERF_REG_PARISC_IOR: /* CR21 */ + return regs->ior; + case PERF_REG_PARISC_IPSW: /* CR22 */ + return regs->ipsw; + }; + WARN_ON_ONCE((u32)idx >= PERF_REG_PARISC_MAX); + return 0; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_PARISC_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (!IS_ENABLED(CONFIG_64BIT)) + return PERF_SAMPLE_REGS_ABI_32; + + if (test_tsk_thread_flag(task, TIF_32BIT)) + return PERF_SAMPLE_REGS_ABI_32; + + return PERF_SAMPLE_REGS_ABI_64; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b9b3d527bc90..4c7c5df80bd0 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/uaccess.h> #include <linux/kdebug.h> #include <linux/kfence.h> +#include <linux/perf_event.h> #include <asm/assembly.h> #include <asm/io.h> @@ -633,6 +634,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) /* Assist Exception Trap, i.e. floating point exception. */ die_if_kernel("Floating point exception", regs, 0); /* quiet */ __inc_irq_stat(irq_fpassist_count); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); handle_fpe(regs); return; diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 00e97204783e..fb64d9ce0b17 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -13,6 +13,7 @@ #include <linux/uaccess.h> #include <linux/sysctl.h> #include <linux/unaligned.h> +#include <linux/perf_event.h> #include <asm/hardirq.h> #include <asm/traps.h> #include "unaligned.h" @@ -378,6 +379,7 @@ void handle_unaligned(struct pt_regs *regs) int ret = ERR_NOTHANDLED; __inc_irq_stat(irq_unaligned_count); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->ior); /* log a message with pacing */ if (user_mode(regs)) { diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 69d65ffab312..03165c82dfdb 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -41,7 +41,6 @@ unsigned long raw_copy_from_user(void *dst, const void __user *src, mtsp(get_kernel_space(), SR_TEMP2); /* Check region is user accessible */ - if (start) while (start < end) { if (!prober_user(SR_TEMP1, start)) { newlen = (start - (unsigned long) src); diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index e5fdc336c9b2..2e23533b67e3 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_spu.h generic-y += agp.h -generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/kvm_types.h b/arch/powerpc/include/asm/kvm_types.h new file mode 100644 index 000000000000..5d4bffea7d47 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PPC_KVM_TYPES_H +#define _ASM_PPC_KVM_TYPES_H + +#if IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) && IS_MODULE(CONFIG_KVM_BOOK3S_64_HV) +#define KVM_SUB_MODULES kvm-pr,kvm-hv +#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) +#define KVM_SUB_MODULES kvm-pr +#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_HV) +#define KVM_SUB_MODULES kvm-hv +#else +#undef KVM_SUB_MODULES +#endif + +#endif diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 4d64a5db50f3..0359ab72cd3b 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -14,7 +14,7 @@ #define can_map_direct(dev, addr) \ ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr))) -bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) +bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr) { if (likely(!dev->bus_dma_limit)) return false; @@ -24,7 +24,7 @@ bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr) #define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset) -bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle) +bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle) { if (likely(!dev->bus_dma_limit)) return false; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 48ad0116f359..ef78ff77cf8f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -334,7 +334,7 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev, rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen); edev->in_error = true; - pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE); + pci_uevent_ers(pdev, rc); return rc; } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; + if (!fw_dump.fadump_enabled) + return; + if (!fw_dump.param_area_supported || fw_dump.dump_active) return; diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1302b5ac5672..89a1b8c21ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, * it fires once. */ if (single_escalation) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]); xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; @@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Grab info about irq */ state->pt_number = hw_irq; - state->pt_data = irq_data_get_irq_handler_data(host_data); + state->pt_data = irq_data_get_irq_chip_data(host_data); /* * Configure the IRQ to match the existing configuration of @@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) */ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { - struct irq_data *d = irq_get_irq_data(irq); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(irq); /* * This slightly odd sequence gives the right result @@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) i0, i1); } if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = - irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); seq_printf(m, " ESC %d %c%c EOI @%llx", diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index b65256a63e87..9c9650319f3b 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev) return -EINVAL; } - xd = irq_get_handler_data(vinst->virq); + xd = irq_get_chip_data(vinst->virq); if (!xd) { pr_err("Inst%d: Invalid virq %d\n", vinst->vas_id, vinst->virq); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 825f9432e03d..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { - struct msi_desc *desc = arg->desc; - struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 625361a15424..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); /* * Clear saved_p to indicate that it's no longer pending diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6eb9db8017b8..22cda9c452d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -16,6 +16,7 @@ config RISCV select ACPI_MCFG if (ACPI && PCI) select ACPI_PPTT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ACPI_RIMT if ACPI select ACPI_SPCR_TABLE if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION @@ -28,7 +29,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX - select ARCH_HAS_ELF_CORE_EFLAGS + select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index b9ef2da15fb2..fc2725cbca18 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -140,6 +140,8 @@ CONFIG_MICREL_PHY=y CONFIG_MICROSEMI_PHY=y CONFIG_MOTORCOMM_PHY=y CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y CONFIG_KEYBOARD_SUN4I_LRADC=m CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 59c975f750c9..e29ded3416b4 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -32,6 +32,7 @@ enum riscv_irqchip_type { ACPI_RISCV_IRQCHIP_IMSIC = 0x01, ACPI_RISCV_IRQCHIP_PLIC = 0x02, ACPI_RISCV_IRQCHIP_APLIC = 0x03, + ACPI_RISCV_IRQCHIP_SMSI = 0x04, }; int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base, @@ -42,6 +43,7 @@ unsigned long acpi_rintc_ext_parent_to_hartid(unsigned int plic_id, unsigned int unsigned int acpi_rintc_get_plic_nr_contexts(unsigned int plic_id); unsigned int acpi_rintc_get_plic_context(unsigned int plic_id, unsigned int ctxt_idx); int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resource *res); +int riscv_acpi_update_gsi_range(u32 gsi_base, u32 nr_irqs); #else static inline int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base, @@ -76,6 +78,10 @@ static inline int __init acpi_rintc_get_imsic_mmio_info(u32 index, struct resour return 0; } +static inline int riscv_acpi_update_gsi_range(u32 gsi_base, u32 nr_irqs) +{ + return -ENODEV; +} #endif /* CONFIG_ACPI */ #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 7559d728c5ff..78b18e2fd771 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -3,14 +3,18 @@ #ifndef __ASM_KGDB_H_ #define __ASM_KGDB_H_ +#include <linux/build_bug.h> + #ifdef __KERNEL__ #define GDB_SIZEOF_REG sizeof(unsigned long) -#define DBG_MAX_REG_NUM (36) -#define NUMREGBYTES ((DBG_MAX_REG_NUM) * GDB_SIZEOF_REG) +#define DBG_MAX_REG_NUM 36 +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) #define CACHE_FLUSH_IS_SAFE 1 #define BUFMAX 2048 +static_assert(BUFMAX > NUMREGBYTES, + "As per KGDB documentation, BUFMAX must be larger than NUMREGBYTES"); #ifdef CONFIG_RISCV_ISA_C #define BREAK_INSTR_SIZE 2 #else @@ -97,6 +101,7 @@ extern unsigned long kgdb_compiled_break; #define DBG_REG_STATUS_OFF 33 #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 +/* NOTE: increase DBG_MAX_REG_NUM if you add more values here. */ extern const char riscv_gdb_stub_feature[64]; diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index d71d3299a335..4d794573e3db 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_sbi.h> +#include <asm/kvm_vcpu_sbi_fwft.h> #include <asm/kvm_vcpu_timer.h> #include <asm/kvm_vcpu_pmu.h> @@ -263,6 +264,9 @@ struct kvm_vcpu_arch { /* Performance monitoring context */ struct kvm_pmu pmu_context; + /* Firmware feature SBI extension context */ + struct kvm_sbi_fwft fwft_context; + /* 'static' configurations which are set only once */ struct kvm_vcpu_config cfg; diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 1d85b6617508..9a930afc8f57 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, unsigned long saddr_high, unsigned long flags, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index d678fd7e5973..3497489e04db 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -11,7 +11,7 @@ #define KVM_SBI_IMPID 3 -#define KVM_SBI_VERSION_MAJOR 2 +#define KVM_SBI_VERSION_MAJOR 3 #define KVM_SBI_VERSION_MINOR 0 enum kvm_riscv_sbi_ext_status { @@ -59,6 +59,14 @@ struct kvm_vcpu_sbi_extension { void (*deinit)(struct kvm_vcpu *vcpu); void (*reset)(struct kvm_vcpu *vcpu); + + unsigned long state_reg_subtype; + unsigned long (*get_state_reg_count)(struct kvm_vcpu *vcpu); + int (*get_state_reg_id)(struct kvm_vcpu *vcpu, int index, u64 *reg_id); + int (*get_state_reg)(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val); + int (*set_state_reg)(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val); }; void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -69,27 +77,21 @@ void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu, unsigned long pc, unsigned long a1); void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_riscv_vcpu_reg_indices_sbi_ext(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg); -int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_reg_indices_sbi(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid); -bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_reset(struct kvm_vcpu *vcpu); -int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, - unsigned long *reg_val); -int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, - unsigned long reg_val); - #ifdef CONFIG_RISCV_SBI_V01 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; #endif @@ -102,6 +104,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h b/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h new file mode 100644 index 000000000000..5604cec79902 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Rivos Inc. + * + * Authors: + * Clément Léger <cleger@rivosinc.com> + */ + +#ifndef __KVM_VCPU_RISCV_FWFT_H +#define __KVM_VCPU_RISCV_FWFT_H + +#include <asm/sbi.h> + +struct kvm_sbi_fwft_feature; + +struct kvm_sbi_fwft_config { + const struct kvm_sbi_fwft_feature *feature; + bool supported; + bool enabled; + unsigned long flags; +}; + +/* FWFT data structure per vcpu */ +struct kvm_sbi_fwft { + struct kvm_sbi_fwft_config *configs; +#ifndef CONFIG_32BIT + bool have_vs_pmlen_7; + bool have_vs_pmlen_16; +#endif +}; + +#define vcpu_to_fwft(vcpu) (&(vcpu)->arch.fwft_context) + +#endif /* !__KVM_VCPU_RISCV_FWFT_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 341e74238aa0..ccc77a89b1e2 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -36,6 +36,7 @@ enum sbi_ext_id { SBI_EXT_STA = 0x535441, SBI_EXT_NACL = 0x4E41434C, SBI_EXT_FWFT = 0x46574654, + SBI_EXT_MPXY = 0x4D505859, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -136,6 +137,7 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_FW_READ, SBI_EXT_PMU_COUNTER_FW_READ_HI, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + SBI_EXT_PMU_EVENT_GET_INFO, }; union sbi_pmu_ctr_info { @@ -159,9 +161,20 @@ struct riscv_pmu_snapshot_data { u64 reserved[447]; }; +struct riscv_pmu_event_info { + u32 event_idx; + u32 output; + u64 event_data; +}; + +#define RISCV_PMU_EVENT_INFO_OUTPUT_MASK 0x01 + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_PLAT_FW_EVENT_MASK GENMASK_ULL(61, 0) +/* SBI v3.0 allows extended hpmeventX width value */ +#define RISCV_PMU_RAW_EVENT_V2_MASK GENMASK_ULL(55, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 +#define RISCV_PMU_RAW_EVENT_V2_IDX 0x30000 #define RISCV_PLAT_FW_EVENT 0xFFFF /** General pmu event codes specified in SBI PMU extension */ @@ -219,6 +232,7 @@ enum sbi_pmu_event_type { SBI_PMU_EVENT_TYPE_HW = 0x0, SBI_PMU_EVENT_TYPE_CACHE = 0x1, SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, SBI_PMU_EVENT_TYPE_FW = 0xf, }; @@ -430,6 +444,67 @@ enum sbi_fwft_feature_t { #define SBI_FWFT_SET_FLAG_LOCK BIT(0) +enum sbi_ext_mpxy_fid { + SBI_EXT_MPXY_GET_SHMEM_SIZE, + SBI_EXT_MPXY_SET_SHMEM, + SBI_EXT_MPXY_GET_CHANNEL_IDS, + SBI_EXT_MPXY_READ_ATTRS, + SBI_EXT_MPXY_WRITE_ATTRS, + SBI_EXT_MPXY_SEND_MSG_WITH_RESP, + SBI_EXT_MPXY_SEND_MSG_WITHOUT_RESP, + SBI_EXT_MPXY_GET_NOTIFICATION_EVENTS, +}; + +enum sbi_mpxy_attribute_id { + /* Standard channel attributes managed by MPXY framework */ + SBI_MPXY_ATTR_MSG_PROT_ID = 0x00000000, + SBI_MPXY_ATTR_MSG_PROT_VER = 0x00000001, + SBI_MPXY_ATTR_MSG_MAX_LEN = 0x00000002, + SBI_MPXY_ATTR_MSG_SEND_TIMEOUT = 0x00000003, + SBI_MPXY_ATTR_MSG_COMPLETION_TIMEOUT = 0x00000004, + SBI_MPXY_ATTR_CHANNEL_CAPABILITY = 0x00000005, + SBI_MPXY_ATTR_SSE_EVENT_ID = 0x00000006, + SBI_MPXY_ATTR_MSI_CONTROL = 0x00000007, + SBI_MPXY_ATTR_MSI_ADDR_LO = 0x00000008, + SBI_MPXY_ATTR_MSI_ADDR_HI = 0x00000009, + SBI_MPXY_ATTR_MSI_DATA = 0x0000000A, + SBI_MPXY_ATTR_EVENTS_STATE_CONTROL = 0x0000000B, + SBI_MPXY_ATTR_STD_ATTR_MAX_IDX, + /* + * Message protocol specific attributes, managed by + * the message protocol specification. + */ + SBI_MPXY_ATTR_MSGPROTO_ATTR_START = 0x80000000, + SBI_MPXY_ATTR_MSGPROTO_ATTR_END = 0xffffffff +}; + +/* Possible values of MSG_PROT_ID attribute as-per SBI v3.0 (or higher) */ +enum sbi_mpxy_msgproto_id { + SBI_MPXY_MSGPROTO_RPMI_ID = 0x0, +}; + +/* RPMI message protocol specific MPXY attributes */ +enum sbi_mpxy_rpmi_attribute_id { + SBI_MPXY_RPMI_ATTR_SERVICEGROUP_ID = SBI_MPXY_ATTR_MSGPROTO_ATTR_START, + SBI_MPXY_RPMI_ATTR_SERVICEGROUP_VERSION, + SBI_MPXY_RPMI_ATTR_IMPL_ID, + SBI_MPXY_RPMI_ATTR_IMPL_VERSION, + SBI_MPXY_RPMI_ATTR_MAX_ID +}; + +/* Encoding of MSG_PROT_VER attribute */ +#define SBI_MPXY_MSG_PROT_VER_MAJOR(__ver) upper_16_bits(__ver) +#define SBI_MPXY_MSG_PROT_VER_MINOR(__ver) lower_16_bits(__ver) +#define SBI_MPXY_MSG_PROT_MKVER(__maj, __min) (((u32)(__maj) << 16) | (u16)(__min)) + +/* Capabilities available through CHANNEL_CAPABILITY attribute */ +#define SBI_MPXY_CHAN_CAP_MSI BIT(0) +#define SBI_MPXY_CHAN_CAP_SSE BIT(1) +#define SBI_MPXY_CHAN_CAP_EVENTS_STATE BIT(2) +#define SBI_MPXY_CHAN_CAP_SEND_WITH_RESP BIT(3) +#define SBI_MPXY_CHAN_CAP_SEND_WITHOUT_RESP BIT(4) +#define SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS BIT(5) + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 251099d860aa..759a4852c09a 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -56,6 +56,7 @@ struct kvm_riscv_config { unsigned long mimpid; unsigned long zicboz_block_size; unsigned long satp_mode; + unsigned long zicbop_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -185,6 +186,10 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZICCRSE, KVM_RISCV_ISA_EXT_ZAAMO, KVM_RISCV_ISA_EXT_ZALRSC, + KVM_RISCV_ISA_EXT_ZICBOP, + KVM_RISCV_ISA_EXT_ZFBFMIN, + KVM_RISCV_ISA_EXT_ZVFBFMIN, + KVM_RISCV_ISA_EXT_ZVFBFWMA, KVM_RISCV_ISA_EXT_MAX, }; @@ -205,6 +210,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_DBCN, KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_SUSP, + KVM_RISCV_SBI_EXT_FWFT, KVM_RISCV_SBI_EXT_MAX, }; @@ -214,6 +220,18 @@ struct kvm_riscv_sbi_sta { unsigned long shmem_hi; }; +struct kvm_riscv_sbi_fwft_feature { + unsigned long enable; + unsigned long flags; + unsigned long value; +}; + +/* SBI FWFT extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_fwft { + struct kvm_riscv_sbi_fwft_feature misaligned_deleg; + struct kvm_riscv_sbi_fwft_feature pointer_masking; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -297,6 +315,9 @@ struct kvm_riscv_sbi_sta { #define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) #define KVM_REG_RISCV_SBI_STA_REG(name) \ (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_SBI_FWFT (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_FWFT_REG(name) \ + (offsetof(struct kvm_riscv_sbi_fwft, name) / sizeof(unsigned long)) /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a1e38ecfc8be..3f50d3dd76c6 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -54,6 +54,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); + clear_tasks_mm_cpumask(cpu); /* Verify from the firmware if the cpu is really stopped*/ if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d3d92a4becc7..9b9dec6893b8 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -455,7 +455,7 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - /* instruciton page fault */ + /* instruction page fault */ ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index c0738d6c6498..8723390c7cad 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -49,10 +49,15 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) post_kprobe_handler(p, kcb, regs); } -static bool __kprobes arch_check_kprobe(struct kprobe *p) +static bool __kprobes arch_check_kprobe(unsigned long addr) { - unsigned long tmp = (unsigned long)p->addr - p->offset; - unsigned long addr = (unsigned long)p->addr; + unsigned long tmp, offset; + + /* start iterating at the closest preceding symbol */ + if (!kallsyms_lookup_size_offset(addr, NULL, &offset)) + return false; + + tmp = addr - offset; while (tmp <= addr) { if (tmp == addr) @@ -71,7 +76,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)insn & 0x1) return -EILSEQ; - if (!arch_check_kprobe(p)) + if (!arch_check_kprobe((unsigned long)p->addr)) return -EILSEQ; /* copy instruction */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14235e58c539..b5bc5fc65cea 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -331,11 +331,14 @@ void __init setup_arch(char **cmdline_p) /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); + if (acpi_disabled) { #if IS_ENABLED(CONFIG_BUILTIN_DTB) - unflatten_and_copy_device_tree(); + unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + unflatten_device_tree(); #endif + } + misc_mem_init(); init_resources(); diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.h b/arch/riscv/kernel/tests/kprobes/test-kprobes.h index 3886ab491ecb..537f44aa9d3f 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.h +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.h @@ -11,7 +11,7 @@ #define KPROBE_TEST_MAGIC_LOWER 0x0000babe #define KPROBE_TEST_MAGIC_UPPER 0xcafe0000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* array of addresses to install kprobes */ extern void *test_kprobes_addresses[]; @@ -19,6 +19,6 @@ extern void *test_kprobes_addresses[]; /* array of functions that return KPROBE_TEST_MAGIC */ extern long (*test_kprobes_functions[])(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* TEST_KPROBES_H */ diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 5a62091b0809..c50328212917 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,7 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 4b199dc3e58b..07197395750e 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -27,6 +27,7 @@ kvm-y += vcpu_onereg.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o kvm-y += vcpu_sbi_base.o +kvm-y += vcpu_sbi_fwft.o kvm-y += vcpu_sbi_hsm.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o kvm-y += vcpu_sbi_replace.o diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c index 24c270d6d0e2..b67d60d722c2 100644 --- a/arch/riscv/kvm/gstage.c +++ b/arch/riscv/kvm/gstage.c @@ -321,7 +321,7 @@ void __init kvm_riscv_gstage_mode_detect(void) if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; kvm_riscv_gstage_pgd_levels = 5; - goto skip_sv48x4_test; + goto done; } /* Try Sv48x4 G-stage mode */ @@ -329,10 +329,31 @@ void __init kvm_riscv_gstage_mode_detect(void) if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; kvm_riscv_gstage_pgd_levels = 4; + goto done; } -skip_sv48x4_test: + /* Try Sv39x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; + kvm_riscv_gstage_pgd_levels = 3; + goto done; + } +#else /* CONFIG_32BIT */ + /* Try Sv32x4 G-stage mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { + kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; + kvm_riscv_gstage_pgd_levels = 2; + goto done; + } +#endif + + /* KVM depends on !HGATP_MODE_OFF */ + kvm_riscv_gstage_mode = HGATP_MODE_OFF; + kvm_riscv_gstage_pgd_levels = 0; + +done: csr_write(CSR_HGATP, 0); kvm_riscv_local_hfence_gvma_all(); -#endif } diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 67c876de74ef..77dc1655b442 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -93,6 +93,23 @@ static int __init riscv_kvm_init(void) return rc; kvm_riscv_gstage_mode_detect(); + switch (kvm_riscv_gstage_mode) { + case HGATP_MODE_SV32X4: + str = "Sv32x4"; + break; + case HGATP_MODE_SV39X4: + str = "Sv39x4"; + break; + case HGATP_MODE_SV48X4: + str = "Sv48x4"; + break; + case HGATP_MODE_SV57X4: + str = "Sv57x4"; + break; + default: + kvm_riscv_nacl_exit(); + return -ENODEV; + } kvm_riscv_gstage_vmid_detect(); @@ -135,22 +152,6 @@ static int __init riscv_kvm_init(void) (rc) ? slist : "no features"); } - switch (kvm_riscv_gstage_mode) { - case HGATP_MODE_SV32X4: - str = "Sv32x4"; - break; - case HGATP_MODE_SV39X4: - str = "Sv39x4"; - break; - case HGATP_MODE_SV48X4: - str = "Sv48x4"; - break; - case HGATP_MODE_SV57X4: - str = "Sv57x4"; - break; - default: - return -ENODEV; - } kvm_info("using %s G-stage page table format\n", str); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 3ebcfffaa978..bccb919ca615 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,7 +7,6 @@ */ #include <linux/bitops.h> -#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -133,6 +132,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; + + vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); @@ -570,7 +571,6 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } - cfg->hedeleg = KVM_HEDELEG_DEFAULT; if (vcpu->guest_debug) cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } @@ -910,7 +910,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret) continue; ret = 1; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index cce6a38ea54f..865dae903aa0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -65,9 +65,11 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZCF), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), @@ -88,6 +90,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -173,7 +177,6 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: - case KVM_RISCV_ISA_EXT_SMNPM: /* There is not architectural config bit to disable sscofpmf completely */ case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSNPM: @@ -199,8 +202,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZCF: case KVM_RISCV_ISA_EXT_ZCMOP: case KVM_RISCV_ISA_EXT_ZFA: + case KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICBOP: case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: @@ -220,6 +225,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZTSO: case KVM_RISCV_ISA_EXT_ZVBB: case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_RISCV_ISA_EXT_ZVFH: case KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_RISCV_ISA_EXT_ZVKB: @@ -277,15 +284,20 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; reg_val = riscv_cbom_block_size; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; reg_val = riscv_cboz_block_size; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + reg_val = riscv_cbop_block_size; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): reg_val = vcpu->arch.mvendorid; break; @@ -366,17 +378,23 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, } break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + if (!riscv_isa_extension_available(NULL, ZICBOM)) return -ENOENT; if (reg_val != riscv_cbom_block_size) return -EINVAL; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + if (!riscv_isa_extension_available(NULL, ZICBOZ)) return -ENOENT; if (reg_val != riscv_cboz_block_size) return -EINVAL; break; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + if (!riscv_isa_extension_available(NULL, ZICBOP)) + return -ENOENT; + if (reg_val != riscv_cbop_block_size) + return -EINVAL; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): if (reg_val == vcpu->arch.mvendorid) break; @@ -817,10 +835,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu, * was not available. */ if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + !riscv_isa_extension_available(NULL, ZICBOM)) continue; else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && - !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + !riscv_isa_extension_available(NULL, ZICBOZ)) + continue; + else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) && + !riscv_isa_extension_available(NULL, ZICBOP)) continue; size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; @@ -1061,66 +1082,14 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) return copy_isa_ext_reg_indices(vcpu, NULL); } -static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - unsigned int n = 0; - - for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? - KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_SINGLE | i; - - if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) - continue; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - - n++; - } - - return n; -} - static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_ext_reg_indices(vcpu, NULL); -} - -static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - int total = 0; - - if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); - - for (int i = 0; i < n; i++) { - u64 reg = KVM_REG_RISCV | size | - KVM_REG_RISCV_SBI_STATE | - KVM_REG_RISCV_SBI_STA | i; - - if (uindices) { - if (put_user(reg, uindices)) - return -EFAULT; - uindices++; - } - } - - total += n; - } - - return total; + return kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, NULL); } static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu) { - return copy_sbi_reg_indices(vcpu, NULL); + return kvm_riscv_vcpu_reg_indices_sbi(vcpu, NULL); } static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu) @@ -1243,12 +1212,12 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, return ret; uindices += ret; - ret = copy_sbi_ext_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi_ext(vcpu, uindices); if (ret < 0) return ret; uindices += ret; - ret = copy_sbi_reg_indices(vcpu, uindices); + ret = kvm_riscv_vcpu_reg_indices_sbi(vcpu, uindices); if (ret < 0) return ret; uindices += ret; diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 78ac3216a54d..a2fae70ee174 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -60,6 +60,7 @@ static u32 kvm_pmu_get_perf_event_type(unsigned long eidx) type = PERF_TYPE_HW_CACHE; break; case SBI_PMU_EVENT_TYPE_RAW: + case SBI_PMU_EVENT_TYPE_RAW_V2: case SBI_PMU_EVENT_TYPE_FW: type = PERF_TYPE_RAW; break; @@ -128,6 +129,9 @@ static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data) case SBI_PMU_EVENT_TYPE_RAW: config = evt_data & RISCV_PMU_RAW_EVENT_MASK; break; + case SBI_PMU_EVENT_TYPE_RAW_V2: + config = evt_data & RISCV_PMU_RAW_EVENT_V2_MASK; + break; case SBI_PMU_EVENT_TYPE_FW: if (ecode < SBI_PMU_FW_MAX) config = (1ULL << 63) | ecode; @@ -405,8 +409,6 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); int sbiret = 0; gpa_t saddr; - unsigned long hva; - bool writable; if (!kvpmu || flags) { sbiret = SBI_ERR_INVALID_PARAM; @@ -428,19 +430,14 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s goto out; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) { - sbiret = SBI_ERR_INVALID_ADDRESS; - goto out; - } - kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); if (!kvpmu->sdata) return -ENOMEM; + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { kfree(kvpmu->sdata); - sbiret = SBI_ERR_FAILURE; + sbiret = SBI_ERR_INVALID_ADDRESS; goto out; } @@ -452,6 +449,65 @@ out: return 0; } +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long num_events, + unsigned long flags, struct kvm_vcpu_sbi_return *retdata) +{ + struct riscv_pmu_event_info *einfo = NULL; + int shmem_size = num_events * sizeof(*einfo); + gpa_t shmem; + u32 eidx, etype; + u64 econfig; + int ret; + + if (flags != 0 || (saddr_low & (SZ_16 - 1) || num_events == 0)) { + ret = SBI_ERR_INVALID_PARAM; + goto out; + } + + shmem = saddr_low; + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) { + shmem |= ((gpa_t)saddr_high << 32); + } else { + ret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + } + + einfo = kzalloc(shmem_size, GFP_KERNEL); + if (!einfo) + return -ENOMEM; + + ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_FAILURE; + goto free_mem; + } + + for (int i = 0; i < num_events; i++) { + eidx = einfo[i].event_idx; + etype = kvm_pmu_get_perf_event_type(eidx); + econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data); + ret = riscv_pmu_get_event_info(etype, econfig, NULL); + einfo[i].output = (ret > 0) ? 1 : 0; + } + + ret = kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size); + if (ret) { + ret = SBI_ERR_INVALID_ADDRESS; + goto free_mem; + } + + ret = 0; +free_mem: + kfree(einfo); +out: + retdata->err_val = ret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a56c4959f9ad..1b13623380e1 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -79,6 +79,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_sta, }, { + .ext_idx = KVM_RISCV_SBI_EXT_FWFT, + .ext_ptr = &vcpu_sbi_ext_fwft, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, .ext_ptr = &vcpu_sbi_ext_experimental, }, @@ -106,7 +110,7 @@ riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx) return sext; } -bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) +static bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) { struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; const struct kvm_riscv_sbi_extension_entry *sext; @@ -284,6 +288,31 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, return 0; } +int kvm_riscv_vcpu_reg_indices_sbi_ext(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int n = 0; + + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | + KVM_REG_RISCV_SBI_SINGLE | i; + + if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) + continue; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; +} + int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -360,64 +389,163 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, return 0; } -int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_reg_indices_sbi(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long state_reg_count; + int i, j, rc, count = 0; + u64 reg; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (!ext->get_state_reg_count || + scontext->ext_status[entry->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_ENABLED) + continue; + + state_reg_count = ext->get_state_reg_count(vcpu); + if (!uindices) + goto skip_put_user; + + for (j = 0; j < state_reg_count; j++) { + if (ext->get_state_reg_id) { + rc = ext->get_state_reg_id(vcpu, j, ®); + if (rc) + return rc; + } else { + reg = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + ext->state_reg_subtype | j; + } + + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + +skip_put_user: + count += state_reg_count; + } + + return count; +} + +static const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext_withstate(struct kvm_vcpu *vcpu, + unsigned long subtype) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *entry; + const struct kvm_vcpu_sbi_extension *ext; + int i; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + entry = &sbi_ext[i]; + ext = entry->ext_ptr; + + if (ext->get_state_reg_count && + ext->state_reg_subtype == subtype && + scontext->ext_status[entry->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) + return ext; + } + + return NULL; +} + +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; - - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; + + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } - if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val); - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->set_state_reg) return -EINVAL; - } - return 0; + return ext->set_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); } -int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_SBI_STATE); - unsigned long reg_subtype, reg_val; + const struct kvm_vcpu_sbi_extension *ext; + unsigned long reg_subtype; + void *reg_val; + u64 data64; + u32 data32; + u16 data16; + u8 data8; int ret; - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + switch (KVM_REG_SIZE(reg->id)) { + case 1: + reg_val = &data8; + break; + case 2: + reg_val = &data16; + break; + case 4: + reg_val = &data32; + break; + case 8: + reg_val = &data64; + break; + default: return -EINVAL; + } reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; - switch (reg_subtype) { - case KVM_REG_RISCV_SBI_STA: - ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val); - break; - default: + ext = kvm_vcpu_sbi_find_ext_withstate(vcpu, reg_subtype); + if (!ext || !ext->get_state_reg) return -EINVAL; - } + ret = ext->get_state_reg(vcpu, reg_num, KVM_REG_SIZE(reg->id), reg_val); if (ret) return ret; - if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; diff --git a/arch/riscv/kvm/vcpu_sbi_fwft.c b/arch/riscv/kvm/vcpu_sbi_fwft.c new file mode 100644 index 000000000000..62cc9c3d5759 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_fwft.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Rivos Inc. + * + * Authors: + * Clément Léger <cleger@rivosinc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/cpufeature.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/kvm_vcpu_sbi_fwft.h> + +#define MIS_DELEG (BIT_ULL(EXC_LOAD_MISALIGNED) | BIT_ULL(EXC_STORE_MISALIGNED)) + +struct kvm_sbi_fwft_feature { + /** + * @id: Feature ID + */ + enum sbi_fwft_feature_t id; + + /** + * @first_reg_num: ONE_REG index of the first ONE_REG register + */ + unsigned long first_reg_num; + + /** + * @supported: Check if the feature is supported on the vcpu + * + * This callback is optional, if not provided the feature is assumed to + * be supported + */ + bool (*supported)(struct kvm_vcpu *vcpu); + + /** + * @reset: Reset the feature value irrespective whether feature is supported or not + * + * This callback is mandatory + */ + void (*reset)(struct kvm_vcpu *vcpu); + + /** + * @set: Set the feature value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*set)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value); + + /** + * @get: Get the feature current value + * + * Return SBI_SUCCESS on success or an SBI error (SBI_ERR_*) + * + * This callback is mandatory + */ + long (*get)(struct kvm_vcpu *vcpu, struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value); +}; + +static const enum sbi_fwft_feature_t kvm_fwft_defined_features[] = { + SBI_FWFT_MISALIGNED_EXC_DELEG, + SBI_FWFT_LANDING_PAD, + SBI_FWFT_SHADOW_STACK, + SBI_FWFT_DOUBLE_TRAP, + SBI_FWFT_PTE_AD_HW_UPDATING, + SBI_FWFT_POINTER_MASKING_PMLEN, +}; + +static bool kvm_fwft_is_defined_feature(enum sbi_fwft_feature_t feature) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kvm_fwft_defined_features); i++) { + if (kvm_fwft_defined_features[i] == feature) + return true; + } + + return false; +} + +static bool kvm_sbi_fwft_misaligned_delegation_supported(struct kvm_vcpu *vcpu) +{ + return misaligned_traps_can_delegate(); +} + +static void kvm_sbi_fwft_reset_misaligned_delegation(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + cfg->hedeleg &= ~MIS_DELEG; +} + +static long kvm_sbi_fwft_set_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + if (value == 1) { + cfg->hedeleg |= MIS_DELEG; + if (!one_reg_access) + csr_set(CSR_HEDELEG, MIS_DELEG); + } else if (value == 0) { + cfg->hedeleg &= ~MIS_DELEG; + if (!one_reg_access) + csr_clear(CSR_HEDELEG, MIS_DELEG); + } else { + return SBI_ERR_INVALID_PARAM; + } + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_misaligned_delegation(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + *value = (cfg->hedeleg & MIS_DELEG) == MIS_DELEG; + return SBI_SUCCESS; +} + +#ifndef CONFIG_32BIT + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_HENVCFG, value); + return (csr_read_clear(CSR_HENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +static bool kvm_sbi_fwft_pointer_masking_pmlen_supported(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + if (!riscv_isa_extension_available(vcpu->arch.isa, SMNPM)) + return false; + + fwft->have_vs_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + fwft->have_vs_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + return fwft->have_vs_pmlen_7 || fwft->have_vs_pmlen_16; +} + +static void kvm_sbi_fwft_reset_pointer_masking_pmlen(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; +} + +static long kvm_sbi_fwft_set_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long value) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + unsigned long pmm; + + switch (value) { + case 0: + pmm = ENVCFG_PMM_PMLEN_0; + break; + case 7: + if (!fwft->have_vs_pmlen_7) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_7; + break; + case 16: + if (!fwft->have_vs_pmlen_16) + return SBI_ERR_INVALID_PARAM; + pmm = ENVCFG_PMM_PMLEN_16; + break; + default: + return SBI_ERR_INVALID_PARAM; + } + + vcpu->arch.cfg.henvcfg &= ~ENVCFG_PMM; + vcpu->arch.cfg.henvcfg |= pmm; + + /* + * Instead of waiting for vcpu_load/put() to update HENVCFG CSR, + * update here so that VCPU see's pointer masking mode change + * immediately. + */ + if (!one_reg_access) + csr_write(CSR_HENVCFG, vcpu->arch.cfg.henvcfg); + + return SBI_SUCCESS; +} + +static long kvm_sbi_fwft_get_pointer_masking_pmlen(struct kvm_vcpu *vcpu, + struct kvm_sbi_fwft_config *conf, + bool one_reg_access, unsigned long *value) +{ + switch (vcpu->arch.cfg.henvcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_0: + *value = 0; + break; + case ENVCFG_PMM_PMLEN_7: + *value = 7; + break; + case ENVCFG_PMM_PMLEN_16: + *value = 16; + break; + default: + return SBI_ERR_FAILURE; + } + + return SBI_SUCCESS; +} + +#endif + +static const struct kvm_sbi_fwft_feature features[] = { + { + .id = SBI_FWFT_MISALIGNED_EXC_DELEG, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, misaligned_deleg.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_misaligned_delegation_supported, + .reset = kvm_sbi_fwft_reset_misaligned_delegation, + .set = kvm_sbi_fwft_set_misaligned_delegation, + .get = kvm_sbi_fwft_get_misaligned_delegation, + }, +#ifndef CONFIG_32BIT + { + .id = SBI_FWFT_POINTER_MASKING_PMLEN, + .first_reg_num = offsetof(struct kvm_riscv_sbi_fwft, pointer_masking.enable) / + sizeof(unsigned long), + .supported = kvm_sbi_fwft_pointer_masking_pmlen_supported, + .reset = kvm_sbi_fwft_reset_pointer_masking_pmlen, + .set = kvm_sbi_fwft_set_pointer_masking_pmlen, + .get = kvm_sbi_fwft_get_pointer_masking_pmlen, + }, +#endif +}; + +static const struct kvm_sbi_fwft_feature *kvm_sbi_fwft_regnum_to_feature(unsigned long reg_num) +{ + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + if (feature->first_reg_num <= reg_num && reg_num < (feature->first_reg_num + 3)) + return feature; + } + + return NULL; +} + +static struct kvm_sbi_fwft_config * +kvm_sbi_fwft_get_config(struct kvm_vcpu *vcpu, enum sbi_fwft_feature_t feature) +{ + int i; + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + for (i = 0; i < ARRAY_SIZE(features); i++) { + if (fwft->configs[i].feature->id == feature) + return &fwft->configs[i]; + } + + return NULL; +} + +static int kvm_fwft_get_feature(struct kvm_vcpu *vcpu, u32 feature, + struct kvm_sbi_fwft_config **conf) +{ + struct kvm_sbi_fwft_config *tconf; + + tconf = kvm_sbi_fwft_get_config(vcpu, feature); + if (!tconf) { + if (kvm_fwft_is_defined_feature(feature)) + return SBI_ERR_NOT_SUPPORTED; + + return SBI_ERR_DENIED; + } + + if (!tconf->supported || !tconf->enabled) + return SBI_ERR_NOT_SUPPORTED; + + *conf = tconf; + + return SBI_SUCCESS; +} + +static int kvm_sbi_fwft_set(struct kvm_vcpu *vcpu, u32 feature, + unsigned long value, unsigned long flags) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + if ((flags & ~SBI_FWFT_SET_FLAG_LOCK) != 0) + return SBI_ERR_INVALID_PARAM; + + if (conf->flags & SBI_FWFT_SET_FLAG_LOCK) + return SBI_ERR_DENIED_LOCKED; + + conf->flags = flags; + + return conf->feature->set(vcpu, conf, false, value); +} + +static int kvm_sbi_fwft_get(struct kvm_vcpu *vcpu, unsigned long feature, + unsigned long *value) +{ + int ret; + struct kvm_sbi_fwft_config *conf; + + ret = kvm_fwft_get_feature(vcpu, feature, &conf); + if (ret) + return ret; + + return conf->feature->get(vcpu, conf, false, value); +} + +static int kvm_sbi_ext_fwft_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_FWFT_SET: + ret = kvm_sbi_fwft_set(vcpu, cp->a0, cp->a1, cp->a2); + break; + case SBI_EXT_FWFT_GET: + ret = kvm_sbi_fwft_get(vcpu, cp->a0, &retdata->out_val); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + break; + } + + retdata->err_val = ret; + + return 0; +} + +static int kvm_sbi_ext_fwft_init(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int i; + + fwft->configs = kcalloc(ARRAY_SIZE(features), sizeof(struct kvm_sbi_fwft_config), + GFP_KERNEL); + if (!fwft->configs) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + feature = &features[i]; + conf = &fwft->configs[i]; + if (feature->supported) + conf->supported = feature->supported(vcpu); + else + conf->supported = true; + + conf->enabled = conf->supported; + conf->feature = feature; + } + + return 0; +} + +static void kvm_sbi_ext_fwft_deinit(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + + kfree(fwft->configs); +} + +static void kvm_sbi_ext_fwft_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_sbi_fwft *fwft = vcpu_to_fwft(vcpu); + const struct kvm_sbi_fwft_feature *feature; + int i; + + for (i = 0; i < ARRAY_SIZE(features); i++) { + fwft->configs[i].flags = 0; + feature = &features[i]; + if (feature->reset) + feature->reset(vcpu); + } +} + +static unsigned long kvm_sbi_ext_fwft_get_reg_count(struct kvm_vcpu *vcpu) +{ + unsigned long max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long reg, ret = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + ret++; + } + + return ret; +} + +static int kvm_sbi_ext_fwft_get_reg_id(struct kvm_vcpu *vcpu, int index, u64 *reg_id) +{ + int reg, max_reg_count = sizeof(struct kvm_riscv_sbi_fwft) / sizeof(unsigned long); + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + int idx = 0; + + for (reg = 0; reg < max_reg_count; reg++) { + feature = kvm_sbi_fwft_regnum_to_feature(reg); + if (!feature) + continue; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + continue; + + if (index == idx) { + *reg_id = KVM_REG_RISCV | + (IS_ENABLED(CONFIG_32BIT) ? + KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64) | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_FWFT | reg; + return 0; + } + + idx++; + } + + return -ENOENT; +} + +static int kvm_sbi_ext_fwft_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long *value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + *value = conf->enabled; + break; + case 1: + *value = conf->flags; + break; + case 2: + ret = conf->feature->get(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +static int kvm_sbi_ext_fwft_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) +{ + const struct kvm_sbi_fwft_feature *feature; + struct kvm_sbi_fwft_config *conf; + unsigned long value; + int ret = 0; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + + feature = kvm_sbi_fwft_regnum_to_feature(reg_num); + if (!feature) + return -ENOENT; + + conf = kvm_sbi_fwft_get_config(vcpu, feature->id); + if (!conf || !conf->supported) + return -ENOENT; + + switch (reg_num - feature->first_reg_num) { + case 0: + switch (value) { + case 0: + conf->enabled = false; + break; + case 1: + conf->enabled = true; + break; + default: + return -EINVAL; + } + break; + case 1: + conf->flags = value & SBI_FWFT_SET_FLAG_LOCK; + break; + case 2: + ret = conf->feature->set(vcpu, conf, true, value); + break; + default: + return -ENOENT; + } + + return sbi_err_map_linux_errno(ret); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft = { + .extid_start = SBI_EXT_FWFT, + .extid_end = SBI_EXT_FWFT, + .handler = kvm_sbi_ext_fwft_handler, + .init = kvm_sbi_ext_fwft_init, + .deinit = kvm_sbi_ext_fwft_deinit, + .reset = kvm_sbi_ext_fwft_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_FWFT, + .get_state_reg_count = kvm_sbi_ext_fwft_get_reg_count, + .get_state_reg_id = kvm_sbi_ext_fwft_get_reg_id, + .get_state_reg = kvm_sbi_ext_fwft_get_reg, + .set_state_reg = kvm_sbi_ext_fwft_set_reg, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index e4be34e03e83..a020d979d179 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; + case SBI_EXT_PMU_EVENT_GET_INFO: + ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata); + break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index cc6cb7c8f0e4..afa0545c3bcf 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -85,8 +85,6 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) unsigned long shmem_phys_hi = cp->a1; u32 flags = cp->a2; struct sbi_sta_struct zero_sta = {0}; - unsigned long hva; - bool writable; gpa_t shmem; int ret; @@ -111,13 +109,10 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) return SBI_ERR_INVALID_ADDRESS; } - hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); - if (kvm_is_error_hva(hva) || !writable) - return SBI_ERR_INVALID_ADDRESS; - + /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); if (ret) - return SBI_ERR_FAILURE; + return SBI_ERR_INVALID_ADDRESS; vcpu->arch.sta.shmem = shmem; vcpu->arch.sta.last_steal = current->sched_info.run_delay; @@ -151,63 +146,82 @@ static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu) return !!sched_info_on(); } -const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { - .extid_start = SBI_EXT_STA, - .extid_end = SBI_EXT_STA, - .handler = kvm_sbi_ext_sta_handler, - .probe = kvm_sbi_ext_sta_probe, - .reset = kvm_riscv_vcpu_sbi_sta_reset, -}; +static unsigned long kvm_sbi_ext_sta_get_state_reg_count(struct kvm_vcpu *vcpu) +{ + return sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); +} -int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long *reg_val) +static int kvm_sbi_ext_sta_get_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, void *reg_val) { + unsigned long *value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): - *reg_val = (unsigned long)vcpu->arch.sta.shmem; + *value = (unsigned long)vcpu->arch.sta.shmem; break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) - *reg_val = upper_32_bits(vcpu->arch.sta.shmem); + *value = upper_32_bits(vcpu->arch.sta.shmem); else - *reg_val = 0; + *value = 0; break; default: - return -EINVAL; + return -ENOENT; } return 0; } -int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, - unsigned long reg_num, - unsigned long reg_val) +static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_size, const void *reg_val) { + unsigned long value; + + if (reg_size != sizeof(unsigned long)) + return -EINVAL; + value = *(const unsigned long *)reg_val; + switch (reg_num) { case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; vcpu->arch.sta.shmem |= hi << 32; } else { - vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem = value; } break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32); + vcpu->arch.sta.shmem = ((gpa_t)value << 32); vcpu->arch.sta.shmem |= lo; - } else if (reg_val != 0) { + } else if (value != 0) { return -EINVAL; } break; default: - return -EINVAL; + return -ENOENT; } return 0; } + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { + .extid_start = SBI_EXT_STA, + .extid_end = SBI_EXT_STA, + .handler = kvm_sbi_ext_sta_handler, + .probe = kvm_sbi_ext_sta_probe, + .reset = kvm_riscv_vcpu_sbi_sta_reset, + .state_reg_subtype = KVM_REG_RISCV_SBI_STA, + .get_state_reg_count = kvm_sbi_ext_sta_get_state_reg_count, + .get_state_reg = kvm_sbi_ext_sta_get_reg, + .set_state_reg = kvm_sbi_ext_sta_set_reg, +}; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 3b426c800480..abb1c2bf2542 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -14,6 +14,7 @@ #include <linux/smp.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/kvm_mmu.h> #include <asm/kvm_tlb.h> #include <asm/kvm_vmid.h> @@ -24,15 +25,12 @@ static DEFINE_SPINLOCK(vmid_lock); void __init kvm_riscv_gstage_vmid_detect(void) { - unsigned long old; - /* Figure-out number of VMID bits in HW */ - old = csr_read(CSR_HGATP); - csr_write(CSR_HGATP, old | HGATP_VMID); + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); - csr_write(CSR_HGATP, old); + csr_write(CSR_HGATP, 0); /* We polluted local TLB so flush all guest TLB */ kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7679bc16b692..b4769241332b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -25,6 +25,7 @@ endif KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR +KBUILD_CFLAGS_DECOMPRESSOR += -Wno-pointer-sign KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index cadb4b13622a..b9cd9572dd35 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -10,14 +10,15 @@ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H +#include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/sha3.h> +#include <linux/build_bug.h> #include <linux/types.h> /* must be big enough for the largest SHA variant */ #define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE #define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE -#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx) struct s390_sha_ctx { u64 count; /* message length in bytes */ @@ -42,4 +43,9 @@ int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, u8 *out); +static inline void __check_s390_sha_ctx_size(void) +{ + BUILD_BUG_ON(S390_SHA_CTX_SIZE != sizeof(struct s390_sha_ctx)); +} + #endif diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index 9fc3f0dae8f0..a2952ed5518b 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -27,7 +27,7 @@ static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd) { union register_pair r1 = { .even = virt_to_phys(data), }; - asm volatile("diag %[r1],%[r3],0x304\n" + asm volatile("diag %[r1],%[r3],0x304" : [r1] "+&d" (r1.pair) : [r3] "d" (cmd) : "memory"); diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 352108727d7e..56817990c73d 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -143,7 +143,7 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, " lghi 2,0\n" /* 0 into gr2 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - " lgr %[reg2],2\n" /* gr2 into reg2 */ + " lgr %[reg2],2" /* gr2 into reg2 */ : [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2) : [qid] "d" (qid) : "cc", "0", "1", "2"); @@ -186,7 +186,7 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit) asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0) : "cc", "0", "1"); @@ -211,7 +211,7 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit) asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0) : "cc", "0", "1"); @@ -315,7 +315,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */ " lgr 2,%[reg2]\n" /* ni addr into gr2 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg1] "+&d" (reg1.value) : [reg0] "d" (reg0), [reg2] "d" (reg2) : "cc", "memory", "0", "1", "2"); @@ -363,7 +363,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, " lgr 1,%[reg1]\n" /* qact in info into gr1 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - " lgr %[reg2],2\n" /* qact out info into reg2 */ + " lgr %[reg2],2" /* qact out info into reg2 */ : [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2) : [reg0] "d" (reg0) : "cc", "0", "1", "2"); @@ -388,7 +388,7 @@ static inline struct ap_queue_status ap_bapq(ap_qid_t qid) asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(BAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0) : "cc", "0", "1"); @@ -416,7 +416,7 @@ static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx) " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " lgr 2,%[reg2]\n" /* secret index into gr2 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(AAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0), [reg2] "d" (reg2) : "cc", "0", "1", "2"); @@ -453,7 +453,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, " lgr 0,%[reg0]\n" /* qid param in gr0 */ "0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n" " brc 2,0b\n" /* handle partial completion */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg1],1" /* gr1 (status) into reg1 */ : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value), [nqap_r2] "+&d" (nqap_r2.pair) : [nqap_r1] "d" (nqap_r1.pair) @@ -518,7 +518,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, " brc 6,0b\n" /* handle partial complete */ "2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */ + " lgr %[reg2],2" /* gr2 (res length) into reg2 */ : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 21c26d842832..845b77864412 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -17,7 +17,7 @@ static __always_inline int __atomic_read(const int *ptr) int val; asm volatile( - " l %[val],%[ptr]\n" + " l %[val],%[ptr]" : [val] "=d" (val) : [ptr] "R" (*ptr)); return val; } @@ -26,11 +26,11 @@ static __always_inline void __atomic_set(int *ptr, int val) { if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvhi %[ptr],%[val]\n" + " mvhi %[ptr],%[val]" : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " st %[val],%[ptr]\n" + " st %[val],%[ptr]" : [ptr] "=R" (*ptr) : [val] "d" (val)); } } @@ -40,7 +40,7 @@ static __always_inline long __atomic64_read(const long *ptr) long val; asm volatile( - " lg %[val],%[ptr]\n" + " lg %[val],%[ptr]" : [val] "=d" (val) : [ptr] "RT" (*ptr)); return val; } @@ -49,11 +49,11 @@ static __always_inline void __atomic64_set(long *ptr, long val) { if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvghi %[ptr],%[val]\n" + " mvghi %[ptr],%[val]" : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " stg %[val],%[ptr]\n" + " stg %[val],%[ptr]" : [ptr] "=RT" (*ptr) : [val] "d" (val)); } } @@ -66,7 +66,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \ op_type old; \ \ asm volatile( \ - op_string " %[old],%[val],%[ptr]\n" \ + op_string " %[old],%[val],%[ptr]" \ op_barrier \ : [old] "=d" (old), [ptr] "+QS" (*ptr) \ : [val] "d" (val) : "cc", "memory"); \ @@ -75,7 +75,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \ #define __ATOMIC_OPS(op_name, op_type, op_string) \ __ATOMIC_OP(op_name, op_type, op_string, "") \ - __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + __ATOMIC_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0") __ATOMIC_OPS(__atomic_add, int, "laa") __ATOMIC_OPS(__atomic_and, int, "lan") @@ -94,14 +94,14 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") static __always_inline void op_name(op_type val, op_type *ptr) \ { \ asm volatile( \ - op_string " %[ptr],%[val]\n" \ + op_string " %[ptr],%[val]" \ op_barrier \ : [ptr] "+QS" (*ptr) : [val] "i" (val) : "cc", "memory");\ } #define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \ __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \ - __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0") __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") @@ -179,7 +179,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \ int cc; \ \ asm volatile( \ - op_string " %[tmp],%[val],%[ptr]\n" \ + op_string " %[tmp],%[val],%[ptr]" \ op_barrier \ : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \ : [val] "d" (val) \ @@ -189,7 +189,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \ #define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \ __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \ - __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0") __ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal") __ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg") @@ -203,7 +203,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \ int cc; \ \ asm volatile( \ - op_string " %[ptr],%[val]\n" \ + op_string " %[ptr],%[val]" \ op_barrier \ : "=@cc" (cc), [ptr] "+QS" (*ptr) \ : [val] "i" (val) \ @@ -213,7 +213,7 @@ static __always_inline bool op_name(op_type val, op_type *ptr) \ #define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \ __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \ - __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "\nbcr 14,0") __ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi") __ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi") diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d82130d7f2b6..f3184073e754 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -18,9 +18,9 @@ #ifdef MARCH_HAS_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define __ASM_BCR_SERIALIZE "bcr 14,0\n" +#define __ASM_BCR_SERIALIZE "bcr 14,0" #else -#define __ASM_BCR_SERIALIZE "bcr 15,0\n" +#define __ASM_BCR_SERIALIZE "bcr 15,0" #endif static __always_inline void bcr_serialize(void) @@ -69,12 +69,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, if (__builtin_constant_p(size) && size > 0) { asm(" clgr %2,%1\n" - " slbgr %0,%0\n" + " slbgr %0,%0" :"=d" (mask) : "d" (size-1), "d" (index) :"cc"); return mask; } asm(" clgr %1,%2\n" - " slbgr %0,%0\n" + " slbgr %0,%0" :"=d" (mask) : "d" (size), "d" (index) :"cc"); return ~mask; } diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index ec945fb60c02..5f10074665b0 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -62,7 +62,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; mask = 1UL << (nr & (BITS_PER_BYTE - 1)); asm volatile( - " tm %[addr],%[mask]\n" + " tm %[addr],%[mask]" : "=@cc" (cc) : [addr] "Q" (*addr), [mask] "I" (mask) ); diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index d86dea5900e7..7e83dc2d3b06 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -27,7 +27,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum) kmsan_check_memory(buff, len); asm volatile( "0: cksm %[sum],%[rp]\n" - " jo 0b\n" + " jo 0b" : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); return sum; } diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index a9e2006033b7..008357996262 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -18,7 +18,7 @@ void __cmpxchg_called_with_bad_pointer(void); static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new) { asm volatile( - " cs %[old],%[new],%[ptr]\n" + " cs %[old],%[new],%[ptr]" : [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr) : [new] "d" (new) : "memory", "cc"); @@ -28,7 +28,7 @@ static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new) static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new) { asm volatile( - " csg %[old],%[new],%[ptr]\n" + " csg %[old],%[new],%[ptr]" : [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr) : [new] "d" (new) : "memory", "cc"); @@ -126,7 +126,7 @@ static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size) } \ case 4: { \ asm volatile( \ - " cs %[__old],%[__new],%[__ptr]\n" \ + " cs %[__old],%[__new],%[__ptr]" \ : [__old] "+d" (*__oldp), \ [__ptr] "+Q" (*(ptr)), \ "=@cc" (__cc) \ @@ -136,7 +136,7 @@ static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size) } \ case 8: { \ asm volatile( \ - " csg %[__old],%[__new],%[__ptr]\n" \ + " csg %[__old],%[__new],%[__ptr]" \ : [__old] "+d" (*__oldp), \ [__ptr] "+QS" (*(ptr)), \ "=@cc" (__cc) \ @@ -241,7 +241,7 @@ static __always_inline u64 __arch_xchg(u64 ptr, u64 x, int size) static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) { asm volatile( - " cdsg %[old],%[new],%[ptr]\n" + " cdsg %[old],%[new],%[ptr]" : [old] "+d" (old), [ptr] "+QS" (*ptr) : [new] "d" (new) : "memory", "cc"); @@ -258,7 +258,7 @@ static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, int cc; asm volatile( - " cdsg %[old],%[new],%[ptr]\n" + " cdsg %[old],%[new],%[ptr]" : [old] "+d" (*oldp), [ptr] "+QS" (*ptr), "=@cc" (cc) : [new] "d" (new) : "memory"); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 4bc5317fbb12..a83683169d98 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -229,7 +229,7 @@ static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2, asm volatile( " la %%r1,%[pb]\n" " lghi %%r0,%[fc]\n" - " .insn rre,%[opc] << 16,%[r1],%[r2]\n" + " .insn rre,%[opc] << 16,%[r1],%[r2]" : [pb] "=R" (*pb) : [opc] "i" (opc), [fc] "i" (fc), [r1] "i" (r1), [r2] "i" (r2) @@ -242,7 +242,7 @@ static __always_inline void __cpacf_query_rrf(u32 opc, u8 r1, u8 r2, u8 r3, asm volatile( " la %%r1,%[pb]\n" " lghi %%r0,%[fc]\n" - " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n" + " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]" : [pb] "=R" (*pb) : [opc] "i" (opc), [fc] "i" (fc), [r1] "i" (r1), [r2] "i" (r2), [r3] "i" (r3), [m4] "i" (m4) @@ -416,7 +416,7 @@ static inline int cpacf_km(unsigned long func, void *param, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_KM) @@ -448,7 +448,7 @@ static inline int cpacf_kmc(unsigned long func, void *param, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_KMC) @@ -476,7 +476,7 @@ static inline void cpacf_kimd(unsigned long func, void *param, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,0,%[src],8,0\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [src] "+&d" (s.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)(param)), [opc] "i" (CPACF_KIMD) @@ -501,7 +501,7 @@ static inline void cpacf_klmd(unsigned long func, void *param, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,0,%[src],8,0\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [src] "+&d" (s.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_KLMD) @@ -530,7 +530,7 @@ static inline int _cpacf_kmac(unsigned long *gr0, void *param, " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,%[src]\n" " brc 1,0b\n" /* handle partial completion */ - " lgr %[r0],0\n" + " lgr %[r0],0" : [r0] "+d" (*gr0), [src] "+&d" (s.pair) : [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_KMAC) @@ -580,7 +580,7 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [src] "+&d" (s.pair), [dst] "+&d" (d.pair), [ctr] "+&d" (c.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), @@ -614,7 +614,7 @@ static inline void cpacf_prno(unsigned long func, void *param, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,%[dst],%[seed]\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [dst] "+&d" (d.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [seed] "d" (s.pair), [opc] "i" (CPACF_PRNO) @@ -640,7 +640,7 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, asm volatile ( " lghi 0,%[fc]\n" "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair) : [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO) : "cc", "memory", "0"); @@ -692,7 +692,7 @@ static inline void cpacf_pckmo(long func, void *param) asm volatile( " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" - " .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */ + " .insn rre,%[opc] << 16,0,0" /* PCKMO opcode */ : : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_PCKMO) @@ -725,7 +725,7 @@ static inline void cpacf_kma(unsigned long func, void *param, u8 *dest, " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n" - " brc 1,0b\n" /* handle partial completion */ + " brc 1,0b" /* handle partial completion */ : [dst] "+&d" (d.pair), [src] "+&d" (s.pair), [aad] "+&d" (a.pair) : [fc] "d" (func), [pba] "d" ((unsigned long)param), diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h index e93cc240a1ed..1765a0320933 100644 --- a/arch/s390/include/asm/ctlreg.h +++ b/arch/s390/include/asm/ctlreg.h @@ -100,7 +100,7 @@ struct ctlreg { BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \ typecheck(struct ctlreg, array[0]); \ asm volatile( \ - " lctlg %[_low],%[_high],%[_arr]\n" \ + " lctlg %[_low],%[_high],%[_arr]" \ : \ : [_arr] "Q" (*(struct addrtype *)(&array)), \ [_low] "i" (low), [_high] "i" (high) \ @@ -119,7 +119,7 @@ struct ctlreg { BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \ typecheck(struct ctlreg, array[0]); \ asm volatile( \ - " stctg %[_low],%[_high],%[_arr]\n" \ + " stctg %[_low],%[_high],%[_arr]" \ : [_arr] "=Q" (*(struct addrtype *)(&array)) \ : [_low] "i" (low), [_high] "i" (high)); \ } while (0) @@ -127,7 +127,7 @@ struct ctlreg { static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg) { asm volatile( - " lctlg %[cr],%[cr],%[reg]\n" + " lctlg %[cr],%[cr],%[reg]" : : [reg] "Q" (*reg), [cr] "i" (cr) : "memory"); @@ -136,7 +136,7 @@ static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg) static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg) { asm volatile( - " stctg %[cr],%[cr],%[reg]\n" + " stctg %[cr],%[cr],%[reg]" : [reg] "=Q" (*reg) : [cr] "i" (cr)); } diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index 135bb89c0a89..e99f8bca8e08 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -38,7 +38,7 @@ asm(".include \"asm/fpu-insn-asm.h\"\n"); static __always_inline void fpu_cefbr(u8 f1, s32 val) { - asm volatile("cefbr %[f1],%[val]\n" + asm volatile("cefbr %[f1],%[val]" : : [f1] "I" (f1), [val] "d" (val) : "memory"); @@ -48,7 +48,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode) { unsigned long val; - asm volatile("cgebr %[val],%[mode],%[f2]\n" + asm volatile("cgebr %[val],%[mode],%[f2]" : [val] "=d" (val) : [f2] "I" (f2), [mode] "I" (mode) : "memory"); @@ -57,7 +57,7 @@ static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode) static __always_inline void fpu_debr(u8 f1, u8 f2) { - asm volatile("debr %[f1],%[f2]\n" + asm volatile("debr %[f1],%[f2]" : : [f1] "I" (f1), [f2] "I" (f2) : "memory"); @@ -66,7 +66,7 @@ static __always_inline void fpu_debr(u8 f1, u8 f2) static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg) { instrument_read(reg, sizeof(*reg)); - asm volatile("ld %[fpr],%[reg]\n" + asm volatile("ld %[fpr],%[reg]" : : [fpr] "I" (fpr), [reg] "Q" (reg->ui) : "memory"); @@ -74,7 +74,7 @@ static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg) static __always_inline void fpu_ldgr(u8 f1, u32 val) { - asm volatile("ldgr %[f1],%[val]\n" + asm volatile("ldgr %[f1],%[val]" : : [f1] "I" (f1), [val] "d" (val) : "memory"); @@ -113,7 +113,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc) static __always_inline void fpu_std(unsigned short fpr, freg_t *reg) { instrument_write(reg, sizeof(*reg)); - asm volatile("std %[fpr],%[reg]\n" + asm volatile("std %[fpr],%[reg]" : [reg] "=Q" (reg->ui) : [fpr] "I" (fpr) : "memory"); @@ -181,7 +181,7 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + asm volatile("VL %[v1],%O[vxr],,%R[vxr]" : : [vxr] "Q" (*(__vector128 *)vxr), [v1] "I" (v1) @@ -195,7 +195,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr) instrument_read(vxr, sizeof(__vector128)); asm volatile( " la 1,%[vxr]\n" - " VL %[v1],0,,1\n" + " VL %[v1],0,,1" : : [vxr] "R" (*(__vector128 *)vxr), [v1] "I" (v1) @@ -239,7 +239,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]" : : [vxr] "Q" (*(u8 *)vxr), [index] "d" (index), @@ -257,7 +257,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) instrument_read(vxr, size); asm volatile( " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" + " VLL %[v1],%[index],0,1" : : [vxr] "R" (*(u8 *)vxr), [index] "d" (index), @@ -277,7 +277,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]" \ : \ : [vxrs] "Q" (*_v), \ [v1] "I" (_v1), [v3] "I" (_v3) \ @@ -297,7 +297,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) instrument_read(_v, size); \ asm volatile( \ " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ + " VLM %[v1],%[v3],0,1" \ : \ : [vxrs] "R" (*_v), \ [v1] "I" (_v1), [v3] "I" (_v3) \ @@ -360,7 +360,7 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + asm volatile("VST %[v1],%O[vxr],,%R[vxr]" : [vxr] "=Q" (*(__vector128 *)vxr) : [v1] "I" (v1) : "memory"); @@ -373,7 +373,7 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr) instrument_write(vxr, sizeof(__vector128)); asm volatile( " la 1,%[vxr]\n" - " VST %[v1],0,,1\n" + " VST %[v1],0,,1" : [vxr] "=R" (*(__vector128 *)vxr) : [v1] "I" (v1) : "memory", "1"); @@ -389,7 +389,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]" : [vxr] "=Q" (*(u8 *)vxr) : [index] "d" (index), [v1] "I" (v1) : "memory"); @@ -405,7 +405,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) instrument_write(vxr, size); asm volatile( " la 1,%[vxr]\n" - " VSTL %[v1],%[index],0,1\n" + " VSTL %[v1],%[index],0,1" : [vxr] "=R" (*(u8 *)vxr) : [index] "d" (index), [v1] "I" (v1) : "memory", "1"); @@ -423,7 +423,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]" \ : [vxrs] "=Q" (*_v) \ : [v1] "I" (_v1), [v3] "I" (_v3) \ : "memory"); \ @@ -442,7 +442,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) instrument_write(_v, size); \ asm volatile( \ " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ + " VSTM %[v1],%[v3],0,1" \ : [vxrs] "=R" (*_v) \ : [v1] "I" (_v1), [v3] "I" (_v3) \ : "memory", "1"); \ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f870d09515cc..c2ba3d4398c5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -356,7 +356,7 @@ struct kvm_s390_float_interrupt { int counters[FIRQ_MAX_COUNT]; struct kvm_s390_mchk_info mchk; struct kvm_s390_ext_info srv_signal; - int next_rr_cpu; + int last_sleep_cpu; struct mutex ais_lock; u8 simm; u8 nimm; @@ -722,6 +722,8 @@ extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); +bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa); + static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index df73a052760c..00cc8c916cfb 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -76,7 +76,7 @@ long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args) \ HYPERCALL_REGS_##args; \ \ asm volatile ( \ - " diag 2,4,0x500\n" \ + " diag 2,4,0x500" \ : "=d" (__rc) \ : "d" (__nr) HYPERCALL_FMT_##args \ : "memory", "cc"); \ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 41f900f693d9..6890925d5587 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -246,6 +246,16 @@ int clp_refresh_fh(u32 fid, u32 *fh); /* UID */ void update_uid_checking(bool new); +/* Firmware Sysfs */ +int __init __zpci_fw_sysfs_init(void); + +static inline int __init zpci_fw_sysfs_init(void) +{ + if (IS_ENABLED(CONFIG_SYSFS)) + return __zpci_fw_sysfs_init(); + return 0; +} + /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 96af7d964014..965886dfe954 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -73,13 +73,13 @@ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ asm volatile( \ - op2 " %[ptr__],%[val__]\n" \ + op2 " %[ptr__],%[val__]" \ : [ptr__] "+Q" (*ptr__) \ : [val__] "i" ((szcast)val__) \ : "cc"); \ } else { \ asm volatile( \ - op1 " %[old__],%[val__],%[ptr__]\n" \ + op1 " %[old__],%[val__],%[ptr__]" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ @@ -98,7 +98,7 @@ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ - op " %[old__],%[val__],%[ptr__]\n" \ + op " %[old__],%[val__],%[ptr__]" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ @@ -117,7 +117,7 @@ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ - op " %[old__],%[val__],%[ptr__]\n" \ + op " %[old__],%[val__],%[ptr__]" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c1a7a92f0575..b7100c6a4054 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -2055,4 +2055,26 @@ static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) return res; } +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long value = 0; +#ifdef CONFIG_PGSTE + unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE); + + do { + value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr); + } while (value & PGSTE_PCL_BIT); + value |= PGSTE_PCL_BIT; +#endif + return __pgste(value); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + barrier(); + WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT); +#endif +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6a9c08b80eda..93e1034485d7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -163,7 +163,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, "2: stg %[poison],0(%[addr])\n" " j 4f\n" "3: mvc 8(1,%[addr]),0(%[addr])\n" - "4:\n" + "4:" : [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp) : [poison] "d" (poison) : "memory", "cc" diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h index 91fc24520e82..402325ec20f0 100644 --- a/arch/s390/include/asm/rwonce.h +++ b/arch/s390/include/asm/rwonce.h @@ -19,7 +19,7 @@ \ BUILD_BUG_ON(sizeof(x) != 16); \ asm volatile( \ - " lpq %[val],%[_x]\n" \ + " lpq %[val],%[_x]" \ : [val] "=d" (__u.val) \ : [_x] "QS" (x) \ : "memory"); \ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f9935db9fd76..b06b183b7246 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -98,7 +98,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) kcsan_release(); asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ - " mvhhi %[lock],0\n" + " mvhhi %[lock],0" : [lock] "=Q" (((unsigned short *)&lp->lock)[1]) : : "memory"); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 1d5ca13dc90f..810a6b9d9628 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -199,7 +199,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, " lg 15,%[_stack]\n" \ " stg %[_frame],%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ - " lgr 15,%[_prev]\n" \ + " lgr 15,%[_prev]" \ : [_prev] "=&d" (prev), CALL_FMT_##nr \ : [_stack] "R" (__stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ @@ -250,7 +250,7 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, " lra 14,0(1)\n" \ " lpswe %[psw_enter]\n" \ "0: lpswe 0(7)\n" \ - "1:\n" \ + "1:" \ : CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave) \ : [psw_enter] "Q" (psw_enter) \ : "7", CALL_CLOBBER_##nr); \ diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index f8f68f4ef255..238e721e5a22 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -125,7 +125,7 @@ static inline void *memscan(void *s, int c, size_t n) asm volatile( " lgr 0,%[c]\n" "0: srst %[ret],%[s]\n" - " jo 0b\n" + " jo 0b" : [ret] "+&a" (ret), [s] "+&a" (s) : [c] "d" (c) : "cc", "memory", "0"); diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index bd4cb00ccd5e..10ce5c4ccbd6 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -155,7 +155,7 @@ long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr) \ SYSCALL_REGS_##nr; \ \ asm volatile ( \ - " svc 0\n" \ + " svc 0" \ : "=d" (rc) \ : "d" (r1) SYSCALL_FMT_##nr \ : "memory"); \ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 59dfb8780f62..49447b40f038 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -81,7 +81,7 @@ static inline void set_tod_programmable_field(u16 val) { asm volatile( " lgr 0,%[val]\n" - " sckpf\n" + " sckpf" : : [val] "d" ((unsigned long)val) : "0"); diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c index d6a34454aa5a..f411562aa7f6 100644 --- a/arch/s390/kernel/diag/diag310.c +++ b/arch/s390/kernel/diag/diag310.c @@ -66,7 +66,7 @@ static inline unsigned long diag310(unsigned long subcode, unsigned long size, v union register_pair rp = { .even = (unsigned long)addr, .odd = size }; diag_stat_inc(DIAG_STAT_X310); - asm volatile("diag %[rp],%[subcode],0x310\n" + asm volatile("diag %[rp],%[subcode],0x310" : [rp] "+d" (rp.pair) : [subcode] "d" (subcode) : "memory"); diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c index f0a8b4841fb9..fe325c2a2d0d 100644 --- a/arch/s390/kernel/diag/diag324.c +++ b/arch/s390/kernel/diag/diag324.c @@ -101,7 +101,7 @@ static unsigned long diag324(unsigned long subcode, void *addr) union register_pair rp = { .even = (unsigned long)addr }; diag_stat_inc(DIAG_STAT_X324); - asm volatile("diag %[rp],%[subcode],0x324\n" + asm volatile("diag %[rp],%[subcode],0x324" : [rp] "+d" (rp.pair) : [subcode] "d" (subcode) : "memory"); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 9455f213dc20..62bf8a15bf32 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -80,6 +80,15 @@ static int paicrypt_root_alloc(void) /* Release the PMU if event is the last perf event */ static DEFINE_MUTEX(pai_reserve_mutex); +/* Free all memory allocated for event counting/sampling setup */ +static void paicrypt_free(struct paicrypt_mapptr *mp) +{ + free_page((unsigned long)mp->mapptr->page); + kvfree(mp->mapptr->save); + kfree(mp->mapptr); + mp->mapptr = NULL; +} + /* Adjust usage counters and remove allocated memory when all users are * gone. */ @@ -93,15 +102,8 @@ static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu) "refcnt %u\n", __func__, event->attr.config, event->cpu, cpump->active_events, refcount_read(&cpump->refcnt)); - if (refcount_dec_and_test(&cpump->refcnt)) { - debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", - __func__, (unsigned long)cpump->page, - cpump->save); - free_page((unsigned long)cpump->page); - kvfree(cpump->save); - kfree(cpump); - mp->mapptr = NULL; - } + if (refcount_dec_and_test(&cpump->refcnt)) + paicrypt_free(mp); paicrypt_root_free(); mutex_unlock(&pai_reserve_mutex); } @@ -175,14 +177,13 @@ static u64 paicrypt_getall(struct perf_event *event) * * Allocate the memory for the event. */ -static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) +static int paicrypt_alloc_cpu(struct perf_event *event, int cpu) { struct paicrypt_map *cpump = NULL; struct paicrypt_mapptr *mp; int rc; mutex_lock(&pai_reserve_mutex); - /* Allocate root node */ rc = paicrypt_root_alloc(); if (rc) @@ -192,58 +193,44 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); cpump = mp->mapptr; if (!cpump) { /* Paicrypt_map allocated? */ + rc = -ENOMEM; cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); - if (!cpump) { - rc = -ENOMEM; - goto free_root; + if (!cpump) + goto undo; + /* Allocate memory for counter page and counter extraction. + * Only the first counting event has to allocate a page. + */ + mp->mapptr = cpump; + cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); + cpump->save = kvmalloc_array(paicrypt_cnt + 1, + sizeof(struct pai_userdata), + GFP_KERNEL); + if (!cpump->page || !cpump->save) { + paicrypt_free(mp); + goto undo; } INIT_LIST_HEAD(&cpump->syswide_list); - } - - /* Allocate memory for counter page and counter extraction. - * Only the first counting event has to allocate a page. - */ - if (cpump->page) { + refcount_set(&cpump->refcnt, 1); + rc = 0; + } else { refcount_inc(&cpump->refcnt); - goto unlock; } - rc = -ENOMEM; - cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); - if (!cpump->page) - goto free_paicrypt_map; - cpump->save = kvmalloc_array(paicrypt_cnt + 1, - sizeof(struct pai_userdata), GFP_KERNEL); - if (!cpump->save) { - free_page((unsigned long)cpump->page); - cpump->page = NULL; - goto free_paicrypt_map; +undo: + if (rc) { + /* Error in allocation of event, decrement anchor. Since + * the event in not created, its destroy() function is never + * invoked. Adjust the reference counter for the anchor. + */ + paicrypt_root_free(); } - - /* Set mode and reference count */ - rc = 0; - refcount_set(&cpump->refcnt, 1); - mp->mapptr = cpump; - debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx " - "save %p rc %d\n", __func__, cpump->active_events, - refcount_read(&cpump->refcnt), - (unsigned long)cpump->page, cpump->save, rc); - goto unlock; - -free_paicrypt_map: - /* Undo memory allocation */ - kfree(cpump); - mp->mapptr = NULL; -free_root: - paicrypt_root_free(); unlock: mutex_unlock(&pai_reserve_mutex); - return rc ? ERR_PTR(rc) : cpump; + return rc; } -static int paicrypt_event_init_all(struct perf_event *event) +static int paicrypt_alloc(struct perf_event *event) { - struct paicrypt_map *cpump; struct cpumask *maskptr; int cpu, rc = -ENOMEM; @@ -252,12 +239,11 @@ static int paicrypt_event_init_all(struct perf_event *event) goto out; for_each_online_cpu(cpu) { - cpump = paicrypt_busy(event, cpu); - if (IS_ERR(cpump)) { + rc = paicrypt_alloc_cpu(event, cpu); + if (rc) { for_each_cpu(cpu, maskptr) paicrypt_event_destroy_cpu(event, cpu); kfree(maskptr); - rc = PTR_ERR(cpump); goto out; } cpumask_set_cpu(cpu, maskptr); @@ -279,7 +265,6 @@ out: static int paicrypt_event_init(struct perf_event *event) { struct perf_event_attr *a = &event->attr; - struct paicrypt_map *cpump; int rc = 0; /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ @@ -301,13 +286,10 @@ static int paicrypt_event_init(struct perf_event *event) } } - if (event->cpu >= 0) { - cpump = paicrypt_busy(event, event->cpu); - if (IS_ERR(cpump)) - rc = PTR_ERR(cpump); - } else { - rc = paicrypt_event_init_all(event); - } + if (event->cpu >= 0) + rc = paicrypt_alloc_cpu(event, event->cpu); + else + rc = paicrypt_alloc(event); if (rc) { free_page(PAI_SAVE_AREA(event)); goto out; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b529868789f..892fce2b7549 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -839,7 +839,7 @@ static void __init setup_control_program_code(void) return; diag_stat_inc(DIAG_STAT_X318); - asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); + asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val)); } /* diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c index ba049fd103c2..cc869de6e3a5 100644 --- a/arch/s390/kernel/skey.c +++ b/arch/s390/kernel/skey.c @@ -11,7 +11,7 @@ static inline unsigned long load_real_address(unsigned long address) unsigned long real; asm volatile( - " lra %[real],0(%[address])\n" + " lra %[real],0(%[address])" : [real] "=d" (real) : [address] "a" (address) : "cc"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e88ebe5339fc..da84c0dc6b7e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -340,7 +340,7 @@ static void pcpu_delegate(struct pcpu *pcpu, int cpu, "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" "1: sigp 0,%1,%3 # sigp stop to current cpu\n" - " brc 2,1b # busy, try again\n" + " brc 2,1b # busy, try again" : : "d" (pcpu->address), "d" (source_cpu), "K" (SIGP_RESTART), "K" (SIGP_STOP) : "0", "1", "cc"); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 93b2a01bae40..5d17609bcfe1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -866,8 +866,8 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } -/* - * Do the actual search for `uv_get_secret_metadata`. +/** + * uv_find_secret() - search secret metadata for a given secret id. * @secret_id: search pattern. * @list: ephemeral buffer space * @secret: output data, containing the secret's metadata. diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index feecf1a6ddb4..d74d4c52ccd0 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -215,6 +215,28 @@ SECTIONS ELF_DETAILS /* + * Make sure that the .got.plt is either completely empty or it + * contains only the three reserved double words. + */ + .got.plt : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .plt : { + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") + + /* * uncompressed image info used by the decompressor * it should match struct vmlinux_info */ @@ -244,28 +266,6 @@ SECTIONS #endif } :NONE - /* - * Make sure that the .got.plt is either completely empty or it - * contains only the three reserved double words. - */ - .got.plt : { - *(.got.plt) - } - ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") - - /* - * Sections that should stay zero sized, which is safer to - * explicitly check instead of blindly discarding. - */ - .plt : { - *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) - } - ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") - .rela.dyn : { - *(.rela.*) *(.rela_*) - } - ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") - /* Sections to be discarded */ DISCARDS /DISCARD/ : { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9384572ffa7b..c62a868cf2b6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1323,6 +1323,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: kvm_vcpu_srcu_read_unlock(vcpu); + vcpu->kvm->arch.float_int.last_sleep_cpu = vcpu->vcpu_idx; kvm_vcpu_halt(vcpu); vcpu->valid_wakeup = false; __unset_cpu_idle(vcpu); @@ -1949,18 +1950,15 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) if (!online_vcpus) return; - /* find idle VCPUs first, then round robin */ - sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); - if (sigcpu == online_vcpus) { - do { - sigcpu = kvm->arch.float_int.next_rr_cpu++; - kvm->arch.float_int.next_rr_cpu %= online_vcpus; - /* avoid endless loops if all vcpus are stopped */ - if (nr_tries++ >= online_vcpus) - return; - } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu))); + for (sigcpu = kvm->arch.float_int.last_sleep_cpu; ; sigcpu++) { + sigcpu %= online_vcpus; + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + if (!is_vcpu_stopped(dst_vcpu)) + break; + /* avoid endless loops if all vcpus are stopped */ + if (nr_tries++ >= online_vcpus) + return; } - dst_vcpu = kvm_get_vcpu(kvm, sigcpu); /* make the VCPU drop out of the SIE, or wake it up if sleeping */ switch (type) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d51aa5f66be..16ba04062854 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -356,7 +356,7 @@ static __always_inline void pfcr_query(u8 (*query)[16]) { asm volatile( " lghi 0,0\n" - " .insn rsy,0xeb0000000016,0,0,%[query]\n" + " .insn rsy,0xeb0000000016,0,0,%[query]" : [query] "=QS" (*query) : : "cc", "0"); @@ -368,7 +368,7 @@ static __always_inline void __sortl_query(u8 (*query)[32]) " lghi 0,0\n" " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rre,0xb9380000,2,4\n" + " .insn rre,0xb9380000,2,4" : [query] "=R" (*query) : : "cc", "0", "1"); @@ -380,7 +380,7 @@ static __always_inline void __dfltcc_query(u8 (*query)[32]) " lghi 0,0\n" " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rrf,0xb9390000,2,4,6,0\n" + " .insn rrf,0xb9390000,2,4,6,0" : [query] "=R" (*query) : : "cc", "0", "1"); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9253c70897a8..9a71b6e00948 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -605,6 +605,14 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) } } +#if IS_ENABLED(CONFIG_VFIO_AP) +bool kvm_s390_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) +{ + return kvm_is_gpa_in_memslot(kvm, gpa); +} +EXPORT_SYMBOL_FOR_MODULES(kvm_s390_is_gpa_in_memslot, "vfio_ap"); +#endif + /* * handle_pqap: Handling pqap interception * @vcpu: the vcpu having issue the pqap instruction diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ad9da4038511..10db1e56a811 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -96,7 +96,7 @@ static inline int arch_load_niai4(int *lock) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ - " l %[owner],%[lock]\n" + " l %[owner],%[lock]" : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory"); return owner; } @@ -109,7 +109,7 @@ static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ - " cs %[old],%[new],%[lock]\n" + " cs %[old],%[new],%[lock]" : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc) : [new] "d" (new) : "memory"); @@ -124,7 +124,7 @@ static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ - " cs %[old],%[new],%[lock]\n" + " cs %[old],%[new],%[lock]" : [old] "+d" (old), [lock] "+Q" (*lock) : [new] "d" (new) : "cc", "memory"); diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 099de76e8b1a..757f58960198 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -27,7 +27,7 @@ static inline char *__strend(const char *s) asm volatile( " lghi 0,0\n" "0: srst %[e],%[s]\n" - " jo 0b\n" + " jo 0b" : [e] "+&a" (e), [s] "+&a" (s) : : "cc", "memory", "0"); @@ -41,7 +41,7 @@ static inline char *__strnend(const char *s, size_t n) asm volatile( " lghi 0,0\n" "0: srst %[p],%[s]\n" - " jo 0b\n" + " jo 0b" : [p] "+&d" (p), [s] "+&a" (s) : : "cc", "memory", "0"); @@ -95,7 +95,7 @@ char *strcat(char *dest, const char *src) "0: srst %[dummy],%[dest]\n" " jo 0b\n" "1: mvst %[dummy],%[src]\n" - " jo 1b\n" + " jo 1b" : [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src) : : "cc", "memory", "0"); @@ -291,7 +291,7 @@ void *memscan(void *s, int c, size_t n) asm volatile( " lgr 0,%[c]\n" "0: srst %[ret],%[s]\n" - " jo 0b\n" + " jo 0b" : [ret] "+&a" (ret), [s] "+&a" (s) : [c] "d" (c) : "cc", "memory", "0"); diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 6e42100875e7..6bb3fa5bf925 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -150,7 +150,7 @@ static __always_inline struct pt_regs fake_pt_regs(void) regs.gprs[15] = current_stack_pointer; asm volatile( - "basr %[psw_addr],0\n" + "basr %[psw_addr],0" : [psw_addr] "=d" (regs.psw.addr)); return regs; } @@ -232,7 +232,7 @@ static noinline void test_unwind_kprobed_func(void) asm volatile( " nopr %%r7\n" "test_unwind_kprobed_insn:\n" - " nopr %%r7\n" + " nopr %%r7" :); } diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index ce7bcf7c0032..1721b73b7803 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -27,7 +27,7 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, "1: exrl %0,2f\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" - "3:\n" + "3:" : : "d" (bytes), "a" (p1), "a" (p2) : "0", "cc", "memory"); } @@ -53,7 +53,7 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, " j 4f\n" "2: xc 0(1,%1),0(%2)\n" "3: xc 0(1,%1),0(%3)\n" - "4:\n" + "4:" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) : : "0", "cc", "memory"); } @@ -84,7 +84,7 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, "2: xc 0(1,%1),0(%2)\n" "3: xc 0(1,%1),0(%3)\n" "4: xc 0(1,%1),0(%4)\n" - "5:\n" + "5:" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) : : "0", "cc", "memory"); } @@ -121,7 +121,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, "3: xc 0(1,%1),0(%3)\n" "4: xc 0(1,%1),0(%4)\n" "5: xc 0(1,%1),0(%5)\n" - "6:\n" + "6:" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) : : "0", "cc", "memory"); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index b63f427e7289..d4c3c36855e2 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -15,6 +15,7 @@ #include <linux/pagewalk.h> #include <linux/ksm.h> #include <asm/gmap_helpers.h> +#include <asm/pgtable.h> /** * ptep_zap_swap_entry() - discard a swap entry. @@ -47,6 +48,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; spinlock_t *ptl; + pgste_t pgste; pte_t *ptep; mmap_assert_locked(mm); @@ -60,8 +62,16 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) ptep = get_locked_pte(mm, vmaddr, &ptl); if (unlikely(!ptep)) return; - if (pte_swap(*ptep)) + if (pte_swap(*ptep)) { + preempt_disable(); + pgste = pgste_get_lock(ptep); + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_clear(mm, vmaddr, ptep); + + pgste_set_unlock(ptep, pgste); + preempt_enable(); + } pte_unmap_unlock(ptep, ptl); } EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 44426e0f2944..cfd219fe495c 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -41,7 +41,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz " ex %1,0(1)\n" " lg %1,0(%3)\n" " lra %0,0(%0)\n" - " sturg %1,%0\n" + " sturg %1,%0" : "+&a" (aligned), "+&a" (count), "=m" (tmp) : "a" (&tmp), "a" (&tmp[offset]), "a" (src) : "cc", "memory", "1"); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 76d92069799f..626fca116cd7 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -245,7 +245,7 @@ static inline unsigned long base_lra(unsigned long address) unsigned long real; asm volatile( - " lra %0,0(%1)\n" + " lra %0,0(%1)" : "=d" (real) : "a" (address) : "cc"); return real; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 50eb57c976bc..0fde20bbc50b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,6 +24,7 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/pgtable.h> #include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) @@ -115,28 +116,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, return old; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long value = 0; -#ifdef CONFIG_PGSTE - unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE); - - do { - value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr); - } while (value & PGSTE_PCL_BIT); - value |= PGSTE_PCL_BIT; -#endif - return __pgste(value); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - barrier(); - WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT); -#endif -} - static inline pgste_t pgste_get(pte_t *ptep) { unsigned long pgste = 0; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index cd6676c2d602..c82c577db2bc 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1188,6 +1188,10 @@ static int __init pci_base_init(void) if (rc) goto out_find; + rc = zpci_fw_sysfs_init(); + if (rc) + goto out_find; + s390_pci_initialized = 1; return 0; diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d930416d4c90..b95376041501 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -88,6 +88,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); + pci_uevent_ers(pdev, ers_res); if (ers_result_indicates_abort(ers_res)) pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); else if (ers_res == PCI_ERS_RESULT_NEED_RESET) @@ -244,6 +245,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) ers_res = PCI_ERS_RESULT_RECOVERED; if (ers_res != PCI_ERS_RESULT_RECOVERED) { + pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT); pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); status_str = "failed (driver can't recover)"; @@ -253,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); if (driver->err_handler->resume) driver->err_handler->resume(pdev); + pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: pci_dev_unlock(pdev); zpci_report_status(zdev, "recovery", status_str); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index eb978c8012be..35ceb1bea1c6 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -145,7 +145,7 @@ int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib) return -EIO; asm volatile( - ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n" + ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]" : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib)); return 0; @@ -442,7 +442,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block); static inline void __pciwb_mio(void) { - asm volatile (".insn rre,0xb9d50000,0,0\n"); + asm volatile (".insn rre,0xb9d50000,0,0"); } void zpci_barrier(void) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 0ee0924cfab7..12060870e2aa 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -41,6 +41,9 @@ zpci_attr(segment1, "0x%02x\n", pfip[1]); zpci_attr(segment2, "0x%02x\n", pfip[2]); zpci_attr(segment3, "0x%02x\n", pfip[3]); +#define ZPCI_FW_ATTR_RO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + static ssize_t mio_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -164,6 +167,13 @@ static ssize_t uid_is_unique_show(struct device *dev, } static DEVICE_ATTR_RO(uid_is_unique); +static ssize_t uid_checking_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0); +} +ZPCI_FW_ATTR_RO(uid_checking); + /* analogous to smbios index */ static ssize_t index_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -233,3 +243,18 @@ const struct attribute_group pfip_attr_group = { .name = "pfip", .attrs = pfip_attrs, }; + +static struct attribute *clp_fw_attrs[] = { + &uid_checking_attr.attr, + NULL, +}; + +static struct attribute_group clp_fw_attr_group = { + .name = "clp", + .attrs = clp_fw_attrs, +}; + +int __init __zpci_fw_sysfs_init(void) +{ + return sysfs_create_group(firmware_kobj, &clp_fw_attr_group); +} diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index b6f36c938f1d..48b2e97114f9 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -81,10 +81,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 9c2644443c4d..85db9ce42d1a 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -60,8 +60,7 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 137573610ec4..e8b3b720578b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -88,8 +88,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index e76694aace25..fcca7cc5a75a 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -109,10 +109,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index f427a95bcd21..98f4611ba553 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -87,8 +87,7 @@ CONFIG_SND_SOC=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index da176f100e00..e5d102cbff89 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -59,8 +59,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 924bb3233b0b..22177aa8f961 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -93,8 +93,7 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 0307bb2be79f..ff992301622b 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -49,8 +49,7 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 93b9aa32dc7c..a29fb912a242 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -64,9 +64,8 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index f28b8c4181c2..58b792dacfec 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -74,8 +74,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 3a4239f20ff1..7edf18451158 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -69,8 +69,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index e4ef259425c4..28a81efefb02 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -59,8 +59,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index e0d1560b2bfd..f8bfa46643ff 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -43,8 +43,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 9870d16d9711..311817161afb 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -102,9 +102,8 @@ CONFIG_LEDS_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 07894f13441e..2433aa5f44a8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -161,8 +161,7 @@ CONFIG_STAGING=y # CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 75db12fb9ad1..b0baa5771c26 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -84,8 +84,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 8770a72e6a63..1078c286a610 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -83,8 +83,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index b15c6406a0e8..edb9e0d2dce5 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -107,8 +107,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 5327a2f70980..33daa0a17a32 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -44,8 +44,7 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 9501e69eb886..d572655f842d 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -110,10 +110,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 4d75c92cac10..3d194d81c92b 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -57,9 +57,8 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index cc6292b3235a..889daa5d2faa 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -95,7 +95,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_DMADEVICES=y CONFIG_TIMB_DMA=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index 48a0f9beb116..25e9d22779b3 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -64,7 +64,7 @@ CONFIG_MMC=y CONFIG_MMC_SDHI=y CONFIG_MMC_SH_MMCIF=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index b77b3313157e..e7b72ff377a8 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -61,8 +61,7 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 44f9b2317f09..17d2471d8e51 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -113,8 +113,7 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index aec74b0e7003..34c8fe755add 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -90,8 +90,7 @@ CONFIG_USB_TEST=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 9a0df5ea3866..52e7a42d66c7 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -84,8 +84,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 8ef72b8dbcd3..2c474645ec36 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -215,9 +215,8 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 103b81ec1ffb..b0c2ba478353 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -66,8 +66,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 00ef62133b04..e6d807f52253 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -114,8 +114,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 7a7c4dec2925..127940aafc39 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -187,10 +187,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sparc/include/asm/adi_64.h b/arch/sparc/include/asm/adi_64.h index 4301c6fd87f7..0c066fdab696 100644 --- a/arch/sparc/include/asm/adi_64.h +++ b/arch/sparc/include/asm/adi_64.h @@ -9,7 +9,7 @@ #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct adi_caps { __u64 blksz; @@ -41,6 +41,6 @@ static inline unsigned long adi_nbits(void) return adi_state.caps.nbits; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* !(__ASM_SPARC64_ADI_H) */ diff --git a/arch/sparc/include/asm/auxio.h b/arch/sparc/include/asm/auxio.h index a2681052e900..d0a933ed0d04 100644 --- a/arch/sparc/include/asm/auxio.h +++ b/arch/sparc/include/asm/auxio.h @@ -2,11 +2,11 @@ #ifndef ___ASM_SPARC_AUXIO_H #define ___ASM_SPARC_AUXIO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void __iomem *auxio_register; -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #if defined(__sparc__) && defined(__arch64__) #include <asm/auxio_64.h> diff --git a/arch/sparc/include/asm/auxio_32.h b/arch/sparc/include/asm/auxio_32.h index 852457c7a265..db58fa28de9e 100644 --- a/arch/sparc/include/asm/auxio_32.h +++ b/arch/sparc/include/asm/auxio_32.h @@ -29,7 +29,7 @@ #define AUXIO_FLPY_EJCT 0x02 /* Eject floppy disk. Write only. */ #define AUXIO_LED 0x01 /* On if set, off if unset. Read/Write */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * NOTE: these routines are implementation dependent-- @@ -75,7 +75,7 @@ do { \ } \ } while (0) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* AUXIO2 (Power Off Control) */ diff --git a/arch/sparc/include/asm/auxio_64.h b/arch/sparc/include/asm/auxio_64.h index ae1ed41987db..8a4ae07daf16 100644 --- a/arch/sparc/include/asm/auxio_64.h +++ b/arch/sparc/include/asm/auxio_64.h @@ -74,7 +74,7 @@ #define AUXIO_PCIO_CPWR_OFF 0x02 /* Courtesy Power Off */ #define AUXIO_PCIO_SPWR_OFF 0x01 /* System Power Off */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define AUXIO_LTE_ON 1 #define AUXIO_LTE_OFF 0 @@ -94,6 +94,6 @@ void auxio_set_lte(int on); */ void auxio_set_led(int on); -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #endif /* !(_SPARC64_AUXIO_H) */ diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index 2b1261b77ecd..06092572c045 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -4,7 +4,7 @@ #include <asm/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm.h> @@ -78,6 +78,6 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *, #define flush_cache_vmap_early(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _SPARC64_CACHEFLUSH_H */ diff --git a/arch/sparc/include/asm/cpudata.h b/arch/sparc/include/asm/cpudata.h index d213165ee713..67022a153023 100644 --- a/arch/sparc/include/asm/cpudata.h +++ b/arch/sparc/include/asm/cpudata.h @@ -2,14 +2,14 @@ #ifndef ___ASM_SPARC_CPUDATA_H #define ___ASM_SPARC_CPUDATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/threads.h> #include <linux/percpu.h> extern const struct seq_operations cpuinfo_op; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #if defined(__sparc__) && defined(__arch64__) #include <asm/cpudata_64.h> diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 9c3fc03abe9a..056b3c0e7ef9 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -7,7 +7,7 @@ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct { /* Dcache line 1 */ @@ -35,7 +35,7 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #include <asm/trap_block.h> diff --git a/arch/sparc/include/asm/delay_64.h b/arch/sparc/include/asm/delay_64.h index 22213b1c119d..5de5b5f23188 100644 --- a/arch/sparc/include/asm/delay_64.h +++ b/arch/sparc/include/asm/delay_64.h @@ -7,12 +7,12 @@ #ifndef _SPARC64_DELAY_H #define _SPARC64_DELAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void __delay(unsigned long loops); void udelay(unsigned long usecs); #define mdelay(n) udelay((n) * 1000) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _SPARC64_DELAY_H */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 8fb09eec8c3e..694ed081cf8d 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -58,6 +58,7 @@ #define R_SPARC_7 43 #define R_SPARC_5 44 #define R_SPARC_6 45 +#define R_SPARC_UA64 54 /* Bits present in AT_HWCAP, primarily for Sparc32. */ #define HWCAP_SPARC_FLUSH 0x00000001 diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 135f9a49b6ba..d1bb0f13352c 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -13,6 +13,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/dma-mapping.h> +#include <linux/string.h> #include <asm/auxio.h> @@ -615,7 +616,7 @@ static unsigned long __init sun_floppy_init(void) sun_pci_fd_ebus_dma.callback = sun_pci_fd_dma_callback; sun_pci_fd_ebus_dma.client_cookie = NULL; sun_pci_fd_ebus_dma.irq = FLOPPY_IRQ; - strcpy(sun_pci_fd_ebus_dma.name, "floppy"); + strscpy(sun_pci_fd_ebus_dma.name, "floppy"); if (ebus_dma_register(&sun_pci_fd_ebus_dma)) return 0; diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index e284394cb3aa..f7c9036199c5 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -6,7 +6,7 @@ #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void _mcount(void); #endif diff --git a/arch/sparc/include/asm/hvtramp.h b/arch/sparc/include/asm/hvtramp.h index ce2453ea4f2b..8cf7a54fa528 100644 --- a/arch/sparc/include/asm/hvtramp.h +++ b/arch/sparc/include/asm/hvtramp.h @@ -2,7 +2,7 @@ #ifndef _SPARC64_HVTRAP_H #define _SPARC64_HVTRAP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index f220edcf17c7..94ac56d43746 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -102,7 +102,7 @@ */ #define HV_FAST_MACH_EXIT 0x00 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mach_exit(unsigned long exit_code); #endif @@ -131,7 +131,7 @@ void sun4v_mach_exit(unsigned long exit_code); */ #define HV_FAST_MACH_DESC 0x01 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_desc(unsigned long buffer_pa, unsigned long buf_len, unsigned long *real_buf_len); @@ -152,7 +152,7 @@ unsigned long sun4v_mach_desc(unsigned long buffer_pa, */ #define HV_FAST_MACH_SIR 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mach_sir(void); #endif @@ -208,7 +208,7 @@ void sun4v_mach_sir(void); */ #define HV_FAST_MACH_SET_WATCHDOG 0x05 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_set_watchdog(unsigned long timeout, unsigned long *orig_timeout); #endif @@ -254,7 +254,7 @@ unsigned long sun4v_mach_set_watchdog(unsigned long timeout, */ #define HV_FAST_CPU_START 0x10 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_start(unsigned long cpuid, unsigned long pc, unsigned long rtba, @@ -282,7 +282,7 @@ unsigned long sun4v_cpu_start(unsigned long cpuid, */ #define HV_FAST_CPU_STOP 0x11 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_stop(unsigned long cpuid); #endif @@ -299,7 +299,7 @@ unsigned long sun4v_cpu_stop(unsigned long cpuid); */ #define HV_FAST_CPU_YIELD 0x12 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_yield(void); #endif @@ -317,7 +317,7 @@ unsigned long sun4v_cpu_yield(void); */ #define HV_FAST_CPU_POKE 0x13 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_poke(unsigned long cpuid); #endif @@ -363,7 +363,7 @@ unsigned long sun4v_cpu_poke(unsigned long cpuid); #define HV_CPU_QUEUE_RES_ERROR 0x3e #define HV_CPU_QUEUE_NONRES_ERROR 0x3f -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_qconf(unsigned long type, unsigned long queue_paddr, unsigned long num_queue_entries); @@ -416,7 +416,7 @@ unsigned long sun4v_cpu_qconf(unsigned long type, */ #define HV_FAST_CPU_MONDO_SEND 0x42 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa); @@ -449,7 +449,7 @@ unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, #define HV_CPU_STATE_RUNNING 0x02 #define HV_CPU_STATE_ERROR 0x03 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ long sun4v_cpu_state(unsigned long cpuid); #endif @@ -485,7 +485,7 @@ long sun4v_cpu_state(unsigned long cpuid); * * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_tsb_descr { unsigned short pgsz_idx; unsigned short assoc; @@ -536,7 +536,7 @@ struct hv_tsb_descr { * The fault status block is a multiple of 64-bytes and must be aligned * on a 64-byte boundary. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_fault_status { unsigned long i_fault_type; unsigned long i_fault_addr; @@ -651,7 +651,7 @@ struct hv_fault_status { */ #define HV_FAST_MMU_TSB_CTX0 0x20 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions, unsigned long tsb_desc_ra); #endif @@ -736,7 +736,7 @@ unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions, */ #define HV_FAST_MMU_DEMAP_ALL 0x24 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void sun4v_mmu_demap_all(void); #endif @@ -766,7 +766,7 @@ void sun4v_mmu_demap_all(void); */ #define HV_FAST_MMU_MAP_PERM_ADDR 0x25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr, unsigned long set_to_zero, unsigned long tte, @@ -990,7 +990,7 @@ unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr, */ #define HV_CCB_SUBMIT 0x34 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_submit(unsigned long ccb_buf, unsigned long len, unsigned long flags, @@ -1035,7 +1035,7 @@ unsigned long sun4v_ccb_submit(unsigned long ccb_buf, */ #define HV_CCB_INFO 0x35 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_info(unsigned long ca, void *info_arr); #endif @@ -1069,7 +1069,7 @@ unsigned long sun4v_ccb_info(unsigned long ca, */ #define HV_CCB_KILL 0x36 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ccb_kill(unsigned long ca, void *kill_status); #endif @@ -1104,7 +1104,7 @@ unsigned long sun4v_ccb_kill(unsigned long ca, */ #define HV_FAST_TOD_GET 0x50 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_tod_get(unsigned long *time); #endif @@ -1121,7 +1121,7 @@ unsigned long sun4v_tod_get(unsigned long *time); */ #define HV_FAST_TOD_SET 0x51 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_tod_set(unsigned long time); #endif @@ -1197,7 +1197,7 @@ unsigned long sun4v_tod_set(unsigned long time); */ #define HV_FAST_CONS_WRITE 0x63 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ long sun4v_con_getchar(long *status); long sun4v_con_putchar(long c); long sun4v_con_read(unsigned long buffer, @@ -1239,7 +1239,7 @@ unsigned long sun4v_con_write(unsigned long buffer, #define HV_SOFT_STATE_NORMAL 0x01 #define HV_SOFT_STATE_TRANSITION 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mach_set_soft_state(unsigned long soft_state, unsigned long msg_string_ra); #endif @@ -1318,7 +1318,7 @@ unsigned long sun4v_mach_set_soft_state(unsigned long soft_state, */ #define HV_FAST_SVC_CLRSTATUS 0x84 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_svc_send(unsigned long svc_id, unsigned long buffer, unsigned long buffer_size, @@ -1348,7 +1348,7 @@ unsigned long sun4v_svc_clrstatus(unsigned long svc_id, * start (offset 0) of the trap trace buffer, and is described as * follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_trap_trace_control { unsigned long head_offset; unsigned long tail_offset; @@ -1367,7 +1367,7 @@ struct hv_trap_trace_control { * * Each trap trace buffer entry is laid out as follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_trap_trace_entry { unsigned char type; /* Hypervisor or guest entry? */ unsigned char hpstate; /* Hyper-privileged state */ @@ -1617,7 +1617,7 @@ struct hv_trap_trace_entry { */ #define HV_FAST_INTR_DEVINO2SYSINO 0xa0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_devino_to_sysino(unsigned long devhandle, unsigned long devino); #endif @@ -1635,7 +1635,7 @@ unsigned long sun4v_devino_to_sysino(unsigned long devhandle, */ #define HV_FAST_INTR_GETENABLED 0xa1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_getenabled(unsigned long sysino); #endif @@ -1651,7 +1651,7 @@ unsigned long sun4v_intr_getenabled(unsigned long sysino); */ #define HV_FAST_INTR_SETENABLED 0xa2 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_setenabled(unsigned long sysino, unsigned long intr_enabled); #endif @@ -1668,7 +1668,7 @@ unsigned long sun4v_intr_setenabled(unsigned long sysino, */ #define HV_FAST_INTR_GETSTATE 0xa3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_getstate(unsigned long sysino); #endif @@ -1688,7 +1688,7 @@ unsigned long sun4v_intr_getstate(unsigned long sysino); */ #define HV_FAST_INTR_SETSTATE 0xa4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state); #endif @@ -1706,7 +1706,7 @@ unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state */ #define HV_FAST_INTR_GETTARGET 0xa5 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_gettarget(unsigned long sysino); #endif @@ -1723,7 +1723,7 @@ unsigned long sun4v_intr_gettarget(unsigned long sysino); */ #define HV_FAST_INTR_SETTARGET 0xa6 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); #endif @@ -1807,7 +1807,7 @@ unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid); */ #define HV_FAST_VINTR_SET_TARGET 0xae -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_vintr_get_cookie(unsigned long dev_handle, unsigned long dev_ino, unsigned long *cookie); @@ -3047,7 +3047,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, #define LDC_MTE_SZ64K 0x0000000000000001 /* 64K page */ #define LDC_MTE_SZ8K 0x0000000000000000 /* 8K page */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct ldc_mtable_entry { unsigned long mte; unsigned long cookie; @@ -3130,7 +3130,7 @@ struct ldc_mtable_entry { */ #define HV_FAST_LDC_REVOKE 0xef -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ldc_tx_qconf(unsigned long channel, unsigned long ra, unsigned long num_entries); @@ -3230,7 +3230,7 @@ unsigned long sun4v_ldc_revoke(unsigned long channel, #define HV_FAST_N2_GET_PERFREG 0x104 #define HV_FAST_N2_SET_PERFREG 0x105 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_niagara_getperf(unsigned long reg, unsigned long *val); unsigned long sun4v_niagara_setperf(unsigned long reg, @@ -3247,7 +3247,7 @@ unsigned long sun4v_niagara2_setperf(unsigned long reg, * a buffer where these statistics can be collected. It is continually * updated once configured. The layout is as follows: */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_mmu_statistics { unsigned long immu_tsb_hits_ctx0_8k_tte; unsigned long immu_tsb_ticks_ctx0_8k_tte; @@ -3332,7 +3332,7 @@ struct hv_mmu_statistics { */ #define HV_FAST_MMUSTAT_INFO 0x103 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_mmustat_conf(unsigned long ra, unsigned long *orig_ra); unsigned long sun4v_mmustat_info(unsigned long *ra); #endif @@ -3343,7 +3343,7 @@ unsigned long sun4v_mmustat_info(unsigned long *ra); #define HV_NCS_QCONF 0x01 #define HV_NCS_QTAIL_UPDATE 0x02 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct hv_ncs_queue_entry { /* MAU Control Register */ unsigned long mau_control; @@ -3422,7 +3422,7 @@ struct hv_ncs_qtail_update_arg { */ #define HV_FAST_NCS_REQUEST 0x110 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_ncs_request(unsigned long request, unsigned long arg_ra, unsigned long arg_size); @@ -3433,7 +3433,7 @@ unsigned long sun4v_ncs_request(unsigned long request, #define HV_FAST_REBOOT_DATA_SET 0x172 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_reboot_data_set(unsigned long ra, unsigned long len); #endif @@ -3441,7 +3441,7 @@ unsigned long sun4v_reboot_data_set(unsigned long ra, #define HV_FAST_VT_GET_PERFREG 0x184 #define HV_FAST_VT_SET_PERFREG 0x185 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_vt_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, @@ -3451,7 +3451,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_FAST_T5_GET_PERFREG 0x1a8 #define HV_FAST_T5_SET_PERFREG 0x1a9 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, @@ -3462,7 +3462,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, #define HV_FAST_M7_GET_PERFREG 0x43 #define HV_FAST_M7_SET_PERFREG 0x44 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, unsigned long *reg_val); unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, @@ -3506,7 +3506,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, #define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ unsigned long sun4v_get_version(unsigned long group, unsigned long *major, unsigned long *minor); diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index c9528e4719cd..d8ed296624af 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -250,19 +250,19 @@ void insl(unsigned long, void *, unsigned long); #define insw insw #define insl insl -static inline void readsb(void __iomem *port, void *buf, unsigned long count) +static inline void readsb(const volatile void __iomem *port, void *buf, unsigned long count) { insb((unsigned long __force)port, buf, count); } #define readsb readsb -static inline void readsw(void __iomem *port, void *buf, unsigned long count) +static inline void readsw(const volatile void __iomem *port, void *buf, unsigned long count) { insw((unsigned long __force)port, buf, count); } #define readsw readsw -static inline void readsl(void __iomem *port, void *buf, unsigned long count) +static inline void readsl(const volatile void __iomem *port, void *buf, unsigned long count) { insl((unsigned long __force)port, buf, count); } diff --git a/arch/sparc/include/asm/irqflags_32.h b/arch/sparc/include/asm/irqflags_32.h index 7ca3eaf3dace..f5f20774faac 100644 --- a/arch/sparc/include/asm/irqflags_32.h +++ b/arch/sparc/include/asm/irqflags_32.h @@ -11,7 +11,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/psr.h> @@ -43,6 +43,6 @@ static inline notrace bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* (__ASSEMBLY__) */ +#endif /* (__ASSEMBLER__) */ #endif /* !(_ASM_IRQFLAGS_H) */ diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h index c29ed571ae49..0071566c2c22 100644 --- a/arch/sparc/include/asm/irqflags_64.h +++ b/arch/sparc/include/asm/irqflags_64.h @@ -13,7 +13,7 @@ #include <asm/pil.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline notrace unsigned long arch_local_save_flags(void) { @@ -93,6 +93,6 @@ static inline notrace unsigned long arch_local_irq_save(void) return flags; } -#endif /* (__ASSEMBLY__) */ +#endif /* (__ASSEMBLER__) */ #endif /* !(_ASM_IRQFLAGS_H) */ diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 2718cbea826a..f49d1e6104e1 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_SPARC_JUMP_LABEL_H #define _ASM_SPARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -48,5 +48,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/sparc/include/asm/kdebug_32.h b/arch/sparc/include/asm/kdebug_32.h index 763d423823bd..7627701a032c 100644 --- a/arch/sparc/include/asm/kdebug_32.h +++ b/arch/sparc/include/asm/kdebug_32.h @@ -19,7 +19,7 @@ #define DEBUG_BP_TRAP 126 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* The debug vector is passed in %o1 at boot time. It is a pointer to * a structure in the debuggers address space. Here is its format. */ @@ -64,7 +64,7 @@ enum die_val { DIE_OOPS, }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* Some nice offset defines for assembler code. */ #define KDEBUG_ENTRY_OFF 0x0 diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c1e05e4ab9e3..053a24b67aed 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -59,7 +59,7 @@ #define ASI_LEON3_SYSCTRL_CFG_SNOOPING (1 << 27) #define ASI_LEON3_SYSCTRL_CFG_SSIZE(c) (1 << ((c >> 20) & 0xf)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* do a physical address bypass write, i.e. for 0x80000000 */ static inline void leon_store_reg(unsigned long paddr, unsigned long value) @@ -132,7 +132,7 @@ static inline int sparc_leon3_cpuid(void) return sparc_leon3_asr17() >> 28; } -#endif /*!__ASSEMBLY__*/ +#endif /*!__ASSEMBLER__*/ #ifdef CONFIG_SMP # define LEON3_IRQ_IPI_DEFAULT 13 @@ -194,7 +194,7 @@ static inline int sparc_leon3_cpuid(void) #define LEON2_CCR_DSETS_MASK 0x03000000UL #define LEON2_CFG_SSIZE_MASK 0x00007000UL -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct vm_area_struct; unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr); @@ -247,7 +247,7 @@ extern int leon_ipi_irq; #endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* macros used in leon_mm.c */ #define PFN(x) ((x) >> PAGE_SHIFT) diff --git a/arch/sparc/include/asm/leon_amba.h b/arch/sparc/include/asm/leon_amba.h index 6433a93f5126..2ff5714d7a63 100644 --- a/arch/sparc/include/asm/leon_amba.h +++ b/arch/sparc/include/asm/leon_amba.h @@ -8,7 +8,7 @@ #ifndef LEON_AMBA_H_INCLUDE #define LEON_AMBA_H_INCLUDE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct amba_prom_registers { unsigned int phys_addr; /* The physical address of this register */ @@ -89,7 +89,7 @@ struct amba_prom_registers { #define LEON3_GPTIMER_CONFIG_NRTIMERS(c) ((c)->config & 0x7) #define LEON3_GPTIMER_CTRL_ISPENDING(r) (((r)&LEON3_GPTIMER_CTRL_PENDING) ? 1 : 0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct leon3_irqctrl_regs_map { u32 ilevel; @@ -189,7 +189,7 @@ extern int leon_debug_irqout; extern unsigned long leon3_gptimer_irq; extern unsigned int sparc_leon_eirq; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define LEON3_IO_AREA 0xfff00000 #define LEON3_CONF_AREA 0xff000 diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index 3e4bac33be81..a8bae8ad243a 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -4,7 +4,7 @@ #include <uapi/asm/mman.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define arch_mmap_check(addr,len,flags) sparc_mmap_check(addr,len) int sparc_mmap_check(unsigned long addr, unsigned long len); @@ -87,5 +87,5 @@ static inline bool arch_validate_flags(vm_flags_t vm_flags) } #endif /* CONFIG_SPARC64 */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __SPARC_MMAN_H__ */ diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 7e2704c770e9..4eeb938f3e61 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -59,7 +59,7 @@ #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) #define CTX_NRBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_NR_MASK) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define TSB_ENTRY_ALIGNMENT 16 @@ -117,7 +117,7 @@ typedef struct { spinlock_t tag_lock; } mm_context_t; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define TSB_CONFIG_TSB 0x00 #define TSB_CONFIG_RSS_LIMIT 0x08 diff --git a/arch/sparc/include/asm/mmu_context_32.h b/arch/sparc/include/asm/mmu_context_32.h index 509043f81560..d9ff73f776f9 100644 --- a/arch/sparc/include/asm/mmu_context_32.h +++ b/arch/sparc/include/asm/mmu_context_32.h @@ -2,7 +2,7 @@ #ifndef __SPARC_MMU_CONTEXT_H #define __SPARC_MMU_CONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm-generic/mm_hooks.h> @@ -29,6 +29,6 @@ void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, #include <asm-generic/mmu_context.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 08160bf9a0f4..78bbacc14d2d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -4,7 +4,7 @@ /* Derived heavily from Linus's Alpha/AXP ASN code... */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/spinlock.h> #include <linux/mm_types.h> @@ -193,6 +193,6 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) #include <asm-generic/mmu_context.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/mxcc.h b/arch/sparc/include/asm/mxcc.h index 3a2561bea4dd..bd6339dcf693 100644 --- a/arch/sparc/include/asm/mxcc.h +++ b/arch/sparc/include/asm/mxcc.h @@ -84,7 +84,7 @@ * MID: The moduleID of the cpu your read this from. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void mxcc_set_stream_src(unsigned long *paddr) { @@ -133,6 +133,6 @@ static inline void mxcc_set_creg(unsigned long mxcc_control) "i" (ASI_M_MXCC)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_MXCC_H) */ diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h index 1b151f738b00..f1ad7f7bcac2 100644 --- a/arch/sparc/include/asm/obio.h +++ b/arch/sparc/include/asm/obio.h @@ -97,7 +97,7 @@ #define CC_EREG 0x1F00E00 /* Error code register */ #define CC_CID 0x1F00F04 /* Component ID */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline int bw_get_intr_mask(int sbus_level) { @@ -221,6 +221,6 @@ static inline void cc_set_igen(unsigned int gen) "i" (ASI_M_MXCC)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_OBIO_H) */ diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h index 69545b3e5454..ce68000dffac 100644 --- a/arch/sparc/include/asm/openprom.h +++ b/arch/sparc/include/asm/openprom.h @@ -11,7 +11,7 @@ /* Empirical constants... */ #define LINUX_OPPROM_MAGIC 0x10010407 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/of.h> /* V0 prom device operations. */ @@ -275,6 +275,6 @@ struct linux_prom_pci_intmask { unsigned int interrupt; }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_OPENPROM_H) */ diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index 9954254ea569..c1bccbedf567 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -13,7 +13,7 @@ #include <vdso/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) #define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) @@ -108,14 +108,14 @@ typedef pte_t *pgtable_t; #define TASK_UNMAPPED_BASE 0x50000000 -#else /* !(__ASSEMBLY__) */ +#else /* !(__ASSEMBLER__) */ #define __pgprot(x) (x) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define PAGE_OFFSET 0xf0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long phys_base; extern unsigned long pfn_base; #endif diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 2a68ff5b6eab..d764d8a8586b 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -30,7 +30,7 @@ #define HUGE_MAX_HSTATE 5 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) struct pt_regs; @@ -128,7 +128,7 @@ extern unsigned long sparc64_va_hole_bottom; extern unsigned long PAGE_OFFSET; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* The maximum number of physical memory address bits we support. The * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" @@ -139,7 +139,7 @@ extern unsigned long PAGE_OFFSET; #define ILOG2_4MB 22 #define ILOG2_256MB 28 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) @@ -153,7 +153,7 @@ extern unsigned long PAGE_OFFSET; #define virt_to_phys __pa #define phys_to_virt __va -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #include <asm-generic/getorder.h> diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h index 3068809ef9ad..78f14d6620bf 100644 --- a/arch/sparc/include/asm/parport_64.h +++ b/arch/sparc/include/asm/parport_64.h @@ -9,6 +9,7 @@ #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/string.h> #include <asm/ebus_dma.h> #include <asm/ns87303.h> @@ -149,7 +150,7 @@ static int ecpp_probe(struct platform_device *op) sparc_ebus_dmas[slot].info.callback = NULL; sparc_ebus_dmas[slot].info.client_cookie = NULL; sparc_ebus_dmas[slot].info.irq = 0xdeadbeef; - strcpy(sparc_ebus_dmas[slot].info.name, "parport"); + strscpy(sparc_ebus_dmas[slot].info.name, "parport"); if (ebus_dma_register(&sparc_ebus_dmas[slot].info)) goto out_unmap_regs; diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h index 238376b1ffcc..fb5ed6a59535 100644 --- a/arch/sparc/include/asm/pcic.h +++ b/arch/sparc/include/asm/pcic.h @@ -8,7 +8,7 @@ #ifndef __SPARC_PCIC_H #define __SPARC_PCIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/smp.h> diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 7c199c003ffe..f1538a48484a 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -21,7 +21,7 @@ #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_ALIGN(__addr) (((__addr) + ~PGDIR_MASK) & PGDIR_MASK) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm-generic/pgtable-nopud.h> #include <linux/spinlock.h> @@ -423,7 +423,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, __changed; \ }) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define VMALLOC_START _AC(0xfe600000,UL) #define VMALLOC_END _AC(0xffc00000,UL) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 669cd02469a1..64b85ff9c766 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -79,7 +79,7 @@ #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long VMALLOC_END; @@ -106,7 +106,7 @@ bool kern_addr_valid(unsigned long addr); pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* PTE bits which are the same in SUN4U and SUN4V format. */ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ @@ -191,7 +191,7 @@ bool kern_addr_valid(unsigned long addr); /* We borrow bit 20 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _AC(0x0000000000100000, UL) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long); @@ -1177,6 +1177,6 @@ extern unsigned long pte_leaf_size(pte_t pte); #endif /* CONFIG_HUGETLB_PAGE */ -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC64_PGTABLE_H) */ diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index 18e68d43f036..a265822a475e 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -10,7 +10,7 @@ #include <asm/page.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/thread_info.h> /* TI_UWINMASK for WINDOW_FLUSH */ #endif @@ -97,7 +97,7 @@ bne 99b; \ restore %g0, %g0, %g0; -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long last_valid_pfn; /* This makes sense. Honest it does - Anton */ @@ -136,6 +136,6 @@ srmmu_get_pte (unsigned long addr) return entry; } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_PGTSRMMU_H) */ diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 0a0d5c3d184c..321859454ca4 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -21,7 +21,7 @@ * XXX No longer using virtual page tables, kill this upper limit... */ #define VA_BITS 44 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define VPTE_SIZE (1UL << (VA_BITS - PAGE_SHIFT + 3)) #else #define VPTE_SIZE (1 << (VA_BITS - PAGE_SHIFT + 3)) @@ -45,7 +45,7 @@ #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* The Sparc processor specific thread struct. */ /* XXX This should die, everything can go into thread_info now. */ @@ -62,7 +62,7 @@ struct thread_struct { #endif }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #ifndef CONFIG_DEBUG_SPINLOCK #define INIT_THREAD { \ @@ -75,7 +75,7 @@ struct thread_struct { } #endif /* !(CONFIG_DEBUG_SPINLOCK) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/fpumacro.h> @@ -242,6 +242,6 @@ static inline void prefetchw(const void *x) int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap); -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h index 65127ce565ab..5af50ccda023 100644 --- a/arch/sparc/include/asm/psr.h +++ b/arch/sparc/include/asm/psr.h @@ -14,7 +14,7 @@ #include <uapi/asm/psr.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Get the %psr register. */ static inline unsigned int get_psr(void) { @@ -63,6 +63,6 @@ static inline unsigned int get_fsr(void) return fsr; } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__LINUX_SPARC_PSR_H) */ diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index d1419e669027..8adf3fd2f00f 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -5,7 +5,7 @@ #include <uapi/asm/ptrace.h> #if defined(__sparc__) && defined(__arch64__) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/threads.h> @@ -113,10 +113,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->u_regs[UREG_I6]; } -#else /* __ASSEMBLY__ */ -#endif /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ +#endif /* __ASSEMBLER__ */ #else /* (defined(__sparc__) && defined(__arch64__)) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/switch_to.h> static inline bool pt_regs_is_syscall(struct pt_regs *regs) @@ -144,8 +144,8 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->pc) #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) unsigned long profile_pc(struct pt_regs *); -#else /* (!__ASSEMBLY__) */ -#endif /* (!__ASSEMBLY__) */ +#else /* (!__ASSEMBLER__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__)) */ #define STACK_BIAS 2047 diff --git a/arch/sparc/include/asm/ross.h b/arch/sparc/include/asm/ross.h index 79a54d66a2c0..53a42b37495d 100644 --- a/arch/sparc/include/asm/ross.h +++ b/arch/sparc/include/asm/ross.h @@ -95,7 +95,7 @@ #define HYPERSPARC_ICCR_FTD 0x00000002 #define HYPERSPARC_ICCR_ICE 0x00000001 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline unsigned int get_ross_icr(void) { @@ -187,6 +187,6 @@ static inline void hyper_flush_cache_page(unsigned long page) } } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_ROSS_H) */ diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h index 4d6026c1e446..861f85b5bf9b 100644 --- a/arch/sparc/include/asm/sbi.h +++ b/arch/sparc/include/asm/sbi.h @@ -64,7 +64,7 @@ struct sbi_regs { */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline int acquire_sbi(int devid, int mask) { @@ -111,6 +111,6 @@ static inline void set_sbi_ctl(int devid, int cfgno, int cfg) "i" (ASI_M_CTL)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_SBI_H) */ diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h index ee05f9d2bcf2..200f95144fd2 100644 --- a/arch/sparc/include/asm/sigcontext.h +++ b/arch/sparc/include/asm/sigcontext.h @@ -5,7 +5,7 @@ #include <asm/ptrace.h> #include <uapi/asm/sigcontext.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __SUNOS_MAXWIN 31 @@ -104,6 +104,6 @@ typedef struct { #endif /* (CONFIG_SPARC64) */ -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_SIGCONTEXT_H) */ diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h index 28f81081e37d..d93fe93544ec 100644 --- a/arch/sparc/include/asm/signal.h +++ b/arch/sparc/include/asm/signal.h @@ -2,16 +2,16 @@ #ifndef __SPARC_SIGNAL_H #define __SPARC_SIGNAL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/personality.h> #include <linux/types.h> #endif #include <uapi/asm/signal.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_KA_RESTORER #define __ARCH_HAS_SA_RESTORER -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC_SIGNAL_H) */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 2cf7971d7f6c..9c6ed98fbaf1 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -10,15 +10,15 @@ #include <linux/threads.h> #include <asm/head.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/asi.h> @@ -105,7 +105,7 @@ int hard_smp_processor_id(void); void smp_setup_cpu_possible_map(void); -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* Sparc specific messages. */ #define MSG_CROSS_CALL 0x0005 /* run func on cpus */ diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index 0964fede0b2c..759fb4a9530e 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -12,16 +12,16 @@ #include <asm/starfire.h> #include <asm/spitfire.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> #include <linux/cache.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Private routines/data @@ -68,7 +68,7 @@ int __cpu_disable(void); void __cpu_die(unsigned int cpu); #endif -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #else diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bc5aa6f61676..6d6d261bf8d2 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -7,7 +7,7 @@ #ifndef __SPARC_SPINLOCK_H #define __SPARC_SPINLOCK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/psr.h> #include <asm/barrier.h> @@ -183,6 +183,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw) res; \ }) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* __SPARC_SPINLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 3a9a0b0c7465..13cd15d346be 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -7,13 +7,13 @@ #ifndef __SPARC64_SPINLOCK_H #define __SPARC64_SPINLOCK_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> #include <asm/barrier.h> #include <asm/qspinlock.h> #include <asm/qrwlock.h> -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(__SPARC64_SPINLOCK_H) */ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index e9b7d25b29fa..79b9dd5e9ac6 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -68,7 +68,7 @@ #define CPU_ID_M8 ('8') #define CPU_ID_SONOMA1 ('N') -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum ultra_tlb_layout { spitfire = 0, @@ -363,6 +363,6 @@ static inline void cheetah_put_itlb_data(int entry, unsigned long data) "i" (ASI_ITLB_DATA_ACCESS)); } -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* CONFIG_SPARC64 */ #endif /* !(_SPARC64_SPITFIRE_H) */ diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h index fb1a8c499cb0..8e511ed78775 100644 --- a/arch/sparc/include/asm/starfire.h +++ b/arch/sparc/include/asm/starfire.h @@ -8,7 +8,7 @@ #ifndef _SPARC64_STARFIRE_H #define _SPARC64_STARFIRE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern int this_is_starfire; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 45b4955b253f..fdaf7b171e0a 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/page.h> @@ -72,7 +72,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); */ #define THREAD_SIZE_ORDER 1 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Size of kernel stack for each process */ #define THREAD_SIZE (2 * PAGE_SIZE) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 1a44372e2bc0..c8a73dff27f8 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -26,7 +26,7 @@ #include <asm/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/types.h> @@ -64,7 +64,7 @@ struct thread_info { __attribute__ ((aligned(64))); }; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* offsets into the thread_info struct for assembly code access */ #define TI_TASK 0x00000000 @@ -110,7 +110,7 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ @@ -150,7 +150,7 @@ extern struct thread_info *current_thread_info(void); #define set_thread_fpdepth(val) (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH] = (val)) #define get_thread_wsaved() (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED]) #define set_thread_wsaved(val) (__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED] = (val)) -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ /* * Thread information flags, only 16 bits are available as we encode @@ -228,14 +228,14 @@ extern struct thread_info *current_thread_info(void); * Note that there are only 8 bits available. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) #define test_thread_64bit_stack(__SP) \ ((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \ false : true) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ace0d48e837e..6cf2a60a0156 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -7,7 +7,7 @@ #include <asm/hypervisor.h> #include <asm/asi.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Trap handling code needs to get at a few critical values upon * trap entry and to process TSB misses. These cannot be in the @@ -91,7 +91,7 @@ extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, __sun_m7_2insn_patch_end; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #define TRAP_PER_CPU_THREAD 0x00 #define TRAP_PER_CPU_PGD_PADDR 0x08 diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h index 2fba2602ba69..e4e10b0e7887 100644 --- a/arch/sparc/include/asm/traps.h +++ b/arch/sparc/include/asm/traps.h @@ -9,7 +9,7 @@ #include <uapi/asm/traps.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* This is for V8 compliant Sparc CPUS */ struct tt_entry { unsigned long inst_one; @@ -21,5 +21,5 @@ struct tt_entry { /* We set this to _start in system setup. */ extern struct tt_entry *sparc_ttable; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* !(_SPARC_TRAPS_H) */ diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 522a677e050d..239be259e166 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -59,7 +59,7 @@ * The kernel TSB is locked into the TLB by virtue of being in the * kernel image, so we don't play these games for swapper_tsb access. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct tsb_ldquad_phys_patch_entry { unsigned int addr; unsigned int sun4u_insn; diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h index 8f6469408019..b32d3068cce1 100644 --- a/arch/sparc/include/asm/ttable.h +++ b/arch/sparc/include/asm/ttable.h @@ -5,7 +5,7 @@ #include <asm/utrap.h> #include <asm/pil.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/thread_info.h> #endif diff --git a/arch/sparc/include/asm/turbosparc.h b/arch/sparc/include/asm/turbosparc.h index 23df777f9cea..5f73263b6ded 100644 --- a/arch/sparc/include/asm/turbosparc.h +++ b/arch/sparc/include/asm/turbosparc.h @@ -57,7 +57,7 @@ #define TURBOSPARC_WTENABLE 0x00000020 /* Write thru for dcache */ #define TURBOSPARC_SNENABLE 0x40000000 /* DVMA snoop enable */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Bits [13:5] select one of 512 instruction cache tags */ static inline void turbosparc_inv_insn_tag(unsigned long addr) @@ -121,6 +121,6 @@ static inline unsigned long turbosparc_get_ccreg(void) return regval; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_TURBOSPARC_H) */ diff --git a/arch/sparc/include/asm/upa.h b/arch/sparc/include/asm/upa.h index 782691b30f54..b1df3a7f40ed 100644 --- a/arch/sparc/include/asm/upa.h +++ b/arch/sparc/include/asm/upa.h @@ -24,7 +24,7 @@ #define UPA_PORTID_ID 0x000000000000ffff /* Module Identification bits */ /* UPA I/O space accessors */ -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if defined(__KERNEL__) && !defined(__ASSEMBLER__) static inline unsigned char _upa_readb(unsigned long addr) { unsigned char ret; @@ -105,6 +105,6 @@ static inline void _upa_writeq(unsigned long q, unsigned long addr) #define upa_writew(__w, __addr) (_upa_writew((__w), (unsigned long)(__addr))) #define upa_writel(__l, __addr) (_upa_writel((__l), (unsigned long)(__addr))) #define upa_writeq(__q, __addr) (_upa_writeq((__q), (unsigned long)(__addr))) -#endif /* __KERNEL__ && !__ASSEMBLY__ */ +#endif /* __KERNEL__ && !__ASSEMBLER__ */ #endif /* !(_SPARC64_UPA_H) */ diff --git a/arch/sparc/include/asm/vaddrs.h b/arch/sparc/include/asm/vaddrs.h index 4fec0341e2a8..da567600c897 100644 --- a/arch/sparc/include/asm/vaddrs.h +++ b/arch/sparc/include/asm/vaddrs.h @@ -31,7 +31,7 @@ */ #define SRMMU_NOCACHE_ALCRATIO 64 /* 256 pages per 64MB of system RAM */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/kmap_size.h> enum fixed_addresses { diff --git a/arch/sparc/include/asm/viking.h b/arch/sparc/include/asm/viking.h index 08ffc605035f..bbb714de43c4 100644 --- a/arch/sparc/include/asm/viking.h +++ b/arch/sparc/include/asm/viking.h @@ -110,7 +110,7 @@ #define VIKING_PTAG_DIRTY 0x00010000 /* Block has been modified */ #define VIKING_PTAG_SHARED 0x00000100 /* Shared with some other cache */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void viking_flush_icache(void) { @@ -250,6 +250,6 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr) return val; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !(_SPARC_VIKING_H) */ diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 7903e84e09e0..71eb4e9afb3e 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -45,7 +45,7 @@ #define VISExitHalfFast \ wr %o5, 0, %fprs; -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void save_and_clear_fpu(void) { __asm__ __volatile__ ( " rd %%fprs, %%o5\n" diff --git a/arch/sparc/include/uapi/asm/ptrace.h b/arch/sparc/include/uapi/asm/ptrace.h index abe640037a55..2eb677f4eb6a 100644 --- a/arch/sparc/include/uapi/asm/ptrace.h +++ b/arch/sparc/include/uapi/asm/ptrace.h @@ -15,7 +15,7 @@ */ #define PT_REGS_MAGIC 0x57ac6c00 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -88,7 +88,7 @@ struct sparc_trapf { unsigned long _unused; struct pt_regs *regs; }; -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #else /* 32 bit sparc */ @@ -97,7 +97,7 @@ struct sparc_trapf { /* This struct defines the way the registers are stored on the * stack during a system call and basically all traps. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -125,11 +125,11 @@ struct sparc_stackf { unsigned long xargs[6]; unsigned long xxargs[1]; }; -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__))*/ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define TRACEREG_SZ sizeof(struct pt_regs) #define STACKFRAME_SZ sizeof(struct sparc_stackf) @@ -137,7 +137,7 @@ struct sparc_stackf { #define TRACEREG32_SZ sizeof(struct pt_regs32) #define STACKFRAME32_SZ sizeof(struct sparc_stackf32) -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #define UREG_G0 0 #define UREG_G1 1 @@ -161,30 +161,30 @@ struct sparc_stackf { #if defined(__sparc__) && defined(__arch64__) /* 64 bit sparc */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ /* For assembly code. */ #define TRACEREG_SZ 0xa0 #define STACKFRAME_SZ 0xc0 #define TRACEREG32_SZ 0x50 #define STACKFRAME32_SZ 0x60 -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #else /* (defined(__sparc__) && defined(__arch64__)) */ /* 32 bit sparc */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ -#else /* (!__ASSEMBLY__) */ +#else /* (!__ASSEMBLER__) */ /* For assembly code. */ #define TRACEREG_SZ 0x50 #define STACKFRAME_SZ 0x60 -#endif /* (!__ASSEMBLY__) */ +#endif /* (!__ASSEMBLER__) */ #endif /* (defined(__sparc__) && defined(__arch64__)) */ diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h index b61382924725..9c64d7cb85c2 100644 --- a/arch/sparc/include/uapi/asm/signal.h +++ b/arch/sparc/include/uapi/asm/signal.h @@ -105,7 +105,7 @@ #define __old_sigaction32 sigaction32 #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long __old_sigset_t; /* at least 32 bits */ @@ -176,6 +176,6 @@ typedef struct sigaltstack { } stack_t; -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* _UAPI__SPARC_SIGNAL_H */ diff --git a/arch/sparc/include/uapi/asm/traps.h b/arch/sparc/include/uapi/asm/traps.h index 930db746f8bd..43fe5b8fe8be 100644 --- a/arch/sparc/include/uapi/asm/traps.h +++ b/arch/sparc/include/uapi/asm/traps.h @@ -10,8 +10,8 @@ #define NUM_SPARC_TRAPS 255 -#ifndef __ASSEMBLY__ -#endif /* !(__ASSEMBLY__) */ +#ifndef __ASSEMBLER__ +#endif /* !(__ASSEMBLER__) */ /* For patching the trap table at boot time, we need to know how to * form various common Sparc instructions. Thus these macros... diff --git a/arch/sparc/include/uapi/asm/utrap.h b/arch/sparc/include/uapi/asm/utrap.h index d890b7fc6e83..a489b08b6a33 100644 --- a/arch/sparc/include/uapi/asm/utrap.h +++ b/arch/sparc/include/uapi/asm/utrap.h @@ -44,9 +44,9 @@ #define UTH_NOCHANGE (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef int utrap_entry_t; typedef void *utrap_handler_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* !(__ASM_SPARC64_PROCESSOR_H) */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 36f2727e1445..22170d4f8e06 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -4,8 +4,6 @@ # Makefile for the linux kernel. # -asflags-y := -ansi - # Undefine sparc when processing vmlinux.lds - it is used # And teach CPP we are doing $(BITS) builds (for this case) CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) diff --git a/arch/sparc/kernel/adi_64.c b/arch/sparc/kernel/adi_64.c index e0e4fc527b24..18036a43cf56 100644 --- a/arch/sparc/kernel/adi_64.c +++ b/arch/sparc/kernel/adi_64.c @@ -202,7 +202,7 @@ static tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm, } else { size = sizeof(tag_storage_desc_t)*max_desc; - mm->context.tag_store = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + mm->context.tag_store = kzalloc(size, GFP_NOWAIT); if (mm->context.tag_store == NULL) { tag_desc = NULL; goto out; @@ -281,7 +281,7 @@ static tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm, size = (size + (PAGE_SIZE-adi_blksize()))/PAGE_SIZE; size = size * PAGE_SIZE; } - tags = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + tags = kzalloc(size, GFP_NOWAIT); if (tags == NULL) { tag_desc->tag_users = 0; tag_desc = NULL; diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index d44725d37e30..849db20e7165 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -28,7 +28,6 @@ * #define APC_DEBUG_LED */ -#define APC_MINOR MISC_DYNAMIC_MINOR #define APC_OBPNAME "power-management" #define APC_DEVNAME "apc" @@ -138,7 +137,7 @@ static const struct file_operations apc_fops = { .llseek = noop_llseek, }; -static struct miscdevice apc_miscdev = { APC_MINOR, APC_DEVNAME, &apc_fops }; +static struct miscdevice apc_miscdev = { MISC_DYNAMIC_MINOR, APC_DEVNAME, &apc_fops }; static int apc_probe(struct platform_device *op) { diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index ffdc15588ac2..f7fc6f2af2f2 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -781,14 +781,17 @@ void ldom_set_var(const char *var, const char *value) } pkt; char *base, *p; int msg_len, loops; + size_t var_len, value_len; - if (strlen(var) + strlen(value) + 2 > - sizeof(pkt) - sizeof(pkt.header)) { - printk(KERN_ERR PFX - "contents length: %zu, which more than max: %lu," - "so could not set (%s) variable to (%s).\n", - strlen(var) + strlen(value) + 2, - sizeof(pkt) - sizeof(pkt.header), var, value); + var_len = strlen(var) + 1; + value_len = strlen(value) + 1; + + if (var_len + value_len > sizeof(pkt) - sizeof(pkt.header)) { + pr_err(PFX + "contents length: %zu, which more than max: %lu," + "so could not set (%s) variable to (%s).\n", + var_len + value_len, + sizeof(pkt) - sizeof(pkt.header), var, value); return; } @@ -797,10 +800,10 @@ void ldom_set_var(const char *var, const char *value) pkt.header.data.handle = cp->handle; pkt.header.msg.hdr.type = DS_VAR_SET_REQ; base = p = &pkt.header.msg.name_and_value[0]; - strcpy(p, var); - p += strlen(var) + 1; - strcpy(p, value); - p += strlen(value) + 1; + strscpy(p, var, var_len); + p += var_len; + strscpy(p, value, value_len); + p += value_len; msg_len = (sizeof(struct ds_data) + sizeof(struct ds_var_set_msg) + @@ -910,7 +913,7 @@ static int register_services(struct ds_info *dp) pbuf.req.handle = cp->handle; pbuf.req.major = 1; pbuf.req.minor = 0; - strcpy(pbuf.id_buf, cp->service_id); + strscpy(pbuf.id_buf, cp->service_id); err = __ds_send(lp, &pbuf, msg_len); if (err > 0) diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 8de6646e9ce8..10934dfa987a 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -60,30 +60,3 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) pci_assign_unassigned_resources(); pci_bus_add_devices(root_bus); } - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - struct resource *res; - u16 cmd, oldcmd; - int i; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - oldcmd = cmd; - - pci_dev_for_each_resource(dev, res, i) { - /* Only set up the requested stuff */ - if (!(mask & (1<<i))) - continue; - - if (res->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - - if (cmd != oldcmd) { - pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index b8c51cc23d96..49740450a685 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -87,6 +87,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, break; #ifdef CONFIG_SPARC64 case R_SPARC_64: + case R_SPARC_UA64: location[0] = v >> 56; location[1] = v >> 48; location[2] = v >> 40; @@ -141,7 +142,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, break; default: - printk(KERN_ERR "module %s: Unknown relocation: %x\n", + printk(KERN_ERR "module %s: Unknown relocation: 0x%x\n", me->name, (int) (ELF_R_TYPE(rel[i].r_info) & 0xff)); return -ENOEXEC; diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 06012e68bdca..284a4cafa432 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -387,6 +387,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, if (of_device_register(op)) { printk("%pOF: Could not register of device.\n", dp); + put_device(&op->dev); kfree(op); op = NULL; } diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index f98c2901f335..f53092b07b9e 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -677,6 +677,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, if (of_device_register(op)) { printk("%pOF: Could not register of device.\n", dp); + put_device(&op->dev); kfree(op); op = NULL; } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index ddac216a2aff..a9448088e762 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -722,33 +722,6 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, return bus; } -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - struct resource *res; - u16 cmd, oldcmd; - int i; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - oldcmd = cmd; - - pci_dev_for_each_resource(dev, res, i) { - /* Only set up the requested stuff */ - if (!(mask & (1<<i))) - continue; - - if (res->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - - if (cmd != oldcmd) { - pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - /* Platform support for /proc/bus/pci/X/Y mmap()s. */ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) { diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 25fe0a061732..d7c911724435 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/jiffies.h> #include <asm/swift.h> /* for cache flushing. */ @@ -352,7 +353,7 @@ int __init pcic_probe(void) pbm = &pcic->pbm; pbm->prom_node = node; prom_getstring(node, "name", namebuf, 63); namebuf[63] = 0; - strcpy(pbm->prom_name, namebuf); + strscpy(pbm->prom_name, namebuf); { extern int pcic_nmi_trap_patch[4]; @@ -477,7 +478,7 @@ static void pcic_map_pci_device(struct linux_pcic *pcic, int j; if (node == 0 || node == -1) { - strcpy(namebuf, "???"); + strscpy(namebuf, "???"); } else { prom_getstring(node, "name", namebuf, 63); namebuf[63] = 0; } @@ -536,7 +537,7 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) char namebuf[64]; if (node == 0 || node == -1) { - strcpy(namebuf, "???"); + strscpy(namebuf, "???"); } else { prom_getstring(node, "name", namebuf, sizeof(namebuf)); } @@ -641,33 +642,6 @@ void pcibios_fixup_bus(struct pci_bus *bus) } } -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - struct resource *res; - u16 cmd, oldcmd; - int i; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - oldcmd = cmd; - - pci_dev_for_each_resource(dev, res, i) { - /* Only set up the requested stuff */ - if (!(mask & (1<<i))) - continue; - - if (res->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - - if (cmd != oldcmd) { - pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - /* Makes compiler happy */ static volatile int pcic_timer_dummy; diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index a67dd67f10c8..cd94f1e8d644 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -187,14 +187,16 @@ char * __init build_path_component(struct device_node *dp) { const char *name = of_get_property(dp, "name", NULL); char tmp_buf[64], *n; + size_t n_sz; tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, name); + strscpy(tmp_buf, name); - n = prom_early_alloc(strlen(tmp_buf) + 1); - strcpy(n, tmp_buf); + n_sz = strlen(tmp_buf) + 1; + n = prom_early_alloc(n_sz); + strscpy(n, tmp_buf, n_sz); return n; } @@ -204,13 +206,14 @@ extern void restore_current(void); void __init of_console_init(void) { char *msg = "OF stdout device is: %s\n"; + const size_t of_console_path_sz = 256; struct device_node *dp; unsigned long flags; const char *type; phandle node; int skip, tmp, fd; - of_console_path = prom_early_alloc(256); + of_console_path = prom_early_alloc(of_console_path_sz); switch (prom_vers) { case PROM_V0: @@ -297,7 +300,7 @@ void __init of_console_init(void) prom_printf("No stdout-path in root node.\n"); prom_halt(); } - strcpy(of_console_path, path); + strscpy(of_console_path, path, of_console_path_sz); } break; } diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index ba82884cb92a..aa4799cbb9c1 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -361,14 +361,16 @@ char * __init build_path_component(struct device_node *dp) { const char *name = of_get_property(dp, "name", NULL); char tmp_buf[64], *n; + size_t n_sz; tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, name); + strscpy(tmp_buf, name); - n = prom_early_alloc(strlen(tmp_buf) + 1); - strcpy(n, tmp_buf); + n_sz = strlen(tmp_buf) + 1; + n = prom_early_alloc(n_sz); + strscpy(n, tmp_buf, n_sz); return n; } diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index c9ec70888a39..d258fd10db01 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -120,11 +120,14 @@ EXPORT_SYMBOL(of_find_in_proplist); */ static int __init handle_nextprop_quirks(char *buf, const char *name) { - if (!name || strlen(name) == 0) + size_t name_len; + + name_len = name ? strlen(name) : 0; + if (name_len == 0) return -1; #ifdef CONFIG_SPARC32 - strcpy(buf, name); + strscpy(buf, name, name_len + 1); #endif return 0; } diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S index cbd42ea7c3f7..99357bfa8e82 100644 --- a/arch/sparc/lib/M7memcpy.S +++ b/arch/sparc/lib/M7memcpy.S @@ -696,16 +696,16 @@ FUNC_NAME: EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40) faligndata %f24, %f26, %f10 EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40) - EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_40) + EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_32) faligndata %f26, %f28, %f12 - EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_32) add %o4, 64, %o4 - EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40) + EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_24) faligndata %f28, %f30, %f14 - EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_40) - EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_24) + EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_16) add %o0, 64, %o0 - EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40) + EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_8) fsrc2 %f30, %f14 bgu,pt %xcc, .Lunalign_sloop prefetch [%o4 + (8 * BLOCK_SIZE)], 20 @@ -728,7 +728,7 @@ FUNC_NAME: add %o4, 8, %o4 faligndata %f0, %f2, %f16 subcc %o5, 8, %o5 - EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5) + EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_8) fsrc2 %f2, %f0 bgu,pt %xcc, .Lunalign_by8 add %o0, 8, %o0 @@ -772,7 +772,7 @@ FUNC_NAME: subcc %o5, 0x20, %o5 EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) - EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) bne,pt %xcc, 1b add %o0, 0x20, %o0 @@ -804,12 +804,12 @@ FUNC_NAME: brz,pt %o3, 2f sub %o2, %o3, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_o3) add %o1, 1, %o1 subcc %o3, 1, %o3 add %o0, 1, %o0 bne,pt %xcc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_o3_plus_1) 2: and %o1, 0x7, %o3 brz,pn %o3, .Lmedium_noprefetch_cp diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index ee5091dd67ed..783bdec0d7be 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for Sparc library files.. # -asflags-y := -ansi -DST_DIV0=0x02 +asflags-y := -DST_DIV0=0x02 lib-$(CONFIG_SPARC32) += ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S index 64fbac28b3db..207343367bb2 100644 --- a/arch/sparc/lib/Memcpy_utils.S +++ b/arch/sparc/lib/Memcpy_utils.S @@ -137,6 +137,15 @@ ENTRY(memcpy_retl_o2_plus_63_8) ba,pt %xcc, __restore_asi add %o2, 8, %o0 ENDPROC(memcpy_retl_o2_plus_63_8) +ENTRY(memcpy_retl_o2_plus_o3) + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3) +ENTRY(memcpy_retl_o2_plus_o3_plus_1) + add %o3, 1, %o3 + ba,pt %xcc, __restore_asi + add %o2, %o3, %o0 +ENDPROC(memcpy_retl_o2_plus_o3_plus_1) ENTRY(memcpy_retl_o2_plus_o5) ba,pt %xcc, __restore_asi add %o2, %o5, %o0 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 7ad58ebe0d00..df0ec1bd1948 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -281,7 +281,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ subcc %o5, 0x20, %o5 EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32) EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_16) EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index ee51c1230689..bbd3ea0a6482 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -79,8 +79,8 @@ #ifndef EX_RETVAL #define EX_RETVAL(x) x __restore_asi: - ret wr %g0, ASI_AIUS, %asi + ret restore ENTRY(NG_ret_i2_plus_i4_plus_1) ba,pt %xcc, __restore_asi @@ -125,15 +125,16 @@ ENTRY(NG_ret_i2_plus_g1_minus_56) ba,pt %xcc, __restore_asi add %i2, %g1, %i0 ENDPROC(NG_ret_i2_plus_g1_minus_56) -ENTRY(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_plus_16) + add %i4, 16, %i4 ba,pt %xcc, __restore_asi add %i2, %i4, %i0 -ENDPROC(NG_ret_i2_plus_i4) -ENTRY(NG_ret_i2_plus_i4_minus_8) - sub %i4, 8, %i4 +ENDPROC(NG_ret_i2_plus_i4_plus_16) +ENTRY(NG_ret_i2_plus_i4_plus_8) + add %i4, 8, %i4 ba,pt %xcc, __restore_asi add %i2, %i4, %i0 -ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENDPROC(NG_ret_i2_plus_i4_plus_8) ENTRY(NG_ret_i2_plus_8) ba,pt %xcc, __restore_asi add %i2, 8, %i0 @@ -160,6 +161,12 @@ ENTRY(NG_ret_i2_and_7_plus_i4) ba,pt %xcc, __restore_asi add %i2, %i4, %i0 ENDPROC(NG_ret_i2_and_7_plus_i4) +ENTRY(NG_ret_i2_and_7_plus_i4_plus_8) + and %i2, 7, %i2 + add %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) #endif .align 64 @@ -405,13 +412,13 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4_plus_16) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4_plus_16) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4_plus_16) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_plus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 @@ -468,7 +475,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 635398ec7540..154fbd35400c 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -164,17 +164,18 @@ ENTRY(U1_gs_40_fp) retl add %o0, %o2, %o0 ENDPROC(U1_gs_40_fp) -ENTRY(U1_g3_0_fp) - VISExitHalf - retl - add %g3, %o2, %o0 -ENDPROC(U1_g3_0_fp) ENTRY(U1_g3_8_fp) VISExitHalf add %g3, 8, %g3 retl add %g3, %o2, %o0 ENDPROC(U1_g3_8_fp) +ENTRY(U1_g3_16_fp) + VISExitHalf + add %g3, 16, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_16_fp) ENTRY(U1_o2_0_fp) VISExitHalf retl @@ -547,18 +548,18 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 62: FINISH_VISCHUNK(o0, f44, f46) 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) -93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_8_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_16_fp) bge,pt %xcc, 93b add %o0, 8, %o0 diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 9248d59c734c..bace3a18f836 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -267,6 +267,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) + and %o2, 0x3f, %o2 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 bgu,pt %XCC, 1f @@ -336,7 +337,6 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * Also notice how this code is careful not to perform a * load past the end of the src buffer. */ - and %o2, 0x3f, %o2 andcc %o2, 0x38, %g2 be,pn %XCC, 2f subcc %g2, 0x8, %g2 diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 2d1752108d77..e9d232561c82 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -2,8 +2,6 @@ # Makefile for the linux Sparc-specific parts of the memory manager. # -asflags-y := -ansi - obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 4b9431311e05..4652e868663b 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -22,6 +22,26 @@ static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) { + unsigned long hugepage_size = _PAGE_SZ4MB_4U; + + pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4U; + + switch (shift) { + case HPAGE_256MB_SHIFT: + hugepage_size = _PAGE_SZ256MB_4U; + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_SHIFT: + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_64K_SHIFT: + hugepage_size = _PAGE_SZ64K_4U; + break; + default: + WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); + } + + pte_val(entry) = pte_val(entry) | hugepage_size; return entry; } diff --git a/arch/sparc/prom/Makefile b/arch/sparc/prom/Makefile index a1adc75d8055..92db8bb4ad4c 100644 --- a/arch/sparc/prom/Makefile +++ b/arch/sparc/prom/Makefile @@ -2,7 +2,6 @@ # Makefile for the Sun Boot PROM interface library under # Linux. # -asflags := -ansi lib-y := bootstr_$(BITS).o lib-y += init_$(BITS).o diff --git a/arch/sparc/prom/tree_64.c b/arch/sparc/prom/tree_64.c index 88793e5b0ab5..7388339bbd7e 100644 --- a/arch/sparc/prom/tree_64.c +++ b/arch/sparc/prom/tree_64.c @@ -272,7 +272,7 @@ char *prom_nextprop(phandle node, const char *oprop, char *buffer) return buffer; } if (oprop == buffer) { - strcpy (buf, oprop); + strscpy(buf, oprop); oprop = buf; } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 1d4def0db841..49781bee7905 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -39,6 +39,7 @@ config UML select HAVE_ARCH_TRACEHOOK select HAVE_SYSCALL_TRACEPOINTS select THREAD_INFO_IN_TASK + select SPARSE_IRQ config MMU bool diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index 277cea3d30eb..8006a5bd578c 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -199,4 +199,7 @@ static int ssl_non_raw_setup(char *str) return 1; } __setup("ssl-non-raw", ssl_non_raw_setup); -__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode"); +__uml_help(ssl_non_raw_setup, +"ssl-non-raw\n" +" Set serial lines to non-raw mode.\n\n" +); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f2b2feeeb455..37455e74d314 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -370,7 +370,7 @@ __uml_help(ubd_setup, " useful when a unique number should be given to the device. Note when\n" " specifying a label, the filename2 must be also presented. It can be\n" " an empty string, in which case the backing file is not used:\n" -" ubd0=File,,Serial\n" +" ubd0=File,,Serial\n\n" ); static int udb_setup(char *str) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 9bbbddfe866b..25d9258fa592 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1721,7 +1721,7 @@ static int __init vector_setup(char *str) __setup("vec", vector_setup); __uml_help(vector_setup, "vec[0-9]+:<option>=<value>,<option>=<value>\n" -" Configure a vector io network device.\n\n" +" Configure a vector io network device.\n\n" ); late_initcall(vector_init); diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c index e9e23cc3f357..f9b4b6f7582c 100644 --- a/arch/um/drivers/virtio_pcidev.c +++ b/arch/um/drivers/virtio_pcidev.c @@ -598,6 +598,11 @@ static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) kfree(dev); } +static void virtio_pcidev_virtio_shutdown(struct virtio_device *vdev) +{ + /* nothing to do, we just don't want queue shutdown */ +} + static struct virtio_device_id id_table[] = { { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -609,6 +614,7 @@ static struct virtio_driver virtio_pcidev_virtio_driver = { .id_table = id_table, .probe = virtio_pcidev_virtio_probe, .remove = virtio_pcidev_virtio_remove, + .shutdown = virtio_pcidev_virtio_shutdown, }; static int __init virtio_pcidev_init(void) diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 0bbb24868557..c727e56ba116 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -13,20 +13,9 @@ #include <asm/mm_hooks.h> #include <asm/mmu.h> -#define activate_mm activate_mm -static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) -{ -} - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - unsigned cpu = smp_processor_id(); - - if (prev != next) { - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - } } #define init_new_context init_new_context diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 8a789c17acd8..7854d51b6639 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -71,7 +71,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry, struct cpuinfo_um { unsigned long loops_per_jiffy; - int ipi_pipe[2]; int cache_alignment; union { __u32 x86_capability[NCAPINTS + NBUGINTS]; @@ -81,8 +80,6 @@ struct cpuinfo_um { extern struct cpuinfo_um boot_cpu_data; -#define cpu_data(cpu) boot_cpu_data -#define current_cpu_data boot_cpu_data #define cache_line_size() (boot_cpu_data.cache_alignment) #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 2f9bfd99460a..7c7e17bce403 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -23,8 +23,9 @@ #define STUB_START stub_start #define STUB_CODE STUB_START #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) -#define STUB_DATA_PAGES 2 /* must be a power of two */ -#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE) +#define STUB_DATA_PAGES 2 +#define STUB_SIZE ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE) +#define STUB_END (STUB_START + STUB_SIZE) #ifndef __ASSEMBLER__ diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index c261a77a32f6..27db38e95df9 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -53,8 +53,7 @@ struct stub_syscall { }; struct stub_data { - unsigned long offset; - long err, child_err; + long err; int syscall_data_len; /* 128 leaves enough room for additional fields in the struct */ diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c index 15c342426489..47cd3d869fb2 100644 --- a/arch/um/kernel/dtb.c +++ b/arch/um/kernel/dtb.c @@ -38,5 +38,5 @@ static int __init uml_dtb_setup(char *line, int *add) __uml_setup("dtb=", uml_dtb_setup, "dtb=<file>\n" -" Boot the kernel with the devicetree blob from the specified file.\n" +" Boot the kernel with the devicetree blob from the specified file.\n\n" ); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 0dfaf96bb7da..d69d137a0334 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -691,6 +691,11 @@ void __init init_IRQ(void) os_setup_epoll(); } +int __init arch_probe_nr_irqs(void) +{ + return NR_IRQS; +} + void sigchld_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, void *mc) { diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index ae0fa2173778..17da0a870650 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -986,26 +986,26 @@ static int setup_time_travel(char *str) __setup("time-travel", setup_time_travel); __uml_help(setup_time_travel, "time-travel\n" -"This option just enables basic time travel mode, in which the clock/timers\n" -"inside the UML instance skip forward when there's nothing to do, rather than\n" -"waiting for real time to elapse. However, instance CPU speed is limited by\n" -"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n" -"clock (but quicker when there's nothing to do).\n" +" This option just enables basic time travel mode, in which the clock/timers\n" +" inside the UML instance skip forward when there's nothing to do, rather than\n" +" waiting for real time to elapse. However, instance CPU speed is limited by\n" +" the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n" +" clock (but quicker when there's nothing to do).\n" "\n" "time-travel=inf-cpu\n" -"This enables time travel mode with infinite processing power, in which there\n" -"are no wall clock timers, and any CPU processing happens - as seen from the\n" -"guest - instantly. This can be useful for accurate simulation regardless of\n" -"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" -"easily lead to getting stuck (e.g. if anything in the system busy loops).\n" +" This enables time travel mode with infinite processing power, in which there\n" +" are no wall clock timers, and any CPU processing happens - as seen from the\n" +" guest - instantly. This can be useful for accurate simulation regardless of\n" +" debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" +" easily lead to getting stuck (e.g. if anything in the system busy loops).\n" "\n" "time-travel=ext:[ID:]/path/to/socket\n" -"This enables time travel mode similar to =inf-cpu, except the system will\n" -"use the given socket to coordinate with a central scheduler, in order to\n" -"have more than one system simultaneously be on simulated time. The virtio\n" -"driver code in UML knows about this so you can also simulate networks and\n" -"devices using it, assuming the device has the right capabilities.\n" -"The optional ID is a 64-bit integer that's sent to the central scheduler.\n"); +" This enables time travel mode similar to =inf-cpu, except the system will\n" +" use the given socket to coordinate with a central scheduler, in order to\n" +" have more than one system simultaneously be on simulated time. The virtio\n" +" driver code in UML knows about this so you can also simulate networks and\n" +" devices using it, assuming the device has the right capabilities.\n" +" The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n"); static int setup_time_travel_start(char *str) { @@ -1022,8 +1022,9 @@ static int setup_time_travel_start(char *str) __setup("time-travel-start=", setup_time_travel_start); __uml_help(setup_time_travel_start, "time-travel-start=<nanoseconds>\n" -"Configure the UML instance's wall clock to start at this value rather than\n" -"the host's wall clock at the time of UML boot.\n"); +" Configure the UML instance's wall clock to start at this value rather than\n" +" the host's wall clock at the time of UML boot.\n\n"); + static struct kobject *bc_time_kobject; static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 2f5ee045bc7a..cfbbbf8500c3 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -54,12 +54,9 @@ static void __init add_arg(char *arg) /* * These fields are initialized at boot time and not changed. - * XXX This structure is used only in the non-SMP case. Maybe this - * should be moved to smp.c. */ struct cpuinfo_um boot_cpu_data = { .loops_per_jiffy = 0, - .ipi_pipe = { -1, -1 }, .cache_alignment = L1_CACHE_BYTES, .x86_capability = { 0 } }; @@ -331,9 +328,7 @@ int __init linux_main(int argc, char **argv, char **envp) host_task_size = get_top_address(envp); /* reserve a few pages for the stubs */ - stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE; - /* another page for the code portion */ - stub_start -= PAGE_SIZE; + stub_start = host_task_size - STUB_SIZE; host_task_size = stub_start; /* Limit TASK_SIZE to what is addressable by the page table */ diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 78f48fa9db8b..0bc10cd4cbed 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -895,7 +895,7 @@ __uml_setup("noreboot", noreboot_cmd_param, "noreboot\n" " Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n" " This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n" -" crashes in CI\n"); +" crashes in CI\n\n"); void reboot_skas(void) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e3cccf4256ca..fa3b616af03a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -412,10 +412,6 @@ config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI -config X86_64_SMP - def_bool y - depends on X86_64 && SMP - config ARCH_SUPPORTS_UPROBES def_bool y @@ -1902,7 +1898,6 @@ config INTEL_TDX_HOST depends on X86_X2APIC select ARCH_KEEP_MEMBLOCK depends on CONTIG_ALLOC - depends on !KEXEC_CORE depends on X86_MCE help Intel Trust Domain Extensions (TDX) protects guest VMs from malicious diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index c827f694fb72..b1c59fb0a4c9 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -1,26 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -config AS_AVX512 - def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5) - help - Supported by binutils >= 2.25 and LLVM integrated assembler - -config AS_GFNI - def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - -config AS_VAES - def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - -config AS_VPCLMULQDQ - def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - config AS_WRUSS def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8518ae214c9b..79e15971529d 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -27,7 +27,7 @@ static inline bool variable_test_bit(int nr, const void *addr) bool v; const u32 *p = addr; - asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr)); + asm("btl %2,%1" : "=@ccc" (v) : "m" (*p), "Ir" (nr)); return v; } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 60580836daf7..a3c58ebe3662 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -155,15 +155,15 @@ static inline void wrgs32(u32 v, addr_t addr) static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("fs repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("fs repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("gs repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("gs repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index f35369bb14c5..b25c6a9303b7 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -32,8 +32,8 @@ int memcmp(const void *s1, const void *s2, size_t len) { bool diff; - asm("repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm("repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index d9c6fc78cf33..48d3076b6053 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -306,7 +306,7 @@ config CRYPTO_ARIA_AESNI_AVX2_X86_64 config CRYPTO_ARIA_GFNI_AVX512_X86_64 tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)" - depends on 64BIT && AS_GFNI + depends on 64BIT select CRYPTO_SKCIPHER select CRYPTO_ALGAPI select CRYPTO_ARIA diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index dfba7e5e88ea..2d30d5d36145 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -46,10 +46,8 @@ obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \ aes-gcm-aesni-x86_64.o \ - aes-xts-avx-x86_64.o -ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy) -aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o -endif + aes-xts-avx-x86_64.o \ + aes-gcm-avx10-x86_64.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aes-ctr-avx-x86_64.S b/arch/x86/crypto/aes-ctr-avx-x86_64.S index bbbfd80f5a50..2745918f68ee 100644 --- a/arch/x86/crypto/aes-ctr-avx-x86_64.S +++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S @@ -552,7 +552,6 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx) _aes_ctr_crypt 1 SYM_FUNC_END(aes_xctr_crypt_aesni_avx) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) .set VL, 32 .set USE_AVX512, 0 SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2) @@ -570,4 +569,3 @@ SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512) SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512) _aes_ctr_crypt 1 SYM_FUNC_END(aes_xctr_crypt_vaes_avx512) -#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index db79cdf81588..a30753a3e207 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -886,7 +886,6 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_aesni_avx) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) .set VL, 32 .set USE_AVX512, 0 SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2) @@ -904,4 +903,3 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx512) SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_vaes_avx512) -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 061b1ced93c5..d953ac470aae 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -828,10 +828,8 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \ }} DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600); DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800); -#endif /* The common part of the x86_64 AES-GCM key struct */ struct aes_gcm_key { @@ -912,17 +910,8 @@ struct aes_gcm_key_avx10 { #define FLAG_RFC4106 BIT(0) #define FLAG_ENC BIT(1) #define FLAG_AVX BIT(2) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -# define FLAG_AVX10_256 BIT(3) -# define FLAG_AVX10_512 BIT(4) -#else - /* - * This should cause all calls to the AVX10 assembly functions to be - * optimized out, avoiding the need to ifdef each call individually. - */ -# define FLAG_AVX10_256 0 -# define FLAG_AVX10_512 0 -#endif +#define FLAG_AVX10_256 BIT(3) +#define FLAG_AVX10_512 BIT(4) static inline struct aes_gcm_key * aes_gcm_key_get(struct crypto_aead *tfm, int flags) @@ -1519,7 +1508,6 @@ DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX, "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx", AES_GCM_KEY_AESNI_SIZE, 500); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) /* aes_gcm_algs_vaes_avx10_256 */ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256", @@ -1529,7 +1517,6 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", AES_GCM_KEY_AVX10_SIZE, 800); -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ static int __init register_avx_algs(void) { @@ -1551,7 +1538,6 @@ static int __init register_avx_algs(void) * Similarly, the assembler support was added at about the same time. * For simplicity, just always check for VAES and VPCLMULQDQ together. */ -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_VAES) || !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || @@ -1592,7 +1578,7 @@ static int __init register_avx_algs(void) ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512)); if (err) return err; -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ + return 0; } @@ -1607,12 +1593,10 @@ static void unregister_avx_algs(void) { unregister_skciphers(skcipher_algs_aesni_avx); unregister_aeads(aes_gcm_algs_aesni_avx); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) unregister_skciphers(skcipher_algs_vaes_avx2); unregister_skciphers(skcipher_algs_vaes_avx512); unregister_aeads(aes_gcm_algs_vaes_avx10_256); unregister_aeads(aes_gcm_algs_vaes_avx10_512); -#endif } #else /* CONFIG_X86_64 */ static struct aead_alg aes_gcm_algs_aesni[0]; diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 9556dacd9841..932fb17308e7 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -295,7 +295,6 @@ vpshufb t1, t0, t2; \ vpxor t2, x7, x7; -#ifdef CONFIG_AS_GFNI #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -318,8 +317,6 @@ vgf2p8affineinvqb $0, t2, x3, x3; \ vgf2p8affineinvqb $0, t2, x7, x7 -#endif /* CONFIG_AS_GFNI */ - #define aria_sbox_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -561,7 +558,6 @@ y4, y5, y6, y7, \ mem_tmp, 8); -#ifdef CONFIG_AS_GFNI #define aria_fe_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ y0, y1, y2, y3, \ @@ -719,8 +715,6 @@ y4, y5, y6, y7, \ mem_tmp, 8); -#endif /* CONFIG_AS_GFNI */ - /* NB: section is mergeable, all elements must be aligned 16-byte blocks */ .section .rodata.cst16, "aM", @progbits, 16 .align 16 @@ -772,7 +766,6 @@ .Ltf_hi__x2__and__fwd_aff: .octa 0x3F893781E95FE1576CDA64D2BA0CB204 -#ifdef CONFIG_AS_GFNI /* AES affine: */ #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) .Ltf_aff_bitmatrix: @@ -871,7 +864,6 @@ BV8(0, 0, 0, 0, 0, 1, 0, 0), BV8(0, 0, 0, 0, 0, 0, 1, 0), BV8(0, 0, 0, 0, 0, 0, 0, 1)) -#endif /* CONFIG_AS_GFNI */ /* 4-bit mask */ .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 @@ -1140,7 +1132,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) -#ifdef CONFIG_AS_GFNI SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) /* input: * %r9: rk @@ -1359,4 +1350,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) FRAME_END RET; SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) -#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S index c60fa2980630..ed53d4f46bd7 100644 --- a/arch/x86/crypto/aria-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S @@ -302,7 +302,6 @@ vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ vpxor t0, x7, x7; -#ifdef CONFIG_AS_GFNI #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -325,7 +324,6 @@ vgf2p8affineinvqb $0, t2, x3, x3; \ vgf2p8affineinvqb $0, t2, x7, x7 -#endif /* CONFIG_AS_GFNI */ #define aria_sbox_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -598,7 +596,7 @@ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); -#ifdef CONFIG_AS_GFNI + #define aria_fe_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ y0, y1, y2, y3, \ @@ -752,7 +750,6 @@ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); -#endif /* CONFIG_AS_GFNI */ .section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 .align 32 @@ -806,7 +803,6 @@ .Ltf_hi__x2__and__fwd_aff: .octa 0x3F893781E95FE1576CDA64D2BA0CB204 -#ifdef CONFIG_AS_GFNI .section .rodata.cst8, "aM", @progbits, 8 .align 8 /* AES affine: */ @@ -868,8 +864,6 @@ BV8(0, 0, 0, 0, 0, 0, 1, 0), BV8(0, 0, 0, 0, 0, 0, 0, 1)) -#endif /* CONFIG_AS_GFNI */ - /* 4-bit mask */ .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 @@ -1219,7 +1213,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way) RET; SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way) -#ifdef CONFIG_AS_GFNI SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way) /* input: * %r9: rk @@ -1438,4 +1431,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way) FRAME_END RET; SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way) -#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c index 007b250f774c..1487a49bfbac 100644 --- a/arch/x86/crypto/aria_aesni_avx2_glue.c +++ b/arch/x86/crypto/aria_aesni_avx2_glue.c @@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way); -#ifdef CONFIG_AS_GFNI asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way); @@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way); -#endif /* CONFIG_AS_GFNI */ static struct aria_avx_ops aria_ops; @@ -213,7 +211,7 @@ static int __init aria_avx2_init(void) return -ENODEV; } - if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { + if (boot_cpu_has(X86_FEATURE_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c index 4c88ef4eba82..e4e3d78915a5 100644 --- a/arch/x86/crypto/aria_aesni_avx_glue.c +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way); -#ifdef CONFIG_AS_GFNI asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way); @@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way); -#endif /* CONFIG_AS_GFNI */ static struct aria_avx_ops aria_ops; @@ -199,7 +197,7 @@ static int __init aria_avx_init(void) return -ENODEV; } - if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { + if (boot_cpu_has(X86_FEATURE_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 94519688b007..77e2d920a640 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -99,7 +99,7 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -.macro CLEAR_REGS clear_bp=1 +.macro CLEAR_REGS clear_callee=1 /* * Sanitize registers of values that a speculation attack might * otherwise want to exploit. The lower registers are likely clobbered @@ -113,20 +113,19 @@ For 32-bit we have the following conventions - kernel is built with xorl %r9d, %r9d /* nospec r9 */ xorl %r10d, %r10d /* nospec r10 */ xorl %r11d, %r11d /* nospec r11 */ + .if \clear_callee xorl %ebx, %ebx /* nospec rbx */ - .if \clear_bp xorl %ebp, %ebp /* nospec rbp */ - .endif xorl %r12d, %r12d /* nospec r12 */ xorl %r13d, %r13d /* nospec r13 */ xorl %r14d, %r14d /* nospec r14 */ xorl %r15d, %r15d /* nospec r15 */ - + .endif .endm -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1 +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1 PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint - CLEAR_REGS clear_bp=\clear_bp + CLEAR_REGS clear_callee=\clear_callee .endm .macro POP_REGS pop_rdi=1 diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index 29c5c32c16c3..fafbd3e68cb8 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -16,7 +16,7 @@ .macro FRED_ENTER UNWIND_HINT_END_OF_STACK - ENDBR + ANNOTATE_NOENDBR PUSH_AND_CLEAR_REGS movq %rsp, %rdi /* %rdi -> pt_regs */ .endm @@ -97,8 +97,7 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) push %rdi /* fred_ss handed in by the caller */ push %rbp pushf - mov $__KERNEL_CS, %rax - push %rax + push $__KERNEL_CS /* * Unlike the IDT event delivery, FRED _always_ pushes an error code @@ -112,18 +111,37 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) push %rax /* Return RIP */ push $0 /* Error code, 0 for IRQ/NMI */ - PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0 + PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0 + movq %rsp, %rdi /* %rdi -> pt_regs */ + /* + * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx} + * are clobbered, which corresponds to: arguments, extra caller-saved + * and return. All registers a C function is allowed to clobber. + * + * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15} + * are untouched, with the exception of rbp, which carries the stack + * frame and will be restored before exit. + * + * Further calling another C function will not alter this state. + */ call __fred_entry_from_kvm /* Call the C entry point */ - POP_REGS - ERETS -1: + /* - * Objtool doesn't understand what ERETS does, this hint tells it that - * yes, we'll reach here and with what stack state. A save/restore pair - * isn't strictly needed, but it's the simplest form. + * When FRED, use ERETS to potentially clear NMIs, otherwise simply + * restore the stack pointer. + */ + ALTERNATIVE "nop; nop; mov %rbp, %rsp", \ + __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \ + X86_FEATURE_FRED + +1: /* + * Objtool doesn't understand ERETS, and the cfi register state is + * different from initial_func_cfi due to PUSH_REGS. Tell it the state + * is similar to where UNWIND_HINT_SAVE is. */ UNWIND_HINT_RESTORE + pop %rbp RET diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index c9103a6fa06e..6e6c0a740837 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -124,7 +124,12 @@ bool emulate_vsyscall(unsigned long error_code, if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) return false; - if (!(error_code & X86_PF_INSTR)) { + /* + * Assume that faults at regs->ip are because of an + * instruction fetch. Return early and avoid + * emulation for faults during data accesses: + */ + if (address != regs->ip) { /* Failed vsyscall read */ if (vsyscall_mode == EMULATE) return false; @@ -137,12 +142,18 @@ bool emulate_vsyscall(unsigned long error_code, } /* + * X86_PF_INSTR is only set when NX is supported. When + * available, use it to double-check that the emulation code + * is only being used for instruction fetches: + */ + if (cpu_feature_enabled(X86_FEATURE_NX)) + WARN_ON_ONCE(!(error_code & X86_PF_INSTR)); + + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. */ - WARN_ON_ONCE(address != regs->ip); - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index afdbda2dd7b7..e890fd37e9c2 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,7 +17,6 @@ #include <asm/desc.h> #include <asm/e820/api.h> #include <asm/sev.h> -#include <asm/ibt.h> #include <asm/hypervisor.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> @@ -37,7 +36,45 @@ #include <linux/export.h> void *hv_hypercall_pg; + +#ifdef CONFIG_X86_64 +static u64 __hv_hyperfail(u64 control, u64 param1, u64 param2) +{ + return U64_MAX; +} + +DEFINE_STATIC_CALL(__hv_hypercall, __hv_hyperfail); + +u64 hv_std_hypercall(u64 control, u64 param1, u64 param2) +{ + u64 hv_status; + + register u64 __r8 asm("r8") = param2; + asm volatile ("call " STATIC_CALL_TRAMP_STR(__hv_hypercall) + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (param1), "+r" (__r8) + : : "cc", "memory", "r9", "r10", "r11"); + + return hv_status; +} + +typedef u64 (*hv_hypercall_f)(u64 control, u64 param1, u64 param2); + +static inline void hv_set_hypercall_pg(void *ptr) +{ + hv_hypercall_pg = ptr; + + if (!ptr) + ptr = &__hv_hyperfail; + static_call_update(__hv_hypercall, (hv_hypercall_f)ptr); +} +#else +static inline void hv_set_hypercall_pg(void *ptr) +{ + hv_hypercall_pg = ptr; +} EXPORT_SYMBOL_GPL(hv_hypercall_pg); +#endif union hv_ghcb * __percpu *hv_ghcb_pg; @@ -330,7 +367,7 @@ static int hv_suspend(void) * pointer is restored on resume. */ hv_hypercall_pg_saved = hv_hypercall_pg; - hv_hypercall_pg = NULL; + hv_set_hypercall_pg(NULL); /* Disable the hypercall page in the hypervisor */ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -356,7 +393,7 @@ static void hv_resume(void) vmalloc_to_pfn(hv_hypercall_pg_saved); wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hv_hypercall_pg = hv_hypercall_pg_saved; + hv_set_hypercall_pg(hv_hypercall_pg_saved); hv_hypercall_pg_saved = NULL; /* @@ -476,8 +513,8 @@ void __init hyperv_init(void) if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) goto skip_hypercall_pg_init; - hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, - VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, MODULES_VADDR, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); if (hv_hypercall_pg == NULL) @@ -515,27 +552,9 @@ void __init hyperv_init(void) wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } -skip_hypercall_pg_init: - /* - * Some versions of Hyper-V that provide IBT in guest VMs have a bug - * in that there's no ENDBR64 instruction at the entry to the - * hypercall page. Because hypercalls are invoked via an indirect call - * to the hypercall page, all hypercall attempts fail when IBT is - * enabled, and Linux panics. For such buggy versions, disable IBT. - * - * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall - * page, so if future Linux kernel versions enable IBT for 32-bit - * builds, additional hypercall page hackery will be required here - * to provide an ENDBR32. - */ -#ifdef CONFIG_X86_KERNEL_IBT - if (cpu_feature_enabled(X86_FEATURE_IBT) && - *(u32 *)hv_hypercall_pg != gen_endbr()) { - setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Disabling IBT because of Hyper-V bug\n"); - } -#endif + hv_set_hypercall_pg(hv_hypercall_pg); +skip_hypercall_pg_init: /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 090f5ac9f492..c3ba12b1bc07 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -11,6 +11,7 @@ #include <linux/pci.h> #include <linux/irq.h> #include <linux/export.h> +#include <linux/irqchip/irq-msi-lib.h> #include <asm/mshyperv.h> static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -289,59 +290,99 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) (void)hv_unmap_msi_interrupt(dev, &old_entry); } -static void hv_msi_free_irq(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - struct irq_data *irqd = irq_get_irq_data(virq); - struct msi_desc *desc; - - if (!irqd) - return; - - desc = irq_data_get_msi_desc(irqd); - if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) - return; - - hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); -} - /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. */ static struct irq_chip hv_pci_msi_controller = { .name = "HV-PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, - .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = hv_irq_compose_msi_msg, - .irq_set_affinity = msi_domain_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, + .irq_set_affinity = irq_chip_set_affinity_parent, }; -static struct msi_domain_ops pci_msi_domain_ops = { - .msi_free = hv_msi_free_irq, - .msi_prepare = pci_msi_prepare, +static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED; + + info->ops->msi_prepare = pci_msi_prepare; + + return true; +} + +#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) +#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) + +static struct msi_parent_ops hv_msi_parent_ops = { + .supported_flags = HV_MSI_FLAGS_SUPPORTED, + .required_flags = HV_MSI_FLAGS_REQUIRED, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "HV-", + .init_dev_msi_info = hv_init_dev_msi_info, }; -static struct msi_domain_info hv_pci_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .ops = &pci_msi_domain_ops, - .chip = &hv_pci_msi_controller, - .handler = handle_edge_irq, - .handler_name = "edge", +static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + /* + * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except + * entry_to_msi_msg() should be in here. + */ + + int ret; + + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg); + if (ret) + return ret; + + for (int i = 0; i < nr_irqs; ++i) { + irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL, + handle_edge_irq, NULL, "edge"); + } + return 0; +} + +static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs) +{ + for (int i = 0; i < nr_irqs; ++i) { + struct irq_data *irqd = irq_domain_get_irq_data(d, virq); + struct msi_desc *desc; + + desc = irq_data_get_msi_desc(irqd); + if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) + continue; + + hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); + } + irq_domain_free_irqs_top(d, virq, nr_irqs); +} + +static const struct irq_domain_ops hv_msi_domain_ops = { + .select = msi_lib_irq_domain_select, + .alloc = hv_msi_domain_alloc, + .free = hv_msi_domain_free, }; struct irq_domain * __init hv_create_pci_msi_domain(void) { struct irq_domain *d = NULL; - struct fwnode_handle *fn; - fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); - if (fn) - d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"), + .ops = &hv_msi_domain_ops, + .parent = x86_vector_domain, + }; + + if (info.fwnode) + d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops); /* No point in going further if we can't get an irq domain */ BUG_ON(!d); diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index ade6c665c97e..651771534cae 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -385,9 +385,23 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) return ret; } +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) +{ + u64 hv_status; + + register u64 __r8 asm("r8") = param2; + asm volatile("vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (param1), "+r" (__r8) + : : "cc", "memory", "r9", "r10", "r11"); + + return hv_status; +} + #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_INTEL_TDX_GUEST @@ -437,6 +451,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) #else static inline void hv_tdx_msr_write(u64 msr, u64 value) {} static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_INTEL_TDX_GUEST */ #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) @@ -463,6 +478,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value) } /* + * Keep track of the PFN regions which were shared with the host. The access + * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). + */ +struct hv_enc_pfn_region { + struct list_head list; + u64 pfn; + int count; +}; + +static LIST_HEAD(hv_list_enc); +static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); + +static int hv_list_enc_add(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + /* Check if the PFN already exists in some region first */ + list_for_each_entry(ent, &hv_list_enc, list) { + if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) + /* Nothing to do - pfn is already in the list */ + goto unlock_done; + } + + /* + * Check if the PFN is adjacent to an existing region. Growing + * a region can make it adjacent to another one but merging is + * not (yet) implemented for simplicity. A PFN cannot be added + * to two regions to keep the logic in hv_list_enc_remove() + * correct. + */ + list_for_each_entry(ent, &hv_list_enc, list) { + if (ent->pfn + ent->count == pfn) { + /* Grow existing region up */ + ent->count++; + goto unlock_done; + } else if (pfn + 1 == ent->pfn) { + /* Grow existing region down */ + ent->pfn--; + ent->count++; + goto unlock_done; + } + } + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + /* No adjacent region found -- create a new one */ + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = pfn; + ent->count = 1; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +static int hv_list_enc_remove(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent, *t; + struct hv_enc_pfn_region new_region; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_for_each_entry_safe(ent, t, &hv_list_enc, list) { + if (pfn == ent->pfn + ent->count - 1) { + /* Removing tail pfn */ + ent->count--; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn == ent->pfn) { + /* Removing head pfn */ + ent->count--; + ent->pfn++; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { + /* + * Removing a pfn in the middle. Cut off the tail + * of the existing region and create a template for + * the new one. + */ + new_region.pfn = pfn + 1; + new_region.count = ent->count - (pfn - ent->pfn + 1); + ent->count = pfn - ent->pfn; + goto unlock_split; + } + + } +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + continue; + +unlock_split: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = new_region.pfn; + ent->count = new_region.count; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +/* Stop new private<->shared conversions */ +static void hv_vtom_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +static void hv_vtom_kexec_finish(void) +{ + struct hv_gpa_range_for_visibility *input; + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 hv_status; + int cur, i; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + if (unlikely(!input)) + goto out; + + list_for_each_entry(ent, &hv_list_enc, list) { + for (i = 0, cur = 0; i < ent->count; i++) { + input->gpa_page_list[cur] = ent->pfn + i; + cur++; + + if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { + input->partition_id = HV_PARTITION_ID_SELF; + input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; + input->reserved0 = 0; + input->reserved1 = 0; + hv_status = hv_do_rep_hypercall( + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, + cur, 0, input, NULL); + WARN_ON_ONCE(!hv_result_success(hv_status)); + cur = 0; + } + } + + } + +out: + local_irq_restore(flags); +} + +/* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * * In Isolation VM, all guest memory is encrypted from host and guest @@ -475,6 +679,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], struct hv_gpa_range_for_visibility *input; u64 hv_status; unsigned long flags; + int ret; /* no-op if partition isolation is not enabled */ if (!hv_is_isolation_supported()) @@ -486,6 +691,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], return -EINVAL; } + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_remove(pfn, count); + else + ret = hv_list_enc_add(pfn, count); + if (ret) + return ret; + local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); @@ -506,8 +718,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], if (hv_result_success(hv_status)) return 0; + + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_add(pfn, count); else - return -EFAULT; + ret = hv_list_enc_remove(pfn, count); + /* + * There's no good way to recover from -ENOMEM here, the accounting is + * wrong either way. + */ + WARN_ON_ONCE(ret); + + return -EFAULT; } /* @@ -669,6 +891,8 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; + x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; /* Set WB as the default cache mode. */ guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 02bae8e0758b..4c305305871b 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -23,8 +23,7 @@ static inline bool __must_check rdrand_long(unsigned long *v) unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" - CC_SET(c) - : CC_OUT(c) (ok), [out] "=r" (*v)); + : "=@ccc" (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -35,8 +34,7 @@ static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile("rdseed %[out]" - CC_SET(c) - : CC_OUT(c) (ok), [out] "=r" (*v)); + : "=@ccc" (ok), [out] "=r" (*v)); return ok; } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index f963848024a5..d5c8d3afe196 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -122,18 +122,6 @@ static __always_inline __pure void *rip_rel_ptr(void *p) } #endif -/* - * Macros to generate condition code outputs from inline assembly, - * The output operand must be type "bool". - */ -#ifdef __GCC_ASM_FLAG_OUTPUTS__ -# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" -# define CC_OUT(c) "=@cc" #c -#else -# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" -# define CC_OUT(c) [_cc_ ## c] "=qm" -#endif - #ifdef __KERNEL__ # include <asm/extable_fixup_types.h> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index a835f891164d..c2ce213f2b9b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -99,8 +99,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, { bool negative; asm_inline volatile(LOCK_PREFIX "xorb %2,%1" - CC_SET(s) - : CC_OUT(s) (negative), WBYTE_ADDR(addr) + : "=@ccs" (negative), WBYTE_ADDR(addr) : "iq" ((char)mask) : "memory"); return negative; } @@ -149,8 +148,7 @@ arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm(__ASM_SIZE(bts) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -175,8 +173,7 @@ arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm volatile(__ASM_SIZE(btr) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -187,8 +184,7 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm volatile(__ASM_SIZE(btc) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; @@ -211,8 +207,7 @@ static __always_inline bool constant_test_bit_acquire(long nr, const volatile un bool oldbit; asm volatile("testb %2,%1" - CC_SET(nz) - : CC_OUT(nz) (oldbit) + : "=@ccnz" (oldbit) : "m" (((unsigned char *)addr)[nr >> 3]), "i" (1 << (nr & 7)) :"memory"); @@ -225,8 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l bool oldbit; asm volatile(__ASM_SIZE(bt) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 20fcb8507ad1..880ca15073ed 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,14 +5,19 @@ #include <linux/stringify.h> #include <linux/instrumentation.h> #include <linux/objtool.h> +#include <asm/asm.h> /* * Despite that some emulators terminate on UD2, we use it for WARN(). */ -#define ASM_UD2 ".byte 0x0f, 0x0b" +#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) #define INSN_UD2 0x0b0f #define LEN_UD2 2 +#define ASM_UDB _ASM_BYTES(0xd6) +#define INSN_UDB 0xd6 +#define LEN_UDB 1 + /* * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. */ @@ -26,7 +31,7 @@ #define BUG_UD2 0xfffe #define BUG_UD1 0xfffd #define BUG_UD1_UBSAN 0xfffc -#define BUG_EA 0xffea +#define BUG_UDB 0xffd6 #define BUG_LOCK 0xfff0 #ifdef CONFIG_GENERIC_BUG diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 976b90a3d190..c40b9ebc1fb4 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -71,12 +71,10 @@ * * __cfi_foo: * endbr64 - * subl 0x12345678, %r10d - * jz foo - * ud2 - * nop + * subl 0x12345678, %eax + * jne.32,pn foo+3 * foo: - * osp nop3 # was endbr64 + * nopl -42(%rax) # was endbr64 * ... code here ... * ret * @@ -86,9 +84,9 @@ * indirect caller: * lea foo(%rip), %r11 * ... - * movl $0x12345678, %r10d - * subl $16, %r11 - * nop4 + * movl $0x12345678, %eax + * lea -0x10(%r11), %r11 + * nop5 * call *%r11 * */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b61f32c3459f..a88b06f1c35e 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -166,8 +166,7 @@ extern void __add_wrong_size(void) { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "q" (__new) \ @@ -178,8 +177,7 @@ extern void __add_wrong_size(void) { \ volatile u16 *__ptr = (volatile u16 *)(_ptr); \ asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -190,8 +188,7 @@ extern void __add_wrong_size(void) { \ volatile u32 *__ptr = (volatile u32 *)(_ptr); \ asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -202,8 +199,7 @@ extern void __add_wrong_size(void) { \ volatile u64 *__ptr = (volatile u64 *)(_ptr); \ asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 371f7906019e..1f80a62be969 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -46,8 +46,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new bool ret; \ \ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ - CC_SET(e) \ - : CC_OUT(e) (ret), \ + : "=@ccz" (ret), \ [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -125,8 +124,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ - CC_SET(e) \ - : ALT_OUTPUT_SP(CC_OUT(e) (ret), \ + : ALT_OUTPUT_SP("=@ccz" (ret), \ "+a" (o.low), "+d" (o.high)) \ : "b" (n.low), "c" (n.high), \ [ptr] "S" (_ptr) \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 71d1e72ed879..5afea056fb20 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -66,8 +66,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, bool ret; \ \ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ - CC_SET(e) \ - : CC_OUT(e) (ret), \ + : "=@ccz" (ret), \ [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b2a562217d3f..4091a776e37a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -444,6 +444,7 @@ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ #define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ @@ -497,6 +498,7 @@ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ #define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ /* * BUG word(s) diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 28d845257303..5e45d6424722 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -59,10 +59,10 @@ static __always_inline __attribute_const__ u32 gen_endbr(void) static __always_inline __attribute_const__ u32 gen_endbr_poison(void) { /* - * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it - * will be unique to (former) ENDBR sites. + * 4 byte NOP that isn't NOP4, such that it will be unique to (former) + * ENDBR sites. Additionally it carries UDB as immediate. */ - return 0x001f0f66; /* osp nopl (%rax) */ + return 0xd6401f0f; /* nopl -42(%rax) */ } static inline bool __is_endbr(u32 val) @@ -70,10 +70,6 @@ static inline bool __is_endbr(u32 val) if (val == gen_endbr_poison()) return true; - /* See cfi_fineibt_bhi_preamble() */ - if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5) - return true; - val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ return val == gen_endbr(); } diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index a4ec27c67988..abd637e54e94 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -460,17 +460,12 @@ __visible noinstr void func(struct pt_regs *regs, \ #endif void idt_install_sysvec(unsigned int n, const void *function); - -#ifdef CONFIG_X86_FRED void fred_install_sysvec(unsigned int vector, const idtentry_t function); -#else -static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { } -#endif #define sysvec_install(vector, function) { \ - if (cpu_feature_enabled(X86_FEATURE_FRED)) \ + if (IS_ENABLED(CONFIG_X86_FRED)) \ fred_install_sysvec(vector, function); \ - else \ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) \ idt_install_sysvec(vector, asm_##function); \ } diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index f2ad77929d6e..5cfb27f26583 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -13,6 +13,15 @@ # define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */ #endif +#ifdef CONFIG_X86_64 + +#include <linux/bits.h> + +#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0) +#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1) + +#endif + # define KEXEC_CONTROL_PAGE_SIZE 4096 # define KEXEC_CONTROL_CODE_MAX_SIZE 2048 @@ -121,8 +130,7 @@ typedef unsigned long relocate_kernel_fn(unsigned long indirection_page, unsigned long pa_control_page, unsigned long start_address, - unsigned int preserve_context, - unsigned int host_mem_enc_active); + unsigned int flags); #endif extern relocate_kernel_fn relocate_kernel; #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 18a5c3119e1a..fdf178443f85 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -138,14 +138,14 @@ KVM_X86_OP(check_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) -KVM_X86_OP(recalc_msr_intercepts) +KVM_X86_OP(recalc_intercepts) KVM_X86_OP(complete_emulated_msr) KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) -KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level) +KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) #undef KVM_X86_OP diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..48598d017d6f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -120,7 +120,7 @@ #define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) -#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) +#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ @@ -142,7 +142,7 @@ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ - | X86_CR4_LAM_SUP)) + | X86_CR4_LAM_SUP | X86_CR4_CET)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -267,6 +267,7 @@ enum x86_intercept_stage; #define PFERR_RSVD_MASK BIT(3) #define PFERR_FETCH_MASK BIT(4) #define PFERR_PK_MASK BIT(5) +#define PFERR_SS_MASK BIT(6) #define PFERR_SGX_MASK BIT(15) #define PFERR_GUEST_RMP_MASK BIT_ULL(31) #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) @@ -545,10 +546,10 @@ struct kvm_pmc { #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ KVM_MAX_NR_AMD_GP_COUNTERS) -#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3 -#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0 -#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \ - KVM_MAX_NR_AMD_FIXED_COUTNERS) +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3 +#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0 +#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \ + KVM_MAX_NR_AMD_FIXED_COUNTERS) struct kvm_pmu { u8 version; @@ -579,6 +580,9 @@ struct kvm_pmu { DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX); + u64 ds_area; u64 pebs_enable; u64 pebs_enable_rsvd; @@ -771,6 +775,7 @@ enum kvm_only_cpuid_leafs { CPUID_7_2_EDX, CPUID_24_0_EBX, CPUID_8000_0021_ECX, + CPUID_7_1_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -811,7 +816,6 @@ struct kvm_vcpu_arch { bool at_instruction_boundary; bool tpr_access_reporting; bool xfd_no_write_intercept; - u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; u64 perf_capabilities; @@ -872,6 +876,8 @@ struct kvm_vcpu_arch { u64 xcr0; u64 guest_supported_xcr0; + u64 ia32_xss; + u64 guest_supported_xss; struct kvm_pio_request pio; void *pio_data; @@ -926,6 +932,7 @@ struct kvm_vcpu_arch { bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); unsigned long cui_linear_rip; + int cui_rdmsr_imm_reg; gpa_t time; s8 pvclock_tsc_shift; @@ -1348,6 +1355,30 @@ enum kvm_apicv_inhibit { __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) +struct kvm_possible_nx_huge_pages { + /* + * A list of kvm_mmu_page structs that, if zapped, could possibly be + * replaced by an NX huge page. A shadow page is on this list if its + * existence disallows an NX huge page (nx_huge_page_disallowed is set) + * and there are no other conditions that prevent a huge page, e.g. + * the backing host page is huge, dirtly logging is not enabled for its + * memslot, etc... Note, zapping shadow pages on this list doesn't + * guarantee an NX huge page will be created in its stead, e.g. if the + * guest attempts to execute from the region then KVM obviously can't + * create an NX huge page (without hanging the guest). + */ + struct list_head pages; + u64 nr_pages; +}; + +enum kvm_mmu_type { + KVM_SHADOW_MMU, +#ifdef CONFIG_X86_64 + KVM_TDP_MMU, +#endif + KVM_NR_MMU_TYPES, +}; + struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; @@ -1357,21 +1388,11 @@ struct kvm_arch { u8 vm_type; bool has_private_mem; bool has_protected_state; + bool has_protected_eoi; bool pre_fault_allowed; struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; - /* - * A list of kvm_mmu_page structs that, if zapped, could possibly be - * replaced by an NX huge page. A shadow page is on this list if its - * existence disallows an NX huge page (nx_huge_page_disallowed is set) - * and there are no other conditions that prevent a huge page, e.g. - * the backing host page is huge, dirtly logging is not enabled for its - * memslot, etc... Note, zapping shadow pages on this list doesn't - * guarantee an NX huge page will be created in its stead, e.g. if the - * guest attempts to execute from the region then KVM obviously can't - * create an NX huge page (without hanging the guest). - */ - struct list_head possible_nx_huge_pages; + struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES]; #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; #endif @@ -1526,7 +1547,7 @@ struct kvm_arch { * is held in read mode: * - tdp_mmu_roots (above) * - the link field of kvm_mmu_page structs used by the TDP MMU - * - possible_nx_huge_pages; + * - possible_nx_huge_pages[KVM_TDP_MMU]; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * Because the lock is only taken within the MMU lock, strictly @@ -1908,7 +1929,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu); + void (*recalc_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); @@ -1922,7 +1943,7 @@ struct kvm_x86_ops { void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); - int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); + int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); }; struct kvm_x86_nested_ops { @@ -2149,13 +2170,16 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); -int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); -int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); -int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); -int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); -int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); int kvm_emulate_invd(struct kvm_vcpu *vcpu); int kvm_emulate_mwait(struct kvm_vcpu *vcpu); @@ -2187,6 +2211,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -2276,10 +2301,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) -#else -#define kvm_arch_has_private_mem(kvm) false #endif #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) @@ -2356,6 +2379,7 @@ int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); void kvm_user_return_msr_update_cache(unsigned int index, u64 val); +u64 kvm_get_user_return_msr(unsigned int slot); static inline bool kvm_is_supported_user_return_msr(u32 msr) { @@ -2392,9 +2416,6 @@ void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu); - static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 57bc74e112f2..4a47c16e2df8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -124,7 +124,6 @@ bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); -void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); @@ -148,7 +147,6 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_async_pf_task_wait_schedule(T) do {} while(0) -#define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) { diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h index 08f1b57d3b62..23268a188e70 100644 --- a/arch/x86/include/asm/kvm_types.h +++ b/arch/x86/include/asm/kvm_types.h @@ -2,6 +2,16 @@ #ifndef _ASM_X86_KVM_TYPES_H #define _ASM_X86_KVM_TYPES_H +#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-amd,kvm-intel +#elif IS_MODULE(CONFIG_KVM_AMD) +#define KVM_SUB_MODULES kvm-amd +#elif IS_MODULE(CONFIG_KVM_INTEL) +#define KVM_SUB_MODULES kvm-intel +#else +#undef KVM_SUB_MODULES +#endif + #define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 #endif /* _ASM_X86_KVM_TYPES_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index abc4659f5809..605abd02158d 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,6 +6,7 @@ #include <linux/nmi.h> #include <linux/msi.h> #include <linux/io.h> +#include <linux/static_call.h> #include <asm/nospec-branch.h> #include <asm/paravirt.h> #include <asm/msr.h> @@ -39,16 +40,21 @@ static inline unsigned char hv_get_nmi_reason(void) return 0; } -#if IS_ENABLED(CONFIG_HYPERV) -extern bool hyperv_paravisor_present; +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2); +#if IS_ENABLED(CONFIG_HYPERV) extern void *hv_hypercall_pg; extern union hv_ghcb * __percpu *hv_ghcb_pg; bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall); +#endif /* * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA @@ -65,37 +71,15 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; - u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input_address, output_address); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %[output_address], %%r8\n" - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input_address) - : [output_address] "r" (output_address) - : "cc", "memory", "r8", "r9", "r10", "r11"); - return hv_status; - } - - if (!hv_hypercall_pg) - return U64_MAX; - - __asm__ __volatile__("mov %[output_address], %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input_address) - : [output_address] "r" (output_address), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "memory", "r8", "r9", "r10", "r11"); + return static_call_mod(hv_hypercall)(control, input_address, output_address); #else u32 input_address_hi = upper_32_bits(input_address); u32 input_address_lo = lower_32_bits(input_address); u32 output_address_hi = upper_32_bits(output_address); u32 output_address_lo = lower_32_bits(output_address); + u64 hv_status; if (!hv_hypercall_pg) return U64_MAX; @@ -108,48 +92,30 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) "D"(output_address_hi), "S"(output_address_lo), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); -#endif /* !x86_64 */ return hv_status; +#endif /* !x86_64 */ } /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { - u64 hv_status; - #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input1, 0); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__( - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - :: "cc", "r8", "r9", "r10", "r11"); - } else { - __asm__ __volatile__(CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, 0); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), - ASM_CALL_CONSTRAINT - : "A" (control), - "b" (input1_hi), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "edi", "esi"); - } -#endif + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); return hv_status; +#endif } static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) @@ -162,45 +128,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { - u64 hv_status; - #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input1, input2); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %[input2], %%r8\n" - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : [input2] "r" (input2) - : "cc", "r8", "r9", "r10", "r11"); - } else { - __asm__ __volatile__("mov %[input2], %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : [input2] "r" (input2), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, input2); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - u32 input2_hi = upper_32_bits(input2); - u32 input2_lo = lower_32_bits(input2); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), ASM_CALL_CONSTRAINT - : "A" (control), "b" (input1_hi), - "D"(input2_hi), "S"(input2_lo), - THUNK_TARGET(hv_hypercall_pg) - : "cc"); - } -#endif + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), ASM_CALL_CONSTRAINT + : "A" (control), "b" (input1_hi), + "D"(input2_hi), "S"(input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc"); return hv_status; +#endif } static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 718a55d82fe4..9e1720d73244 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -315,9 +315,12 @@ #define PERF_CAP_PT_IDX 16 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define PERF_CAP_LBR_FMT 0x3f #define PERF_CAP_PEBS_TRAP BIT_ULL(6) #define PERF_CAP_ARCH_REG BIT_ULL(7) #define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_FW_WRITES BIT_ULL(13) #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17) #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ @@ -747,6 +750,7 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 /* AMD Hardware Feedback Support MSRs */ #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index c69e269937c5..76b95bd1a405 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.0+ */ /* Generic MTRR (Memory Type Range Register) ioctls. Copyright (C) 1997-1999 Richard Gooch - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 6ca6516c7492..e4815e15dc9a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -36,9 +36,7 @@ static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx) static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) { - /* "monitorx %eax, %ecx, %edx" */ - asm volatile(".byte 0x0f, 0x01, 0xfa" - :: "a" (eax), "c" (ecx), "d"(edx)); + asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx)); } static __always_inline void __mwait(u32 eax, u32 ecx) @@ -80,9 +78,7 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { /* No need for TSA buffer clearing on AMD */ - /* "mwaitx %eax, %ebx, %ecx" */ - asm volatile(".byte 0x0f, 0x01, 0xfb" - :: "a" (eax), "b" (ebx), "c" (ecx)); + asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx)); } /* diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b0d03b6c279b..332428caaed2 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -309,8 +309,7 @@ do { \ \ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ __percpu_arg([var]) \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [oval] "+a" (pco_old__), \ [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pco_new__) \ @@ -367,8 +366,7 @@ do { \ asm_inline qual ( \ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ - CC_SET(z) \ - : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + : ALT_OUTPUT_SP("=@ccz" (success), \ [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), "+d" (old__.high)) \ : "b" (new__.low), "c" (new__.high), \ @@ -436,8 +434,7 @@ do { \ asm_inline qual ( \ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ - CC_SET(z) \ - : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + : ALT_OUTPUT_SP("=@ccz" (success), \ [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), "+d" (old__.high)) \ : "b" (new__.low), "c" (new__.high), \ @@ -585,8 +582,7 @@ do { \ bool oldbit; \ \ asm volatile("btl %[nr], " __percpu_arg([var]) \ - CC_SET(c) \ - : CC_OUT(c) (oldbit) \ + : "=@ccc" (oldbit) \ : [var] "m" (__my_cpu_var(_var)), \ [nr] "rI" (_nr)); \ oldbit; \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bde58f6510ac..a24c7805acdb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -731,6 +731,8 @@ void __noreturn stop_this_cpu(void *dummy); void microcode_check(struct cpuinfo_x86 *prev_info); void store_cpu_caps(struct cpuinfo_x86 *info); +DECLARE_PER_CPU(bool, cache_state_incoherent); + enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_AUTO, diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 3821ee3fae35..54c8fc430684 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -6,37 +6,15 @@ #define __CLOBBERS_MEM(clb...) "memory", ## clb -#ifndef __GCC_ASM_FLAG_OUTPUTS__ - -/* Use asm goto */ - -#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ -({ \ - bool c = false; \ - asm goto (fullop "; j" #cc " %l[cc_label]" \ - : : [var] "m" (_var), ## __VA_ARGS__ \ - : clobbers : cc_label); \ - if (0) { \ -cc_label: c = true; \ - } \ - c; \ -}) - -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ - -/* Use flags output or a set instruction */ - #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c; \ - asm_inline volatile (fullop CC_SET(cc) \ - : [var] "+m" (_var), CC_OUT(cc) (c) \ + asm_inline volatile (fullop \ + : [var] "+m" (_var), "=@cc" #cc (c) \ : __VA_ARGS__ : clobbers); \ c; \ }) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ - #define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index f9046c4b9a2b..0e6c0940100f 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -491,8 +491,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) /* "pvalidate" mnemonic support in binutils 2.36 and newer */ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t" - CC_SET(c) - : CC_OUT(c) (no_rmpupdate), "=a"(rc) + : "=@ccc"(no_rmpupdate), "=a"(rc) : "a"(vaddr), "c"(rmp_psize), "d"(validate) : "memory", "cc"); diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index c72d46175374..5c03aaa89014 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -83,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { bool ret; - asm("btl %2,%1" CC_SET(c) - : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); + asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1)); return ret; } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index fde2bd7af19e..46aa2c9c1bda 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -75,9 +75,7 @@ static inline u32 rdpkru(void) * "rdpkru" instruction. Places PKRU contents in to EAX, * clears EDX and requires that ecx=0. */ - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (pkru), "=d" (edx) - : "c" (ecx)); + asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx)); return pkru; } @@ -89,8 +87,7 @@ static inline void wrpkru(u32 pkru) * "wrpkru" instruction. Loads contents in EAX to PKRU, * requires that ecx = edx = 0. */ - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (pkru), "c"(ecx), "d"(edx)); + asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx)); } #else @@ -287,8 +284,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) * See movdir64b()'s comment on operand specification. */ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" - CC_SET(z) - : CC_OUT(z) (zf), "+m" (*__dst) + : "=@ccz" (zf), "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); /* Submission failure is indicated via EFLAGS.ZF=1 */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index ffc27f676243..17f6c3fedeee 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -299,6 +299,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_ #define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3) #define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4) #define SVM_SEV_FEAT_DEBUG_SWAP BIT(5) +#define SVM_SEV_FEAT_SECURE_TSC BIT(9) #define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 7ddef3a69866..6b338d7f01b7 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -102,10 +102,31 @@ u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); void tdx_init(void); +#include <linux/preempt.h> #include <asm/archrandom.h> +#include <asm/processor.h> typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); +static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + lockdep_assert_preemption_disabled(); + + /* + * SEAMCALLs are made to the TDX module and can generate dirty + * cachelines of TDX private memory. Mark cache state incoherent + * so that the cache can be flushed during kexec. + * + * This needs to be done before actually making the SEAMCALL, + * because kexec-ing CPU could send NMI to stop remote CPUs, + * in which case even disabling IRQ won't help here. + */ + this_cpu_write(cache_state_incoherent, true); + + return func(fn, args); +} + static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { @@ -113,7 +134,9 @@ static __always_inline u64 sc_retry(sc_func_t func, u64 fn, u64 ret; do { - ret = func(fn, args); + preempt_disable(); + ret = __seamcall_dirty_cache(func, fn, args); + preempt_enable(); } while (ret == TDX_RND_NO_ENTROPY && --retry); return ret; @@ -131,6 +154,8 @@ int tdx_guest_keyid_alloc(void); u32 tdx_get_nr_guest_keyids(void); void tdx_guest_keyid_free(unsigned int keyid); +void tdx_quirk_reset_page(struct page *page); + struct tdx_td { /* TD root structure: */ struct page *tdr_page; @@ -146,6 +171,8 @@ struct tdx_td { struct tdx_vp { /* TDVP root page */ struct page *tdvpr_page; + /* precalculated page_to_phys(tdvpr_page) for use in noinstr code */ + phys_addr_t tdvpr_pa; /* TD vCPU control structure: */ struct page **tdcx_pages; @@ -203,5 +230,11 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; } #endif /* CONFIG_INTEL_TDX_HOST */ +#ifdef CONFIG_KEXEC_CORE +void tdx_cpu_flush_cache_for_kexec(void); +#else +static inline void tdx_cpu_flush_cache_for_kexec(void) { } +#endif + #endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 5337f1be18f6..f2d142a0a862 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -178,9 +178,9 @@ void int3_emulate_ret(struct pt_regs *regs) } static __always_inline -void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +bool __emulate_cc(unsigned long flags, u8 cc) { - static const unsigned long jcc_mask[6] = { + static const unsigned long cc_mask[6] = { [0] = X86_EFLAGS_OF, [1] = X86_EFLAGS_CF, [2] = X86_EFLAGS_ZF, @@ -193,15 +193,21 @@ void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned lo bool match; if (cc < 0xc) { - match = regs->flags & jcc_mask[cc >> 1]; + match = flags & cc_mask[cc >> 1]; } else { - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (cc >= 0xe) - match = match || (regs->flags & X86_EFLAGS_ZF); + match = match || (flags & X86_EFLAGS_ZF); } - if ((match && !invert) || (!match && invert)) + return (match && !invert) || (!match && invert); +} + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + if (__emulate_cc(regs->flags, cc)) ip += disp; int3_emulate_jmp(regs, ip); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 3a7755c1a441..91a3fb8ae7ff 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -378,7 +378,7 @@ do { \ asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ : [new] ltype (__new) \ @@ -397,7 +397,7 @@ do { \ asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ "+A" (__old), \ [ptr] "+m" (*_ptr) \ : "b" ((u32)__new), \ @@ -417,11 +417,10 @@ do { \ __typeof__(*(_ptr)) __new = (_new); \ asm volatile("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ - CC_SET(z) \ "2:\n" \ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ %[errout]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [errout] "+r" (__err), \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cca7d6641287..c85c50019523 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -106,6 +106,7 @@ #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 +#define VM_EXIT_LOAD_CET_STATE 0x10000000 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff @@ -119,6 +120,7 @@ #define VM_ENTRY_LOAD_BNDCFGS 0x00010000 #define VM_ENTRY_PT_CONCEAL_PIP 0x00020000 #define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 +#define VM_ENTRY_LOAD_CET_STATE 0x00100000 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff @@ -132,6 +134,7 @@ #define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) #define VMX_BASIC_INOUT BIT_ULL(54) #define VMX_BASIC_TRUE_CTLS BIT_ULL(55) +#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56) static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { @@ -369,6 +372,9 @@ enum vmcs_field { GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, GUEST_SYSENTER_ESP = 0x00006824, GUEST_SYSENTER_EIP = 0x00006826, + GUEST_S_CET = 0x00006828, + GUEST_SSP = 0x0000682a, + GUEST_INTR_SSP_TABLE = 0x0000682c, HOST_CR0 = 0x00006c00, HOST_CR3 = 0x00006c02, HOST_CR4 = 0x00006c04, @@ -381,6 +387,9 @@ enum vmcs_field { HOST_IA32_SYSENTER_EIP = 0x00006c12, HOST_RSP = 0x00006c14, HOST_RIP = 0x00006c16, + HOST_S_CET = 0x00006c18, + HOST_SSP = 0x00006c1a, + HOST_INTR_SSP_TABLE = 0x00006c1c }; /* diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0f15d683817d..d420c9c066d4 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -35,6 +35,11 @@ #define MC_VECTOR 18 #define XM_VECTOR 19 #define VE_VECTOR 20 +#define CP_VECTOR 21 + +#define HV_VECTOR 28 +#define VC_VECTOR 29 +#define SX_VECTOR 30 /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT @@ -411,6 +416,35 @@ struct kvm_xcrs { __u64 padding[16]; }; +#define KVM_X86_REG_TYPE_MSR 2 +#define KVM_X86_REG_TYPE_KVM 3 + +#define KVM_X86_KVM_REG_SIZE(reg) \ +({ \ + reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \ +}) + +#define KVM_X86_REG_TYPE_SIZE(type, reg) \ +({ \ + __u64 type_size = (__u64)type << 32; \ + \ + type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \ + type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \ + 0; \ + type_size; \ +}) + +#define KVM_X86_REG_ID(type, index) \ + (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index) + +#define KVM_X86_REG_MSR(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index) +#define KVM_X86_REG_KVM(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index) + +/* KVM-defined registers starting from 0 */ +#define KVM_REG_GUEST_SSP 0 + #define KVM_SYNC_X86_REGS (1UL << 0) #define KVM_SYNC_X86_SREGS (1UL << 1) #define KVM_SYNC_X86_EVENTS (1UL << 2) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f0f4a4cf84a7..9792e329343e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -94,6 +94,8 @@ #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 #define EXIT_REASON_TDCALL 77 +#define EXIT_REASON_MSR_READ_IMM 84 +#define EXIT_REASON_MSR_WRITE_IMM 85 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -158,7 +160,9 @@ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ { EXIT_REASON_NOTIFY, "NOTIFY" }, \ - { EXIT_REASON_TDCALL, "TDCALL" } + { EXIT_REASON_TDCALL, "TDCALL" }, \ + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8698d66563ed..0281703da5e2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -89,7 +89,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, */ flags->bm_control = 0; } - if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) { + if (cpu_feature_enabled(X86_FEATURE_ZEN)) { /* * For all AMD Zen or newer CPUs that support C3, caches * should not be flushed by software while entering C3 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 79ae9cb50019..8ee5ff547357 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -147,10 +147,10 @@ static void *its_init_thunk(void *thunk, int reg) /* * When ITS uses indirect branch thunk the fineibt_paranoid * caller sequence doesn't fit in the caller site. So put the - * remaining part of the sequence (<ea> + JNE) into the ITS + * remaining part of the sequence (UDB + JNE) into the ITS * thunk. */ - bytes[i++] = 0xea; /* invalid instruction */ + bytes[i++] = 0xd6; /* UDB */ bytes[i++] = 0x75; /* JNE */ bytes[i++] = 0xfd; @@ -163,7 +163,7 @@ static void *its_init_thunk(void *thunk, int reg) reg -= 8; } bytes[i++] = 0xff; - bytes[i++] = 0xe0 + reg; /* jmp *reg */ + bytes[i++] = 0xe0 + reg; /* JMP *reg */ bytes[i++] = 0xcc; return thunk + offset; @@ -713,20 +713,33 @@ static inline bool is_jcc32(struct insn *insn) #if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL) /* - * CALL/JMP *%\reg + * [CS]{,3} CALL/JMP *%\reg [INT3]* */ -static int emit_indirect(int op, int reg, u8 *bytes) +static int emit_indirect(int op, int reg, u8 *bytes, int len) { + int cs = 0, bp = 0; int i = 0; u8 modrm; + /* + * Set @len to the excess bytes after writing the instruction. + */ + len -= 2 + (reg >= 8); + WARN_ON_ONCE(len < 0); + switch (op) { case CALL_INSN_OPCODE: modrm = 0x10; /* Reg = 2; CALL r/m */ + /* + * Additional NOP is better than prefix decode penalty. + */ + if (len <= 3) + cs = len; break; case JMP32_INSN_OPCODE: modrm = 0x20; /* Reg = 4; JMP r/m */ + bp = len; break; default: @@ -734,6 +747,9 @@ static int emit_indirect(int op, int reg, u8 *bytes) return -1; } + while (cs--) + bytes[i++] = 0x2e; /* CS-prefix */ + if (reg >= 8) { bytes[i++] = 0x41; /* REX.B prefix */ reg -= 8; @@ -745,6 +761,9 @@ static int emit_indirect(int op, int reg, u8 *bytes) bytes[i++] = 0xff; /* opcode */ bytes[i++] = modrm; + while (bp--) + bytes[i++] = 0xcc; /* INT3 */ + return i; } @@ -918,20 +937,11 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) return emit_its_trampoline(addr, insn, reg, bytes); #endif - ret = emit_indirect(op, reg, bytes + i); + ret = emit_indirect(op, reg, bytes + i, insn->length - i); if (ret < 0) return ret; i += ret; - /* - * The compiler is supposed to EMIT an INT3 after every unconditional - * JMP instruction due to AMD BTC. However, if the compiler is too old - * or MITIGATION_SLS isn't enabled, we still need an INT3 after - * indirect JMPs even on Intel. - */ - if (op == JMP32_INSN_OPCODE && i < insn->length) - bytes[i++] = INT3_INSN_OPCODE; - for (; i < insn->length;) bytes[i++] = BYTES_NOP1; @@ -970,7 +980,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) case JMP32_INSN_OPCODE: /* Check for cfi_paranoid + ITS */ dest = addr + insn.length + insn.immediate.value; - if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { + if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) { WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); continue; } @@ -1177,6 +1187,7 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT; +static bool cfi_debug __ro_after_init; #ifdef CONFIG_FINEIBT_BHI bool cfi_bhi __ro_after_init = false; @@ -1259,6 +1270,8 @@ static __init int cfi_parse_cmdline(char *str) } else if (!strcmp(str, "off")) { cfi_mode = CFI_OFF; cfi_rand = false; + } else if (!strcmp(str, "debug")) { + cfi_debug = true; } else if (!strcmp(str, "kcfi")) { cfi_mode = CFI_KCFI; } else if (!strcmp(str, "fineibt")) { @@ -1266,26 +1279,26 @@ static __init int cfi_parse_cmdline(char *str) } else if (!strcmp(str, "norand")) { cfi_rand = false; } else if (!strcmp(str, "warn")) { - pr_alert("CFI mismatch non-fatal!\n"); + pr_alert("CFI: mismatch non-fatal!\n"); cfi_warn = true; } else if (!strcmp(str, "paranoid")) { if (cfi_mode == CFI_FINEIBT) { cfi_paranoid = true; } else { - pr_err("Ignoring paranoid; depends on fineibt.\n"); + pr_err("CFI: ignoring paranoid; depends on fineibt.\n"); } } else if (!strcmp(str, "bhi")) { #ifdef CONFIG_FINEIBT_BHI if (cfi_mode == CFI_FINEIBT) { cfi_bhi = true; } else { - pr_err("Ignoring bhi; depends on fineibt.\n"); + pr_err("CFI: ignoring bhi; depends on fineibt.\n"); } #else - pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n"); + pr_err("CFI: ignoring bhi; depends on FINEIBT_BHI=y.\n"); #endif } else { - pr_err("Ignoring unknown cfi option (%s).", str); + pr_err("CFI: Ignoring unknown option (%s).", str); } str = next; @@ -1300,9 +1313,9 @@ early_param("cfi", cfi_parse_cmdline); * * __cfi_\func: __cfi_\func: * movl $0x12345678,%eax // 5 endbr64 // 4 - * nop subl $0x12345678,%r10d // 7 - * nop jne __cfi_\func+6 // 2 - * nop nop3 // 3 + * nop subl $0x12345678,%eax // 5 + * nop jne.d32,pn \func+3 // 7 + * nop * nop * nop * nop @@ -1311,34 +1324,44 @@ early_param("cfi", cfi_parse_cmdline); * nop * nop * nop + * \func: \func: + * endbr64 nopl -42(%rax) * * * caller: caller: - * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5 * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4 - * je 1f // 2 nop4 // 4 + * je 1f // 2 nop5 // 5 * ud2 // 2 * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6 * + * + * Notably, the FineIBT sequences are crafted such that branches are presumed + * non-taken. This is based on Agner Fog's optimization manual, which states: + * + * "Make conditional jumps most often not taken: The efficiency and throughput + * for not-taken branches is better than for taken branches on most + * processors. Therefore, it is good to place the most frequent branch first" */ /* * <fineibt_preamble_start>: * 0: f3 0f 1e fa endbr64 - * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d - * b: 75 f9 jne 6 <fineibt_preamble_start+0x6> - * d: 0f 1f 00 nopl (%rax) + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13> + * 10: 0f 1f 40 d6 nopl -0x2a(%rax) * - * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as - * (bad) on x86_64 and raises #UD. + * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as + * UDB on x86_64 and raises #UD. */ asm( ".pushsection .rodata \n" "fineibt_preamble_start: \n" " endbr64 \n" - " subl $0x12345678, %r10d \n" + " subl $0x12345678, %eax \n" "fineibt_preamble_bhi: \n" - " jne fineibt_preamble_start+6 \n" - ASM_NOP3 + " cs jne.d32 fineibt_preamble_start+0x13 \n" + "#fineibt_func: \n" + " nopl -42(%rax) \n" "fineibt_preamble_end: \n" ".popsection\n" ); @@ -1349,20 +1372,20 @@ extern u8 fineibt_preamble_end[]; #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) #define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start) -#define fineibt_preamble_ud 6 -#define fineibt_preamble_hash 7 +#define fineibt_preamble_ud 0x13 +#define fineibt_preamble_hash 5 /* * <fineibt_caller_start>: - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11 - * a: 0f 1f 40 00 nopl 0x0(%rax) + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) */ asm( ".pushsection .rodata \n" "fineibt_caller_start: \n" - " movl $0x12345678, %r10d \n" + " movl $0x12345678, %eax \n" " lea -0x10(%r11), %r11 \n" - ASM_NOP4 + ASM_NOP5 "fineibt_caller_end: \n" ".popsection \n" ); @@ -1371,7 +1394,7 @@ extern u8 fineibt_caller_start[]; extern u8 fineibt_caller_end[]; #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) -#define fineibt_caller_hash 2 +#define fineibt_caller_hash 1 #define fineibt_caller_jmp (fineibt_caller_size - 2) @@ -1388,9 +1411,9 @@ extern u8 fineibt_caller_end[]; * of adding a load. * * <fineibt_paranoid_start>: - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d - * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax + * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11 * e: 75 fd jne d <fineibt_paranoid_start+0xd> * 10: 41 ff d3 call *%r11 * 13: 90 nop @@ -1402,13 +1425,13 @@ extern u8 fineibt_caller_end[]; */ asm( ".pushsection .rodata \n" "fineibt_paranoid_start: \n" - " movl $0x12345678, %r10d \n" - " cmpl -9(%r11), %r10d \n" - " lea -0x10(%r11), %r11 \n" + " mov $0x12345678, %eax \n" + " cmpl -11(%r11), %eax \n" + " cs lea -0x10(%r11), %r11 \n" + "#fineibt_caller_size: \n" " jne fineibt_paranoid_start+0xd \n" "fineibt_paranoid_ind: \n" - " call *%r11 \n" - " nop \n" + " cs call *%r11 \n" "fineibt_paranoid_end: \n" ".popsection \n" ); @@ -1520,51 +1543,67 @@ static int cfi_rand_preamble(s32 *start, s32 *end) return 0; } +/* + * Inline the bhi-arity 1 case: + * + * __cfi_foo: + * 0: f3 0f 1e fa endbr64 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 49 0f 45 fa cmovne %rax, %rdi + * d: 2e 75 03 jne,pn foo+0x3 + * + * foo: + * 10: 0f 1f 40 <d6> nopl -42(%rax) + * + * Notably, this scheme is incompatible with permissive CFI + * because the CMOVcc is unconditional and RDI will have been + * clobbered. + */ +asm( ".pushsection .rodata \n" + "fineibt_bhi1_start: \n" + " cmovne %rax, %rdi \n" + " cs jne fineibt_bhi1_func + 0x3 \n" + "fineibt_bhi1_func: \n" + " nopl -42(%rax) \n" + "fineibt_bhi1_end: \n" + ".popsection \n" +); + +extern u8 fineibt_bhi1_start[]; +extern u8 fineibt_bhi1_end[]; + +#define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start) + static void cfi_fineibt_bhi_preamble(void *addr, int arity) { + u8 bytes[MAX_INSN_SIZE]; + if (!arity) return; if (!cfi_warn && arity == 1) { - /* - * Crazy scheme to allow arity-1 inline: - * - * __cfi_foo: - * 0: f3 0f 1e fa endbr64 - * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d - * b: 49 0f 45 fa cmovne %r10, %rdi - * f: 75 f5 jne __cfi_foo+6 - * 11: 0f 1f 00 nopl (%rax) - * - * Code that direct calls to foo()+0, decodes the tail end as: - * - * foo: - * 0: f5 cmc - * 1: 0f 1f 00 nopl (%rax) - * - * which clobbers CF, but does not affect anything ABI - * wise. - * - * Notably, this scheme is incompatible with permissive CFI - * because the CMOVcc is unconditional and RDI will have been - * clobbered. - */ - const u8 magic[9] = { - 0x49, 0x0f, 0x45, 0xfa, - 0x75, 0xf5, - BYTES_NOP3, - }; - - text_poke_early(addr + fineibt_preamble_bhi, magic, 9); - + text_poke_early(addr + fineibt_preamble_bhi, + fineibt_bhi1_start, fineibt_bhi1_size); return; } - text_poke_early(addr + fineibt_preamble_bhi, - text_gen_insn(CALL_INSN_OPCODE, - addr + fineibt_preamble_bhi, - __bhi_args[arity]), - CALL_INSN_SIZE); + /* + * Replace the bytes at fineibt_preamble_bhi with a CALL instruction + * that lines up exactly with the end of the preamble, such that the + * return address will be foo+0. + * + * __cfi_foo: + * 0: f3 0f 1e fa endbr64 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity] + */ + bytes[0] = 0x2e; + bytes[1] = 0x2e; + __text_gen_insn(bytes + 2, CALL_INSN_OPCODE, + addr + fineibt_preamble_bhi + 2, + __bhi_args[arity], CALL_INSN_SIZE); + + text_poke_early(addr + fineibt_preamble_bhi, bytes, 7); } static int cfi_rewrite_preamble(s32 *start, s32 *end) @@ -1655,8 +1694,6 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; - BUG_ON(fineibt_paranoid_size != 20); - for (s = start; s < end; s++) { void *addr = (void *)s + *s; struct insn insn; @@ -1696,8 +1733,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) emit_paranoid_trampoline(addr + fineibt_caller_size, &insn, 11, bytes + fineibt_caller_size); } else { - ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); - if (WARN_ON_ONCE(ret != 3)) + int len = fineibt_paranoid_size - fineibt_paranoid_ind; + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len); + if (WARN_ON_ONCE(ret != len)) continue; } @@ -1707,13 +1745,20 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) return 0; } +#define pr_cfi_debug(X...) if (cfi_debug) pr_info(X) + +#define FINEIBT_WARN(_f, _v) \ + WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v) + static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, s32 *start_cfi, s32 *end_cfi, bool builtin) { int ret; - if (WARN_ONCE(fineibt_preamble_size != 16, - "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) + if (FINEIBT_WARN(fineibt_preamble_size, 20) || + FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || + FINEIBT_WARN(fineibt_caller_size, 14) || + FINEIBT_WARN(fineibt_paranoid_size, 20)) return; if (cfi_mode == CFI_AUTO) { @@ -1734,6 +1779,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ + pr_cfi_debug("CFI: disabling all indirect call checking\n"); ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1744,43 +1790,53 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_hash = cfi_rehash(cfi_bpf_hash); cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } + pr_cfi_debug("CFI: cfi_seed: 0x%08x\n", cfi_seed); + pr_cfi_debug("CFI: rehashing all preambles\n"); ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; + pr_cfi_debug("CFI: rehashing all indirect calls\n"); ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err; + } else { + pr_cfi_debug("CFI: rehashing disabled\n"); } switch (cfi_mode) { case CFI_OFF: if (builtin) - pr_info("Disabling CFI\n"); + pr_info("CFI: disabled\n"); return; case CFI_KCFI: + pr_cfi_debug("CFI: re-enabling all indirect call checking\n"); ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err; if (builtin) - pr_info("Using kCFI\n"); + pr_info("CFI: Using %sretpoline kCFI\n", + cfi_rand ? "rehashed " : ""); return; case CFI_FINEIBT: + pr_cfi_debug("CFI: adding FineIBT to all preambles\n"); /* place the FineIBT preamble at func()-16 */ ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; /* rewrite the callers to target func()-16 */ + pr_cfi_debug("CFI: rewriting indirect call sites to use FineIBT\n"); ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ + pr_cfi_debug("CFI: removing old endbr insns\n"); cfi_rewrite_endbr(start_cfi, end_cfi); if (builtin) { @@ -1823,11 +1879,11 @@ static void poison_cfi(void *addr) /* * __cfi_\func: - * osp nopl (%rax) - * subl $0, %r10d - * jz 1f - * ud2 - * 1: nop + * nopl -42(%rax) + * sub $0, %eax + * jne \func+3 + * \func: + * nopl -42(%rax) */ poison_endbr(addr); poison_hash(addr + fineibt_preamble_hash); @@ -1853,12 +1909,14 @@ static void poison_cfi(void *addr) } } +#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) + /* - * When regs->ip points to a 0xEA byte in the FineIBT preamble, + * When regs->ip points to a 0xD6 byte in the FineIBT preamble, * return true and fill out target and type. * * We check the preamble by checking for the ENDBR instruction relative to the - * 0xEA instruction. + * UDB instruction. */ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type) { @@ -1868,10 +1926,10 @@ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, if (!exact_endbr((void *)addr)) return false; - *target = addr + fineibt_preamble_size; + *target = addr + fineibt_prefix_size; __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); - *type = (u32)regs->r10 + hash; + *type = (u32)regs->ax + hash; /* * Since regs->ip points to the middle of an instruction; it cannot @@ -1909,12 +1967,12 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault); *target = addr; - addr -= fineibt_preamble_size; + addr -= fineibt_prefix_size; if (!exact_endbr((void *)addr)) return false; __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); - *type = (u32)regs->r10 + hash; + *type = (u32)regs->ax + hash; /* * The UD2 sites are constructed with a RET immediately following, @@ -1931,7 +1989,7 @@ static bool is_paranoid_thunk(unsigned long addr) u32 thunk; __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); - return (thunk & 0x00FFFFFF) == 0xfd75ea; + return (thunk & 0x00FFFFFF) == 0xfd75d6; Efault: return false; @@ -1939,8 +1997,7 @@ Efault: /* * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] - * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS - * thunk. + * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk. */ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) { @@ -1950,8 +2007,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, return false; if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + *target = regs->r11 + fineibt_prefix_size; + *type = regs->ax; /* * Since the trapping instruction is the exact, but LOCK prefixed, @@ -1963,14 +2020,14 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, /* * The cfi_paranoid + ITS thunk combination results in: * - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d - * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 41 3b 43 f7 cmp -11(%r11), %eax + * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11 * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 * * Where the paranoid_thunk looks like: * - * 1d: <ea> (bad) + * 1d: <d6> udb * __x86_indirect_paranoid_thunk_r11: * 1e: 75 fd jne 1d * __x86_indirect_its_thunk_r11: @@ -1979,8 +2036,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, * */ if (is_paranoid_thunk(regs->ip)) { - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + *target = regs->r11 + fineibt_prefix_size; + *type = regs->ax; regs->ip = *target; return true; @@ -2005,6 +2062,8 @@ bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type) static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, s32 *start_cfi, s32 *end_cfi, bool builtin) { + if (IS_ENABLED(CONFIG_CFI) && builtin) + pr_info("CFI: Using standard kCFI\n"); } #ifdef CONFIG_X86_KERNEL_IBT @@ -2321,6 +2380,7 @@ void __init alternative_instructions(void) __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); + cfi_debug = false; /* * Rewrite the retpolines, must be done before alternatives since diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6259b474073b..32ba599a51f8 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -102,6 +102,7 @@ static void __used common(void) BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax); /* TLB state for the entry code */ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c index 77086cf565ec..638eb5c933e0 100644 --- a/arch/x86/kernel/cfi.c +++ b/arch/x86/kernel/cfi.c @@ -27,7 +27,7 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target, * for indirect call checks: * *  movl -<id>, %r10d ; 6 bytes - * addl -4(%reg), %r10d ; 4 bytes + * addl -<pos>(%reg), %r10d; 4 bytes * je .Ltmp1 ; 2 bytes * ud2 ; <- regs->ip * .Ltmp1: diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a6f88ca1a6b4..ccaa51ce63f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -546,6 +546,23 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) u64 msr; /* + * Mark using WBINVD is needed during kexec on processors that + * support SME. This provides support for performing a successful + * kexec when going from SME inactive to SME active (or vice-versa). + * + * The cache must be cleared so that if there are entries with the + * same physical address, both with and without the encryption bit, + * they don't race each other when flushed and potentially end up + * with the wrong entry being committed to memory. + * + * Test the CPUID bit directly because with mem_encrypt=off the + * BSP will clear the X86_FEATURE_SME bit and the APs will not + * see it set after that. + */ + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + __this_cpu_write(cache_state_incoherent, true); + + /* * BIOS support is required for SME and SEV. * For SME: If BIOS has enabled SME then adjust x86_phys_bits by * the SME physical address space reduction value. @@ -1338,11 +1355,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c78f860419d6..c4febdbcfe4d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -38,10 +38,6 @@ bool hv_nested; struct ms_hyperv_info ms_hyperv; -/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ -bool hyperv_paravisor_present __ro_after_init; -EXPORT_SYMBOL_GPL(hyperv_paravisor_present); - #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_msr(unsigned int reg) { @@ -288,8 +284,18 @@ static void __init x86_setup_ops_for_tsc_pg_clock(void) old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; } + +#ifdef CONFIG_X86_64 +DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall); +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall); +#define hypercall_update(hc) static_call_update(hv_hypercall, hc) +#endif #endif /* CONFIG_HYPERV */ +#ifndef hypercall_update +#define hypercall_update(hc) (void)hc +#endif + static uint32_t __init ms_hyperv_platform(void) { u32 eax; @@ -484,14 +490,14 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); - hyperv_paravisor_present = !!ms_hyperv.paravisor_present; - pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { static_branch_enable(&isolation_type_snp); + if (!ms_hyperv.paravisor_present) + hypercall_update(hv_snp_hypercall); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); @@ -499,6 +505,7 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; if (!ms_hyperv.paravisor_present) { + hypercall_update(hv_tdx_hypercall); /* * Mark the Hyper-V TSC page feature as disabled * in a TDX VM without paravisor so that the @@ -565,6 +572,11 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif #endif + /* + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. Root + * partition doesn't need to write to synthetic MSR to enable invariant + * TSC feature. It sees what the hardware provides. + */ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* * Writing to synthetic MSR 0x40000118 updates/changes the @@ -636,8 +648,12 @@ static void __init ms_hyperv_init_platform(void) * TSC should be marked as unstable only after Hyper-V * clocksource has been initialized. This ensures that the * stability of the sched_clock is not altered. + * + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. No + * need to check for it. */ - if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + if (!hv_root_partition() && + !(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) mark_tsc_unstable("running on Hyper-V"); hardlockup_detector_disable(); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 18cf79d6e2c5..763534d77f59 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* * MTRR (Memory Type Range Register) cleanup * * Copyright (C) 2009 Yinghai Lu - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> #include <linux/pci.h> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index ecbda0341a8a..4b3d492afe17 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* Generic MTRR (Memory Type Range Register) driver. Copyright (C) 1997-2000 Richard Gooch Copyright (c) 2002 Patrick Mochel - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..2cd25a0d4637 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 4cee6213d667..caa4dc885c21 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_MSR_IMM, CPUID_ECX, 5, 0x00000007, 1 }, { X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 99004b02e2ed..42a088a337c5 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -68,7 +68,7 @@ static inline bool encls_failed(int ret) ({ \ int ret; \ asm volatile( \ - "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ + "1: encls\n" \ "2:\n" \ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret) \ @@ -111,8 +111,8 @@ static inline bool encls_failed(int ret) ({ \ int ret; \ asm volatile( \ - "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ - " xor %%eax,%%eax;\n" \ + "1: encls\n\t" \ + "xor %%eax,%%eax\n" \ "2:\n" \ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret), "=b"(rbx_out) \ diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f79c5edc0b89..6ab9eac64670 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -97,9 +97,11 @@ void __init native_init_IRQ(void) /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); - if (cpu_feature_enabled(X86_FEATURE_FRED)) + /* FRED's IRQ path may be used even if FRED isn't fully enabled. */ + if (IS_ENABLED(CONFIG_X86_FRED)) fred_complete_exception_setup(); - else + + if (!cpu_feature_enabled(X86_FEATURE_FRED)) idt_setup_apic_and_irq_gates(); lapic_assign_system_vectors(); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8ae750cde0c6..b67d7c59dca0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -190,7 +190,7 @@ static void apf_task_wake_all(void) } } -void kvm_async_pf_task_wake(u32 token) +static void kvm_async_pf_task_wake(u32 token) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -241,7 +241,6 @@ again: /* A dummy token might be allocated and ultimately not used. */ kfree(dummy); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); noinstr u32 kvm_read_and_reset_apf_flags(void) { @@ -933,6 +932,19 @@ static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc) static void __init kvm_init_platform(void) { + u64 tolud = PFN_PHYS(e820__end_of_low_ram_pfn()); + /* + * Note, hardware requires variable MTRR ranges to be power-of-2 sized + * and naturally aligned. But when forcing guest MTRR state, Linux + * doesn't program the forced ranges into hardware. Don't bother doing + * the math to generate a technically-legal range. + */ + struct mtrr_var_range pci_hole = { + .base_lo = tolud | X86_MEMTYPE_UC, + .mask_lo = (u32)(~(SZ_4G - tolud - 1)) | MTRR_PHYSMASK_V, + .mask_hi = (BIT_ULL(boot_cpu_data.x86_phys_bits) - 1) >> 32, + }; + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) { unsigned long nr_pages; @@ -982,8 +994,12 @@ static void __init kvm_init_platform(void) kvmclock_init(); x86_platform.apic_post_init = kvm_apic_init; - /* Set WB as the default cache mode for SEV-SNP and TDX */ - guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); + /* + * Set WB as the default cache mode for SEV-SNP and TDX, with a single + * UC range for the legacy PCI hole, e.g. so that devices that expect + * to get UC/WC mappings don't get surprised with WB. + */ + guest_force_mtrr_state(&pci_hole, 1, MTRR_TYPE_WRBACK); } #if defined(CONFIG_AMD_MEM_ENCRYPT) @@ -1073,16 +1089,6 @@ static void kvm_wait(u8 *ptr, u8 val) void __init kvm_spinlock_init(void) { /* - * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an - * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is - * preferred over native qspinlock when vCPU is preempted. - */ - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { - pr_info("PV spinlocks disabled, no host support\n"); - return; - } - - /* * Disable PV spinlocks and use native qspinlock when dedicated pCPUs * are available. */ @@ -1101,6 +1107,16 @@ void __init kvm_spinlock_init(void) goto out; } + /* + * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an + * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is + * preferred over native qspinlock when vCPU is preempted. + */ + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { + pr_info("PV spinlocks disabled, no host support\n"); + return; + } + pr_info("PV spinlocks enabled\n"); __pv_init_lock_hash(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 697fb99406e6..201137b98fb8 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -29,6 +29,7 @@ #include <asm/set_memory.h> #include <asm/cpu.h> #include <asm/efi.h> +#include <asm/processor.h> #ifdef CONFIG_ACPI /* @@ -346,6 +347,22 @@ int machine_kexec_prepare(struct kimage *image) unsigned long reloc_end = (unsigned long)__relocate_kernel_end; int result; + /* + * Some early TDX-capable platforms have an erratum. A kernel + * partial write (a write transaction of less than cacheline + * lands at memory controller) to TDX private memory poisons that + * memory, and a subsequent read triggers a machine check. + * + * On those platforms the old kernel must reset TDX private + * memory before jumping to the new kernel otherwise the new + * kernel may see unexpected machine check. For simplicity + * just fail kexec/kdump on those platforms. + */ + if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) { + pr_info_once("Not allowed on platform with tdx_pw_mce bug\n"); + return -EOPNOTSUPP; + } + /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, __pa(control_page)); if (result) @@ -384,16 +401,10 @@ void __nocfi machine_kexec(struct kimage *image) { unsigned long reloc_start = (unsigned long)__relocate_kernel_start; relocate_kernel_fn *relocate_kernel_ptr; - unsigned int host_mem_enc_active; + unsigned int relocate_kernel_flags; int save_ftrace_enabled; void *control_page; - /* - * This must be done before load_segments() since if call depth tracking - * is used then GS must be valid to make any function calls. - */ - host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); - #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) save_processor_state(); @@ -427,6 +438,17 @@ void __nocfi machine_kexec(struct kimage *image) */ relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start; + relocate_kernel_flags = 0; + if (image->preserve_context) + relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT; + + /* + * This must be done before load_segments() since it resets + * GS to 0 and percpu data needs the correct GS to work. + */ + if (this_cpu_read(cache_state_incoherent)) + relocate_kernel_flags |= RELOC_KERNEL_CACHE_INCOHERENT; + /* * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -436,6 +458,11 @@ void __nocfi machine_kexec(struct kimage *image) * * Take advantage of this here by force loading the segments, * before the GDT is zapped with an invalid value. + * + * load_segments() resets GS to 0. Don't make any function call + * after here since call depth tracking uses percpu variables to + * operate (relocate_kernel() is explicitly ignored by call depth + * tracking). */ load_segments(); @@ -443,8 +470,7 @@ void __nocfi machine_kexec(struct kimage *image) image->start = relocate_kernel_ptr((unsigned long)image->head, virt_to_phys(control_page), image->start, - image->preserve_context, - host_mem_enc_active); + relocate_kernel_flags); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) @@ -453,6 +479,10 @@ void __nocfi machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } +/* + * Handover to the next kernel, no CFI concern. + */ +ANNOTATE_NOCFI_SYM(machine_kexec); /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e3a3987b0c4f..4c718f8adc59 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -89,6 +89,16 @@ DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); /* + * The cache may be in an incoherent state and needs flushing during kexec. + * E.g., on SME/TDX platforms, dirty cacheline aliases with and without + * encryption bit(s) can coexist and the cache needs to be flushed before + * booting to the new kernel to avoid the silent memory corruption due to + * dirty cachelines with different encryption property being written back + * to the memory. + */ +DEFINE_PER_CPU(bool, cache_state_incoherent); + +/* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. */ @@ -827,19 +837,7 @@ void __noreturn stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(c); - /* - * Use wbinvd on processors that support SME. This provides support - * for performing a successful kexec when going from SME inactive - * to SME active (or vice-versa). The cache must be cleared so that - * if there are entries with the same physical address, both with and - * without the encryption bit, they don't race each other when flushed - * and potentially end up with the wrong entry being committed to - * memory. - * - * Test the CPUID bit directly because the machine might've cleared - * X86_FEATURE_SME due to cmdline options. - */ - if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + if (this_cpu_read(cache_state_incoherent)) wbinvd(); /* diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index ea604f4d0b52..11e20bb13aca 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -66,8 +66,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) * %rdi indirection_page * %rsi pa_control_page * %rdx start address - * %rcx preserve_context - * %r8 host_mem_enc_active + * %rcx flags: RELOC_KERNEL_* */ /* Save the CPU context, used for jumping back */ @@ -111,7 +110,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* save indirection list for jumping back */ movq %rdi, pa_backup_pages_map(%rip) - /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ + /* Save the flags to %r11 as swap_pages clobbers %rcx. */ movq %rcx, %r11 /* setup a new stack at the end of the physical control page */ @@ -129,9 +128,8 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* * %rdi indirection page * %rdx start address - * %r8 host_mem_enc_active * %r9 page table page - * %r11 preserve_context + * %r11 flags: RELOC_KERNEL_* * %r13 original CR4 when relocate_kernel() was invoked */ @@ -200,14 +198,21 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %r9, %cr3 /* + * If the memory cache is in incoherent state, e.g., due to + * memory encryption, do WBINVD to flush cache. + * * If SME is active, there could be old encrypted cache line * entries that will conflict with the now unencrypted memory * used by kexec. Flush the caches before copying the kernel. + * + * Note SME sets this flag to true when the platform supports + * SME, so the WBINVD is performed even SME is not activated + * by the kernel. But this has no harm. */ - testq %r8, %r8 - jz .Lsme_off + testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b + jz .Lnowbinvd wbinvd -.Lsme_off: +.Lnowbinvd: call swap_pages @@ -220,7 +225,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %cr3, %rax movq %rax, %cr3 - testq %r11, %r11 /* preserve_context */ + testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b jnz .Lrelocate /* @@ -273,7 +278,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) ANNOTATE_NOENDBR andq $PAGE_MASK, %r8 lea PAGE_SIZE(%r8), %rsp - movl $1, %r11d /* Ensure preserve_context flag is set */ + /* + * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that + * swap_pages() can swap pages correctly. Note all other + * RELOC_KERNEL_* flags passed to relocate_kernel() are not + * restored. + */ + movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d call swap_pages movq kexec_va_control_page(%rip), %rax 0: addq $virtual_mapped - 0b, %rax @@ -321,7 +332,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) UNWIND_HINT_END_OF_STACK /* * %rdi indirection page - * %r11 preserve_context + * %r11 flags: RELOC_KERNEL_* */ movq %rdi, %rcx /* Put the indirection_page in %rcx */ xorl %edi, %edi @@ -357,7 +368,8 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) movq %rdi, %rdx /* Save destination page to %rdx */ movq %rsi, %rax /* Save source page to %rax */ - testq %r11, %r11 /* Only actually swap for ::preserve_context */ + /* Only actually swap for ::preserve_context */ + testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b jz .Lnoswap /* copy source page to swap page */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 36354b470590..6b22611e69cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -97,7 +97,7 @@ __always_inline int is_valid_bugaddr(unsigned long addr) * Check for UD1 or UD2, accounting for Address Size Override Prefixes. * If it's a UD1, further decode to determine its use: * - * FineIBT: ea (bad) + * FineIBT: d6 udb * FineIBT: f0 75 f9 lock jne . - 6 * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax @@ -130,9 +130,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) WARN_ON_ONCE(!lock); return BUG_LOCK; - case 0xea: + case 0xd6: *len = addr - start; - return BUG_EA; + return BUG_UDB; case OPCODE_ESCAPE: break; @@ -341,7 +341,7 @@ static noinstr bool handle_bug(struct pt_regs *regs) } fallthrough; - case BUG_EA: + case BUG_UDB: case BUG_LOCK: if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { handled = true; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 2c86673155c9..278f08194ec8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,14 +40,14 @@ config KVM_X86 select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_NO_POLL - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_PRE_FAULT_MEMORY - select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM select KVM_WERROR if WERROR + select KVM_GUEST_MEMFD if X86_64 config KVM tristate "Kernel-based Virtual Machine (KVM) support" @@ -74,7 +74,7 @@ config KVM_WERROR # FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning. # Building KVM with -Werror and KASAN is still doable via enabling # the kernel-wide WERROR=y. - depends on KVM && ((EXPERT && !KASAN) || WERROR) + depends on KVM_X86 && ((EXPERT && !KASAN) || WERROR) help Add -Werror to the build flags for KVM. @@ -83,7 +83,8 @@ config KVM_WERROR config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT - depends on KVM && X86_64 + depends on KVM_X86 && X86_64 + select KVM_GENERIC_MEMORY_ATTRIBUTES help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for @@ -95,8 +96,7 @@ config KVM_SW_PROTECTED_VM config KVM_INTEL tristate "KVM for Intel (and compatible) processors support" depends on KVM && IA32_FEAT_CTL - select KVM_GENERIC_PRIVATE_MEM if INTEL_TDX_HOST - select KVM_GENERIC_MEMORY_ATTRIBUTES if INTEL_TDX_HOST + select X86_FRED if X86_64 help Provides support for KVM on processors equipped with Intel's VT extensions, a.k.a. Virtual Machine Extensions (VMX). @@ -135,6 +135,8 @@ config KVM_INTEL_TDX bool "Intel Trust Domain Extensions (TDX) support" default y depends on INTEL_TDX_HOST + select KVM_GENERIC_MEMORY_ATTRIBUTES + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching Intel Trust Domain Extensions (TDX) confidential VMs on Intel processors. @@ -157,9 +159,10 @@ config KVM_AMD_SEV depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM - select KVM_GENERIC_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES select HAVE_KVM_ARCH_GMEM_PREPARE select HAVE_KVM_ARCH_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_POPULATE help Provides support for launching encrypted VMs which use Secure Encrypted Virtualization (SEV), Secure Encrypted Virtualization with @@ -169,7 +172,7 @@ config KVM_AMD_SEV config KVM_IOAPIC bool "I/O APIC, PIC, and PIT emulation" default y - depends on KVM + depends on KVM_X86 help Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e. for full in-kernel APIC emulation. @@ -179,7 +182,7 @@ config KVM_IOAPIC config KVM_SMM bool "System Management Mode emulation" default y - depends on KVM + depends on KVM_X86 help Provides support for KVM to emulate System Management Mode (SMM) in virtual machines. This can be used by the virtual machine @@ -189,7 +192,7 @@ config KVM_SMM config KVM_HYPERV bool "Support for Microsoft Hyper-V emulation" - depends on KVM + depends on KVM_X86 default y help Provides KVM support for emulating Microsoft Hyper-V. This allows KVM @@ -203,7 +206,7 @@ config KVM_HYPERV config KVM_XEN bool "Support for Xen hypercall interface" - depends on KVM + depends on KVM_X86 help Provides KVM support for the hosting Xen HVM guests and passing Xen hypercalls to userspace. @@ -213,7 +216,7 @@ config KVM_XEN config KVM_PROVE_MMU bool "Prove KVM MMU correctness" depends on DEBUG_KERNEL - depends on KVM + depends on KVM_X86 depends on EXPERT help Enables runtime assertions in KVM's MMU that are too costly to enable @@ -228,7 +231,7 @@ config KVM_EXTERNAL_WRITE_TRACKING config KVM_MAX_NR_VCPUS int "Maximum number of vCPUs per KVM guest" - depends on KVM + depends on KVM_X86 range 1024 4096 default 4096 if MAXSMP default 1024 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e2836a255b16..52524e0ca97f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -34,7 +34,7 @@ * aligned to sizeof(unsigned long) because it's not accessed via bitops. */ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; -EXPORT_SYMBOL_GPL(kvm_cpu_caps); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_caps); struct cpuid_xstate_sizes { u32 eax; @@ -131,7 +131,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2( return NULL; } -EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry2); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_cpuid_entry2); static int kvm_check_cpuid(struct kvm_vcpu *vcpu) { @@ -263,6 +263,17 @@ static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu) return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0; } +static u64 cpuid_get_supported_xss(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry_index(vcpu, 0xd, 1); + if (!best) + return 0; + + return (best->ecx | ((u64)best->edx << 32)) & kvm_caps.supported_xss; +} + static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entry, unsigned int x86_feature, @@ -305,7 +316,8 @@ static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1); if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) || cpuid_entry_has(best, X86_FEATURE_XSAVEC))) - best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + best->ebx = xstate_required_size(vcpu->arch.xcr0 | + vcpu->arch.ia32_xss, true); } static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu) @@ -424,6 +436,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) } vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu); + vcpu->arch.guest_supported_xss = cpuid_get_supported_xss(vcpu); vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu); @@ -448,6 +461,8 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * adjustments to the reserved GPA bits. */ kvm_mmu_after_set_cpuid(vcpu); + + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); } int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) @@ -931,6 +946,7 @@ void kvm_set_cpu_caps(void) VENDOR_F(WAITPKG), F(SGX_LC), F(BUS_LOCK_DETECT), + X86_64_F(SHSTK), ); /* @@ -940,6 +956,14 @@ void kvm_set_cpu_caps(void) if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) kvm_cpu_cap_clear(X86_FEATURE_PKU); + /* + * Shadow Stacks aren't implemented in the Shadow MMU. Shadow Stack + * accesses require "magic" Writable=0,Dirty=1 protection, which KVM + * doesn't know how to emulate or map. + */ + if (!tdp_enabled) + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_init(CPUID_7_EDX, F(AVX512_4VNNIW), F(AVX512_4FMAPS), @@ -957,8 +981,19 @@ void kvm_set_cpu_caps(void) F(AMX_INT8), F(AMX_BF16), F(FLUSH_L1D), + F(IBT), ); + /* + * Disable support for IBT and SHSTK if KVM is configured to emulate + * accesses to reserved GPAs, as KVM's emulator doesn't support IBT or + * SHSTK, nor does KVM handle Shadow Stack #PFs (see above). + */ + if (allow_smaller_maxphyaddr) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + } + if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) && boot_cpu_has(X86_FEATURE_AMD_IBPB) && boot_cpu_has(X86_FEATURE_AMD_IBRS)) @@ -985,6 +1020,10 @@ void kvm_set_cpu_caps(void) F(LAM), ); + kvm_cpu_cap_init(CPUID_7_1_ECX, + SCATTERED_F(MSR_IMM), + ); + kvm_cpu_cap_init(CPUID_7_1_EDX, F(AVX_VNNI_INT8), F(AVX_NE_CONVERT), @@ -1222,7 +1261,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_RDPID); } } -EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cpu_caps); #undef F #undef SCATTERED_F @@ -1411,9 +1450,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); + cpuid_entry_override(entry, CPUID_7_1_ECX); cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; - entry->ecx = 0; } if (max_idx >= 2) { entry = do_host_cpuid(array, function, 2); @@ -1820,7 +1859,8 @@ static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func, int r; if (func == CENTAUR_CPUID_SIGNATURE && - boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) return 0; r = do_cpuid_func(array, func, type); @@ -2001,7 +2041,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, if (function == 7 && index == 0) { u64 data; if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) && - !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) && + !kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) && (data & TSX_CTRL_CPUID_CLEAR)) *ebx &= ~(feature_bit(RTM) | feature_bit(HLE)); } else if (function == 0x80000007) { @@ -2045,7 +2085,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, used_max_basic); return exact; } -EXPORT_SYMBOL_GPL(kvm_cpuid); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpuid); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { @@ -2063,4 +2103,4 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, edx); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_cpuid); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1349e278cd2a..4e3da5b497b8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -26,6 +26,7 @@ #include <asm/debugreg.h> #include <asm/nospec-branch.h> #include <asm/ibt.h> +#include <asm/text-patching.h> #include "x86.h" #include "tss.h" @@ -166,7 +167,6 @@ #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ -#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ @@ -178,6 +178,7 @@ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */ +#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -202,7 +203,6 @@ struct opcode { const struct escape *esc; const struct instr_dual *idual; const struct mode_dual *mdual; - void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -266,186 +266,130 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 -#define ON64(x) x +#define ON64(x...) x #else -#define ON64(x) +#define ON64(x...) #endif -/* - * fastop functions have a special calling convention: - * - * dst: rax (in/out) - * src: rdx (in/out) - * src2: rcx (in) - * flags: rflags (in/out) - * ex: rsi (in:fastop pointer, out:zero if exception) - * - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for - * different operand sizes can be reached by calculation, rather than a jump - * table (which would be bigger than the code). - * - * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR - * and 1 for the straight line speculation INT3, leaves 7 bytes for the - * body of the function. Currently none is larger than 4. - */ -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); - -#define FASTOP_SIZE 16 - -#define __FOP_FUNC(name) \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ - ".type " name ", @function \n\t" \ - name ":\n\t" \ - ASM_ENDBR \ - IBT_NOSEAL(name) - -#define FOP_FUNC(name) \ - __FOP_FUNC(#name) - -#define __FOP_RET(name) \ - "11: " ASM_RET \ - ".size " name ", .-" name "\n\t" - -#define FOP_RET(name) \ - __FOP_RET(#name) - -#define __FOP_START(op, align) \ - extern void em_##op(struct fastop *fake); \ - asm(".pushsection .text, \"ax\" \n\t" \ - ".global em_" #op " \n\t" \ - ".align " __stringify(align) " \n\t" \ - "em_" #op ":\n\t" - -#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) - -#define FOP_END \ - ".popsection") - -#define __FOPNOP(name) \ - __FOP_FUNC(name) \ - __FOP_RET(name) - -#define FOPNOP() \ - __FOPNOP(__stringify(__UNIQUE_ID(nop))) - -#define FOP1E(op, dst) \ - __FOP_FUNC(#op "_" #dst) \ - "10: " #op " %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst) - -#define FOP1EEX(op, dst) \ - FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi) - -#define FASTOP1(op) \ - FOP_START(op) \ - FOP1E(op##b, al) \ - FOP1E(op##w, ax) \ - FOP1E(op##l, eax) \ - ON64(FOP1E(op##q, rax)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m) */ -#define FASTOP1SRC2(op, name) \ - FOP_START(name) \ - FOP1E(op, cl) \ - FOP1E(op, cx) \ - FOP1E(op, ecx) \ - ON64(FOP1E(op, rcx)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ -#define FASTOP1SRC2EX(op, name) \ - FOP_START(name) \ - FOP1EEX(op, cl) \ - FOP1EEX(op, cx) \ - FOP1EEX(op, ecx) \ - ON64(FOP1EEX(op, rcx)) \ - FOP_END - -#define FOP2E(op, dst, src) \ - __FOP_FUNC(#op "_" #dst "_" #src) \ - #op " %" #src ", %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst "_" #src) - -#define FASTOP2(op) \ - FOP_START(op) \ - FOP2E(op##b, al, dl) \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, word only */ -#define FASTOP2W(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, src is CL */ -#define FASTOP2CL(op) \ - FOP_START(op) \ - FOP2E(op##b, al, cl) \ - FOP2E(op##w, ax, cl) \ - FOP2E(op##l, eax, cl) \ - ON64(FOP2E(op##q, rax, cl)) \ - FOP_END - -/* 2 operand, src and dest are reversed */ -#define FASTOP2R(op, name) \ - FOP_START(name) \ - FOP2E(op##b, dl, al) \ - FOP2E(op##w, dx, ax) \ - FOP2E(op##l, edx, eax) \ - ON64(FOP2E(op##q, rdx, rax)) \ - FOP_END - -#define FOP3E(op, dst, src, src2) \ - __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ - #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ - __FOP_RET(#op "_" #dst "_" #src "_" #src2) - -/* 3-operand, word-only, src2=cl */ -#define FASTOP3WCL(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP3E(op##w, ax, dx, cl) \ - FOP3E(op##l, eax, edx, cl) \ - ON64(FOP3E(op##q, rax, rdx, cl)) \ - FOP_END - -/* Special case for SETcc - 1 instruction per cc */ -#define FOP_SETCC(op) \ - FOP_FUNC(op) \ - #op " %al \n\t" \ - FOP_RET(op) - -FOP_START(setcc) -FOP_SETCC(seto) -FOP_SETCC(setno) -FOP_SETCC(setc) -FOP_SETCC(setnc) -FOP_SETCC(setz) -FOP_SETCC(setnz) -FOP_SETCC(setbe) -FOP_SETCC(setnbe) -FOP_SETCC(sets) -FOP_SETCC(setns) -FOP_SETCC(setp) -FOP_SETCC(setnp) -FOP_SETCC(setl) -FOP_SETCC(setnl) -FOP_SETCC(setle) -FOP_SETCC(setnle) -FOP_END; - -FOP_START(salc) -FOP_FUNC(salc) -"pushf; sbb %al, %al; popf \n\t" -FOP_RET(salc) -FOP_END; +#define EM_ASM_START(op) \ +static int em_##op(struct x86_emulate_ctxt *ctxt) \ +{ \ + unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \ + int bytes = 1, ok = 1; \ + if (!(ctxt->d & ByteOp)) \ + bytes = ctxt->dst.bytes; \ + switch (bytes) { + +#define __EM_ASM(str) \ + asm("push %[flags]; popf \n\t" \ + "10: " str \ + "pushf; pop %[flags] \n\t" \ + "11: \n\t" \ + : "+a" (ctxt->dst.val), \ + "+d" (ctxt->src.val), \ + [flags] "+D" (flags), \ + "+S" (ok) \ + : "c" (ctxt->src2.val)) + +#define __EM_ASM_1(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t") + +#define __EM_ASM_1_EX(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t" \ + _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi)) + +#define __EM_ASM_2(op, dst, src) \ + __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") + +#define __EM_ASM_3(op, dst, src, src2) \ + __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t") + +#define EM_ASM_END \ + } \ + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \ + return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \ +} + +/* 1-operand, using "a" (dst) */ +#define EM_ASM_1(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_1(op##b, al); break; \ + case 2: __EM_ASM_1(op##w, ax); break; \ + case 4: __EM_ASM_1(op##l, eax); break; \ + ON64(case 8: __EM_ASM_1(op##q, rax); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) */ +#define EM_ASM_1SRC2(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1(op##b, cl); break; \ + case 2: __EM_ASM_1(op##w, cx); break; \ + case 4: __EM_ASM_1(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) with exception */ +#define EM_ASM_1SRC2EX(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1_EX(op##b, cl); break; \ + case 2: __EM_ASM_1_EX(op##w, cx); break; \ + case 4: __EM_ASM_1_EX(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst), "d" (src) */ +#define EM_ASM_2(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, dl); break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, reversed */ +#define EM_ASM_2R(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_2(op##b, dl, al); break; \ + case 2: __EM_ASM_2(op##w, dx, ax); break; \ + case 4: __EM_ASM_2(op##l, edx, eax); break; \ + ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \ + EM_ASM_END + +/* 2-operand, word only (no byte op) */ +#define EM_ASM_2W(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst) and CL (src2) */ +#define EM_ASM_2CL(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, cl); break; \ + case 2: __EM_ASM_2(op##w, ax, cl); break; \ + case 4: __EM_ASM_2(op##l, eax, cl); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \ + EM_ASM_END + +/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */ +#define EM_ASM_3WCL(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \ + case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \ + ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \ + EM_ASM_END + +static int em_salc(struct x86_emulate_ctxt *ctxt) +{ + /* + * Set AL 0xFF if CF is set, or 0x00 when clear. + */ + ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); + return X86EMUL_CONTINUE; +} /* * XXX: inoutclob user must know where the argument is being expanded. @@ -1006,56 +950,55 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } -FASTOP2(add); -FASTOP2(or); -FASTOP2(adc); -FASTOP2(sbb); -FASTOP2(and); -FASTOP2(sub); -FASTOP2(xor); -FASTOP2(cmp); -FASTOP2(test); - -FASTOP1SRC2(mul, mul_ex); -FASTOP1SRC2(imul, imul_ex); -FASTOP1SRC2EX(div, div_ex); -FASTOP1SRC2EX(idiv, idiv_ex); - -FASTOP3WCL(shld); -FASTOP3WCL(shrd); - -FASTOP2W(imul); - -FASTOP1(not); -FASTOP1(neg); -FASTOP1(inc); -FASTOP1(dec); - -FASTOP2CL(rol); -FASTOP2CL(ror); -FASTOP2CL(rcl); -FASTOP2CL(rcr); -FASTOP2CL(shl); -FASTOP2CL(shr); -FASTOP2CL(sar); - -FASTOP2W(bsf); -FASTOP2W(bsr); -FASTOP2W(bt); -FASTOP2W(bts); -FASTOP2W(btr); -FASTOP2W(btc); - -FASTOP2(xadd); - -FASTOP2R(cmp, cmp_r); +EM_ASM_2(add); +EM_ASM_2(or); +EM_ASM_2(adc); +EM_ASM_2(sbb); +EM_ASM_2(and); +EM_ASM_2(sub); +EM_ASM_2(xor); +EM_ASM_2(cmp); +EM_ASM_2(test); +EM_ASM_2(xadd); + +EM_ASM_1SRC2(mul, mul_ex); +EM_ASM_1SRC2(imul, imul_ex); +EM_ASM_1SRC2EX(div, div_ex); +EM_ASM_1SRC2EX(idiv, idiv_ex); + +EM_ASM_3WCL(shld); +EM_ASM_3WCL(shrd); + +EM_ASM_2W(imul); + +EM_ASM_1(not); +EM_ASM_1(neg); +EM_ASM_1(inc); +EM_ASM_1(dec); + +EM_ASM_2CL(rol); +EM_ASM_2CL(ror); +EM_ASM_2CL(rcl); +EM_ASM_2CL(rcr); +EM_ASM_2CL(shl); +EM_ASM_2CL(shr); +EM_ASM_2CL(sar); + +EM_ASM_2W(bsf); +EM_ASM_2W(bsr); +EM_ASM_2W(bt); +EM_ASM_2W(bts); +EM_ASM_2W(btr); +EM_ASM_2W(btc); + +EM_ASM_2R(cmp, cmp_r); static int em_bsf_c(struct x86_emulate_ctxt *ctxt) { /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsf); + return em_bsf(ctxt); } static int em_bsr_c(struct x86_emulate_ctxt *ctxt) @@ -1063,18 +1006,12 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsr); + return em_bsr(ctxt); } static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { - u8 rc; - void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); - - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; " CALL_NOSPEC - : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags)); - return rc; + return __emulate_cc(flags, condition & 0xf); } static void fetch_register_operand(struct operand *op) @@ -1553,6 +1490,37 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, return linear_write_system(ctxt, addr, desc, sizeof(*desc)); } +static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl) +{ + const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET; + u64 efer = 0, cet = 0, ssp = 0; + + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer)) + return true; + + /* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */ + if (!(efer & EFER_LMA)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet)) + return true; + + if (!(cet & CET_SHSTK_EN)) + return false; + + if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp)) + return true; + + /* + * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must + * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode. + */ + return ssp >> 32; +} + static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1693,6 +1661,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (efer & EFER_LMA) goto exception; } + if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) { + err_code = 0; + goto exception; + } /* CS(RPL) <- CPL */ selector = (selector & 0xfffc) | cpl; @@ -2289,7 +2261,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; ctxt->src.val = ctxt->dst.orig_val; - fastop(ctxt, em_cmp); + em_cmp(ctxt); if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ @@ -3054,7 +3026,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); if (cf) ctxt->eflags |= X86_EFLAGS_CF; @@ -3080,7 +3052,7 @@ static int em_aam(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3098,7 +3070,7 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3189,7 +3161,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) static int em_imul_3op(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = ctxt->src2.val; - return fastop(ctxt, em_imul); + return em_imul(ctxt); } static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -3968,7 +3940,6 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } -#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -3983,9 +3954,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ - F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ - F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static const struct opcode group7_rm0[] = { N, @@ -4023,14 +3994,14 @@ static const struct opcode group7_rm7[] = { }; static const struct opcode group1[] = { - F(Lock, em_add), - F(Lock | PageTable, em_or), - F(Lock, em_adc), - F(Lock, em_sbb), - F(Lock | PageTable, em_and), - F(Lock, em_sub), - F(Lock, em_xor), - F(NoWrite, em_cmp), + I(Lock, em_add), + I(Lock | PageTable, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock | PageTable, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -4038,38 +4009,38 @@ static const struct opcode group1A[] = { }; static const struct opcode group2[] = { - F(DstMem | ModRM, em_rol), - F(DstMem | ModRM, em_ror), - F(DstMem | ModRM, em_rcl), - F(DstMem | ModRM, em_rcr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_shr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_sar), + I(DstMem | ModRM, em_rol), + I(DstMem | ModRM, em_ror), + I(DstMem | ModRM, em_rcl), + I(DstMem | ModRM, em_rcr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_shr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_sar), }; static const struct opcode group3[] = { - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcNone | Lock, em_not), - F(DstMem | SrcNone | Lock, em_neg), - F(DstXacc | Src2Mem, em_mul_ex), - F(DstXacc | Src2Mem, em_imul_ex), - F(DstXacc | Src2Mem, em_div_ex), - F(DstXacc | Src2Mem, em_idiv_ex), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcNone | Lock, em_not), + I(DstMem | SrcNone | Lock, em_neg), + I(DstXacc | Src2Mem, em_mul_ex), + I(DstXacc | Src2Mem, em_imul_ex), + I(DstXacc | Src2Mem, em_div_ex), + I(DstXacc | Src2Mem, em_idiv_ex), }; static const struct opcode group4[] = { - F(ByteOp | DstMem | SrcNone | Lock, em_inc), - F(ByteOp | DstMem | SrcNone | Lock, em_dec), + I(ByteOp | DstMem | SrcNone | Lock, em_inc), + I(ByteOp | DstMem | SrcNone | Lock, em_dec), N, N, N, N, N, N, }; static const struct opcode group5[] = { - F(DstMem | SrcNone | Lock, em_inc), - F(DstMem | SrcNone | Lock, em_dec), - I(SrcMem | NearBranch | IsBranch, em_call_near_abs), - I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far), + I(DstMem | SrcNone | Lock, em_inc), + I(DstMem | SrcNone | Lock, em_dec), + I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs), + I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far), I(SrcMem | NearBranch | IsBranch, em_jmp_abs), I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far), I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), @@ -4103,10 +4074,10 @@ static const struct group_dual group7 = { { static const struct opcode group8[] = { N, N, N, N, - F(DstMem | SrcImmByte | NoWrite, em_bt), - F(DstMem | SrcImmByte | Lock | PageTable, em_bts), - F(DstMem | SrcImmByte | Lock, em_btr), - F(DstMem | SrcImmByte | Lock | PageTable, em_btc), + I(DstMem | SrcImmByte | NoWrite, em_bt), + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), + I(DstMem | SrcImmByte | Lock, em_btr), + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; /* @@ -4243,31 +4214,31 @@ static const struct instr_dual instr_dual_8d = { static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - F6ALU(Lock, em_add), + I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), /* 0x08 - 0x0F */ - F6ALU(Lock | PageTable, em_or), + I6ALU(Lock | PageTable, em_or), I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), N, /* 0x10 - 0x17 */ - F6ALU(Lock, em_adc), + I6ALU(Lock, em_adc), I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), /* 0x18 - 0x1F */ - F6ALU(Lock, em_sbb), + I6ALU(Lock, em_sbb), I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), /* 0x20 - 0x27 */ - F6ALU(Lock | PageTable, em_and), N, N, + I6ALU(Lock | PageTable, em_and), N, N, /* 0x28 - 0x2F */ - F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - F6ALU(Lock, em_xor), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - F6ALU(NoWrite, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ - X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), + X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ @@ -4291,7 +4262,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -4304,7 +4275,7 @@ static const struct opcode opcode_table[256] = { DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), /* 0x98 - 0x9F */ D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), - I(SrcImmFAddr | No64 | IsBranch, em_call_far), N, + I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N, II(ImplicitOps | Stack, em_pushf, pushf), II(ImplicitOps | Stack, em_popf, popf), I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), @@ -4312,37 +4283,37 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), + I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), /* 0xA8 - 0xAF */ - F2bv(DstAcc | SrcImm | NoWrite, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), - I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm), - I(ImplicitOps | NearBranch | IsBranch, em_ret), + I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm), + I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ - I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter), - I(Stack | IsBranch, em_leave), - I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm), - I(ImplicitOps | IsBranch, em_ret_far), - D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn), + I(Stack | SrcImmU16 | Src2ImmByte, em_enter), + I(Stack, em_leave), + I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm), + I(ImplicitOps | IsBranch | ShadowStack, em_ret_far), + D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn), D(ImplicitOps | No64 | IsBranch), - II(ImplicitOps | IsBranch, em_iret, iret), + II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret), /* 0xD0 - 0xD7 */ G(Src2One | ByteOp, group2), G(Src2One, group2), G(Src2CL | ByteOp, group2), G(Src2CL, group2), I(DstAcc | SrcImmUByte | No64, em_aam), I(DstAcc | SrcImmUByte | No64, em_aad), - F(DstAcc | ByteOp | No64, em_salc), + I(DstAcc | ByteOp | No64, em_salc), I(DstAcc | SrcXLat | ByteOp, em_mov), /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, @@ -4352,7 +4323,7 @@ static const struct opcode opcode_table[256] = { I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in), I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out), /* 0xE8 - 0xEF */ - I(SrcImm | NearBranch | IsBranch, em_call), + I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call), D(SrcImm | ImplicitOps | NearBranch | IsBranch), I(SrcImmFAddr | No64 | IsBranch, em_jmp_far), D(SrcImmByte | ImplicitOps | NearBranch | IsBranch), @@ -4371,7 +4342,7 @@ static const struct opcode opcode_table[256] = { static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, - N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall), + N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall), II(ImplicitOps | Priv, em_clts, clts), N, DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, @@ -4402,8 +4373,8 @@ static const struct opcode twobyte_table[256] = { IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), II(ImplicitOps | Priv, em_rdmsr, rdmsr), IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), - I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter), - I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit), + I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter), + I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit), N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ @@ -4427,32 +4398,32 @@ static const struct opcode twobyte_table[256] = { /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), II(ImplicitOps, em_cpuid, cpuid), - F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), - F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, + I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), + I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), II(EmulateOnUD | ImplicitOps, em_rsm, rsm), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), - F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + I(DstMem | SrcReg | Src2CL | ModRM, em_shrd), + GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), - F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), + I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xB8 - 0xBF */ N, N, G(BitOp, group8), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), I(DstReg | SrcMem | ModRM, em_bsf_c), I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ - F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), + I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ @@ -4514,6 +4485,60 @@ static const struct opcode opcode_map_0f_38[256] = { #undef I2bvIP #undef I6ALU +static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt) +{ + return ctxt->d & ShadowStack; +} + +static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt) +{ + u64 flags = ctxt->d; + + if (!(flags & IsBranch)) + return false; + + /* + * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are + * indirect and thus affect IBT state. All far RETs (including SYSEXIT + * and IRET) are protected via Shadow Stacks and thus don't affect IBT + * state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is + * enabled, but that should be handled by IRET emulation (in the very + * unlikely scenario that KVM adds support for fully emulating IRET). + */ + if (!(flags & NearBranch)) + return ctxt->execute != em_iret && + ctxt->execute != em_ret_far && + ctxt->execute != em_ret_far_imm && + ctxt->execute != em_sysexit; + + switch (flags & SrcMask) { + case SrcReg: + case SrcMem: + case SrcMem16: + case SrcMem32: + return true; + case SrcMemFAddr: + case SrcImmFAddr: + /* Far branches should be handled above. */ + WARN_ON_ONCE(1); + return true; + case SrcNone: + case SrcImm: + case SrcImmByte: + /* + * Note, ImmU16 is used only for the stack adjustment operand on ENTER + * and RET instructions. ENTER isn't a branch and RET FAR is handled + * by the NearBranch check above. RET itself isn't an indirect branch. + */ + case SrcImmU16: + return false; + default: + WARN_ONCE(1, "Unexpected Src operand '%llx' on branch", + flags & SrcMask); + return false; + } +} + static unsigned imm_size(struct x86_emulate_ctxt *ctxt) { unsigned size; @@ -4943,6 +4968,40 @@ done_prefixes: ctxt->execute = opcode.u.execute; + /* + * Reject emulation if KVM might need to emulate shadow stack updates + * and/or indirect branch tracking enforcement, which the emulator + * doesn't support. + */ + if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) && + ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) { + u64 u_cet = 0, s_cet = 0; + + /* + * Check both User and Supervisor on far transfers as inter- + * privilege level transfers are impacted by CET at the target + * privilege level, and that is not known at this time. The + * expectation is that the guest will not require emulation of + * any CET-affected instructions at any privilege level. + */ + if (!(ctxt->d & NearBranch)) + u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else if (ctxt->ops->cpl(ctxt) == 3) + u_cet = CET_SHSTK_EN | CET_ENDBR_EN; + else + s_cet = CET_SHSTK_EN | CET_ENDBR_EN; + + if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) || + (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet))) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt)) + return EMULATION_FAILED; + + if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt)) + return EMULATION_FAILED; + } + if (unlikely(emulation_type & EMULTYPE_TRAP_UD) && likely(!(ctxt->d & EmulateOnUD))) return EMULATION_FAILED; @@ -5074,24 +5133,6 @@ static void fetch_possible_mmx_operand(struct operand *op) kvm_read_mmx_reg(op->addr.mm, &op->mm_val); } -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) -{ - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - - if (!(ctxt->d & ByteOp)) - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - - asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT - : "c"(ctxt->src2.val)); - - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); - if (!fop) /* exception is returned in fop variable */ - return emulate_de(ctxt); - return X86EMUL_CONTINUE; -} - void init_decode_cache(struct x86_emulate_ctxt *ctxt) { /* Clear fields that are set conditionally but read without a guard. */ @@ -5107,12 +5148,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.end = 0; } -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) { const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; - bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt); ctxt->mem_read.pos = 0; @@ -5160,7 +5200,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(&ctxt->dst); } - if (unlikely(is_guest_mode) && ctxt->intercept) { + if (unlikely(check_intercepts) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5189,7 +5229,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5243,7 +5283,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) @@ -5256,10 +5296,7 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) - rc = fastop(ctxt, ctxt->fop); - else - rc = ctxt->execute(ctxt); + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 72b19a88a776..38595ecb990d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -923,7 +923,7 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) return false; return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; } -EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_hv_assist_page_enabled); int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) { @@ -935,7 +935,7 @@ int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page)); } -EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_hv_get_assist_page); static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) { @@ -1168,15 +1168,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); - mutex_lock(&hv->hv_lock); + guard(mutex)(&hv->hv_lock); if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || hv->hv_tsc_page_status == HV_TSC_PAGE_SET || hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) - goto out_unlock; + return; if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - goto out_unlock; + return; gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; /* @@ -1192,7 +1192,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, goto out_err; hv->hv_tsc_page_status = HV_TSC_PAGE_SET; - goto out_unlock; + return; } /* @@ -1228,12 +1228,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, goto out_err; hv->hv_tsc_page_status = HV_TSC_PAGE_SET; - goto out_unlock; + return; out_err: hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; -out_unlock: - mutex_unlock(&hv->hv_lock); } void kvm_hv_request_tsc_page_update(struct kvm *kvm) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 2b5d389bca5f..2c2783296aed 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* * Copyright (C) 2001 MandrakeSoft S.A. * Copyright 2010 Red Hat, Inc. and/or its affiliates. @@ -8,20 +9,6 @@ * http://www.linux-mandrake.com/ * http://www.mandrakesoft.com/ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * Yunhong Jiang <yunhong.jiang@intel.com> * Yaozu (Eddie) Dong <eddie.dong@intel.com> * Based on Xen 3.1 code. diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 16da89259011..7cc8950005b6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -103,7 +103,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } -EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_has_injectable_intr); /* * check if there is pending interrupt without @@ -119,7 +119,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } -EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_has_interrupt); /* * Read pending interrupt(from non-APIC source) @@ -148,7 +148,7 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v) WARN_ON_ONCE(!irqchip_split(v->kvm)); return get_userspace_extint(v); } -EXPORT_SYMBOL_GPL(kvm_cpu_get_extint); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_cpu_get_extint); /* * Read pending interrupt vector and intack. @@ -195,63 +195,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, struct dest_map *dest_map) -{ - int r = -1; - struct kvm_vcpu *vcpu, *lowest = NULL; - unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; - unsigned int dest_vcpus = 0; - - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) - return r; - - if (irq->dest_mode == APIC_DEST_PHYSICAL && - irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { - pr_info("apic: phys broadcast and lowest prio\n"); - irq->delivery_mode = APIC_DM_FIXED; - } - - memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!kvm_apic_present(vcpu)) - continue; - - if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, - irq->dest_id, irq->dest_mode)) - continue; - - if (!kvm_lowest_prio_delivery(irq)) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, irq, dest_map); - } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { - if (!kvm_vector_hashing_enabled()) { - if (!lowest) - lowest = vcpu; - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) - lowest = vcpu; - } else { - __set_bit(i, dest_vcpu_bitmap); - dest_vcpus++; - } - } - } - - if (dest_vcpus != 0) { - int idx = kvm_vector_to_index(irq->vector, dest_vcpus, - dest_vcpu_bitmap, KVM_MAX_VCPUS); - - lowest = kvm_get_vcpu(kvm, idx); - } - - if (lowest) - r = kvm_apic_set_irq(lowest, irq, dest_map); - - return r; -} - static void kvm_msi_to_lapic_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq) @@ -411,34 +354,6 @@ int kvm_set_routing_entry(struct kvm *kvm, return 0; } -bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu) -{ - int r = 0; - unsigned long i; - struct kvm_vcpu *vcpu; - - if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) - return true; - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!kvm_apic_present(vcpu)) - continue; - - if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, - irq->dest_id, irq->dest_mode)) - continue; - - if (++r == 2) - return false; - - *dest_vcpu = vcpu; - } - - return r == 1; -} -EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); - void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode, u8 vector, unsigned long *ioapic_handled_vectors) { diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 5e62c1f79ce6..34f4a78a7a01 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -121,8 +121,4 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, - struct dest_map *dest_map); - #endif diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 36a8786db291..8ddb01191d6f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -7,7 +7,8 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) + | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE \ + | X86_CR4_CET) #define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG) #define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP) diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index c1df5acfacaf..7b5ddb787a25 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -235,7 +235,6 @@ struct x86_emulate_ops { void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); bool (*is_smm)(struct x86_emulate_ctxt *ctxt); - bool (*is_guest_mode)(struct x86_emulate_ctxt *ctxt); int (*leave_smm)(struct x86_emulate_ctxt *ctxt); void (*triple_fault)(struct x86_emulate_ctxt *ctxt); int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); @@ -521,7 +520,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_RESTART 1 #define EMULATION_INTERCEPTED 2 void init_decode_cache(struct x86_emulate_ctxt *ctxt); -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c index ded0bd688c65..ee53e75a60cb 100644 --- a/arch/x86/kvm/kvm_onhyperv.c +++ b/arch/x86/kvm/kvm_onhyperv.c @@ -101,13 +101,13 @@ int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, gfn_t nr_pages) return __hv_flush_remote_tlbs_range(kvm, &range); } -EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs_range); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_flush_remote_tlbs_range); int hv_flush_remote_tlbs(struct kvm *kvm) { return __hv_flush_remote_tlbs_range(kvm, NULL); } -EXPORT_SYMBOL_GPL(hv_flush_remote_tlbs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_flush_remote_tlbs); void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { @@ -121,4 +121,4 @@ void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) spin_unlock(&kvm_arch->hv_root_tdp_lock); } } -EXPORT_SYMBOL_GPL(hv_track_root_tdp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(hv_track_root_tdp); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5fc437341e03..0ae7f913d782 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -74,6 +74,10 @@ module_param(lapic_timer_advance, bool, 0444); #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 + +static bool __read_mostly vector_hashing_enabled = true; +module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444); + static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data); @@ -102,7 +106,7 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) } __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); -EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_has_noapic_vcpu); __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ); __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ); @@ -130,7 +134,7 @@ static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); } -bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) +static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) { return kvm_x86_ops.set_hv_timer && !(kvm_mwait_in_guest(vcpu->kvm) || @@ -642,7 +646,7 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) return ((max_updated_irr != -1) && (max_updated_irr == *max_irr)); } -EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_apic_update_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr) { @@ -653,7 +657,7 @@ bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr apic->irr_pending = true; return irr_updated; } -EXPORT_SYMBOL_GPL(kvm_apic_update_irr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_irr); static inline int apic_search_irr(struct kvm_lapic *apic) { @@ -693,7 +697,7 @@ void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec) { apic_clear_irr(vec, vcpu->arch.apic); } -EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_clear_irr); static void *apic_vector_to_isr(int vec, struct kvm_lapic *apic) { @@ -775,7 +779,7 @@ void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); } -EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { @@ -786,7 +790,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) */ return apic_find_highest_irr(vcpu->arch.apic); } -EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_find_highest_irr); static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, @@ -950,7 +954,7 @@ void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) { apic_update_ppr(vcpu->arch.apic); } -EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_ppr); static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) { @@ -1061,21 +1065,14 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, return false; } } -EXPORT_SYMBOL_GPL(kvm_apic_match_dest); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_match_dest); -int kvm_vector_to_index(u32 vector, u32 dest_vcpus, - const unsigned long *bitmap, u32 bitmap_size) +static int kvm_vector_to_index(u32 vector, u32 dest_vcpus, + const unsigned long *bitmap, u32 bitmap_size) { - u32 mod; - int i, idx = -1; - - mod = vector % dest_vcpus; - - for (i = 0; i <= mod; i++) { - idx = find_next_bit(bitmap, bitmap_size, idx + 1); - BUG_ON(idx == bitmap_size); - } + int idx = find_nth_bit(bitmap, bitmap_size, vector % dest_vcpus); + BUG_ON(idx >= bitmap_size); return idx; } @@ -1106,6 +1103,16 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, return false; } +static bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) +{ + return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint); +} + +static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) +{ + return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; +} + /* Return true if the interrupt can be handled by using *bitmap as index mask * for valid destinations in *dst array. * Return false if kvm_apic_map_get_dest_lapic did nothing useful. @@ -1149,7 +1156,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (!kvm_lowest_prio_delivery(irq)) return true; - if (!kvm_vector_hashing_enabled()) { + if (!vector_hashing_enabled) { lowest = -1; for_each_set_bit(i, bitmap, 16) { if (!(*dst)[i]) @@ -1230,8 +1237,9 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, * interrupt. * - Otherwise, use remapped mode to inject the interrupt. */ -bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu) +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, + struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) { struct kvm_apic_map *map; unsigned long bitmap; @@ -1258,6 +1266,91 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, return ret; } +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) +{ + int r = 0; + unsigned long i; + struct kvm_vcpu *vcpu; + + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) + return true; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (++r == 2) + return false; + + *dest_vcpu = vcpu; + } + + return r == 1; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu); + +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, struct dest_map *dest_map) +{ + int r = -1; + struct kvm_vcpu *vcpu, *lowest = NULL; + unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int dest_vcpus = 0; + + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + return r; + + if (irq->dest_mode == APIC_DEST_PHYSICAL && + irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { + pr_info("apic: phys broadcast and lowest prio\n"); + irq->delivery_mode = APIC_DM_FIXED; + } + + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (!kvm_lowest_prio_delivery(irq)) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, irq, dest_map); + } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { + if (!vector_hashing_enabled) { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; + } else { + __set_bit(i, dest_vcpu_bitmap); + dest_vcpus++; + } + } + } + + if (dest_vcpus != 0) { + int idx = kvm_vector_to_index(irq->vector, dest_vcpus, + dest_vcpu_bitmap, KVM_MAX_VCPUS); + + lowest = kvm_get_vcpu(kvm, idx); + } + + if (lowest) + r = kvm_apic_set_irq(lowest, irq, dest_map); + + return r; +} + /* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded. @@ -1401,11 +1494,6 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, rcu_read_unlock(); } -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) -{ - return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; -} - static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) { return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); @@ -1481,32 +1569,38 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); } -EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_eoi_accelerated); -void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) +static void kvm_icr_to_lapic_irq(struct kvm_lapic *apic, u32 icr_low, + u32 icr_high, struct kvm_lapic_irq *irq) { - struct kvm_lapic_irq irq; - /* KVM has no delay and should always clear the BUSY/PENDING flag. */ WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); - irq.vector = icr_low & APIC_VECTOR_MASK; - irq.delivery_mode = icr_low & APIC_MODE_MASK; - irq.dest_mode = icr_low & APIC_DEST_MASK; - irq.level = (icr_low & APIC_INT_ASSERT) != 0; - irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; - irq.shorthand = icr_low & APIC_SHORT_MASK; - irq.msi_redir_hint = false; + irq->vector = icr_low & APIC_VECTOR_MASK; + irq->delivery_mode = icr_low & APIC_MODE_MASK; + irq->dest_mode = icr_low & APIC_DEST_MASK; + irq->level = (icr_low & APIC_INT_ASSERT) != 0; + irq->trig_mode = icr_low & APIC_INT_LEVELTRIG; + irq->shorthand = icr_low & APIC_SHORT_MASK; + irq->msi_redir_hint = false; if (apic_x2apic_mode(apic)) - irq.dest_id = icr_high; + irq->dest_id = icr_high; else - irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high); + irq->dest_id = GET_XAPIC_DEST_FIELD(icr_high); +} + +void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) +{ + struct kvm_lapic_irq irq; + + kvm_icr_to_lapic_irq(apic, icr_low, icr_high, &irq); trace_kvm_apic_ipi(icr_low, irq.dest_id); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } -EXPORT_SYMBOL_GPL(kvm_apic_send_ipi); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi); static u32 apic_get_tmcct(struct kvm_lapic *apic) { @@ -1623,7 +1717,7 @@ u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic) return valid_reg_mask; } -EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_readable_reg_mask); static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, void *data) @@ -1864,7 +1958,7 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) lapic_timer_int_injected(vcpu)) __kvm_wait_lapic_expire(vcpu); } -EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_wait_lapic_expire); static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) { @@ -2178,7 +2272,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) out: preempt_enable(); } -EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_expired_hv_timer); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) { @@ -2431,11 +2525,11 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } -EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_set_eoi); #define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast) { if (data & X2APIC_ICR_RESERVED_BITS) return 1; @@ -2450,7 +2544,20 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) */ data &= ~APIC_ICR_BUSY; - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + if (fast) { + struct kvm_lapic_irq irq; + int ignored; + + kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq); + + if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq, + &ignored, NULL)) + return -EWOULDBLOCK; + + trace_kvm_apic_ipi((u32)data, irq.dest_id); + } else { + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); + } if (kvm_x86_ops.x2apic_icr_is_split) { kvm_lapic_set_reg(apic, APIC_ICR, data); kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); @@ -2461,6 +2568,16 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) return 0; } +static int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) +{ + return __kvm_x2apic_icr_write(apic, data, false); +} + +int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data) +{ + return __kvm_x2apic_icr_write(apic, data, true); +} + static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) { if (kvm_x86_ops.x2apic_icr_is_split) @@ -2491,7 +2608,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) else kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } -EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_write_nodecode); void kvm_free_lapic(struct kvm_vcpu *vcpu) { @@ -2629,7 +2746,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated) kvm_recalculate_apic_map(vcpu->kvm); return 0; } -EXPORT_SYMBOL_GPL(kvm_apic_set_base); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_set_base); void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { @@ -2661,26 +2778,23 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) int kvm_alloc_apic_access_page(struct kvm *kvm) { void __user *hva; - int ret = 0; - mutex_lock(&kvm->slots_lock); + guard(mutex)(&kvm->slots_lock); + if (kvm->arch.apic_access_memslot_enabled || kvm->arch.apic_access_memslot_inhibited) - goto out; + return 0; hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); - if (IS_ERR(hva)) { - ret = PTR_ERR(hva); - goto out; - } + if (IS_ERR(hva)) + return PTR_ERR(hva); kvm->arch.apic_access_memslot_enabled = true; -out: - mutex_unlock(&kvm->slots_lock); - return ret; + + return 0; } -EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_alloc_apic_access_page); void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu) { @@ -2944,7 +3058,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) __apic_update_ppr(apic, &ppr); return apic_has_interrupt_for_ppr(apic, ppr); } -EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_has_interrupt); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { @@ -3003,7 +3117,7 @@ void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) } } -EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_ack_interrupt); static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 72de14527698..282b9b7da98c 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -105,7 +105,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int shorthand, unsigned int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec); bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr); @@ -119,6 +118,9 @@ void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, + struct dest_map *dest_map); void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); @@ -137,7 +139,7 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); -int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data); +int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data); int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); @@ -222,12 +224,6 @@ static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu) !kvm_x86_call(apic_init_signal_blocked)(vcpu); } -static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) -{ - return (irq->delivery_mode == APIC_DM_LOWEST || - irq->msi_redir_hint); -} - static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) { return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); @@ -240,16 +236,13 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, unsigned long *vcpu_bitmap); -bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, - struct kvm_vcpu **dest_vcpu); -int kvm_vector_to_index(u32 vector, u32 dest_vcpus, - const unsigned long *bitmap, u32 bitmap_size); +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); -bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b4b6860ab971..f63074048ec6 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -212,7 +212,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, fault = (mmu->permissions[index] >> pte_access) & 1; - WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); + WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK)); if (unlikely(mmu->pkru_mask)) { u32 pkru_bits, offset; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6e838cb6c9e1..667d66cf76d5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -110,7 +110,7 @@ static bool __ro_after_init tdp_mmu_allowed; #ifdef CONFIG_X86_64 bool __read_mostly tdp_mmu_enabled = true; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444); -EXPORT_SYMBOL_GPL(tdp_mmu_enabled); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(tdp_mmu_enabled); #endif static int max_huge_page_level __read_mostly; @@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } -void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type) { /* * If it's possible to replace the shadow page with an NX huge page, @@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) return; ++kvm->stat.nx_lpage_splits; + ++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_add_tail(&sp->possible_nx_huge_page_link, - &kvm->arch.possible_nx_huge_pages); + &kvm->arch.possible_nx_huge_pages[mmu_type].pages); } static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, @@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, sp->nx_huge_page_disallowed = true; if (nx_huge_page_possible) - track_possible_nx_huge_page(kvm, sp); + track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -819,12 +821,14 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type) { if (list_empty(&sp->possible_nx_huge_page_link)) return; --kvm->stat.nx_lpage_splits; + --kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_del_init(&sp->possible_nx_huge_page_link); } @@ -832,7 +836,7 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { sp->nx_huge_page_disallowed = false; - untrack_possible_nx_huge_page(kvm, sp); + untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, @@ -3285,12 +3289,72 @@ out: return level; } -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, - const struct kvm_memory_slot *slot, - gfn_t gfn, int max_level, bool is_private) +static u8 kvm_max_level_for_order(int order) +{ + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); + + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) + return PG_LEVEL_1G; + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) + return PG_LEVEL_2M; + + return PG_LEVEL_4K; +} + +static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, + const struct kvm_memory_slot *slot, gfn_t gfn, + bool is_private) +{ + u8 max_level, coco_level; + kvm_pfn_t pfn; + + /* For faults, use the gmem information that was resolved earlier. */ + if (fault) { + pfn = fault->pfn; + max_level = fault->max_level; + } else { + /* TODO: Call into guest_memfd once hugepages are supported. */ + WARN_ONCE(1, "Get pfn+order from guest_memfd"); + pfn = KVM_PFN_ERR_FAULT; + max_level = PG_LEVEL_4K; + } + + if (max_level == PG_LEVEL_4K) + return max_level; + + /* + * CoCo may influence the max mapping level, e.g. due to RMP or S-EPT + * restrictions. A return of '0' means "no additional restrictions", to + * allow for using an optional "ret0" static call. + */ + coco_level = kvm_x86_call(gmem_max_mapping_level)(kvm, pfn, is_private); + if (coco_level) + max_level = min(max_level, coco_level); + + return max_level; +} + +int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, + const struct kvm_memory_slot *slot, gfn_t gfn) { struct kvm_lpage_info *linfo; - int host_level; + int host_level, max_level; + bool is_private; + + lockdep_assert_held(&kvm->mmu_lock); + + if (fault) { + max_level = fault->max_level; + is_private = fault->is_private; + } else { + max_level = PG_LEVEL_NUM; + is_private = kvm_mem_is_private(kvm, gfn); + } max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { @@ -3299,25 +3363,17 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, break; } - if (is_private) - return max_level; - if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; - host_level = host_pfn_mapping_level(kvm, gfn, slot); + if (is_private || kvm_memslot_is_gmem_only(slot)) + host_level = kvm_gmem_max_mapping_level(kvm, fault, slot, gfn, + is_private); + else + host_level = host_pfn_mapping_level(kvm, gfn, slot); return min(host_level, max_level); } -int kvm_mmu_max_mapping_level(struct kvm *kvm, - const struct kvm_memory_slot *slot, gfn_t gfn) -{ - bool is_private = kvm_slot_can_be_private(slot) && - kvm_mem_is_private(kvm, gfn); - - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); -} - void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; @@ -3338,9 +3394,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, - fault->gfn, fault->max_level, - fault->is_private); + fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, fault, + fault->slot, fault->gfn); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; @@ -3810,7 +3865,7 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, write_unlock(&kvm->mmu_lock); } } -EXPORT_SYMBOL_GPL(kvm_mmu_free_roots); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_roots); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) { @@ -3837,7 +3892,7 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) kvm_mmu_free_roots(kvm, mmu, roots_to_free); } -EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_guest_mode_roots); static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant, u8 level) @@ -4503,42 +4558,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) vcpu->stat.pf_fixed++; } -static inline u8 kvm_max_level_for_order(int order) -{ - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); - - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) - return PG_LEVEL_1G; - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) - return PG_LEVEL_2M; - - return PG_LEVEL_4K; -} - -static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, - u8 max_level, int gmem_order) -{ - u8 req_max_level; - - if (max_level == PG_LEVEL_4K) - return PG_LEVEL_4K; - - max_level = min(kvm_max_level_for_order(gmem_order), max_level); - if (max_level == PG_LEVEL_4K) - return PG_LEVEL_4K; - - req_max_level = kvm_x86_call(private_max_mapping_level)(kvm, pfn); - if (req_max_level) - max_level = min(max_level, req_max_level); - - return max_level; -} - static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int r) { @@ -4546,12 +4565,12 @@ static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, r == RET_PF_RETRY, fault->map_writable); } -static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault) +static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) { int max_order, r; - if (!kvm_slot_can_be_private(fault->slot)) { + if (!kvm_slot_has_gmem(fault->slot)) { kvm_mmu_prepare_memory_fault_exit(vcpu, fault); return -EFAULT; } @@ -4564,8 +4583,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, } fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); - fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn, - fault->max_level, max_order); + fault->max_level = kvm_max_level_for_order(max_order); return RET_PF_CONTINUE; } @@ -4575,8 +4593,8 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, { unsigned int foll = fault->write ? FOLL_WRITE : 0; - if (fault->is_private) - return kvm_mmu_faultin_pfn_private(vcpu, fault); + if (fault->is_private || kvm_memslot_is_gmem_only(fault->slot)) + return kvm_mmu_faultin_pfn_gmem(vcpu, fault); foll |= FOLL_NOWAIT; fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll, @@ -4649,10 +4667,16 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, /* * Retry the page fault if the gfn hit a memslot that is being deleted * or moved. This ensures any existing SPTEs for the old memslot will - * be zapped before KVM inserts a new MMIO SPTE for the gfn. + * be zapped before KVM inserts a new MMIO SPTE for the gfn. Punt the + * error to userspace if this is a prefault, as KVM's prefaulting ABI + * doesn't provide the same forward progress guarantees as KVM_RUN. */ - if (slot->flags & KVM_MEMSLOT_INVALID) + if (slot->flags & KVM_MEMSLOT_INVALID) { + if (fault->prefetch) + return -EAGAIN; + return RET_PF_RETRY; + } if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) { /* @@ -4852,7 +4876,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, return r; } -EXPORT_SYMBOL_GPL(kvm_handle_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_page_fault); #ifdef CONFIG_X86_64 static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, @@ -4942,7 +4966,7 @@ int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level return -EIO; } } -EXPORT_SYMBOL_GPL(kvm_tdp_map_page); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_map_page); long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range) @@ -5138,7 +5162,7 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) __clear_sp_write_flooding_count(sp); } } -EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) @@ -5784,7 +5808,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); } -EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_npt_mmu); static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, @@ -5838,7 +5862,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, kvm_mmu_new_pgd(vcpu, new_eptp); } -EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_ept_mmu); static void init_kvm_softmmu(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) @@ -5903,7 +5927,7 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu) else init_kvm_softmmu(vcpu, cpu_role); } -EXPORT_SYMBOL_GPL(kvm_init_mmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_mmu); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) { @@ -5939,7 +5963,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); kvm_init_mmu(vcpu); } -EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_reset_context); int kvm_mmu_load(struct kvm_vcpu *vcpu) { @@ -5973,7 +5997,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) out: return r; } -EXPORT_SYMBOL_GPL(kvm_mmu_load); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { @@ -6035,7 +6059,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } -EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_free_obsolete_roots); static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) @@ -6361,7 +6385,7 @@ emulate: return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } -EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_page_fault); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg) { @@ -6377,7 +6401,7 @@ void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg) pr_cont(", spte[%d] = 0x%llx", level, sptes[level]); pr_cont("\n"); } -EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_print_sptes); static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, hpa_t root_hpa) @@ -6443,7 +6467,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, __kvm_mmu_invalidate_addr(vcpu, mmu, addr, mmu->prev_roots[i].hpa); } } -EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_addr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invalidate_addr); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { @@ -6460,7 +6484,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) kvm_mmu_invalidate_addr(vcpu, vcpu->arch.walk_mmu, gva, KVM_MMU_ROOTS_ALL); ++vcpu->stat.invlpg; } -EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_invlpg); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) @@ -6513,7 +6537,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, else max_huge_page_level = PG_LEVEL_2M; } -EXPORT_SYMBOL_GPL(kvm_configure_mmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_configure_mmu); static void free_mmu_pages(struct kvm_mmu *mmu) { @@ -6737,11 +6761,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) int kvm_mmu_init_vm(struct kvm *kvm) { - int r; + int r, i; kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); + for (i = 0; i < KVM_NR_MMU_TYPES; ++i) + INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); if (tdp_mmu_enabled) { @@ -7165,7 +7190,7 @@ restart: * mapping if the indirect sp has level = 1. */ if (sp->role.direct && - sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn)) { + sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_remote_tlbs_range()) @@ -7179,7 +7204,7 @@ restart: return need_tlb_flush; } -EXPORT_SYMBOL_GPL(kvm_zap_gfn_range); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_zap_gfn_range); static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) @@ -7582,19 +7607,64 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel return err; } -static void kvm_recover_nx_huge_pages(struct kvm *kvm) +static unsigned long nx_huge_pages_to_zap(struct kvm *kvm, + enum kvm_mmu_type mmu_type) +{ + unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages); + unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + + return ratio ? DIV_ROUND_UP(pages, ratio) : 0; +} + +static bool kvm_mmu_sp_dirty_logging_enabled(struct kvm *kvm, + struct kvm_mmu_page *sp) { - unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; struct kvm_memory_slot *slot; - int rcu_idx; + + /* + * Skip the memslot lookup if dirty tracking can't possibly be enabled, + * as memslot lookups are relatively expensive. + * + * If a memslot update is in progress, reading an incorrect value of + * kvm->nr_memslots_dirty_logging is not a problem: if it is becoming + * zero, KVM will do an unnecessary memslot lookup; if it is becoming + * nonzero, the page will be zapped unnecessarily. Either way, this + * only affects efficiency in racy situations, and not correctness. + */ + if (!atomic_read(&kvm->nr_memslots_dirty_logging)) + return false; + + slot = __gfn_to_memslot(kvm_memslots_for_spte_role(kvm, sp->role), sp->gfn); + if (WARN_ON_ONCE(!slot)) + return false; + + return kvm_slot_dirty_track_enabled(slot); +} + +static void kvm_recover_nx_huge_pages(struct kvm *kvm, + const enum kvm_mmu_type mmu_type) +{ +#ifdef CONFIG_X86_64 + const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU; + spinlock_t *tdp_mmu_pages_lock = &kvm->arch.tdp_mmu_pages_lock; +#else + const bool is_tdp_mmu = false; + spinlock_t *tdp_mmu_pages_lock = NULL; +#endif + unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type); + struct list_head *nx_huge_pages; struct kvm_mmu_page *sp; - unsigned int ratio; LIST_HEAD(invalid_list); bool flush = false; - ulong to_zap; + int rcu_idx; + + nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages; rcu_idx = srcu_read_lock(&kvm->srcu); - write_lock(&kvm->mmu_lock); + if (is_tdp_mmu) + read_lock(&kvm->mmu_lock); + else + write_lock(&kvm->mmu_lock); /* * Zapping TDP MMU shadow pages, including the remote TLB flush, must @@ -7603,11 +7673,15 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) */ rcu_read_lock(); - ratio = READ_ONCE(nx_huge_pages_recovery_ratio); - to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { - if (list_empty(&kvm->arch.possible_nx_huge_pages)) + if (is_tdp_mmu) + spin_lock(tdp_mmu_pages_lock); + + if (list_empty(nx_huge_pages)) { + if (is_tdp_mmu) + spin_unlock(tdp_mmu_pages_lock); break; + } /* * We use a separate list instead of just using active_mmu_pages @@ -7616,56 +7690,44 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) * the total number of shadow pages. And because the TDP MMU * doesn't use active_mmu_pages. */ - sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, + sp = list_first_entry(nx_huge_pages, struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); WARN_ON_ONCE(!sp->role.direct); + unaccount_nx_huge_page(kvm, sp); + + if (is_tdp_mmu) + spin_unlock(tdp_mmu_pages_lock); + /* - * Unaccount and do not attempt to recover any NX Huge Pages - * that are being dirty tracked, as they would just be faulted - * back in as 4KiB pages. The NX Huge Pages in this slot will be - * recovered, along with all the other huge pages in the slot, - * when dirty logging is disabled. - * - * Since gfn_to_memslot() is relatively expensive, it helps to - * skip it if it the test cannot possibly return true. On the - * other hand, if any memslot has logging enabled, chances are - * good that all of them do, in which case unaccount_nx_huge_page() - * is much cheaper than zapping the page. - * - * If a memslot update is in progress, reading an incorrect value - * of kvm->nr_memslots_dirty_logging is not a problem: if it is - * becoming zero, gfn_to_memslot() will be done unnecessarily; if - * it is becoming nonzero, the page will be zapped unnecessarily. - * Either way, this only affects efficiency in racy situations, - * and not correctness. + * Do not attempt to recover any NX Huge Pages that are being + * dirty tracked, as they would just be faulted back in as 4KiB + * pages. The NX Huge Pages in this slot will be recovered, + * along with all the other huge pages in the slot, when dirty + * logging is disabled. */ - slot = NULL; - if (atomic_read(&kvm->nr_memslots_dirty_logging)) { - struct kvm_memslots *slots; + if (!kvm_mmu_sp_dirty_logging_enabled(kvm, sp)) { + if (is_tdp_mmu) + flush |= kvm_tdp_mmu_zap_possible_nx_huge_page(kvm, sp); + else + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - slots = kvm_memslots_for_spte_role(kvm, sp->role); - slot = __gfn_to_memslot(slots, sp->gfn); - WARN_ON_ONCE(!slot); } - if (slot && kvm_slot_dirty_track_enabled(slot)) - unaccount_nx_huge_page(kvm, sp); - else if (is_tdp_mmu_page(sp)) - flush |= kvm_tdp_mmu_zap_sp(kvm, sp); - else - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); WARN_ON_ONCE(sp->nx_huge_page_disallowed); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); rcu_read_unlock(); - cond_resched_rwlock_write(&kvm->mmu_lock); - flush = false; + if (is_tdp_mmu) + cond_resched_rwlock_read(&kvm->mmu_lock); + else + cond_resched_rwlock_write(&kvm->mmu_lock); + flush = false; rcu_read_lock(); } } @@ -7673,7 +7735,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) rcu_read_unlock(); - write_unlock(&kvm->mmu_lock); + if (is_tdp_mmu) + read_unlock(&kvm->mmu_lock); + else + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, rcu_idx); } @@ -7684,9 +7749,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data) static bool kvm_nx_huge_page_recovery_worker(void *data) { struct kvm *kvm = data; + long remaining_time; bool enabled; uint period; - long remaining_time; + int i; enabled = calc_nx_huge_pages_recovery_period(&period); if (!enabled) @@ -7701,7 +7767,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) } __set_current_state(TASK_RUNNING); - kvm_recover_nx_huge_pages(kvm); + for (i = 0; i < KVM_NR_MMU_TYPES; ++i) + kvm_recover_nx_huge_pages(kvm, i); kvm->arch.nx_huge_page_last = get_jiffies_64(); return true; } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 65f3c89d7c5d..ed5c01df21ba 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -411,12 +411,14 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return r; } -int kvm_mmu_max_mapping_level(struct kvm *kvm, +int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault, const struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); -void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); -void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type); +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index f35a830ce469..764e3015d021 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -51,6 +51,9 @@ { PFERR_PRESENT_MASK, "P" }, \ { PFERR_WRITE_MASK, "W" }, \ { PFERR_USER_MASK, "U" }, \ + { PFERR_PK_MASK, "PK" }, \ + { PFERR_SS_MASK, "SS" }, \ + { PFERR_SGX_MASK, "SGX" }, \ { PFERR_RSVD_MASK, "RSVD" }, \ { PFERR_FETCH_MASK, "F" } diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index df31039b5d63..37647afde7d3 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -22,7 +22,7 @@ bool __read_mostly enable_mmio_caching = true; static bool __ro_after_init allow_mmio_caching; module_param_named(mmio_caching, enable_mmio_caching, bool, 0444); -EXPORT_SYMBOL_GPL(enable_mmio_caching); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_mmio_caching); bool __read_mostly kvm_ad_enabled; @@ -470,13 +470,13 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) shadow_mmio_mask = mmio_mask; shadow_mmio_access_mask = access_mask; } -EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_mask); void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value) { kvm->arch.shadow_mmio_value = mmio_value; } -EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_value); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_value); void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask) { @@ -487,7 +487,7 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask) shadow_me_value = me_value; shadow_me_mask = me_mask; } -EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_me_spte_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) { @@ -513,7 +513,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0); } -EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_ept_masks); void kvm_mmu_reset_all_pte_masks(void) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7f3d7229b2c1..c5734ca5c17d 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp) spin_lock(&kvm->arch.tdp_mmu_pages_lock); sp->nx_huge_page_disallowed = false; - untrack_possible_nx_huge_page(kvm, sp); + untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } @@ -925,23 +925,52 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_unlock(); } -bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) +bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm, + struct kvm_mmu_page *sp) { - u64 old_spte; + struct tdp_iter iter = { + .old_spte = sp->ptep ? kvm_tdp_mmu_read_spte(sp->ptep) : 0, + .sptep = sp->ptep, + .level = sp->role.level + 1, + .gfn = sp->gfn, + .as_id = kvm_mmu_page_as_id(sp), + }; + + lockdep_assert_held_read(&kvm->mmu_lock); + + if (WARN_ON_ONCE(!is_tdp_mmu_page(sp))) + return false; /* - * This helper intentionally doesn't allow zapping a root shadow page, - * which doesn't have a parent page table and thus no associated entry. + * Root shadow pages don't have a parent page table and thus no + * associated entry, but they can never be possible NX huge pages. */ if (WARN_ON_ONCE(!sp->ptep)) return false; - old_spte = kvm_tdp_mmu_read_spte(sp->ptep); - if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte))) + /* + * Since mmu_lock is held in read mode, it's possible another task has + * already modified the SPTE. Zap the SPTE if and only if the SPTE + * points at the SP's page table, as checking shadow-present isn't + * sufficient, e.g. the SPTE could be replaced by a leaf SPTE, or even + * another SP. Note, spte_to_child_pt() also checks that the SPTE is + * shadow-present, i.e. guards against zapping a frozen SPTE. + */ + if ((tdp_ptep_t)sp->spt != spte_to_child_pt(iter.old_spte, iter.level)) return false; - tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, - SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1); + /* + * If a different task modified the SPTE, then it should be impossible + * for the SPTE to still be used for the to-be-zapped SP. Non-leaf + * SPTEs don't have Dirty bits, KVM always sets the Accessed bit when + * creating non-leaf SPTEs, and all other bits are immutable for non- + * leaf SPTEs, i.e. the only legal operations for non-leaf SPTEs are + * zapping and replacement. + */ + if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) { + WARN_ON_ONCE((tdp_ptep_t)sp->spt == spte_to_child_pt(iter.old_spte, iter.level)); + return false; + } return true; } @@ -1303,7 +1332,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) fault->req_level >= iter.level) { spin_lock(&kvm->arch.tdp_mmu_pages_lock); if (sp->nx_huge_page_disallowed) - track_possible_nx_huge_page(kvm, sp); + track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } } @@ -1813,7 +1842,7 @@ retry: if (iter.gfn < start || iter.gfn >= end) continue; - max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, iter.gfn); + max_mapping_level = kvm_mmu_max_mapping_level(kvm, NULL, slot, iter.gfn); if (max_mapping_level < iter.level) continue; @@ -1953,7 +1982,7 @@ bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa) spte = sptes[leaf]; return is_shadow_present_pte(spte) && is_last_spte(spte, leaf); } -EXPORT_SYMBOL_GPL(kvm_tdp_mmu_gpa_is_mapped); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_mmu_gpa_is_mapped); /* * Returns the last level spte pointer of the shadow page walk for the given diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 52acf99d40a0..bd62977c9199 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -64,7 +64,8 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu, } bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); -bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); +bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm, + struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm, enum kvm_tdp_mmu_root_types root_types); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 75e9cfc689f8..487ad19a236e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -26,11 +26,18 @@ /* This is enough to filter the vast majority of currently defined events. */ #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 +/* Unadultered PMU capabilities of the host, i.e. of hardware. */ +static struct x86_pmu_capability __read_mostly kvm_host_pmu; + +/* KVM's PMU capabilities, i.e. the intersection of KVM and hardware support. */ struct x86_pmu_capability __read_mostly kvm_pmu_cap; -EXPORT_SYMBOL_GPL(kvm_pmu_cap); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_cap); -struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel; -EXPORT_SYMBOL_GPL(kvm_pmu_eventsel); +struct kvm_pmu_emulated_event_selectors { + u64 INSTRUCTIONS_RETIRED; + u64 BRANCH_INSTRUCTIONS_RETIRED; +}; +static struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel; /* Precise Distribution of Instructions Retired (PDIR) */ static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = { @@ -96,6 +103,56 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } +void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) +{ + bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; + int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; + + /* + * Hybrid PMUs don't play nice with virtualization without careful + * configuration by userspace, and KVM's APIs for reporting supported + * vPMU features do not account for hybrid PMUs. Disable vPMU support + * for hybrid PMUs until KVM gains a way to let userspace opt-in. + */ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + enable_pmu = false; + memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu)); + } else { + perf_get_x86_pmu_capability(&kvm_host_pmu); + } + + if (enable_pmu) { + /* + * WARN if perf did NOT disable hardware PMU if the number of + * architecturally required GP counters aren't present, i.e. if + * there are a non-zero number of counters, but fewer than what + * is architecturally required. + */ + if (!kvm_host_pmu.num_counters_gp || + WARN_ON_ONCE(kvm_host_pmu.num_counters_gp < min_nr_gp_ctrs)) + enable_pmu = false; + else if (is_intel && !kvm_host_pmu.version) + enable_pmu = false; + } + + if (!enable_pmu) { + memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); + return; + } + + memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu)); + kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2); + kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp, + pmu_ops->MAX_NR_GP_COUNTERS); + kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed, + KVM_MAX_NR_FIXED_COUNTERS); + + kvm_pmu_eventsel.INSTRUCTIONS_RETIRED = + perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS); + kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED = + perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); +} + static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -318,7 +375,7 @@ void pmc_write_counter(struct kvm_pmc *pmc, u64 val) pmc->counter &= pmc_bitmask(pmc); pmc_update_sample_period(pmc); } -EXPORT_SYMBOL_GPL(pmc_write_counter); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(pmc_write_counter); static int filter_cmp(const void *pa, const void *pb, u64 mask) { @@ -426,7 +483,7 @@ static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter, return true; } -static bool check_pmu_event_filter(struct kvm_pmc *pmc) +static bool pmc_is_event_allowed(struct kvm_pmc *pmc) { struct kvm_x86_pmu_event_filter *filter; struct kvm *kvm = pmc->vcpu->kvm; @@ -441,12 +498,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return is_fixed_event_allowed(filter, pmc->idx); } -static bool pmc_event_is_allowed(struct kvm_pmc *pmc) -{ - return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && - check_pmu_event_filter(pmc); -} - static int reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -457,7 +508,8 @@ static int reprogram_counter(struct kvm_pmc *pmc) emulate_overflow = pmc_pause_counter(pmc); - if (!pmc_event_is_allowed(pmc)) + if (!pmc_is_globally_enabled(pmc) || !pmc_is_locally_enabled(pmc) || + !pmc_is_event_allowed(pmc)) return 0; if (emulate_overflow) @@ -492,6 +544,47 @@ static int reprogram_counter(struct kvm_pmc *pmc) eventsel & ARCH_PERFMON_EVENTSEL_INT); } +static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel) +{ + /* + * Ignore checks for edge detect (all events currently emulated by KVM + * are always rising edges), pin control (unsupported by modern CPUs), + * and counter mask and its invert flag (KVM doesn't emulate multiple + * events in a single clock cycle). + * + * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit + * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34). + * Checking the "in HLE/RTM transaction" flags is correct as the vCPU + * can't be in a transaction if KVM is emulating an instruction. + * + * Checking the reserved bits might be wrong if they are defined in the + * future, but so could ignoring them, so do the simple thing for now. + */ + return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB); +} + +void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc) +{ + bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1); + bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1); + + /* + * Do NOT consult the PMU event filters, as the filters must be checked + * at the time of emulation to ensure KVM uses fresh information, e.g. + * omitting a PMC from a bitmap could result in a missed event if the + * filter is changed to allow counting the event. + */ + if (!pmc_is_locally_enabled(pmc)) + return; + + if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED)) + bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1); + + if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED)) + bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_recalc_pmc_emulation); + void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX); @@ -527,6 +620,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) */ if (unlikely(pmu->need_cleanup)) kvm_pmu_cleanup(vcpu); + + kvm_for_each_pmc(pmu, pmc, bit, bitmap) + kvm_pmu_recalc_pmc_emulation(pmu, pmc); } int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx) @@ -650,6 +746,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = pmu->global_ctrl; break; case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; break; @@ -711,6 +808,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) pmu->global_status &= ~data; break; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + if (!msr_info->host_initiated) + pmu->global_status |= data & ~pmu->global_status_rsvd; + break; default: kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); return kvm_pmu_call(set_msr)(vcpu, msr_info); @@ -789,6 +890,10 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) */ if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); + + bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); + bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX, + pmu->nr_arch_fixed_counters); } void kvm_pmu_init(struct kvm_vcpu *vcpu) @@ -813,7 +918,7 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) pmu->pmc_in_use, X86_PMC_IDX_MAX); kvm_for_each_pmc(pmu, pmc, i, bitmask) { - if (pmc->perf_event && !pmc_speculative_in_use(pmc)) + if (pmc->perf_event && !pmc_is_locally_enabled(pmc)) pmc_stop_counter(pmc); } @@ -860,44 +965,46 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc) select_user; } -void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel) +static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, + const unsigned long *event_pmcs) { DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - int i; + int i, idx; BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX); + if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX)) + return; + if (!kvm_pmu_has_perf_global_ctrl(pmu)) - bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); - else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx, + bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX); + else if (!bitmap_and(bitmap, event_pmcs, (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX)) return; + idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_for_each_pmc(pmu, pmc, i, bitmap) { - /* - * Ignore checks for edge detect (all events currently emulated - * but KVM are always rising edges), pin control (unsupported - * by modern CPUs), and counter mask and its invert flag (KVM - * doesn't emulate multiple events in a single clock cycle). - * - * Note, the uppermost nibble of AMD's mask overlaps Intel's - * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved - * bits (bits 35:34). Checking the "in HLE/RTM transaction" - * flags is correct as the vCPU can't be in a transaction if - * KVM is emulating an instruction. Checking the reserved bits - * might be wrong if they are defined in the future, but so - * could ignoring them, so do the simple thing for now. - */ - if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) || - !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc)) + if (!pmc_is_event_allowed(pmc) || !cpl_is_matched(pmc)) continue; kvm_pmu_incr_counter(pmc); } + srcu_read_unlock(&vcpu->kvm->srcu, idx); +} + +void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu) +{ + kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_instruction_retired); + +void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu) +{ + kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches); } -EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_pmu_branch_retired); static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter) { diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 103604c4b33b..5c3939e91f1d 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -23,11 +23,6 @@ #define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED -struct kvm_pmu_emulated_event_selectors { - u64 INSTRUCTIONS_RETIRED; - u64 BRANCH_INSTRUCTIONS_RETIRED; -}; - struct kvm_pmu_ops { struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask); @@ -165,7 +160,7 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) return NULL; } -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) +static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -178,57 +173,15 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) } extern struct x86_pmu_capability kvm_pmu_cap; -extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel; -static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) -{ - bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; - int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; +void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops); - /* - * Hybrid PMUs don't play nice with virtualization without careful - * configuration by userspace, and KVM's APIs for reporting supported - * vPMU features do not account for hybrid PMUs. Disable vPMU support - * for hybrid PMUs until KVM gains a way to let userspace opt-in. - */ - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - enable_pmu = false; - - if (enable_pmu) { - perf_get_x86_pmu_capability(&kvm_pmu_cap); - - /* - * WARN if perf did NOT disable hardware PMU if the number of - * architecturally required GP counters aren't present, i.e. if - * there are a non-zero number of counters, but fewer than what - * is architecturally required. - */ - if (!kvm_pmu_cap.num_counters_gp || - WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs)) - enable_pmu = false; - else if (is_intel && !kvm_pmu_cap.version) - enable_pmu = false; - } - - if (!enable_pmu) { - memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); - return; - } - - kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2); - kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp, - pmu_ops->MAX_NR_GP_COUNTERS); - kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed, - KVM_MAX_NR_FIXED_COUNTERS); - - kvm_pmu_eventsel.INSTRUCTIONS_RETIRED = - perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS); - kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED = - perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); -} +void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc); static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) { + kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc); + set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); } @@ -272,7 +225,8 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_cleanup(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); -void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel); +void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu); +void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu); bool is_vmware_backdoor_pmc(u32 pmc_idx); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index c53b92379e6e..743ab25ba787 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -25,6 +25,9 @@ #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) #define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11) +/* Intel-defined sub-features, CPUID level 0x00000007:1 (ECX) */ +#define KVM_X86_FEATURE_MSR_IMM KVM_X86_FEATURE(CPUID_7_1_ECX, 5) + /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) @@ -87,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, + [CPUID_7_1_ECX] = { 7, 1, CPUID_ECX}, }; /* @@ -128,6 +132,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); + KVM_X86_TRANSLATE_FEATURE(MSR_IMM); default: return x86_feature; } diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 9864c057187d..f623c5986119 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -131,7 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(kvm_smm_changed); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed); void process_smi(struct kvm_vcpu *vcpu) { @@ -269,6 +269,10 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu); + + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp)) + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } #endif @@ -529,7 +533,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, vcpu->arch.smbase = smstate->smbase; - if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) + if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) return X86EMUL_UNHANDLEABLE; rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); @@ -558,6 +562,10 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, kvm_x86_call(set_interrupt_shadow)(vcpu, 0); ctxt->interruptibility = (u8)smstate->int_shadow; + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp)) + return X86EMUL_UNHANDLEABLE; + return X86EMUL_CONTINUE; } #endif @@ -620,7 +628,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) /* And finally go back to 32-bit mode. */ efer = 0; - kvm_set_msr(vcpu, MSR_EFER, efer); + __kvm_emulate_msr_write(vcpu, MSR_EFER, efer); } #endif diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h index 551703fbe200..db3c88f16138 100644 --- a/arch/x86/kvm/smm.h +++ b/arch/x86/kvm/smm.h @@ -116,8 +116,8 @@ struct kvm_smram_state_64 { u32 smbase; u32 reserved4[5]; - /* ssp and svm_* fields below are not implemented by KVM */ u64 ssp; + /* svm_* fields below are not implemented by KVM */ u64 svm_guest_pat; u64 svm_host_efer; u64 svm_host_cr4; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index a34c5c3b164e..f286b5706d7c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -64,6 +64,34 @@ static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_IDX_MASK) == -1u); +#define AVIC_AUTO_MODE -1 + +static int avic_param_set(const char *val, const struct kernel_param *kp) +{ + if (val && sysfs_streq(val, "auto")) { + *(int *)kp->arg = AVIC_AUTO_MODE; + return 0; + } + + return param_set_bint(val, kp); +} + +static const struct kernel_param_ops avic_ops = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = avic_param_set, + .get = param_get_bool, +}; + +/* + * Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled + * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met). + */ +static int avic = AVIC_AUTO_MODE; +module_param_cb(avic, &avic_ops, &avic, 0444); +__MODULE_PARM_TYPE(avic, "bool"); + +module_param(enable_ipiv, bool, 0444); + static bool force_avic; module_param_unsafe(force_avic, bool, 0444); @@ -77,7 +105,58 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); static u32 next_vm_id = 0; static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); -bool x2avic_enabled; +static bool x2avic_enabled; + + +static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm, + bool intercept) +{ + static const u32 x2avic_passthrough_msrs[] = { + X2APIC_MSR(APIC_ID), + X2APIC_MSR(APIC_LVR), + X2APIC_MSR(APIC_TASKPRI), + X2APIC_MSR(APIC_ARBPRI), + X2APIC_MSR(APIC_PROCPRI), + X2APIC_MSR(APIC_EOI), + X2APIC_MSR(APIC_RRR), + X2APIC_MSR(APIC_LDR), + X2APIC_MSR(APIC_DFR), + X2APIC_MSR(APIC_SPIV), + X2APIC_MSR(APIC_ISR), + X2APIC_MSR(APIC_TMR), + X2APIC_MSR(APIC_IRR), + X2APIC_MSR(APIC_ESR), + X2APIC_MSR(APIC_ICR), + X2APIC_MSR(APIC_ICR2), + + /* + * Note! Always intercept LVTT, as TSC-deadline timer mode + * isn't virtualized by hardware, and the CPU will generate a + * #GP instead of a #VMEXIT. + */ + X2APIC_MSR(APIC_LVTTHMR), + X2APIC_MSR(APIC_LVTPC), + X2APIC_MSR(APIC_LVT0), + X2APIC_MSR(APIC_LVT1), + X2APIC_MSR(APIC_LVTERR), + X2APIC_MSR(APIC_TMICT), + X2APIC_MSR(APIC_TMCCT), + X2APIC_MSR(APIC_TDCR), + }; + int i; + + if (intercept == svm->x2avic_msrs_intercepted) + return; + + if (!x2avic_enabled) + return; + + for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++) + svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i], + MSR_TYPE_RW, intercept); + + svm->x2avic_msrs_intercepted = intercept; +} static void avic_activate_vmcb(struct vcpu_svm *svm) { @@ -99,7 +178,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) vmcb->control.int_ctl |= X2APIC_MODE_MASK; vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID; /* Disabling MSR intercept for x2APIC registers */ - svm_set_x2apic_msr_interception(svm, false); + avic_set_x2apic_msr_interception(svm, false); } else { /* * Flush the TLB, the guest may have inserted a non-APIC @@ -110,7 +189,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) /* For xAVIC and hybrid-xAVIC modes */ vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID; /* Enabling MSR intercept for x2APIC registers */ - svm_set_x2apic_msr_interception(svm, true); + avic_set_x2apic_msr_interception(svm, true); } } @@ -130,7 +209,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) return; /* Enabling MSR intercept for x2APIC registers */ - svm_set_x2apic_msr_interception(svm, true); + avic_set_x2apic_msr_interception(svm, true); } /* Note: @@ -1090,23 +1169,27 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) avic_vcpu_load(vcpu, vcpu->cpu); } -/* - * Note: - * - The module param avic enable both xAPIC and x2APIC mode. - * - Hypervisor can support both xAVIC and x2AVIC in the same guest. - * - The mode can be switched at run-time. - */ -bool avic_hardware_setup(void) +static bool __init avic_want_avic_enabled(void) { - if (!npt_enabled) + /* + * In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is + * supported (to avoid enabling partial support by default, and because + * x2AVIC should be supported by all Zen4+ CPUs). Explicitly check for + * family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags + * aren't inclusive of previous generations, i.e. the kernel will set + * at most one ZenX feature flag. + */ + if (avic == AVIC_AUTO_MODE) + avic = boot_cpu_has(X86_FEATURE_X2AVIC) && + (boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4)); + + if (!avic || !npt_enabled) return false; /* AVIC is a prerequisite for x2AVIC. */ if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) { - if (boot_cpu_has(X86_FEATURE_X2AVIC)) { - pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled"); - pr_warn(FW_BUG "Try enable AVIC using force_avic option"); - } + if (boot_cpu_has(X86_FEATURE_X2AVIC)) + pr_warn(FW_BUG "Cannot enable x2AVIC, AVIC is unsupported\n"); return false; } @@ -1116,21 +1199,37 @@ bool avic_hardware_setup(void) return false; } - if (boot_cpu_has(X86_FEATURE_AVIC)) { - pr_info("AVIC enabled\n"); - } else if (force_avic) { - /* - * Some older systems does not advertise AVIC support. - * See Revision Guide for specific AMD processor for more detail. - */ - pr_warn("AVIC is not supported in CPUID but force enabled"); - pr_warn("Your system might crash and burn"); - } + /* + * Print a scary message if AVIC is force enabled to make it abundantly + * clear that ignoring CPUID could have repercussions. See Revision + * Guide for specific AMD processor for more details. + */ + if (!boot_cpu_has(X86_FEATURE_AVIC)) + pr_warn("AVIC unsupported in CPUID but force enabled, your system might crash and burn\n"); + + return true; +} + +/* + * Note: + * - The module param avic enable both xAPIC and x2APIC mode. + * - Hypervisor can support both xAVIC and x2AVIC in the same guest. + * - The mode can be switched at run-time. + */ +bool __init avic_hardware_setup(void) +{ + avic = avic_want_avic_enabled(); + if (!avic) + return false; + + pr_info("AVIC enabled\n"); /* AVIC is a prerequisite for x2AVIC. */ x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC); if (x2avic_enabled) pr_info("x2AVIC enabled\n"); + else + svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true; /* * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b7fd2e869998..a6443feab252 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -636,6 +636,14 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DT); } + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_CET)))) { + vmcb02->save.s_cet = vmcb12->save.s_cet; + vmcb02->save.isst_addr = vmcb12->save.isst_addr; + vmcb02->save.ssp = vmcb12->save.ssp; + vmcb_mark_dirty(vmcb02, VMCB_CET); + } + kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); svm_set_efer(vcpu, svm->nested.save.efer); @@ -1044,6 +1052,12 @@ void svm_copy_vmrun_state(struct vmcb_save_area *to_save, to_save->rsp = from_save->rsp; to_save->rip = from_save->rip; to_save->cpl = 0; + + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) { + to_save->s_cet = from_save->s_cet; + to_save->isst_addr = from_save->isst_addr; + to_save->ssp = from_save->ssp; + } } void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) @@ -1111,6 +1125,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->save.dr6 = svm->vcpu.arch.dr6; vmcb12->save.cpl = vmcb02->save.cpl; + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) { + vmcb12->save.s_cet = vmcb02->save.s_cet; + vmcb12->save.isst_addr = vmcb02->save.isst_addr; + vmcb12->save.ssp = vmcb02->save.ssp; + } + vmcb12->control.int_state = vmcb02->control.int_state; vmcb12->control.exit_code = vmcb02->control.exit_code; vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi; @@ -1798,17 +1818,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) return -EINVAL; - ret = -ENOMEM; - ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); - save = kzalloc(sizeof(*save), GFP_KERNEL); - if (!ctl || !save) - goto out_free; + ctl = memdup_user(&user_vmcb->control, sizeof(*ctl)); + if (IS_ERR(ctl)) + return PTR_ERR(ctl); - ret = -EFAULT; - if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl))) - goto out_free; - if (copy_from_user(save, &user_vmcb->save, sizeof(*save))) - goto out_free; + save = memdup_user(&user_vmcb->save, sizeof(*save)); + if (IS_ERR(save)) { + kfree(ctl); + return PTR_ERR(save); + } ret = -EINVAL; __nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 288f7f2a46f2..bc062285fbf5 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -41,7 +41,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); unsigned int idx; - if (!vcpu->kvm->arch.enable_pmu) + if (!pmu->version) return NULL; switch (msr) { @@ -113,6 +113,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: return pmu->version > 1; default: if (msr > MSR_F15H_PERF_CTR5 && @@ -199,17 +200,16 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) kvm_pmu_cap.num_counters_gp); if (pmu->version > 1) { - pmu->global_ctrl_rsvd = ~((1ull << pmu->nr_arch_gp_counters) - 1); + pmu->global_ctrl_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1); pmu->global_status_rsvd = pmu->global_ctrl_rsvd; } - pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; + pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); } static void amd_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0635bd71c10e..0835c664fbfd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -37,7 +37,6 @@ #include "trace.h" #define GHCB_VERSION_MAX 2ULL -#define GHCB_VERSION_DEFAULT 2ULL #define GHCB_VERSION_MIN 1ULL #define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION) @@ -59,6 +58,9 @@ static bool sev_es_debug_swap_enabled = true; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); static u64 sev_supported_vmsa_features; +static unsigned int nr_ciphertext_hiding_asids; +module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444); + #define AP_RESET_HOLD_NONE 0 #define AP_RESET_HOLD_NAE_EVENT 1 #define AP_RESET_HOLD_MSR_PROTO 2 @@ -85,6 +87,10 @@ static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; +static unsigned int max_sev_es_asid; +static unsigned int min_sev_es_asid; +static unsigned int max_snp_asid; +static unsigned int min_snp_asid; static unsigned long sev_me_mask; static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; @@ -147,6 +153,14 @@ static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm) return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; } +static bool snp_is_secure_tsc_enabled(struct kvm *kvm) +{ + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + + return (sev->vmsa_features & SVM_SEV_FEAT_SECURE_TSC) && + !WARN_ON_ONCE(!sev_snp_guest(kvm)); +} + /* Must be called with the sev_bitmap_lock held */ static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { @@ -173,20 +187,34 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) misc_cg_uncharge(type, sev->misc_cg, 1); } -static int sev_asid_new(struct kvm_sev_info *sev) +static int sev_asid_new(struct kvm_sev_info *sev, unsigned long vm_type) { /* * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - * Note: min ASID can end up larger than the max if basic SEV support is - * effectively disabled by disallowing use of ASIDs for SEV guests. */ - unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; - unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; - unsigned int asid; + unsigned int min_asid, max_asid, asid; bool retry = true; int ret; + if (vm_type == KVM_X86_SNP_VM) { + min_asid = min_snp_asid; + max_asid = max_snp_asid; + } else if (sev->es_active) { + min_asid = min_sev_es_asid; + max_asid = max_sev_es_asid; + } else { + min_asid = min_sev_asid; + max_asid = max_sev_asid; + } + + /* + * The min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + * Similarly for SEV-ES guests the min ASID can end up larger than the + * max when ciphertext hiding is enabled, effectively disabling SEV-ES + * support. + */ if (min_asid > max_asid) return -ENOTTY; @@ -406,6 +434,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, struct kvm_sev_info *sev = to_kvm_sev_info(kvm); struct sev_platform_init_args init_args = {0}; bool es_active = vm_type != KVM_X86_SEV_VM; + bool snp_active = vm_type == KVM_X86_SNP_VM; u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0; int ret; @@ -415,12 +444,26 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, if (data->flags) return -EINVAL; + if (!snp_active) + valid_vmsa_features &= ~SVM_SEV_FEAT_SECURE_TSC; + if (data->vmsa_features & ~valid_vmsa_features) return -EINVAL; if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version)) return -EINVAL; + /* + * KVM supports the full range of mandatory features defined by version + * 2 of the GHCB protocol, so default to that for SEV-ES guests created + * via KVM_SEV_INIT2 (KVM_SEV_INIT forces version 1). + */ + if (es_active && !data->ghcb_version) + data->ghcb_version = 2; + + if (snp_active && data->ghcb_version < 2) + return -EINVAL; + if (unlikely(sev->active)) return -EINVAL; @@ -429,18 +472,10 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, sev->vmsa_features = data->vmsa_features; sev->ghcb_version = data->ghcb_version; - /* - * Currently KVM supports the full range of mandatory features defined - * by version 2 of the GHCB protocol, so default to that for SEV-ES - * guests created via KVM_SEV_INIT2. - */ - if (sev->es_active && !sev->ghcb_version) - sev->ghcb_version = GHCB_VERSION_DEFAULT; - - if (vm_type == KVM_X86_SNP_VM) + if (snp_active) sev->vmsa_features |= SVM_SEV_FEAT_SNP_ACTIVE; - ret = sev_asid_new(sev); + ret = sev_asid_new(sev, vm_type); if (ret) goto e_no_asid; @@ -455,7 +490,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, } /* This needs to happen after SEV/SNP firmware initialization. */ - if (vm_type == KVM_X86_SNP_VM) { + if (snp_active) { ret = snp_guest_req_init(kvm); if (ret) goto e_free; @@ -569,8 +604,6 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; - sev->policy = params.policy; - memset(&start, 0, sizeof(start)); dh_blob = NULL; @@ -618,6 +651,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_session; } + sev->policy = params.policy; sev->handle = start.handle; sev->fd = argp->sev_fd; @@ -1968,7 +2002,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { dst_svm = to_svm(dst_vcpu); - sev_init_vmcb(dst_svm); + sev_init_vmcb(dst_svm, false); if (!dst->es_active) continue; @@ -2180,7 +2214,12 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!(params.policy & SNP_POLICY_MASK_RSVD_MBO)) return -EINVAL; - sev->policy = params.policy; + if (snp_is_secure_tsc_enabled(kvm)) { + if (WARN_ON_ONCE(!kvm->arch.default_tsc_khz)) + return -EINVAL; + + start.desired_tsc_khz = kvm->arch.default_tsc_khz; + } sev->snp_context = snp_context_create(kvm, argp); if (!sev->snp_context) @@ -2188,6 +2227,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) start.gctx_paddr = __psp_pa(sev->snp_context); start.policy = params.policy; + memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw)); rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_START, &start, &argp->error); if (rc) { @@ -2196,6 +2236,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_context; } + sev->policy = params.policy; sev->fd = argp->sev_fd; rc = snp_bind_asid(kvm, &argp->error); if (rc) { @@ -2329,7 +2370,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) pr_debug("%s: GFN start 0x%llx length 0x%llx type %d flags %d\n", __func__, params.gfn_start, params.len, params.type, params.flags); - if (!PAGE_ALIGNED(params.len) || params.flags || + if (!params.len || !PAGE_ALIGNED(params.len) || params.flags || (params.type != KVM_SEV_SNP_PAGE_TYPE_NORMAL && params.type != KVM_SEV_SNP_PAGE_TYPE_ZERO && params.type != KVM_SEV_SNP_PAGE_TYPE_UNMEASURED && @@ -2361,7 +2402,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) mutex_lock(&kvm->slots_lock); memslot = gfn_to_memslot(kvm, params.gfn_start); - if (!kvm_slot_can_be_private(memslot)) { + if (!kvm_slot_has_gmem(memslot)) { ret = -EINVAL; goto out; } @@ -3038,6 +3079,9 @@ void __init sev_hardware_setup(void) if (min_sev_asid == 1) goto out; + min_sev_es_asid = min_snp_asid = 1; + max_sev_es_asid = max_snp_asid = min_sev_asid - 1; + sev_es_asid_count = min_sev_asid - 1; WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); sev_es_supported = true; @@ -3046,10 +3090,32 @@ void __init sev_hardware_setup(void) out: if (sev_enabled) { init_args.probe = true; + + if (sev_is_snp_ciphertext_hiding_supported()) + init_args.max_snp_asid = min(nr_ciphertext_hiding_asids, + min_sev_asid - 1); + if (sev_platform_init(&init_args)) sev_supported = sev_es_supported = sev_snp_supported = false; else if (sev_snp_supported) sev_snp_supported = is_sev_snp_initialized(); + + if (sev_snp_supported) + nr_ciphertext_hiding_asids = init_args.max_snp_asid; + + /* + * If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP + * ASID range is partitioned into separate SEV-ES and SEV-SNP + * ASID ranges, with the SEV-SNP range being [1..max_snp_asid] + * and the SEV-ES range being (max_snp_asid..max_sev_es_asid]. + * Note, SEV-ES may effectively be disabled if all ASIDs from + * the joint range are assigned to SEV-SNP. + */ + if (nr_ciphertext_hiding_asids) { + max_snp_asid = nr_ciphertext_hiding_asids; + min_sev_es_asid = max_snp_asid + 1; + pr_info("SEV-SNP ciphertext hiding enabled\n"); + } } if (boot_cpu_has(X86_FEATURE_SEV)) @@ -3060,12 +3126,14 @@ out: min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", - str_enabled_disabled(sev_es_supported), - min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); + sev_es_supported ? min_sev_es_asid <= max_sev_es_asid ? "enabled" : + "unusable" : + "disabled", + min_sev_es_asid, max_sev_es_asid); if (boot_cpu_has(X86_FEATURE_SEV_SNP)) pr_info("SEV-SNP %s (ASIDs %u - %u)\n", str_enabled_disabled(sev_snp_supported), - min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); + min_snp_asid, max_snp_asid); sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; @@ -3078,6 +3146,9 @@ out: sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; + + if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC)) + sev_supported_vmsa_features |= SVM_SEV_FEAT_SECURE_TSC; } void sev_hardware_unsetup(void) @@ -3193,7 +3264,7 @@ skip_vmsa_free: kvfree(svm->sev_es.ghcb_sa); } -static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control) { return (((u64)control->exit_code_hi) << 32) | control->exit_code; } @@ -3219,7 +3290,7 @@ static void dump_ghcb(struct vcpu_svm *svm) */ pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); + kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", @@ -3272,26 +3343,27 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap)); memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap)); - vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb); - vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb); - vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb); - vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb); - vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb); + vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm); + vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm); + vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm); + vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm); + vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm); - svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb); + svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm); - if (kvm_ghcb_xcr0_is_valid(svm)) { - vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); - vcpu->arch.cpuid_dynamic_bits_dirty = true; - } + if (kvm_ghcb_xcr0_is_valid(svm)) + __kvm_set_xcr(vcpu, 0, kvm_ghcb_get_xcr0(svm)); + + if (kvm_ghcb_xss_is_valid(svm)) + __kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm)); /* Copy the GHCB exit information into the VMCB fields */ - exit_code = ghcb_get_sw_exit_code(ghcb); + exit_code = kvm_ghcb_get_sw_exit_code(svm); control->exit_code = lower_32_bits(exit_code); control->exit_code_hi = upper_32_bits(exit_code); - control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); - control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); - svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb); + control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm); + control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm); + svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm); /* Clear the valid entries fields */ memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); @@ -3308,7 +3380,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) * Retrieve the exit code now even though it may not be marked valid * as it could help with debugging. */ - exit_code = kvm_ghcb_get_sw_exit_code(control); + exit_code = kvm_get_cached_sw_exit_code(control); /* Only GHCB Usage code 0 is supported */ if (svm->sev_es.ghcb->ghcb_usage) { @@ -3880,7 +3952,7 @@ next_range: /* * Invoked as part of svm_vcpu_reset() processing of an init event. */ -void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) +static void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_memory_slot *slot; @@ -3888,9 +3960,6 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) kvm_pfn_t pfn; gfn_t gfn; - if (!sev_snp_guest(vcpu->kvm)) - return; - guard(mutex)(&svm->sev_es.snp_vmsa_mutex); if (!svm->sev_es.snp_ap_waiting_for_reset) @@ -4316,7 +4385,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm_vmgexit_success(svm, 0); - exit_code = kvm_ghcb_get_sw_exit_code(control); + exit_code = kvm_get_cached_sw_exit_code(control); switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); @@ -4448,6 +4517,9 @@ void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu) !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) && !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)); + svm_set_intercept_for_msr(vcpu, MSR_AMD64_GUEST_TSC_FREQ, MSR_TYPE_R, + !snp_is_secure_tsc_enabled(vcpu->kvm)); + /* * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if * the host/guest supports its use. @@ -4476,7 +4548,7 @@ void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); } -static void sev_es_init_vmcb(struct vcpu_svm *svm) +static void sev_es_init_vmcb(struct vcpu_svm *svm, bool init_event) { struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm); struct vmcb *vmcb = svm->vmcb01.ptr; @@ -4537,10 +4609,21 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* Can't intercept XSETBV, HV can't modify XCR0 directly */ svm_clr_intercept(svm, INTERCEPT_XSETBV); + + /* + * Set the GHCB MSR value as per the GHCB specification when emulating + * vCPU RESET for an SEV-ES guest. + */ + if (!init_event) + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, + GHCB_VERSION_MIN, + sev_enc_bit)); } -void sev_init_vmcb(struct vcpu_svm *svm) +void sev_init_vmcb(struct vcpu_svm *svm, bool init_event) { + struct kvm_vcpu *vcpu = &svm->vcpu; + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); @@ -4550,24 +4633,36 @@ void sev_init_vmcb(struct vcpu_svm *svm) */ clr_exception_intercept(svm, GP_VECTOR); - if (sev_es_guest(svm->vcpu.kvm)) - sev_es_init_vmcb(svm); + if (init_event && sev_snp_guest(vcpu->kvm)) + sev_snp_init_protected_guest_state(vcpu); + + if (sev_es_guest(vcpu->kvm)) + sev_es_init_vmcb(svm, init_event); } -void sev_es_vcpu_reset(struct vcpu_svm *svm) +int sev_vcpu_create(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; - struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm); + struct vcpu_svm *svm = to_svm(vcpu); + struct page *vmsa_page; + + mutex_init(&svm->sev_es.snp_vmsa_mutex); + + if (!sev_es_guest(vcpu->kvm)) + return 0; /* - * Set the GHCB MSR value as per the GHCB specification when emulating - * vCPU RESET for an SEV-ES guest. + * SEV-ES guests require a separate (from the VMCB) VMSA page used to + * contain the encrypted register state of the guest. */ - set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, - GHCB_VERSION_MIN, - sev_enc_bit)); + vmsa_page = snp_safe_alloc_page(); + if (!vmsa_page) + return -ENOMEM; - mutex_init(&svm->sev_es.snp_vmsa_mutex); + svm->sev_es.vmsa = page_address(vmsa_page); + + vcpu->arch.guest_tsc_protected = snp_is_secure_tsc_enabled(vcpu->kvm); + + return 0; } void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) @@ -4618,6 +4713,16 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); } + + /* + * TSC_AUX is always virtualized for SEV-ES guests when the feature is + * available, i.e. TSC_AUX is loaded on #VMEXIT from the host save area. + * Set the save area to the current hardware value, i.e. the current + * user return value, so that the correct value is restored on #VMEXIT. + */ + if (cpu_feature_enabled(X86_FEATURE_V_TSC_AUX) && + !WARN_ON_ONCE(tsc_aux_uret_slot < 0)) + hostsa->tsc_aux = kvm_get_user_return_msr(tsc_aux_uret_slot); } void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) @@ -4715,7 +4820,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) } slot = gfn_to_memslot(kvm, gfn); - if (!kvm_slot_can_be_private(slot)) { + if (!kvm_slot_has_gmem(slot)) { pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n", gpa); return; @@ -4943,7 +5048,7 @@ next_pfn: } } -int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { int level, rc; bool assigned; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1bfebe40854f..153c12dbf3eb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -158,14 +158,6 @@ module_param(lbrv, int, 0444); static int tsc_scaling = true; module_param(tsc_scaling, int, 0444); -/* - * enable / disable AVIC. Because the defaults differ for APICv - * support between VMX and SVM we cannot use module_param_named. - */ -static bool avic; -module_param(avic, bool, 0444); -module_param(enable_ipiv, bool, 0444); - module_param(enable_device_posted_irqs, bool, 0444); bool __read_mostly dump_invalid_vmcb; @@ -195,7 +187,7 @@ static DEFINE_MUTEX(vmcb_dump_mutex); * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace. */ -static int tsc_aux_uret_slot __read_mostly = -1; +int tsc_aux_uret_slot __ro_after_init = -1; static int get_npt_level(void) { @@ -577,18 +569,6 @@ static int svm_enable_virtualization_cpu(void) amd_pmu_enable_virt(); - /* - * If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type - * "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests. - * Since Linux does not change the value of TSC_AUX once set, prime the - * TSC_AUX field now to avoid a RDMSR on every vCPU run. - */ - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { - u32 __maybe_unused msr_hi; - - rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); - } - return 0; } @@ -736,55 +716,6 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept); } -void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) -{ - static const u32 x2avic_passthrough_msrs[] = { - X2APIC_MSR(APIC_ID), - X2APIC_MSR(APIC_LVR), - X2APIC_MSR(APIC_TASKPRI), - X2APIC_MSR(APIC_ARBPRI), - X2APIC_MSR(APIC_PROCPRI), - X2APIC_MSR(APIC_EOI), - X2APIC_MSR(APIC_RRR), - X2APIC_MSR(APIC_LDR), - X2APIC_MSR(APIC_DFR), - X2APIC_MSR(APIC_SPIV), - X2APIC_MSR(APIC_ISR), - X2APIC_MSR(APIC_TMR), - X2APIC_MSR(APIC_IRR), - X2APIC_MSR(APIC_ESR), - X2APIC_MSR(APIC_ICR), - X2APIC_MSR(APIC_ICR2), - - /* - * Note! Always intercept LVTT, as TSC-deadline timer mode - * isn't virtualized by hardware, and the CPU will generate a - * #GP instead of a #VMEXIT. - */ - X2APIC_MSR(APIC_LVTTHMR), - X2APIC_MSR(APIC_LVTPC), - X2APIC_MSR(APIC_LVT0), - X2APIC_MSR(APIC_LVT1), - X2APIC_MSR(APIC_LVTERR), - X2APIC_MSR(APIC_TMICT), - X2APIC_MSR(APIC_TMCCT), - X2APIC_MSR(APIC_TDCR), - }; - int i; - - if (intercept == svm->x2avic_msrs_intercepted) - return; - - if (!x2avic_enabled) - return; - - for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++) - svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i], - MSR_TYPE_RW, intercept); - - svm->x2avic_msrs_intercepted = intercept; -} - void svm_vcpu_free_msrpm(void *msrpm) { __free_pages(virt_to_page(msrpm), get_order(MSRPM_SIZE)); @@ -844,6 +775,17 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu) svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R); } + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) { + bool shstk_enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + + svm_set_intercept_for_msr(vcpu, MSR_IA32_U_CET, MSR_TYPE_RW, !shstk_enabled); + svm_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, !shstk_enabled); + svm_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP, MSR_TYPE_RW, !shstk_enabled); + svm_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP, MSR_TYPE_RW, !shstk_enabled); + svm_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP, MSR_TYPE_RW, !shstk_enabled); + svm_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, !shstk_enabled); + } + if (sev_es_guest(vcpu->kvm)) sev_es_recalc_msr_intercepts(vcpu); @@ -1077,13 +1019,13 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu) } } -static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu) +static void svm_recalc_intercepts(struct kvm_vcpu *vcpu) { svm_recalc_instruction_intercepts(vcpu); svm_recalc_msr_intercepts(vcpu); } -static void init_vmcb(struct kvm_vcpu *vcpu) +static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb01.ptr; @@ -1221,11 +1163,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_intercept(svm, INTERCEPT_BUSLOCK); if (sev_guest(vcpu->kvm)) - sev_init_vmcb(svm); + sev_init_vmcb(svm, init_event); svm_hv_init_vmcb(vmcb); - svm_recalc_intercepts_after_set_cpuid(vcpu); + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); vmcb_mark_all_dirty(vmcb); @@ -1244,9 +1186,6 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) svm->nmi_masked = false; svm->awaiting_iret_completion = false; - - if (sev_es_guest(vcpu->kvm)) - sev_es_vcpu_reset(svm); } static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -1256,10 +1195,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; - if (init_event) - sev_snp_init_protected_guest_state(vcpu); - - init_vmcb(vcpu); + init_vmcb(vcpu, init_event); if (!init_event) __svm_vcpu_reset(vcpu); @@ -1275,7 +1211,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb01_page; - struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1286,24 +1221,18 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) if (!vmcb01_page) goto out; - if (sev_es_guest(vcpu->kvm)) { - /* - * SEV-ES guests require a separate VMSA page used to contain - * the encrypted register state of the guest. - */ - vmsa_page = snp_safe_alloc_page(); - if (!vmsa_page) - goto error_free_vmcb_page; - } + err = sev_vcpu_create(vcpu); + if (err) + goto error_free_vmcb_page; err = avic_init_vcpu(svm); if (err) - goto error_free_vmsa_page; + goto error_free_sev; svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) { err = -ENOMEM; - goto error_free_vmsa_page; + goto error_free_sev; } svm->x2avic_msrs_intercepted = true; @@ -1312,16 +1241,12 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); svm_switch_vmcb(svm, &svm->vmcb01); - if (vmsa_page) - svm->sev_es.vmsa = page_address(vmsa_page); - svm->guest_state_loaded = false; return 0; -error_free_vmsa_page: - if (vmsa_page) - __free_page(vmsa_page); +error_free_sev: + sev_free_vcpu(vcpu); error_free_vmcb_page: __free_page(vmcb01_page); out: @@ -1423,10 +1348,10 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); /* - * TSC_AUX is always virtualized for SEV-ES guests when the feature is - * available. The user return MSR support is not required in this case - * because TSC_AUX is restored on #VMEXIT from the host save area - * (which has been initialized in svm_enable_virtualization_cpu()). + * TSC_AUX is always virtualized (context switched by hardware) for + * SEV-ES guests when the feature is available. For non-SEV-ES guests, + * context switch TSC_AUX via the user_return MSR infrastructure (not + * all CPUs support TSC_AUX virtualization). */ if (likely(tsc_aux_uret_slot >= 0) && (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) @@ -2727,8 +2652,8 @@ static int svm_get_feature_msr(u32 msr, u64 *data) static bool sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - return sev_es_guest(vcpu->kvm) && - vcpu->arch.guest_state_protected && + return sev_es_guest(vcpu->kvm) && vcpu->arch.guest_state_protected && + msr_info->index != MSR_IA32_XSS && !msr_write_intercepted(vcpu, msr_info->index); } @@ -2784,6 +2709,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_is_intel_compatible(vcpu)) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; + case MSR_IA32_S_CET: + msr_info->data = svm->vmcb->save.s_cet; + break; + case MSR_IA32_INT_SSP_TAB: + msr_info->data = svm->vmcb->save.isst_addr; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + msr_info->data = svm->vmcb->save.ssp; + break; case MSR_TSC_AUX: msr_info->data = svm->tsc_aux; break; @@ -3016,13 +2950,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->vmcb01.ptr->save.sysenter_esp = (u32)data; svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0; break; + case MSR_IA32_S_CET: + svm->vmcb->save.s_cet = data; + vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET); + break; + case MSR_IA32_INT_SSP_TAB: + svm->vmcb->save.isst_addr = data; + vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET); + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + svm->vmcb->save.ssp = data; + vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_CET); + break; case MSR_TSC_AUX: /* * TSC_AUX is always virtualized for SEV-ES guests when the * feature is available. The user return MSR support is not * required in this case because TSC_AUX is restored on #VMEXIT - * from the host save area (which has been initialized in - * svm_enable_virtualization_cpu()). + * from the host save area. */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) break; @@ -3407,6 +3352,10 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-15s %016llx %-13s %016llx\n", "rsp:", save->rsp, "rax:", save->rax); pr_err("%-15s %016llx %-13s %016llx\n", + "s_cet:", save->s_cet, "ssp:", save->ssp); + pr_err("%-15s %016llx\n", + "isst_addr:", save->isst_addr); + pr_err("%-15s %016llx %-13s %016llx\n", "star:", save01->star, "lstar:", save01->lstar); pr_err("%-15s %016llx %-13s %016llx\n", "cstar:", save01->cstar, "sfmask:", save01->sfmask); @@ -3431,6 +3380,13 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "sev_features", vmsa->sev_features); pr_err("%-15s %016llx %-13s %016llx\n", + "pl0_ssp:", vmsa->pl0_ssp, "pl1_ssp:", vmsa->pl1_ssp); + pr_err("%-15s %016llx %-13s %016llx\n", + "pl2_ssp:", vmsa->pl2_ssp, "pl3_ssp:", vmsa->pl3_ssp); + pr_err("%-15s %016llx\n", + "u_cet:", vmsa->u_cet); + + pr_err("%-15s %016llx %-13s %016llx\n", "rax:", vmsa->rax, "rbx:", vmsa->rbx); pr_err("%-15s %016llx %-13s %016llx\n", "rcx:", vmsa->rcx, "rdx:", vmsa->rdx); @@ -4180,17 +4136,27 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + + /* + * Next RIP must be provided as IRQs are disabled, and accessing guest + * memory to decode the instruction might fault, i.e. might sleep. + */ + if (!nrips || !control->next_rip) + return EXIT_FASTPATH_NONE; if (is_guest_mode(vcpu)) return EXIT_FASTPATH_NONE; - switch (svm->vmcb->control.exit_code) { + switch (control->exit_code) { case SVM_EXIT_MSR: - if (!svm->vmcb->control.exit_info_1) + if (!control->exit_info_1) break; - return handle_fastpath_set_msr_irqoff(vcpu); + return handle_fastpath_wrmsr(vcpu); case SVM_EXIT_HLT: return handle_fastpath_hlt(vcpu); + case SVM_EXIT_INVD: + return handle_fastpath_invd(vcpu); default: break; } @@ -4467,8 +4433,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); - - svm_recalc_intercepts_after_set_cpuid(vcpu); } static bool svm_has_wbinvd_exit(void) @@ -5041,7 +5005,7 @@ static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu) return page_address(page); } -static struct kvm_x86_ops svm_x86_ops __initdata = { +struct kvm_x86_ops svm_x86_ops __initdata = { .name = KBUILD_MODNAME, .check_processor_compatibility = svm_check_processor_compat, @@ -5170,7 +5134,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, - .recalc_msr_intercepts = svm_recalc_msr_intercepts, + .recalc_intercepts = svm_recalc_intercepts, .complete_emulated_msr = svm_complete_emulated_msr, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, @@ -5179,7 +5143,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .gmem_prepare = sev_gmem_prepare, .gmem_invalidate = sev_gmem_invalidate, - .private_max_mapping_level = sev_private_max_mapping_level, + .gmem_max_mapping_level = sev_gmem_max_mapping_level, }; /* @@ -5228,7 +5192,8 @@ static __init void svm_set_cpu_caps(void) kvm_set_cpu_caps(); kvm_caps.supported_perf_cap = 0; - kvm_caps.supported_xss = 0; + + kvm_cpu_cap_clear(X86_FEATURE_IBT); /* CPUID 0x80000001 and 0x8000000A (SVM features) */ if (nested) { @@ -5300,8 +5265,12 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x8000001F (SME/SEV features) */ sev_set_cpu_caps(); - /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ + /* + * Clear capabilities that are automatically configured by common code, + * but that require explicit SVM support (that isn't yet implemented). + */ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); + kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM); } static __init int svm_hardware_setup(void) @@ -5374,6 +5343,21 @@ static __init int svm_hardware_setup(void) get_npt_level(), PG_LEVEL_1G); pr_info("Nested Paging %s\n", str_enabled_disabled(npt_enabled)); + /* + * It seems that on AMD processors PTE's accessed bit is + * being set by the CPU hardware before the NPF vmexit. + * This is not expected behaviour and our tests fail because + * of it. + * A workaround here is to disable support for + * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. + * In this case userspace can know if there is support using + * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle + * it + * If future AMD CPU models change the behaviour described above, + * this variable can be changed accordingly + */ + allow_smaller_maxphyaddr = !npt_enabled; + /* Setup shadow_me_value and shadow_me_mask */ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); @@ -5408,15 +5392,12 @@ static __init int svm_hardware_setup(void) goto err; } - enable_apicv = avic = avic && avic_hardware_setup(); - + enable_apicv = avic_hardware_setup(); if (!enable_apicv) { enable_ipiv = false; svm_x86_ops.vcpu_blocking = NULL; svm_x86_ops.vcpu_unblocking = NULL; svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL; - } else if (!x2avic_enabled) { - svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true; } if (vls) { @@ -5453,21 +5434,6 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); - /* - * It seems that on AMD processors PTE's accessed bit is - * being set by the CPU hardware before the NPF vmexit. - * This is not expected behaviour and our tests fail because - * of it. - * A workaround here is to disable support for - * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. - * In this case userspace can know if there is support using - * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle - * it - * If future AMD CPU models change the behaviour described above, - * this variable can be changed accordingly - */ - allow_smaller_maxphyaddr = !npt_enabled; - kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; return 0; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 58b9d168e0c8..e4b04f435b3d 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -48,10 +48,13 @@ extern bool npt_enabled; extern int nrips; extern int vgif; extern bool intercept_smi; -extern bool x2avic_enabled; extern bool vnmi; extern int lbrv; +extern int tsc_aux_uret_slot __ro_after_init; + +extern struct kvm_x86_ops svm_x86_ops __initdata; + /* * Clean bits in VMCB. * VMCB_ALL_CLEAN_MASK might also need to @@ -74,6 +77,7 @@ enum { * AVIC PHYSICAL_TABLE pointer, * AVIC LOGICAL_TABLE pointer */ + VMCB_CET, /* S_CET, SSP, ISST_ADDR */ VMCB_SW = 31, /* Reserved for hypervisor/software use */ }; @@ -82,7 +86,7 @@ enum { (1U << VMCB_ASID) | (1U << VMCB_INTR) | \ (1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \ (1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \ - (1U << VMCB_LBR) | (1U << VMCB_AVIC) | \ + (1U << VMCB_LBR) | (1U << VMCB_AVIC) | (1U << VMCB_CET) | \ (1U << VMCB_SW)) /* TPR and CR2 are always written before VMRUN */ @@ -699,7 +703,6 @@ void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code); void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write); -void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable); void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, int trig_mode, int vec); @@ -801,7 +804,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG) \ ) -bool avic_hardware_setup(void); +bool __init avic_hardware_setup(void); int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -826,10 +829,9 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu); /* sev.c */ int pre_sev_run(struct vcpu_svm *svm, int cpu); -void sev_init_vmcb(struct vcpu_svm *svm); +void sev_init_vmcb(struct vcpu_svm *svm, bool init_event); void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa); @@ -854,6 +856,7 @@ static inline struct page *snp_safe_alloc_page(void) return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT); } +int sev_vcpu_create(struct kvm_vcpu *vcpu); void sev_free_vcpu(struct kvm_vcpu *vcpu); void sev_vm_destroy(struct kvm *kvm); void __init sev_set_cpu_caps(void); @@ -863,10 +866,9 @@ int sev_cpu_init(struct svm_cpu_data *sd); int sev_dev_get_attr(u32 group, u64 attr, u64 *val); extern unsigned int max_sev_asid; void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); -void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu); int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); -int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn); +int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu); void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa); #else @@ -880,6 +882,7 @@ static inline struct page *snp_safe_alloc_page(void) return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT); } +static inline int sev_vcpu_create(struct kvm_vcpu *vcpu) { return 0; } static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {} static inline void sev_vm_destroy(struct kvm *kvm) {} static inline void __init sev_set_cpu_caps(void) {} @@ -889,13 +892,12 @@ static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } #define max_sev_asid 0 static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {} -static inline void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) {} static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order) { return 0; } static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {} -static inline int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +static inline int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { return 0; } @@ -914,16 +916,21 @@ void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted, void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #define DEFINE_KVM_GHCB_ACCESSORS(field) \ - static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \ - { \ - return test_bit(GHCB_BITMAP_IDX(field), \ - (unsigned long *)&svm->sev_es.valid_bitmap); \ - } \ - \ - static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \ - { \ - return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \ - } \ +static __always_inline u64 kvm_ghcb_get_##field(struct vcpu_svm *svm) \ +{ \ + return READ_ONCE(svm->sev_es.ghcb->save.field); \ +} \ + \ +static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \ +{ \ + return test_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&svm->sev_es.valid_bitmap); \ +} \ + \ +static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm) \ +{ \ + return kvm_ghcb_##field##_is_valid(svm) ? kvm_ghcb_get_##field(svm) : 0; \ +} DEFINE_KVM_GHCB_ACCESSORS(cpl) DEFINE_KVM_GHCB_ACCESSORS(rax) @@ -936,5 +943,6 @@ DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1) DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2) DEFINE_KVM_GHCB_ACCESSORS(sw_scratch) DEFINE_KVM_GHCB_ACCESSORS(xcr0) +DEFINE_KVM_GHCB_ACCESSORS(xss) #endif diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 3971b3ea5d04..a8e78c0e5956 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -15,7 +15,7 @@ #include "kvm_onhyperv.h" #include "svm_onhyperv.h" -int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) +static int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { struct hv_vmcb_enlightenments *hve; hpa_t partition_assist_page = hv_get_partition_assist_page(vcpu); @@ -35,3 +35,29 @@ int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) return 0; } +__init void svm_hv_hardware_setup(void) +{ + if (npt_enabled && + ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) { + pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n"); + svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; + svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; + } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) { + int cpu; + + pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n"); + for_each_online_cpu(cpu) { + struct hv_vp_assist_page *vp_ap = + hv_get_vp_assist_page(cpu); + + if (!vp_ap) + continue; + + vp_ap->nested_control.features.directhypercall = 1; + } + svm_x86_ops.enable_l2_tlb_flush = + svm_hv_enable_l2_tlb_flush; + } +} diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index f85bc617ffe4..08f14e6f195c 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -13,9 +13,7 @@ #include "kvm_onhyperv.h" #include "svm/hyperv.h" -static struct kvm_x86_ops svm_x86_ops; - -int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); +__init void svm_hv_hardware_setup(void); static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) { @@ -40,33 +38,6 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb) hve->hv_enlightenments_control.msr_bitmap = 1; } -static inline __init void svm_hv_hardware_setup(void) -{ - if (npt_enabled && - ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) { - pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n"); - svm_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; - svm_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; - } - - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) { - int cpu; - - pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n"); - for_each_online_cpu(cpu) { - struct hv_vp_assist_page *vp_ap = - hv_get_vp_assist_page(cpu); - - if (!vp_ap) - continue; - - vp_ap->nested_control.features.directhypercall = 1; - } - svm_x86_ops.enable_l2_tlb_flush = - svm_hv_enable_l2_tlb_flush; - } -} - static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 57d79fd31df0..e79bc9cb7162 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -461,8 +461,9 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ - EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(AC), EXS(MC) + EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), EXS(MF), \ + EXS(AC), EXS(MC), EXS(XM), EXS(VE), EXS(CP), \ + EXS(HV), EXS(VC), EXS(SX) /* * Tracepoint for kvm interrupt injection: diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 5316c27f6099..02aadb9d730e 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -20,9 +20,6 @@ extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 #define PT_MODE_HOST_GUEST 1 -#define PMU_CAP_FW_WRITES (1ULL << 13) -#define PMU_CAP_LBR_FMT 0x3f - struct nested_vmx_msrs { /* * We only store the "true" versions of the VMX capability MSRs. We @@ -76,6 +73,11 @@ static inline bool cpu_has_vmx_basic_inout(void) return vmcs_config.basic & VMX_BASIC_INOUT; } +static inline bool cpu_has_vmx_basic_no_hw_errcode_cc(void) +{ + return vmcs_config.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC; +} + static inline bool cpu_has_virtual_nmis(void) { return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS && @@ -103,6 +105,10 @@ static inline bool cpu_has_load_perf_global_ctrl(void) return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; } +static inline bool cpu_has_load_cet_ctrl(void) +{ + return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE); +} static inline bool cpu_has_vmx_mpx(void) { return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS; diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbab1c15b0cd..0eb2773b2ae2 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -188,18 +188,18 @@ static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return vmx_get_msr(vcpu, msr_info); } -static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu) +static void vt_recalc_intercepts(struct kvm_vcpu *vcpu) { /* - * TDX doesn't allow VMM to configure interception of MSR accesses. - * TDX guest requests MSR accesses by calling TDVMCALL. The MSR - * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR - * if the userspace has set any. + * TDX doesn't allow VMM to configure interception of instructions or + * MSR accesses. TDX guest requests MSR accesses by calling TDVMCALL. + * The MSR filters will be applied when handling the TDVMCALL for + * RDMSR/WRMSR if the userspace has set any. */ if (is_td_vcpu(vcpu)) return; - vmx_recalc_msr_intercepts(vcpu); + vmx_recalc_intercepts(vcpu); } static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) @@ -831,10 +831,11 @@ static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return tdx_vcpu_ioctl(vcpu, argp); } -static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +static int vt_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, + bool is_private) { if (is_td(kvm)) - return tdx_gmem_private_max_mapping_level(kvm, pfn); + return tdx_gmem_max_mapping_level(kvm, pfn, is_private); return 0; } @@ -995,7 +996,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .apic_init_signal_blocked = vt_op(apic_init_signal_blocked), .migrate_timers = vmx_migrate_timers, - .recalc_msr_intercepts = vt_op(recalc_msr_intercepts), + .recalc_intercepts = vt_op(recalc_intercepts), .complete_emulated_msr = vt_op(complete_emulated_msr), .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, @@ -1005,7 +1006,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl), .vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl), - .private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level) + .gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level) }; struct kvm_x86_init_ops vt_init_ops __initdata = { diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b8ea1969113d..76271962cb70 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -721,6 +721,24 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_MPERF, MSR_TYPE_R); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_U_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_S_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL0_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL1_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL2_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL3_SSP, MSR_TYPE_RW); + kvm_vcpu_unmap(vcpu, &map); vmx->nested.force_msr_bitmap_recalc = false; @@ -997,7 +1015,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { + if (kvm_emulate_msr_write(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -1033,7 +1051,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, } } - if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { + if (kvm_emulate_msr_read(vcpu, msr_index, data)) { pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, msr_index); return false; @@ -1272,9 +1290,10 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) { const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT | VMX_BASIC_INOUT | - VMX_BASIC_TRUE_CTLS; + VMX_BASIC_TRUE_CTLS | + VMX_BASIC_NO_HW_ERROR_CODE_CC; - const u64 reserved_bits = GENMASK_ULL(63, 56) | + const u64 reserved_bits = GENMASK_ULL(63, 57) | GENMASK_ULL(47, 45) | BIT_ULL(31); @@ -2520,6 +2539,32 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 } } +static void vmcs_read_cet_state(struct kvm_vcpu *vcpu, u64 *s_cet, + u64 *ssp, u64 *ssp_tbl) +{ + if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) || + guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + *s_cet = vmcs_readl(GUEST_S_CET); + + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) { + *ssp = vmcs_readl(GUEST_SSP); + *ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE); + } +} + +static void vmcs_write_cet_state(struct kvm_vcpu *vcpu, u64 s_cet, + u64 ssp, u64 ssp_tbl) +{ + if (guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) || + guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + vmcs_writel(GUEST_S_CET, s_cet); + + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) { + vmcs_writel(GUEST_SSP, ssp); + vmcs_writel(GUEST_INTR_SSP_TABLE, ssp_tbl); + } +} + static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { struct hv_enlightened_vmcs *hv_evmcs = nested_vmx_evmcs(vmx); @@ -2636,6 +2681,10 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) + vmcs_write_cet_state(&vmx->vcpu, vmcs12->guest_s_cet, + vmcs12->guest_ssp, vmcs12->guest_ssp_tbl); + set_cr4_guest_host_mask(vmx); } @@ -2675,6 +2724,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl); } + + if (!vmx->nested.nested_run_pending || + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)) + vmcs_write_cet_state(vcpu, vmx->nested.pre_vmenter_s_cet, + vmx->nested.pre_vmenter_ssp, + vmx->nested.pre_vmenter_ssp_tbl); + if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs); @@ -2770,8 +2826,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && - WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl))) { + WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->guest_ia32_perf_global_ctrl))) { *entry_failure_code = ENTRY_FAIL_DEFAULT; return -EINVAL; } @@ -2949,7 +3005,6 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, u8 vector = intr_info & INTR_INFO_VECTOR_MASK; u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; - bool should_have_error_code; bool urg = nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST); bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; @@ -2966,12 +3021,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) return -EINVAL; - /* VM-entry interruption-info field: deliver error code */ - should_have_error_code = - intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && - x86_exception_has_error_code(vector); - if (CC(has_error_code != should_have_error_code)) - return -EINVAL; + /* + * Cannot deliver error code in real mode or if the interrupt + * type is not hardware exception. For other cases, do the + * consistency check only if the vCPU doesn't enumerate + * VMX_BASIC_NO_HW_ERROR_CODE_CC. + */ + if (!prot_mode || intr_type != INTR_TYPE_HARD_EXCEPTION) { + if (CC(has_error_code)) + return -EINVAL; + } else if (!nested_cpu_has_no_hw_errcode_cc(vcpu)) { + if (CC(has_error_code != x86_exception_has_error_code(vector))) + return -EINVAL; + } /* VM-entry exception error code */ if (CC(has_error_code && @@ -3038,6 +3100,16 @@ static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12) return !__is_canonical_address(la, l1_address_bits_on_exit); } +static int nested_vmx_check_cet_state_common(struct kvm_vcpu *vcpu, u64 s_cet, + u64 ssp, u64 ssp_tbl) +{ + if (CC(!kvm_is_valid_u_s_cet(vcpu, s_cet)) || CC(!IS_ALIGNED(ssp, 4)) || + CC(is_noncanonical_msr_address(ssp_tbl, vcpu))) + return -EINVAL; + + return 0; +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -3048,6 +3120,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3))) return -EINVAL; + if (CC(vmcs12->host_cr4 & X86_CR4_CET && !(vmcs12->host_cr0 & X86_CR0_WP))) + return -EINVAL; + if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu))) return -EINVAL; @@ -3104,6 +3179,27 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, return -EINVAL; } + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) { + if (nested_vmx_check_cet_state_common(vcpu, vmcs12->host_s_cet, + vmcs12->host_ssp, + vmcs12->host_ssp_tbl)) + return -EINVAL; + + /* + * IA32_S_CET and SSP must be canonical if the host will + * enter 64-bit mode after VM-exit; otherwise, higher + * 32-bits must be all 0s. + */ + if (ia32e) { + if (CC(is_noncanonical_msr_address(vmcs12->host_s_cet, vcpu)) || + CC(is_noncanonical_msr_address(vmcs12->host_ssp, vcpu))) + return -EINVAL; + } else { + if (CC(vmcs12->host_s_cet >> 32) || CC(vmcs12->host_ssp >> 32)) + return -EINVAL; + } + } + return 0; } @@ -3162,6 +3258,9 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) return -EINVAL; + if (CC(vmcs12->guest_cr4 & X86_CR4_CET && !(vmcs12->guest_cr0 & X86_CR0_WP))) + return -EINVAL; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) @@ -3211,6 +3310,23 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) return -EINVAL; + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) { + if (nested_vmx_check_cet_state_common(vcpu, vmcs12->guest_s_cet, + vmcs12->guest_ssp, + vmcs12->guest_ssp_tbl)) + return -EINVAL; + + /* + * Guest SSP must have 63:N bits identical, rather than + * be canonical (i.e., 63:N-1 bits identical), where N is + * the CPU's maximum linear-address width. Similar to + * is_noncanonical_msr_address(), use the host's + * linear-address width. + */ + if (CC(!__is_canonical_address(vmcs12->guest_ssp, max_host_virt_addr_bits() + 1))) + return -EINVAL; + } + if (nested_check_guest_non_reg_state(vmcs12)) return -EINVAL; @@ -3544,6 +3660,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + if (!vmx->nested.nested_run_pending || + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE)) + vmcs_read_cet_state(vcpu, &vmx->nested.pre_vmenter_s_cet, + &vmx->nested.pre_vmenter_ssp, + &vmx->nested.pre_vmenter_ssp_tbl); + /* * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* * nested early checks are disabled. In the event of a "late" VM-Fail, @@ -3690,7 +3812,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); + kvm_pmu_branch_retired(vcpu); if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) return nested_vmx_failInvalid(vcpu); @@ -4627,6 +4749,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) vmcs12->guest_ia32_efer = vcpu->arch.efer; + + vmcs_read_cet_state(&vmx->vcpu, &vmcs12->guest_s_cet, + &vmcs12->guest_ssp, + &vmcs12->guest_ssp_tbl); } /* @@ -4752,14 +4878,26 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) vmcs_write64(GUEST_BNDCFGS, 0); + /* + * Load CET state from host state if VM_EXIT_LOAD_CET_STATE is set. + * otherwise CET state should be retained across VM-exit, i.e., + * guest values should be propagated from vmcs12 to vmcs01. + */ + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) + vmcs_write_cet_state(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp, + vmcs12->host_ssp_tbl); + else + vmcs_write_cet_state(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp, + vmcs12->guest_ssp_tbl); + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); vcpu->arch.pat = vmcs12->host_ia32_pat; } if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) - WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->host_ia32_perf_global_ctrl)); + WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->host_ia32_perf_global_ctrl)); /* Set L1 segment info according to Intel SDM 27.5.2 Loading Host Segment and Descriptor-Table Registers */ @@ -4937,7 +5075,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { + if (kvm_emulate_msr_write(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); @@ -6216,19 +6354,26 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, union vmx_exit_reason exit_reason) { - u32 msr_index = kvm_rcx_read(vcpu); + u32 msr_index; gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return true; + if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM || + exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) + msr_index = vmx_get_exit_qual(vcpu); + else + msr_index = kvm_rcx_read(vcpu); + /* * The MSR_BITMAP page is divided into four 1024-byte bitmaps, * for the four combinations of read/write and low/high MSR numbers. * First we need to figure out which of the four to use: */ bitmap = vmcs12->msr_bitmap; - if (exit_reason.basic == EXIT_REASON_MSR_WRITE) + if (exit_reason.basic == EXIT_REASON_MSR_WRITE || + exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) bitmap += 2048; if (msr_index >= 0xc0000000) { msr_index -= 0xc0000000; @@ -6527,6 +6672,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC); case EXIT_REASON_MSR_READ: case EXIT_REASON_MSR_WRITE: + case EXIT_REASON_MSR_READ_IMM: + case EXIT_REASON_MSR_WRITE_IMM: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); case EXIT_REASON_INVALID_STATE: return true; @@ -6561,14 +6708,17 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: return true; - case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: + case EXIT_REASON_XSAVES: + case EXIT_REASON_XRSTORS: /* - * This should never happen, since it is not possible to - * set XSS to a non-zero value---neither in L1 nor in L2. - * If if it were, XSS would have to be checked against - * the XSS exit bitmap in vmcs12. + * Always forward XSAVES/XRSTORS to L1 as KVM doesn't utilize + * XSS-bitmap, and always loads vmcs02 with vmcs12's XSS-bitmap + * verbatim, i.e. any exit is due to L1's bitmap. WARN if + * XSAVES isn't enabled, as the CPU is supposed to inject #UD + * in that case, before consulting the XSS-bitmap. */ - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES); + WARN_ON_ONCE(!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES)); + return true; case EXIT_REASON_UMWAIT: case EXIT_REASON_TPAUSE: return nested_cpu_has2(vmcs12, @@ -7029,13 +7179,17 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | - VM_EXIT_CLEAR_BNDCFGS; + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + msrs->exit_ctls_high &= ~VM_EXIT_LOAD_CET_STATE; + /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; } @@ -7051,11 +7205,16 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | + VM_ENTRY_LOAD_CET_STATE; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + msrs->entry_ctls_high &= ~VM_ENTRY_LOAD_CET_STATE; + /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; } @@ -7205,6 +7364,8 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) msrs->basic |= VMX_BASIC_TRUE_CTLS; if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; + if (cpu_has_vmx_basic_no_hw_errcode_cc()) + msrs->basic |= VMX_BASIC_NO_HW_ERROR_CODE_CC; } static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 6eedcfc91070..983484d42ebf 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -309,6 +309,11 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) __kvm_is_valid_cr4(vcpu, val); } +static inline bool nested_cpu_has_no_hw_errcode_cc(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC; +} + /* No difference in the restrictions on guest and host CR4 in VMX operation. */ #define nested_guest_cr4_valid nested_cr4_valid #define nested_host_cr4_valid nested_cr4_valid diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 0b173602821b..de1d9785c01f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -138,7 +138,7 @@ static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; + return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0; } static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) @@ -478,8 +478,8 @@ static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index) }; u64 eventsel; - BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUTNERS); - BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUTNERS); + BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS); + BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS); /* * Yell if perf reports support for a fixed counter but perf doesn't @@ -536,29 +536,44 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) kvm_pmu_cap.num_counters_gp); eax.split.bit_width = min_t(int, eax.split.bit_width, kvm_pmu_cap.bit_width_gp); - pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; + pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(eax.split.bit_width) - 1; eax.split.mask_length = min_t(int, eax.split.mask_length, kvm_pmu_cap.events_mask_len); - pmu->available_event_types = ~entry->ebx & - ((1ull << eax.split.mask_length) - 1); - - if (pmu->version == 1) { - pmu->nr_arch_fixed_counters = 0; - } else { - pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, - kvm_pmu_cap.num_counters_fixed); - edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, - kvm_pmu_cap.bit_width_fixed); - pmu->counter_bitmask[KVM_PMC_FIXED] = - ((u64)1 << edx.split.bit_width_fixed) - 1; + pmu->available_event_types = ~entry->ebx & (BIT_ULL(eax.split.mask_length) - 1); + + entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); + if (entry && + (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { + pmu->reserved_bits ^= HSW_IN_TX; + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); } + perf_capabilities = vcpu_get_perf_capabilities(vcpu); + if (intel_pmu_lbr_is_compatible(vcpu) && + (perf_capabilities & PERF_CAP_LBR_FMT)) + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); + else + lbr_desc->records.nr = 0; + + if (lbr_desc->records.nr) + bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); + + if (pmu->version == 1) + return; + + pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, + kvm_pmu_cap.num_counters_fixed); + edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, + kvm_pmu_cap.bit_width_fixed); + pmu->counter_bitmask[KVM_PMC_FIXED] = BIT_ULL(edx.split.bit_width_fixed) - 1; + intel_pmu_enable_fixed_counter_bits(pmu, INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | INTEL_FIXED_0_ENABLE_PMI); - counter_rsvd = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | - (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX)); + counter_rsvd = ~((BIT_ULL(pmu->nr_arch_gp_counters) - 1) | + ((BIT_ULL(pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX)); pmu->global_ctrl_rsvd = counter_rsvd; /* @@ -573,29 +588,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->global_status_rsvd &= ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; - entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); - if (entry && - (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { - pmu->reserved_bits ^= HSW_IN_TX; - pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); - } - - bitmap_set(pmu->all_valid_pmc_idx, - 0, pmu->nr_arch_gp_counters); - bitmap_set(pmu->all_valid_pmc_idx, - INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); - - perf_capabilities = vcpu_get_perf_capabilities(vcpu); - if (intel_pmu_lbr_is_compatible(vcpu) && - (perf_capabilities & PMU_CAP_LBR_FMT)) - memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); - else - lbr_desc->records.nr = 0; - - if (lbr_desc->records.nr) - bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); - if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { pmu->pebs_enable_rsvd = counter_rsvd; @@ -603,8 +595,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_data_cfg_rsvd = ~0xff00000full; intel_pmu_enable_fixed_counter_bits(pmu, ICL_FIXED_0_ADAPTIVE); } else { - pmu->pebs_enable_rsvd = - ~((1ull << pmu->nr_arch_gp_counters) - 1); + pmu->pebs_enable_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1); } } } @@ -625,7 +616,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].current_config = 0; } - for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUTNERS; i++) { + for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUNTERS; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX; @@ -762,7 +753,7 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) int bit, hw_idx; kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) { - if (!pmc_speculative_in_use(pmc) || + if (!pmc_is_locally_enabled(pmc) || !pmc_is_globally_enabled(pmc) || !pmc->perf_event) continue; diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 66744f5768c8..0a49c863c811 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -281,25 +281,6 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -static void tdx_clear_page(struct page *page) -{ - const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0)); - void *dest = page_to_virt(page); - unsigned long i; - - /* - * The page could have been poisoned. MOVDIR64B also clears - * the poison bit so the kernel can safely use the page again. - */ - for (i = 0; i < PAGE_SIZE; i += 64) - movdir64b(dest + i, zero_page); - /* - * MOVDIR64B store uses WC buffer. Prevent following memory reads - * from seeing potentially poisoned cache. - */ - __mb(); -} - static void tdx_no_vcpus_enter_start(struct kvm *kvm) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); @@ -345,7 +326,7 @@ static int tdx_reclaim_page(struct page *page) r = __tdx_reclaim_page(page); if (!r) - tdx_clear_page(page); + tdx_quirk_reset_page(page); return r; } @@ -442,6 +423,16 @@ void tdx_disable_virtualization_cpu(void) tdx_flush_vp(&arg); } local_irq_restore(flags); + + /* + * Flush cache now if kexec is possible: this is necessary to avoid + * having dirty private memory cachelines when the new kernel boots, + * but WBINVD is a relatively expensive operation and doing it during + * kexec can exacerbate races in native_stop_other_cpus(). Do it + * now, since this is a safe moment and there is going to be no more + * TDX activity on this CPU from this point on. + */ + tdx_cpu_flush_cache_for_kexec(); } #define TDX_SEAMCALL_RETRIES 10000 @@ -593,7 +584,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm) pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return; } - tdx_clear_page(kvm_tdx->td.tdr_page); + tdx_quirk_reset_page(kvm_tdx->td.tdr_page); __free_page(kvm_tdx->td.tdr_page); kvm_tdx->td.tdr_page = NULL; @@ -629,6 +620,11 @@ int tdx_vm_init(struct kvm *kvm) struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); kvm->arch.has_protected_state = true; + /* + * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap, + * i.e. all EOIs are accelerated and never trigger exits. + */ + kvm->arch.has_protected_eoi = true; kvm->arch.has_private_mem = true; kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT; @@ -861,6 +857,7 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu) if (tdx->vp.tdvpr_page) { tdx_reclaim_control_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; } tdx->state = VCPU_TD_STATE_UNINITIALIZED; @@ -1714,7 +1711,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err); return -EIO; } - tdx_clear_page(page); + tdx_quirk_reset_page(page); tdx_unpin(kvm, page); return 0; } @@ -2002,6 +1999,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) * handle retries locally in their EPT violation handlers. */ while (1) { + struct kvm_memory_slot *slot; + ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual); if (ret != RET_PF_RETRY || !local_retry) @@ -2015,6 +2014,15 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) break; } + /* + * Bail if the memslot is invalid, i.e. is being deleted, as + * faulting in will never succeed and this task needs to drop + * SRCU in order to let memslot deletion complete. + */ + slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(gpa)); + if (slot && slot->flags & KVM_MEMSLOT_INVALID) + break; + cond_resched(); } return ret; @@ -2480,7 +2488,7 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params, /* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */ kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1; tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages), - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!tdcs_pages) goto free_tdr; @@ -2940,6 +2948,13 @@ static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx) return -ENOMEM; tdx->vp.tdvpr_page = page; + /* + * page_to_phys() does not work in 'noinstr' code, like guest + * entry via tdh_vp_enter(). Precalculate and store it instead + * of doing it at runtime later. + */ + tdx->vp.tdvpr_pa = page_to_phys(tdx->vp.tdvpr_page); + tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages), GFP_KERNEL); if (!tdx->vp.tdcx_pages) { @@ -3002,6 +3017,7 @@ free_tdvpr: if (tdx->vp.tdvpr_page) __free_page(tdx->vp.tdvpr_page); tdx->vp.tdvpr_page = 0; + tdx->vp.tdvpr_pa = 0; return ret; } @@ -3318,8 +3334,11 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return ret; } -int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) +int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) { + if (!is_private) + return 0; + return PG_LEVEL_4K; } @@ -3457,12 +3476,11 @@ static int __init __tdx_bringup(void) if (r) goto tdx_bringup_err; + r = -EINVAL; /* Get TDX global information for later use */ tdx_sysinfo = tdx_get_sysinfo(); - if (WARN_ON_ONCE(!tdx_sysinfo)) { - r = -EINVAL; + if (WARN_ON_ONCE(!tdx_sysinfo)) goto get_sysinfo_err; - } /* Check TDX module and KVM capabilities */ if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) || @@ -3505,14 +3523,11 @@ static int __init __tdx_bringup(void) if (td_conf->max_vcpus_per_td < num_present_cpus()) { pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n", td_conf->max_vcpus_per_td, num_present_cpus()); - r = -EINVAL; goto get_sysinfo_err; } - if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids())) { - r = -EINVAL; + if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids())) goto get_sysinfo_err; - } /* * Leave hardware virtualization enabled after TDX is enabled diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 106a72c923ca..4233b5ca9461 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), + FIELD(GUEST_S_CET, guest_s_cet), + FIELD(GUEST_SSP, guest_ssp), + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), FIELD(HOST_CR0, host_cr0), FIELD(HOST_CR3, host_cr3), FIELD(HOST_CR4, host_cr4), @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), + FIELD(HOST_S_CET, host_s_cet), + FIELD(HOST_SSP, host_ssp), + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl), }; const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 56fd150a6f24..4ad6b16525b9 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -117,7 +117,13 @@ struct __packed vmcs12 { natural_width host_ia32_sysenter_eip; natural_width host_rsp; natural_width host_rip; - natural_width paddingl[8]; /* room for future expansion */ + natural_width host_s_cet; + natural_width host_ssp; + natural_width host_ssp_tbl; + natural_width guest_s_cet; + natural_width guest_ssp; + natural_width guest_ssp_tbl; + natural_width paddingl[2]; /* room for future expansion */ u32 pin_based_vm_exec_control; u32 cpu_based_vm_exec_control; u32 exception_bitmap; @@ -294,6 +300,12 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(host_ia32_sysenter_eip, 656); CHECK_OFFSET(host_rsp, 664); CHECK_OFFSET(host_rip, 672); + CHECK_OFFSET(host_s_cet, 680); + CHECK_OFFSET(host_ssp, 688); + CHECK_OFFSET(host_ssp_tbl, 696); + CHECK_OFFSET(guest_s_cet, 704); + CHECK_OFFSET(guest_ssp, 712); + CHECK_OFFSET(guest_ssp_tbl, 720); CHECK_OFFSET(pin_based_vm_exec_control, 744); CHECK_OFFSET(cpu_based_vm_exec_control, 748); CHECK_OFFSET(exception_bitmap, 752); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..bc255d709d8a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline) .section .text, "ax" +#ifndef CONFIG_X86_FRED + SYM_FUNC_START(vmx_do_interrupt_irqoff) VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(vmx_do_interrupt_irqoff) + +#endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..f87c216d976d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <linux/tboot.h> #include <linux/trace_events.h> -#include <linux/entry-kvm.h> #include <asm/apic.h> #include <asm/asm.h> @@ -1344,22 +1343,35 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) } #ifdef CONFIG_X86_64 -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) +static u64 vmx_read_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 *cache) { preempt_disable(); if (vmx->vt.guest_state_loaded) - rdmsrq(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + *cache = read_msr(msr); preempt_enable(); - return vmx->msr_guest_kernel_gs_base; + return *cache; } -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) +static void vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data, + u64 *cache) { preempt_disable(); if (vmx->vt.guest_state_loaded) - wrmsrq(MSR_KERNEL_GS_BASE, data); + wrmsrns(msr, data); preempt_enable(); - vmx->msr_guest_kernel_gs_base = data; + *cache = data; +} + +static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) +{ + return vmx_read_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, + &vmx->msr_guest_kernel_gs_base); +} + +static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) +{ + vmx_write_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, data, + &vmx->msr_guest_kernel_gs_base); } #endif @@ -2093,6 +2105,15 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; + case MSR_IA32_S_CET: + msr_info->data = vmcs_readl(GUEST_S_CET); + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + msr_info->data = vmcs_readl(GUEST_SSP); + break; + case MSR_IA32_INT_SSP_TAB: + msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE); + break; case MSR_IA32_DEBUGCTLMSR: msr_info->data = vmx_guest_debugctl_read(); break; @@ -2127,7 +2148,7 @@ u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) (host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; - if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && + if ((kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT) && (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; @@ -2411,10 +2432,19 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else vmx->pt_desc.guest.addr_a[index / 2] = data; break; + case MSR_IA32_S_CET: + vmcs_writel(GUEST_S_CET, data); + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + vmcs_writel(GUEST_SSP, data); + break; + case MSR_IA32_INT_SSP_TAB: + vmcs_writel(GUEST_INTR_SSP_TABLE, data); + break; case MSR_IA32_PERF_CAPABILITIES: - if (data & PMU_CAP_LBR_FMT) { - if ((data & PMU_CAP_LBR_FMT) != - (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) + if (data & PERF_CAP_LBR_FMT) { + if ((data & PERF_CAP_LBR_FMT) != + (kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT)) return 1; if (!cpuid_model_is_consistent(vcpu)) return 1; @@ -2584,6 +2614,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER }, { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS }, { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL }, + { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE }, }; memset(vmcs_conf, 0, sizeof(*vmcs_conf)); @@ -4068,8 +4099,10 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) +static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { + bool intercept; + if (!cpu_has_vmx_msr_bitmap()) return; @@ -4115,12 +4148,34 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)); + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) { + intercept = !guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP, MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP, MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP, MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP, MSR_TYPE_RW, intercept); + } + + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) || kvm_cpu_cap_has(X86_FEATURE_IBT)) { + intercept = !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + + vmx_set_intercept_for_msr(vcpu, MSR_IA32_U_CET, MSR_TYPE_RW, intercept); + vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, intercept); + } + /* * x2APIC and LBR MSR intercepts are modified on-demand and cannot be * filtered by userspace. */ } +void vmx_recalc_intercepts(struct kvm_vcpu *vcpu) +{ + vmx_recalc_msr_intercepts(vcpu); +} + static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { @@ -4270,6 +4325,21 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) if (cpu_has_load_ia32_efer()) vmcs_write64(HOST_IA32_EFER, kvm_host.efer); + + /* + * Supervisor shadow stack is not enabled on host side, i.e., + * host IA32_S_CET.SHSTK_EN bit is guaranteed to 0 now, per SDM + * description(RDSSP instruction), SSP is not readable in CPL0, + * so resetting the two registers to 0s at VM-Exit does no harm + * to kernel execution. When execution flow exits to userspace, + * SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter + * 3 and 4 for details. + */ + if (cpu_has_load_cet_ctrl()) { + vmcs_writel(HOST_S_CET, kvm_host.s_cet); + vmcs_writel(HOST_SSP, 0); + vmcs_writel(HOST_INTR_SSP_TABLE, 0); + } } void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) @@ -4304,7 +4374,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) return pin_based_exec_ctrl; } -static u32 vmx_vmentry_ctrl(void) +static u32 vmx_get_initial_vmentry_ctrl(void) { u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; @@ -4321,7 +4391,7 @@ static u32 vmx_vmentry_ctrl(void) return vmentry_ctrl; } -static u32 vmx_vmexit_ctrl(void) +static u32 vmx_get_initial_vmexit_ctrl(void) { u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; @@ -4351,19 +4421,13 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); - if (kvm_vcpu_apicv_active(vcpu)) { - secondary_exec_controls_setbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - if (enable_ipiv) - tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT); - } else { - secondary_exec_controls_clearbit(vmx, - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); - if (enable_ipiv) - tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT); - } + secondary_exec_controls_changebit(vmx, + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY, + kvm_vcpu_apicv_active(vcpu)); + if (enable_ipiv) + tertiary_exec_controls_changebit(vmx, TERTIARY_EXEC_IPI_VIRT, + kvm_vcpu_apicv_active(vcpu)); vmx_update_msr_bitmap_x2apic(vcpu); } @@ -4686,10 +4750,10 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); + vm_exit_controls_set(vmx, vmx_get_initial_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ - vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); + vm_entry_controls_set(vmx, vmx_get_initial_vmentry_ctrl()); vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); @@ -4817,6 +4881,14 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) { + vmcs_writel(GUEST_SSP, 0); + vmcs_writel(GUEST_INTR_SSP_TABLE, 0); + } + if (kvm_cpu_cap_has(X86_FEATURE_IBT) || + kvm_cpu_cap_has(X86_FEATURE_SHSTK)) + vmcs_writel(GUEST_S_CET, 0); + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); @@ -5785,6 +5857,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; + /* + * Ensure that any updates to kvm->buses[] observed by the + * previous instruction (emulated or otherwise) are also + * visible to the instruction KVM is about to emulate. + */ + smp_rmb(); + if (!kvm_emulate_instruction(vcpu, 0)) return 0; @@ -6003,6 +6082,23 @@ static int handle_notify(struct kvm_vcpu *vcpu) return 1; } +static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu) +{ + return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO)); +} + +static int handle_rdmsr_imm(struct kvm_vcpu *vcpu) +{ + return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu), + vmx_get_msr_imm_reg(vcpu)); +} + +static int handle_wrmsr_imm(struct kvm_vcpu *vcpu) +{ + return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu), + vmx_get_msr_imm_reg(vcpu)); +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6061,6 +6157,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, [EXIT_REASON_NOTIFY] = handle_notify, + [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, + [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, }; static const int kvm_vmx_max_exit_handlers = @@ -6265,6 +6363,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu) if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0) vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest); + if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) + pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n", + vmcs_readl(GUEST_S_CET), vmcs_readl(GUEST_SSP), + vmcs_readl(GUEST_INTR_SSP_TABLE)); pr_err("*** Host State ***\n"); pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); @@ -6295,6 +6397,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu) vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0) vmx_dump_msrs("host autoload", &vmx->msr_autoload.host); + if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) + pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n", + vmcs_readl(HOST_S_CET), vmcs_readl(HOST_SSP), + vmcs_readl(HOST_INTR_SSP_TABLE)); pr_err("*** Control State ***\n"); pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n", @@ -6495,6 +6601,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) #ifdef CONFIG_MITIGATION_RETPOLINE if (exit_reason.basic == EXIT_REASON_MSR_WRITE) return kvm_emulate_wrmsr(vcpu); + else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) + return handle_wrmsr_imm(vcpu); else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) return handle_preemption_timer(vcpu); else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW) @@ -6913,8 +7021,14 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu, "unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; + /* + * Invoke the kernel's IRQ handler for the vector. Use the FRED path + * when it's available even if FRED isn't fully enabled, e.g. even if + * FRED isn't supported in hardware, in order to avoid the indirect + * CALL in the non-FRED path. + */ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); - if (cpu_feature_enabled(X86_FEATURE_FRED)) + if (IS_ENABLED(CONFIG_X86_FRED)) fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector); else vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector)); @@ -7170,11 +7284,16 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, switch (vmx_get_exit_reason(vcpu).basic) { case EXIT_REASON_MSR_WRITE: - return handle_fastpath_set_msr_irqoff(vcpu); + return handle_fastpath_wrmsr(vcpu); + case EXIT_REASON_MSR_WRITE_IMM: + return handle_fastpath_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu), + vmx_get_msr_imm_reg(vcpu)); case EXIT_REASON_PREEMPTION_TIMER: return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); case EXIT_REASON_HLT: return handle_fastpath_hlt(vcpu); + case EXIT_REASON_INVD: + return handle_fastpath_invd(vcpu); default: return EXIT_FASTPATH_NONE; } @@ -7641,6 +7760,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK)); + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT)); entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1); cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM)); @@ -7775,16 +7896,13 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_LC_ENABLED; - /* Recalc MSR interception to account for feature changes. */ - vmx_recalc_msr_intercepts(vcpu); - /* Refresh #PF interception to account for MAXPHYADDR changes. */ vmx_update_exception_bitmap(vcpu); } static __init u64 vmx_get_perf_capabilities(void) { - u64 perf_cap = PMU_CAP_FW_WRITES; + u64 perf_cap = PERF_CAP_FW_WRITES; u64 host_perf_cap = 0; if (!enable_pmu) @@ -7804,7 +7922,7 @@ static __init u64 vmx_get_perf_capabilities(void) if (!vmx_lbr_caps.has_callstack) memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps)); else if (vmx_lbr_caps.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + perf_cap |= host_perf_cap & PERF_CAP_LBR_FMT; } if (vmx_pebs_supported()) { @@ -7872,7 +7990,6 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_UMIP); /* CPUID 0xD.1 */ - kvm_caps.supported_xss = 0; if (!cpu_has_vmx_xsaves()) kvm_cpu_cap_clear(X86_FEATURE_XSAVES); @@ -7884,6 +8001,18 @@ static __init void vmx_set_cpu_caps(void) if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); + + /* + * Disable CET if unrestricted_guest is unsupported as KVM doesn't + * enforce CET HW behaviors in emulator. On platforms with + * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code + * fails, so disable CET in this case too. + */ + if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest || + !cpu_has_vmx_basic_no_hw_errcode_cc()) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + } } static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu, @@ -8333,8 +8462,6 @@ __init int vmx_hardware_setup(void) vmx_setup_user_return_msrs(); - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) - return -EIO; if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); @@ -8364,6 +8491,14 @@ __init int vmx_hardware_setup(void) return -EOPNOTSUPP; } + /* + * Shadow paging doesn't have a (further) performance penalty + * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it + * by default + */ + if (!enable_ept) + allow_smaller_maxphyaddr = true; + if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) enable_ept_ad_bits = 0; @@ -8489,6 +8624,13 @@ __init int vmx_hardware_setup(void) setup_default_sgx_lepubkeyhash(); + vmx_set_cpu_caps(); + + /* + * Configure nested capabilities after core CPU capabilities so that + * nested support can be conditional on base support, e.g. so that KVM + * can hide/show features based on kvm_cpu_cap_has(). + */ if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept); @@ -8497,8 +8639,6 @@ __init int vmx_hardware_setup(void) return r; } - vmx_set_cpu_caps(); - r = alloc_kvm_area(); if (r && nested) nested_vmx_hardware_unsetup(); @@ -8525,7 +8665,9 @@ __init int vmx_hardware_setup(void) */ if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; - kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; + + kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; + return r; } @@ -8558,11 +8700,18 @@ int __init vmx_init(void) return -EOPNOTSUPP; /* - * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing - * to unwind if a later step fails. + * Note, VMCS and eVMCS configuration only touch VMX knobs/variables, + * i.e. there's nothing to unwind if a later step fails. */ hv_init_evmcs(); + /* + * Parse the VMCS config and VMX capabilities before anything else, so + * that the information is available to all setup flows. + */ + if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) + return -EIO; + r = kvm_x86_vendor_init(&vt_init_ops); if (r) return r; @@ -8586,14 +8735,6 @@ int __init vmx_init(void) vmx_check_vmcs12_offsets(); - /* - * Shadow paging doesn't have a (further) performance penalty - * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it - * by default - */ - if (!enable_ept) - allow_smaller_maxphyaddr = true; - return 0; err_l1d_flush: diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index d3389baf3ab3..ea93121029f9 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -181,6 +181,9 @@ struct nested_vmx { */ u64 pre_vmenter_debugctl; u64 pre_vmenter_bndcfgs; + u64 pre_vmenter_s_cet; + u64 pre_vmenter_ssp; + u64 pre_vmenter_ssp_tbl; /* to migrate it to L1 if L2 writes to L1's CR8 directly */ int l1_tpr_threshold; @@ -484,7 +487,8 @@ static inline u8 vmx_get_rvi(void) VM_ENTRY_LOAD_IA32_EFER | \ VM_ENTRY_LOAD_BNDCFGS | \ VM_ENTRY_PT_CONCEAL_PIP | \ - VM_ENTRY_LOAD_IA32_RTIT_CTL) + VM_ENTRY_LOAD_IA32_RTIT_CTL | \ + VM_ENTRY_LOAD_CET_STATE) #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ (VM_EXIT_SAVE_DEBUG_CONTROLS | \ @@ -506,7 +510,8 @@ static inline u8 vmx_get_rvi(void) VM_EXIT_LOAD_IA32_EFER | \ VM_EXIT_CLEAR_BNDCFGS | \ VM_EXIT_PT_CONCEAL_PIP | \ - VM_EXIT_CLEAR_IA32_RTIT_CTL) + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ + VM_EXIT_LOAD_CET_STATE) #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ (PIN_BASED_EXT_INTR_MASK | \ @@ -608,6 +613,14 @@ static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##b { \ BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ +} \ +static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \ + bool set) \ +{ \ + if (set) \ + lname##_controls_setbit(vmx, val); \ + else \ + lname##_controls_clearbit(vmx, val); \ } BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) @@ -706,6 +719,11 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) void dump_vmcs(struct kvm_vcpu *vcpu); +static inline int vmx_get_instr_info_reg(u32 vmx_instr_info) +{ + return (vmx_instr_info >> 3) & 0xf; +} + static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) { return (vmx_instr_info >> 28) & 0xf; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 2b3424f638db..9697368d65b3 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -52,7 +52,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); -void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu); +void vmx_recalc_intercepts(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); int vmx_get_feature_msr(u32 msr, u64 *data); @@ -153,7 +153,7 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); void tdx_flush_tlb_all(struct kvm_vcpu *vcpu); void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); -int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn); +int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); #endif #endif /* __KVM_X86_VMX_X86_OPS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 706b6fd56d3c..b4b5d2d09634 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,7 +59,6 @@ #include <linux/sched/stat.h> #include <linux/sched/isolation.h> #include <linux/mem_encrypt.h> -#include <linux/entry-kvm.h> #include <linux/suspend.h> #include <linux/smp.h> @@ -97,10 +96,10 @@ * vendor module being reloaded with different module parameters. */ struct kvm_caps kvm_caps __read_mostly; -EXPORT_SYMBOL_GPL(kvm_caps); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_caps); struct kvm_host_values kvm_host __read_mostly; -EXPORT_SYMBOL_GPL(kvm_host); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_host); #define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) @@ -136,6 +135,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static DEFINE_MUTEX(vendor_module_lock); +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); + struct kvm_x86_ops kvm_x86_ops __read_mostly; #define KVM_X86_OP(func) \ @@ -152,7 +154,7 @@ module_param(ignore_msrs, bool, 0644); bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, 0644); -EXPORT_SYMBOL_GPL(report_ignored_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(report_ignored_msrs); unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, 0644); @@ -164,12 +166,9 @@ module_param(kvmclock_periodic_sync, bool, 0444); static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, 0644); -static bool __read_mostly vector_hashing = true; -module_param(vector_hashing, bool, 0444); - bool __read_mostly enable_vmware_backdoor = false; module_param(enable_vmware_backdoor, bool, 0444); -EXPORT_SYMBOL_GPL(enable_vmware_backdoor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_vmware_backdoor); /* * Flags to manipulate forced emulation behavior (any non-zero value will @@ -184,7 +183,7 @@ module_param(pi_inject_timer, bint, 0644); /* Enable/disable PMU virtualization */ bool __read_mostly enable_pmu = true; -EXPORT_SYMBOL_GPL(enable_pmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu); module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; @@ -211,7 +210,7 @@ struct kvm_user_return_msrs { }; u32 __read_mostly kvm_nr_uret_msrs; -EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs); static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; @@ -220,17 +219,26 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE) +#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL) +/* + * Note, KVM supports exposing PT to the guest, but does not support context + * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping + * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support + * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs). + */ +#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL) + bool __read_mostly allow_smaller_maxphyaddr = 0; -EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; -EXPORT_SYMBOL_GPL(enable_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_apicv); bool __read_mostly enable_ipiv = true; -EXPORT_SYMBOL_GPL(enable_ipiv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; -EXPORT_SYMBOL_GPL(enable_device_posted_irqs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -335,7 +343,11 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, - MSR_IA32_XFD, MSR_IA32_XFD_ERR, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS, + + MSR_IA32_U_CET, MSR_IA32_S_CET, + MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, + MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, }; static const u32 msrs_to_save_pmu[] = { @@ -367,6 +379,7 @@ static const u32 msrs_to_save_pmu[] = { MSR_AMD64_PERF_CNTR_GLOBAL_CTL, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + @@ -614,7 +627,7 @@ int kvm_add_user_return_msr(u32 msr) kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; return kvm_nr_uret_msrs++; } -EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_add_user_return_msr); int kvm_find_user_return_msr(u32 msr) { @@ -626,7 +639,7 @@ int kvm_find_user_return_msr(u32 msr) } return -1; } -EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { @@ -666,7 +679,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) kvm_user_return_register_notifier(msrs); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr); void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) { @@ -675,7 +688,13 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) msrs->values[slot].curr = value; kvm_user_return_register_notifier(msrs); } -EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache); + +u64 kvm_get_user_return_msr(unsigned int slot) +{ + return this_cpu_ptr(user_return_msrs)->values[slot].curr; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr); static void drop_user_return_notifiers(void) { @@ -697,7 +716,7 @@ noinstr void kvm_spurious_fault(void) /* Fault while not rebooting. We want the trace. */ BUG_ON(!kvm_rebooting); } -EXPORT_SYMBOL_GPL(kvm_spurious_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault); #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 @@ -802,7 +821,7 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, ex->has_payload = false; ex->payload = 0; } -EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_deliver_exception_payload); static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector, bool has_error_code, u32 error_code, @@ -886,7 +905,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) { kvm_multiple_exception(vcpu, nr, false, 0, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, @@ -894,7 +913,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, { kvm_multiple_exception(vcpu, nr, false, 0, true, payload); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_p); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_p); static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code, unsigned long payload) @@ -929,7 +948,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, vcpu->arch.exception.has_payload = false; vcpu->arch.exception.payload = 0; } -EXPORT_SYMBOL_GPL(kvm_requeue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_requeue_exception); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -940,7 +959,7 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) return 1; } -EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_complete_insn_gp); static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -990,7 +1009,7 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, fault_mmu->inject_page_fault(vcpu, fault); } -EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1002,7 +1021,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { kvm_multiple_exception(vcpu, nr, true, error_code, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_e); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e); /* * Checks if cpl <= required_cpl; if true, return true. Otherwise queue @@ -1024,7 +1043,7 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) kvm_queue_exception(vcpu, UD_VECTOR); return false; } -EXPORT_SYMBOL_GPL(kvm_require_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr); static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) { @@ -1079,7 +1098,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) return 1; } -EXPORT_SYMBOL_GPL(load_pdptrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(load_pdptrs); static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1132,7 +1151,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS) kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr0); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1167,19 +1186,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))) return 1; + if (!(cr0 & X86_CR0_WP) && kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) + return 1; + kvm_x86_call(set_cr0)(vcpu, cr0); kvm_post_set_cr0(vcpu, old_cr0, cr0); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr0); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); } -EXPORT_SYMBOL_GPL(kvm_lmsw); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { @@ -1202,7 +1224,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) wrpkru(vcpu->arch.pkru); } -EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { @@ -1228,7 +1250,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } } -EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state); #ifdef CONFIG_X86_64 static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) @@ -1237,7 +1259,7 @@ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) } #endif -static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; u64 old_xcr0 = vcpu->arch.xcr0; @@ -1281,6 +1303,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.cpuid_dynamic_bits_dirty = true; return 0; } +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { @@ -1293,7 +1316,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_xsetbv); static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1341,7 +1364,7 @@ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned lon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1366,13 +1389,16 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } + if ((cr4 & X86_CR4_CET) && !kvm_is_cr0_bit_set(vcpu, X86_CR0_WP)) + return 1; + kvm_x86_call(set_cr4)(vcpu, cr4); kvm_post_set_cr4(vcpu, old_cr4, cr4); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr4); static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) { @@ -1464,7 +1490,7 @@ handle_tlb_flush: return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr3); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr3); int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { @@ -1476,7 +1502,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) vcpu->arch.cr8 = cr8; return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { @@ -1485,7 +1511,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) else return vcpu->arch.cr8; } -EXPORT_SYMBOL_GPL(kvm_get_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_cr8); static void kvm_update_dr0123(struct kvm_vcpu *vcpu) { @@ -1510,7 +1536,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu) if (dr7 & DR7_BP_EN_MASK) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } -EXPORT_SYMBOL_GPL(kvm_update_dr7); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_update_dr7); static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { @@ -1551,7 +1577,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) return 0; } -EXPORT_SYMBOL_GPL(kvm_set_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_dr); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) { @@ -1568,14 +1594,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) return vcpu->arch.dr7; } } -EXPORT_SYMBOL_GPL(kvm_get_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr); int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); + u32 pmc = kvm_rcx_read(vcpu); u64 data; - if (kvm_pmu_rdpmc(vcpu, ecx, &data)) { + if (kvm_pmu_rdpmc(vcpu, pmc, &data)) { kvm_inject_gp(vcpu, 0); return 1; } @@ -1584,7 +1610,7 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, data >> 32); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdpmc); /* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM @@ -1723,7 +1749,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) return __kvm_valid_efer(vcpu, efer); } -EXPORT_SYMBOL_GPL(kvm_valid_efer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_valid_efer); static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { @@ -1766,7 +1792,7 @@ void kvm_enable_efer_bits(u64 mask) { efer_reserved_bits &= ~mask; } -EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { @@ -1809,7 +1835,7 @@ out: return allowed; } -EXPORT_SYMBOL_GPL(kvm_msr_allowed); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_msr_allowed); /* * Write @data into the MSR specified by @index. Select MSR specific fault @@ -1870,6 +1896,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, data = (u32)data; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + if (!kvm_is_valid_u_s_cet(vcpu, data)) + return 1; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + /* + * Note that the MSR emulation here is flawed when a vCPU + * doesn't support the Intel 64 architecture. The expected + * architectural behavior in this case is that the upper 32 + * bits do not exist and should always read '0'. However, + * because the actual hardware on which the virtual CPU is + * running does support Intel 64, XRSTORS/XSAVES in the + * guest could observe behavior that violates the + * architecture. Intercepting XRSTORS/XSAVES for this + * special case isn't deemed worthwhile. + */ + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + /* + * MSR_IA32_INT_SSP_TAB is not present on processors that do + * not support Intel 64 architecture. + */ + if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) + return KVM_MSR_RET_UNSUPPORTED; + if (is_noncanonical_msr_address(data, vcpu)) + return 1; + /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */ + if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4)) + return 1; + break; } msr.data = data; @@ -1898,8 +1962,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, - bool host_initiated) +static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { struct msr_data msr; int ret; @@ -1914,6 +1978,20 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + break; } msr.index = index; @@ -1925,6 +2003,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, return ret; } +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + return __kvm_set_msr(vcpu, index, data, true); +} + +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + return __kvm_get_msr(vcpu, index, data, true); +} + static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated) { @@ -1932,33 +2020,36 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, __kvm_get_msr); } -int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { - if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_read); -int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { - if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_write); -int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { - return kvm_get_msr_ignored_check(vcpu, index, data, false); + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) + return KVM_MSR_RET_FILTERED; + + return __kvm_emulate_msr_read(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_get_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_read); -int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { - return kvm_set_msr_ignored_check(vcpu, index, data, false); + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) + return KVM_MSR_RET_FILTERED; + + return __kvm_emulate_msr_write(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_set_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_write); + static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) { @@ -1990,6 +2081,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) return complete_fast_msr_access(vcpu); } +static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu) +{ + if (!vcpu->run->msr.error) + kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg, + vcpu->run->msr.data); + + return complete_fast_msr_access(vcpu); +} + static u64 kvm_msr_reason(int r) { switch (r) { @@ -2024,55 +2124,82 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index, return 1; } -int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, + int (*complete_rdmsr)(struct kvm_vcpu *)) { - u32 ecx = kvm_rcx_read(vcpu); u64 data; int r; - r = kvm_get_msr_with_filter(vcpu, ecx, &data); + r = kvm_emulate_msr_read(vcpu, msr, &data); if (!r) { - trace_kvm_msr_read(ecx, data); + trace_kvm_msr_read(msr, data); - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); + if (reg < 0) { + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { + kvm_register_write(vcpu, reg, data); + } } else { /* MSR read failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0, - complete_fast_rdmsr, r)) + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, + complete_rdmsr, r)) return 0; - trace_kvm_msr_read_ex(ecx); + trace_kvm_msr_read_ex(msr); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); -int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); - u64 data = kvm_read_edx_eax(vcpu); - int r; + return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, + complete_fast_rdmsr); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr); - r = kvm_set_msr_with_filter(vcpu, ecx, data); +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + vcpu->arch.cui_rdmsr_imm_reg = reg; + + return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr_imm); + +static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + int r; + r = kvm_emulate_msr_write(vcpu, msr, data); if (!r) { - trace_kvm_msr_write(ecx, data); + trace_kvm_msr_write(msr, data); } else { /* MSR write failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data, + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data, complete_fast_msr_access, r)) return 0; /* Signal all other negative errors to userspace */ if (r < 0) return r; - trace_kvm_msr_write_ex(ecx, data); + trace_kvm_msr_write_ex(msr, data); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } -EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); + +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr); + +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr_imm); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { @@ -2084,14 +2211,23 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu) /* Treat an INVD instruction as a NOP and just skip it. */ return kvm_emulate_as_nop(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_invd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_invd); + +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) +{ + if (!kvm_emulate_invd(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; + + return EXIT_FASTPATH_REENTER_GUEST; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_invd); int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -EXPORT_SYMBOL_GPL(kvm_handle_invalid_op); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invalid_op); static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn) @@ -2117,13 +2253,13 @@ int kvm_emulate_mwait(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MWAIT"); } -EXPORT_SYMBOL_GPL(kvm_emulate_mwait); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_mwait); int kvm_emulate_monitor(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MONITOR"); } -EXPORT_SYMBOL_GPL(kvm_emulate_monitor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_monitor); static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { @@ -2133,74 +2269,41 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -/* - * The fast path for frequent and performance sensitive wrmsr emulation, - * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces - * the latency of virtual IPI by avoiding the expensive bits of transitioning - * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the - * other cases which must be called after interrupts are enabled on the host. - */ -static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) +static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) - return 1; - - if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) && - ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && - ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && - ((u32)(data >> 32) != X2APIC_BROADCAST)) - return kvm_x2apic_icr_write(vcpu->arch.apic, data); - - return 1; -} - -static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) -{ - if (!kvm_can_use_hv_timer(vcpu)) - return 1; - - kvm_set_lapic_tscdeadline_msr(vcpu, data); - return 0; -} - -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) -{ - u32 msr = kvm_rcx_read(vcpu); - u64 data; - fastpath_t ret; - bool handled; - - kvm_vcpu_srcu_read_lock(vcpu); - switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): - data = kvm_read_edx_eax(vcpu); - handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) || + kvm_x2apic_icr_write_fast(vcpu->arch.apic, data)) + return EXIT_FASTPATH_NONE; break; case MSR_IA32_TSC_DEADLINE: - data = kvm_read_edx_eax(vcpu); - handled = !handle_fastpath_set_tscdeadline(vcpu, data); + kvm_set_lapic_tscdeadline_msr(vcpu, data); break; default: - handled = false; - break; + return EXIT_FASTPATH_NONE; } - if (handled) { - if (!kvm_skip_emulated_instruction(vcpu)) - ret = EXIT_FASTPATH_EXIT_USERSPACE; - else - ret = EXIT_FASTPATH_REENTER_GUEST; - trace_kvm_msr_write(msr, data); - } else { - ret = EXIT_FASTPATH_NONE; - } + trace_kvm_msr_write(msr, data); - kvm_vcpu_srcu_read_unlock(vcpu); + if (!kvm_skip_emulated_instruction(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; - return ret; + return EXIT_FASTPATH_REENTER_GUEST; +} + +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) +{ + return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr); + +fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); } -EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr_imm); /* * Adapt set_msr() to msr_io()'s calling convention @@ -2566,7 +2669,7 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio); } -EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_l1_tsc); u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) { @@ -2581,7 +2684,7 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) nested_offset += l2_offset; return nested_offset; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_offset); u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) { @@ -2591,7 +2694,7 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) return l1_multiplier; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_multiplier); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset) { @@ -3669,7 +3772,7 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) kvm_vcpu_flush_tlb_guest(vcpu); } -EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_service_local_tlb_flush_requests); static void record_steal_time(struct kvm_vcpu *vcpu) { @@ -3769,6 +3872,67 @@ static void record_steal_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } +/* + * Returns true if the MSR in question is managed via XSTATE, i.e. is context + * switched with the rest of guest FPU state. Note! S_CET is _not_ context + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, + * the value saved/restored via XSTATE is always the host's value. That detail + * is _extremely_ important, as the guest's S_CET must _never_ be resident in + * hardware while executing in the host. Loading guest values for U_CET and + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower + * privilege levels, i.e. are effectively only consumed by userspace as well. + */ +static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) +{ + if (!vcpu) + return false; + + switch (msr) { + case MSR_IA32_U_CET: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) || + guest_cpu_cap_has(vcpu, X86_FEATURE_IBT); + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + default: + return false; + } +} + +/* + * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an + * MSR that is managed via XSTATE. Note, the caller is responsible for doing + * the initial FPU load, this helper only ensures that guest state is resident + * in hardware (the kernel can load its FPU state in IRQ context). + */ +static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, + struct msr_data *msr_info, + int access) +{ + BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W); + + KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm); + KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm); + + kvm_fpu_get(); + if (access == MSR_TYPE_R) + rdmsrq(msr_info->index, msr_info->data); + else + wrmsrq(msr_info->index, msr_info->data); + kvm_fpu_put(); +} + +static void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W); +} + +static void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr = msr_info->index; @@ -3960,16 +4124,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_XSS: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) - return 1; - /* - * KVM supports exposing PT to the guest, but does not support - * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than - * XSAVES/XRSTORS to save/restore PT MSRs. - */ - if (data & ~kvm_caps.supported_xss) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + return KVM_MSR_RET_UNSUPPORTED; + + if (data & ~vcpu->arch.guest_supported_xss) return 1; + if (vcpu->arch.ia32_xss == data) + break; vcpu->arch.ia32_xss = data; vcpu->arch.cpuid_dynamic_bits_dirty = true; break; @@ -4153,6 +4314,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.guest_fpu.xfd_err = data; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_set_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); @@ -4161,7 +4326,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_set_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_msr_common); static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { @@ -4502,6 +4667,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.guest_fpu.xfd_err; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_get_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); @@ -4510,7 +4679,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_get_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_msr_common); /* * Read or write a bunch of msrs. All parameters are kernel addresses. @@ -4522,11 +4691,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { + bool fpu_loaded = false; int i; - for (i = 0; i < msrs->nmsrs; ++i) + for (i = 0; i < msrs->nmsrs; ++i) { + /* + * If userspace is accessing one or more XSTATE-managed MSRs, + * temporarily load the guest's FPU state so that the guest's + * MSR value(s) is resident in hardware and thus can be accessed + * via RDMSR/WRMSR. + */ + if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) { + kvm_load_guest_fpu(vcpu); + fpu_loaded = true; + } if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; + } + if (fpu_loaded) + kvm_put_guest_fpu(vcpu); return i; } @@ -4711,6 +4894,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_MEMORY_FAULT_INFO: case KVM_CAP_X86_GUEST_MODE: + case KVM_CAP_ONE_REG: r = 1; break; case KVM_CAP_PRE_FAULT_MEMORY: @@ -5889,6 +6073,134 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, } } +struct kvm_x86_reg_id { + __u32 index; + __u8 type; + __u8 rsvd1; + __u8 rsvd2:4; + __u8 size:4; + __u8 x86; +}; + +static int kvm_translate_kvm_reg(struct kvm_vcpu *vcpu, + struct kvm_x86_reg_id *reg) +{ + switch (reg->index) { + case KVM_REG_GUEST_SSP: + /* + * FIXME: If host-initiated accesses are ever exempted from + * ignore_msrs (in kvm_do_msr_access()), drop this manual check + * and rely on KVM's standard checks to reject accesses to regs + * that don't exist. + */ + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return -EINVAL; + + reg->type = KVM_X86_REG_TYPE_MSR; + reg->index = MSR_KVM_INTERNAL_GUEST_SSP; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (do_get_msr(vcpu, msr, &val)) + return -EINVAL; + + if (put_user(val, user_val)) + return -EFAULT; + + return 0; +} + +static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (get_user(val, user_val)) + return -EFAULT; + + if (do_set_msr(vcpu, msr, &val)) + return -EINVAL; + + return 0; +} + +static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, + void __user *argp) +{ + struct kvm_one_reg one_reg; + struct kvm_x86_reg_id *reg; + u64 __user *user_val; + bool load_fpu; + int r; + + if (copy_from_user(&one_reg, argp, sizeof(one_reg))) + return -EFAULT; + + if ((one_reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86) + return -EINVAL; + + reg = (struct kvm_x86_reg_id *)&one_reg.id; + if (reg->rsvd1 || reg->rsvd2) + return -EINVAL; + + if (reg->type == KVM_X86_REG_TYPE_KVM) { + r = kvm_translate_kvm_reg(vcpu, reg); + if (r) + return r; + } + + if (reg->type != KVM_X86_REG_TYPE_MSR) + return -EINVAL; + + if ((one_reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64) + return -EINVAL; + + guard(srcu)(&vcpu->kvm->srcu); + + load_fpu = is_xstate_managed_msr(vcpu, reg->index); + if (load_fpu) + kvm_load_guest_fpu(vcpu); + + user_val = u64_to_user_ptr(one_reg.addr); + if (ioctl == KVM_GET_ONE_REG) + r = kvm_get_one_msr(vcpu, reg->index, user_val); + else + r = kvm_set_one_msr(vcpu, reg->index, user_val); + + if (load_fpu) + kvm_put_guest_fpu(vcpu); + return r; +} + +static int kvm_get_reg_list(struct kvm_vcpu *vcpu, + struct kvm_reg_list __user *user_list) +{ + u64 nr_regs = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ? 1 : 0; + u64 user_nr_regs; + + if (get_user(user_nr_regs, &user_list->n)) + return -EFAULT; + + if (put_user(nr_regs, &user_list->n)) + return -EFAULT; + + if (user_nr_regs < nr_regs) + return -E2BIG; + + if (nr_regs && + put_user(KVM_X86_REG_KVM(KVM_REG_GUEST_SSP), &user_list->reg[0])) + return -EFAULT; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -6005,6 +6317,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } + case KVM_GET_ONE_REG: + case KVM_SET_ONE_REG: + r = kvm_get_set_one_reg(vcpu, ioctl, argp); + break; + case KVM_GET_REG_LIST: + r = kvm_get_reg_list(vcpu, argp); + break; case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -6771,7 +7090,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, kvm_free_msr_filter(old_filter); - kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); + /* + * Recalc MSR intercepts as userspace may want to intercept accesses to + * MSRs that KVM would otherwise pass through to the guest. + */ + kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS); return 0; } @@ -6966,6 +7289,15 @@ set_identity_unlock: if (irqchip_in_kernel(kvm)) goto create_irqchip_unlock; + /* + * Disallow an in-kernel I/O APIC if the VM has protected EOIs, + * i.e. if KVM can't intercept EOIs and thus can't properly + * emulate level-triggered interrupts. + */ + r = -ENOTTY; + if (kvm->arch.has_protected_eoi) + goto create_irqchip_unlock; + r = -EINVAL; if (kvm->created_vcpus) goto create_irqchip_unlock; @@ -7353,6 +7685,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) return; break; @@ -7365,6 +7698,24 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) return; break; + case MSR_IA32_XSS: + if (!kvm_caps.supported_xss) + return; + break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + return; + break; + case MSR_IA32_INT_SSP_TAB: + if (!kvm_cpu_cap_has(X86_FEATURE_LM)) + return; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK)) + return; + break; default: break; } @@ -7484,7 +7835,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u64 access = (kvm_x86_call(get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_read); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) @@ -7495,7 +7846,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, access |= PFERR_WRITE_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_write); /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -7581,7 +7932,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -EXPORT_SYMBOL_GPL(kvm_read_guest_virt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_virt); static int emulator_read_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, @@ -7653,7 +8004,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } -EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system); static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) @@ -7687,7 +8038,7 @@ int handle_ud(struct kvm_vcpu *vcpu) return kvm_emulate_instruction(vcpu, emul_type); } -EXPORT_SYMBOL_GPL(handle_ud); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_ud); static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t gpa, bool write) @@ -8166,7 +8517,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) kvm_emulate_wbinvd_noskip(vcpu); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wbinvd); @@ -8353,7 +8704,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); + r = kvm_emulate_msr_read(vcpu, msr_index, pdata); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8376,7 +8727,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_set_msr_with_filter(vcpu, msr_index, data); + r = kvm_emulate_msr_write(vcpu, msr_index, data); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8396,7 +8747,16 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { - return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); + /* + * Treat emulator accesses to the current shadow stack pointer as host- + * initiated, as they aren't true MSR accesses (SSP is a "just a reg"), + * and this API is used only for implicit accesses, i.e. not RDMSR, and + * so the index is fully KVM-controlled. + */ + if (unlikely(msr_index == MSR_KVM_INTERNAL_GUEST_SSP)) + return kvm_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); + + return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); } static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc) @@ -8470,11 +8830,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt) return is_smm(emul_to_vcpu(ctxt)); } -static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt) -{ - return is_guest_mode(emul_to_vcpu(ctxt)); -} - #ifndef CONFIG_KVM_SMM static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { @@ -8558,7 +8913,6 @@ static const struct x86_emulate_ops emulate_ops = { .guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible, .set_nmi_mask = emulator_set_nmi_mask, .is_smm = emulator_is_smm, - .is_guest_mode = emulator_is_guest_mode, .leave_smm = emulator_leave_smm, .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, @@ -8661,7 +9015,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) kvm_set_rflags(vcpu, ctxt->eflags); } } -EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_realmode_interrupt); static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata, u8 *insn_bytes, u8 insn_size) @@ -8726,13 +9080,13 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, { prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); } -EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_prepare_emulation_failure_exit); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) { __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_emulation_failure_exit); void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) { @@ -8754,7 +9108,7 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; run->internal.ndata = ndata; } -EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_event_vectoring_exit); static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { @@ -8864,7 +9218,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) if (unlikely(!r)) return 0; - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); /* * rflags is the old, "raw" value of the flags. The new value has @@ -8878,7 +9232,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) r = kvm_vcpu_do_singlestep(vcpu); return r; } -EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction); static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) { @@ -9009,7 +9363,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, return r; } -EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_decode_emulated_instruction); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len) @@ -9143,7 +9497,14 @@ restart: ctxt->exception.address = 0; } - r = x86_emulate_insn(ctxt); + /* + * Check L1's instruction intercepts when emulating instructions for + * L2, unless KVM is re-emulating a previously decoded instruction, + * e.g. to complete userspace I/O, in which case KVM has already + * checked the intercepts. + */ + r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) && + !(emulation_type & EMULTYPE_NO_DECODE)); if (r == EMULATION_INTERCEPTED) return 1; @@ -9198,9 +9559,9 @@ writeback: */ if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); if (ctxt->is_branch) - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); + kvm_pmu_branch_retired(vcpu); kvm_rip_write(vcpu, ctxt->eip); if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); @@ -9226,14 +9587,14 @@ int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len) { return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction_from_buffer); static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) { @@ -9328,7 +9689,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) ret = kvm_fast_pio_out(vcpu, size, port); return ret && kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_fast_pio); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fast_pio); static int kvmclock_cpu_down_prep(unsigned int cpu) { @@ -9651,6 +10012,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return -EIO; } + if (boot_cpu_has(X86_FEATURE_SHSTK) || boot_cpu_has(X86_FEATURE_IBT)) { + rdmsrq(MSR_IA32_S_CET, kvm_host.s_cet); + /* + * Linux doesn't yet support supervisor shadow stacks (SSS), so + * KVM doesn't save/restore the associated MSRs, i.e. KVM may + * clobber the host values. Yell and refuse to load if SSS is + * unexpectedly enabled, e.g. to avoid crashing the host. + */ + if (WARN_ON_ONCE(kvm_host.s_cet & CET_SHSTK_EN)) + return -EIO; + } + memset(&kvm_caps, 0, sizeof(kvm_caps)); x86_emulator_cache = kvm_alloc_emulator_cache(); @@ -9678,14 +10051,17 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0; } + + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + rdmsrq(MSR_IA32_XSS, kvm_host.xss); + kvm_caps.supported_xss = kvm_host.xss & KVM_SUPPORTED_XSS; + } + kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS; kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS; rdmsrq_safe(MSR_EFER, &kvm_host.efer); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrq(MSR_IA32_XSS, kvm_host.xss); - kvm_init_pmu_capability(ops->pmu_ops); if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) @@ -9734,6 +10110,16 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + } + if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9760,7 +10146,7 @@ out_free_x86_emulator_cache: kmem_cache_destroy(x86_emulator_cache); return r; } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_init); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_init); void kvm_x86_vendor_exit(void) { @@ -9794,7 +10180,7 @@ void kvm_x86_vendor_exit(void) kvm_x86_ops.enable_virtualization_cpu = NULL; mutex_unlock(&vendor_module_lock); } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_exit); #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, @@ -9858,7 +10244,7 @@ bool kvm_apicv_activated(struct kvm *kvm) { return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0); } -EXPORT_SYMBOL_GPL(kvm_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apicv_activated); bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) { @@ -9868,7 +10254,7 @@ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) return (vm_reasons | vcpu_reasons) == 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_apicv_activated); static void set_or_clear_apicv_inhibit(unsigned long *inhibits, enum kvm_apicv_inhibit reason, bool set) @@ -10044,7 +10430,7 @@ out: vcpu->run->hypercall.ret = ret; return 1; } -EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(____kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { @@ -10057,7 +10443,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu), complete_hypercall_exit); } -EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_hypercall); static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { @@ -10500,7 +10886,7 @@ out: preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_update_apicv); static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) { @@ -10576,7 +10962,7 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, __kvm_set_or_clear_apicv_inhibit(kvm, reason, set); up_write(&kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit); static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { @@ -10796,13 +11182,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); - /* - * Recalc MSR intercepts as userspace may want to intercept - * accesses to MSRs that KVM would otherwise pass through to - * the guest. - */ - if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(recalc_msr_intercepts)(vcpu); + if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu)) + kvm_x86_call(recalc_intercepts)(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); @@ -11135,7 +11516,7 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return false; } -EXPORT_SYMBOL_GPL(kvm_vcpu_has_events); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_has_events); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { @@ -11253,7 +11634,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { kvm_vcpu_srcu_read_unlock(vcpu); - r = xfer_to_guest_mode_handle_work(vcpu); + r = kvm_xfer_to_guest_mode_handle_work(vcpu); kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; @@ -11288,7 +11669,7 @@ int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) { return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); } -EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt_noskip); int kvm_emulate_halt(struct kvm_vcpu *vcpu) { @@ -11299,17 +11680,11 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) */ return kvm_emulate_halt_noskip(vcpu) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_halt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) { - int ret; - - kvm_vcpu_srcu_read_lock(vcpu); - ret = kvm_emulate_halt(vcpu); - kvm_vcpu_srcu_read_unlock(vcpu); - - if (!ret) + if (!kvm_emulate_halt(vcpu)) return EXIT_FASTPATH_EXIT_USERSPACE; if (kvm_vcpu_running(vcpu)) @@ -11317,7 +11692,7 @@ fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_EXIT_HANDLED; } -EXPORT_SYMBOL_GPL(handle_fastpath_hlt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_hlt); int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) { @@ -11326,7 +11701,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_ap_reset_hold); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) { @@ -11837,6 +12212,25 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; int ret; + if (kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) { + u64 u_cet, s_cet; + + /* + * Check both User and Supervisor on task switches as inter- + * privilege level task switches are impacted by CET at both + * the current privilege level and the new privilege level, and + * that information is not known at this time. The expectation + * is that the guest won't require emulation of task switches + * while using IBT or Shadow Stacks. + */ + if (__kvm_emulate_msr_read(vcpu, MSR_IA32_U_CET, &u_cet) || + __kvm_emulate_msr_read(vcpu, MSR_IA32_S_CET, &s_cet)) + goto unhandled_task_switch; + + if ((u_cet | s_cet) & (CET_ENDBR_EN | CET_SHSTK_EN)) + goto unhandled_task_switch; + } + init_emulate_ctxt(vcpu); ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, @@ -11846,19 +12240,21 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, * Report an error userspace if MMIO is needed, as KVM doesn't support * MMIO during a task switch (or any other complex operation). */ - if (ret || vcpu->mmio_needed) { - vcpu->mmio_needed = false; - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (ret || vcpu->mmio_needed) + goto unhandled_task_switch; kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); return 1; + +unhandled_task_switch: + vcpu->mmio_needed = false; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } -EXPORT_SYMBOL_GPL(kvm_task_switch); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_task_switch); static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -12388,6 +12784,42 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvfree(vcpu->arch.cpuid_entries); } +static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) +{ + struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; + u64 xfeatures_mask; + int i; + + /* + * Guest FPU state is zero allocated and so doesn't need to be manually + * cleared on RESET, i.e. during vCPU creation. + */ + if (!init_event || !fpstate) + return; + + /* + * On INIT, only select XSTATE components are zeroed, most components + * are unchanged. Currently, the only components that are zeroed and + * supported by KVM are MPX and CET related. + */ + xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) & + (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR | + XFEATURE_MASK_CET_ALL); + if (!xfeatures_mask) + return; + + BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); + + /* + * All paths that lead to INIT are required to load the guest's FPU + * state (because most paths are buried in KVM_RUN). + */ + kvm_put_guest_fpu(vcpu); + for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) + fpstate_clear_xstate_component(fpstate, i); + kvm_load_guest_fpu(vcpu); +} + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_cpuid_entry2 *cpuid_0x1; @@ -12445,22 +12877,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) { - struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; - - /* - * All paths that lead to INIT are required to load the guest's - * FPU state (because most paths are buried in KVM_RUN). - */ - if (init_event) - kvm_put_guest_fpu(vcpu); - - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); - - if (init_event) - kvm_load_guest_fpu(vcpu); - } + kvm_xstate_reset(vcpu, init_event); if (!init_event) { vcpu->arch.smbase = 0x30000; @@ -12472,7 +12889,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); - __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); + kvm_msr_write(vcpu, MSR_IA32_XSS, 0); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ @@ -12538,7 +12955,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_reset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_reset); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { @@ -12550,7 +12967,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } -EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector); void kvm_arch_enable_virtualization(void) { @@ -12668,7 +13085,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -12832,7 +13249,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return (void __user *)hva; } -EXPORT_SYMBOL_GPL(__x86_set_memory_region); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__x86_set_memory_region); void kvm_arch_pre_destroy_vm(struct kvm *kvm) { @@ -13240,13 +13657,13 @@ unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + kvm_rip_read(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_get_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_linear_rip); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) { return kvm_get_linear_rip(vcpu) == linear_rip; } -EXPORT_SYMBOL_GPL(kvm_is_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_linear_rip); unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) { @@ -13257,7 +13674,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) rflags &= ~X86_EFLAGS_TF; return rflags; } -EXPORT_SYMBOL_GPL(kvm_get_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_rflags); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { @@ -13272,7 +13689,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) __kvm_set_rflags(vcpu, rflags); kvm_make_request(KVM_REQ_EVENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_set_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_rflags); static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) { @@ -13504,31 +13921,34 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm) if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) { if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count)) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { return atomic_read(&kvm->arch.noncoherent_dma_count); } -EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_arch_has_noncoherent_dma); -bool kvm_vector_hashing_enabled(void) +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { - return vector_hashing; + return (vcpu->arch.msr_kvm_poll_control & 1) == 0; } -bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +#ifdef CONFIG_KVM_GUEST_MEMFD +/* + * KVM doesn't yet support initializing guest_memfd memory as shared for VMs + * with private memory (the private vs. shared tracking needs to be moved into + * guest_memfd). + */ +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { - return (vcpu->arch.msr_kvm_poll_control & 1) == 0; + return !kvm_arch_has_private_mem(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_no_poll); #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) @@ -13543,6 +13963,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) kvm_x86_call(gmem_invalidate)(start, end); } #endif +#endif int kvm_spec_ctrl_test_value(u64 value) { @@ -13568,7 +13989,7 @@ int kvm_spec_ctrl_test_value(u64 value) return ret; } -EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spec_ctrl_test_value); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) { @@ -13593,7 +14014,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c } vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); } -EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error); /* * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns @@ -13622,7 +14043,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, return 0; } -EXPORT_SYMBOL_GPL(kvm_handle_memory_failure); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_memory_failure); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) { @@ -13686,7 +14107,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return 1; } } -EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invpcid); static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) { @@ -13771,7 +14192,7 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write); int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, void *data) @@ -13809,7 +14230,7 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read); static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size) { @@ -13897,7 +14318,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, return in ? kvm_sev_es_ins(vcpu, size, port) : kvm_sev_es_outs(vcpu, size, port); } -EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_string_io); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index bcfd9b719ada..f3dc77f006f9 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -50,6 +50,7 @@ struct kvm_host_values { u64 efer; u64 xcr0; u64 xss; + u64 s_cet; u64 arch_capabilities; }; @@ -101,6 +102,16 @@ do { \ #define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX #define KVM_SVM_DEFAULT_PLE_WINDOW 3000 +/* + * KVM's internal, non-ABI indices for synthetic MSRs. The values themselves + * are arbitrary and have no meaning, the only requirement is that they don't + * conflict with "real" MSRs that KVM supports. Use values at the upper end + * of KVM's reserved paravirtual MSR range to minimize churn, i.e. these values + * will be usable until KVM exhausts its supply of paravirtual MSR indices. + */ + +#define MSR_KVM_INTERNAL_GUEST_SSP 0x4b564dff + static inline unsigned int __grow_ple_window(unsigned int val, unsigned int base, unsigned int modifier, unsigned int max) { @@ -431,14 +442,15 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); -bool kvm_vector_hashing_enabled(void); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code); int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, void *insn, int insn_len); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; @@ -668,6 +680,9 @@ static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) __reserved_bits |= X86_CR4_PCIDE; \ if (!__cpu_has(__c, X86_FEATURE_LAM)) \ __reserved_bits |= X86_CR4_LAM_SUP; \ + if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \ + !__cpu_has(__c, X86_FEATURE_IBT)) \ + __reserved_bits |= X86_CR4_CET; \ __reserved_bits; \ }) @@ -699,4 +714,27 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl, int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); +#define CET_US_RESERVED_BITS GENMASK(9, 6) +#define CET_US_SHSTK_MASK_BITS GENMASK(1, 0) +#define CET_US_IBT_MASK_BITS (GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10)) +#define CET_US_LEGACY_BITMAP_BASE(data) ((data) >> 12) + +static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data) +{ + if (data & CET_US_RESERVED_BITS) + return false; + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + (data & CET_US_SHSTK_MASK_BITS)) + return false; + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) && + (data & CET_US_IBT_MASK_BITS)) + return false; + if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4)) + return false; + /* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */ + if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR)) + return false; + + return true; +} #endif diff --git a/arch/x86/lib/bhi.S b/arch/x86/lib/bhi.S index 58891681261b..aad1e5839202 100644 --- a/arch/x86/lib/bhi.S +++ b/arch/x86/lib/bhi.S @@ -5,7 +5,7 @@ #include <asm/nospec-branch.h> /* - * Notably, the FineIBT preamble calling these will have ZF set and r10 zero. + * Notably, the FineIBT preamble calling these will have ZF set and eax zero. * * The very last element is in fact larger than 32 bytes, but since its the * last element, this does not matter, @@ -36,7 +36,7 @@ SYM_INNER_LABEL(__bhi_args_1, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi + cmovne %rax, %rdi ANNOTATE_UNRET_SAFE ret int3 @@ -53,8 +53,8 @@ SYM_INNER_LABEL(__bhi_args_2, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi - cmovne %r10, %rsi + cmovne %rax, %rdi + cmovne %rax, %rsi ANNOTATE_UNRET_SAFE ret int3 @@ -64,9 +64,9 @@ SYM_INNER_LABEL(__bhi_args_3, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx ANNOTATE_UNRET_SAFE ret int3 @@ -76,10 +76,10 @@ SYM_INNER_LABEL(__bhi_args_4, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx ANNOTATE_UNRET_SAFE ret int3 @@ -89,11 +89,11 @@ SYM_INNER_LABEL(__bhi_args_5, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 ANNOTATE_UNRET_SAFE ret int3 @@ -110,12 +110,12 @@ SYM_INNER_LABEL(__bhi_args_6, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 - cmovne %r10, %r9 + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 + cmovne %rax, %r9 ANNOTATE_UNRET_SAFE ret int3 @@ -125,13 +125,13 @@ SYM_INNER_LABEL(__bhi_args_7, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 - cmovne %r10, %r9 - cmovne %r10, %rsp + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 + cmovne %rax, %r9 + cmovne %rax, %rsp ANNOTATE_UNRET_SAFE ret int3 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index f513d33b6d37..8f1fed0c3b83 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -134,10 +134,10 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) .macro ITS_THUNK reg /* - * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) + * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b) * that complete the fineibt_paranoid caller sequence. */ -1: .byte 0xea +1: ASM_UDB SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 8834c76f91c9..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -399,15 +399,6 @@ static void cpa_flush_all(unsigned long cache) on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -static void __cpa_flush_tlb(void *data) -{ - struct cpa_data *cpa = data; - unsigned int i; - - for (i = 0; i < cpa->numpages; i++) - flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); -} - static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); static void cpa_collapse_large_pages(struct cpa_data *cpa) @@ -444,6 +435,7 @@ static void cpa_collapse_large_pages(struct cpa_data *cpa) static void cpa_flush(struct cpa_data *cpa, int cache) { + unsigned long start, end; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); @@ -453,10 +445,12 @@ static void cpa_flush(struct cpa_data *cpa, int cache) goto collapse_large_pages; } - if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) - flush_tlb_all(); - else - on_each_cpu(__cpa_flush_tlb, cpa, 1); + start = fix_addr(__cpa_addr(cpa, 0)); + end = start + cpa->numpages * PAGE_SIZE; + if (cpa->force_flush_all) + end = TLB_FLUSH_ALL; + + flush_tlb_kernel_range(start, end); if (!cache) goto collapse_large_pages; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..5d221709353e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -911,11 +911,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fc13306af15f..d4c93d9e73e4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -420,12 +420,12 @@ static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity) u8 *prog = *pprog; EMIT_ENDBR(); - EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ + EMIT1_off32(0x2d, hash); /* subl $hash, %eax */ if (cfi_bhi) { + EMIT2(0x2e, 0x2e); /* cs cs */ emit_call(&prog, __bhi_args[arity], ip + 11); } else { - EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ - EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ + EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */ } EMIT_ENDBR_POISON(); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e7e71490bd25..25076a5acd96 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -295,6 +295,46 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_ro DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk); /* + * PCIe devices underneath Xeon 6 PCIe Root Port bifurcated to x2 have lower + * performance with Extended Tags and MRRS > 128B. Work around the performance + * problems by disabling Extended Tags and limiting MRRS to 128B. + * + * https://cdrdv2.intel.com/v1/dl/getContent/837176 + */ +static int limit_mrrs_to_128(struct pci_host_bridge *b, struct pci_dev *pdev) +{ + int readrq = pcie_get_readrq(pdev); + + if (readrq > 128) + pcie_set_readrq(pdev, 128); + + return 0; +} + +static void pci_xeon_x2_bifurc_quirk(struct pci_dev *pdev) +{ + struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); + u32 linkcap; + + pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &linkcap); + if (FIELD_GET(PCI_EXP_LNKCAP_MLW, linkcap) != 0x2) + return; + + bridge->no_ext_tags = 1; + bridge->enable_device = limit_mrrs_to_128; + pci_info(pdev, "Disabling Extended Tags and limiting MRRS to 128B (performance reasons due to x2 PCIe link)\n"); +} + +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db0, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db1, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db2, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db3, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db6, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db7, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db8, pci_xeon_x2_bifurc_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0db9, pci_xeon_x2_bifurc_quirk); + +/* * Fixup to mark boot BIOS video selected by BIOS before it changes * * From information provided by "Jon Smirl" <jonsmirl@gmail.com> diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 2206b8bc47b8..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,6 +11,10 @@ #include <asm/nospec-branch.h> SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM pushq %rbp movq %rsp, %rbp and $~0xf, %rsp diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index df568fc3ceb4..9dc2efaf5df1 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -129,7 +129,7 @@ static __always_inline void *get_stub_data(void) "subl %0,%%esp ;" \ "movl %1, %%eax ; " \ "call *%%eax ;" \ - :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + :: "i" (STUB_SIZE), \ "i" (&fn)) static __always_inline void diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 9cfd31afa769..9fd56954e2e0 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -133,7 +133,7 @@ static __always_inline void *get_stub_data(void) "subq %0,%%rsp ;" \ "movq %1,%%rax ;" \ "call *%%rax ;" \ - :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + :: "i" (STUB_SIZE), \ "i" (&fn)) static __always_inline void diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index c7a9a087ccaf..eac403248462 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -633,15 +633,19 @@ err: } /* - * Convert TDX private pages back to normal by using MOVDIR64B to - * clear these pages. Note this function doesn't flush cache of - * these TDX private pages. The caller should make sure of that. + * Convert TDX private pages back to normal by using MOVDIR64B to clear these + * pages. Typically, any write to the page will convert it from TDX private back + * to normal kernel memory. Systems with the X86_BUG_TDX_PW_MCE erratum need to + * do the conversion explicitly via MOVDIR64B. */ -static void reset_tdx_pages(unsigned long base, unsigned long size) +static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size) { const void *zero_page = (const void *)page_address(ZERO_PAGE(0)); unsigned long phys, end; + if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) + return; + end = base + size; for (phys = base; phys < end; phys += 64) movdir64b(__va(phys), zero_page); @@ -654,17 +658,23 @@ static void reset_tdx_pages(unsigned long base, unsigned long size) mb(); } -static void tdmr_reset_pamt(struct tdmr_info *tdmr) +void tdx_quirk_reset_page(struct page *page) +{ + tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE); +} +EXPORT_SYMBOL_GPL(tdx_quirk_reset_page); + +static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr) { - tdmr_do_pamt_func(tdmr, reset_tdx_pages); + tdmr_do_pamt_func(tdmr, tdx_quirk_reset_paddr); } -static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list) +static void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list) { int i; for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) - tdmr_reset_pamt(tdmr_entry(tdmr_list, i)); + tdmr_quirk_reset_pamt(tdmr_entry(tdmr_list, i)); } static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list) @@ -1136,15 +1146,7 @@ err_reset_pamts: * to the kernel. */ wbinvd_on_all_cpus(); - /* - * According to the TDX hardware spec, if the platform - * doesn't have the "partial write machine check" - * erratum, any kernel read/write will never cause #MC - * in kernel space, thus it's OK to not convert PAMTs - * back to normal. But do the conversion anyway here - * as suggested by the TDX spec. - */ - tdmrs_reset_pamt_all(&tdx_tdmr_list); + tdmrs_quirk_reset_pamt_all(&tdx_tdmr_list); err_free_pamts: tdmrs_free_pamt_all(&tdx_tdmr_list); err_free_tdmrs: @@ -1266,7 +1268,7 @@ static bool paddr_is_tdx_private(unsigned long phys) return false; /* Get page type from the TDX module */ - sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args); + sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args); /* * The SEAMCALL will not return success unless there is a @@ -1502,11 +1504,6 @@ static inline u64 tdx_tdr_pa(struct tdx_td *td) return page_to_phys(td->tdr_page); } -static inline u64 tdx_tdvpr_pa(struct tdx_vp *td) -{ - return page_to_phys(td->tdvpr_page); -} - /* * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether * a CLFLUSH of pages is required before handing them to the TDX module. @@ -1518,11 +1515,11 @@ static void tdx_clflush_page(struct page *page) clflush_cache_range(page_to_virt(page), PAGE_SIZE); } -noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) +noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) { - args->rcx = tdx_tdvpr_pa(td); + args->rcx = td->tdvpr_pa; - return __seamcall_saved_ret(TDH_VP_ENTER, args); + return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args); } EXPORT_SYMBOL_GPL(tdh_vp_enter); @@ -1581,7 +1578,7 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { struct tdx_module_args args = { .rcx = page_to_phys(tdcx_page), - .rdx = tdx_tdvpr_pa(vp), + .rdx = vp->tdvpr_pa, }; tdx_clflush_page(tdcx_page); @@ -1650,7 +1647,7 @@ EXPORT_SYMBOL_GPL(tdh_mng_create); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = tdx_tdr_pa(td), }; @@ -1706,7 +1703,7 @@ EXPORT_SYMBOL_GPL(tdh_mr_finalize); u64 tdh_vp_flush(struct tdx_vp *vp) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, }; return seamcall(TDH_VP_FLUSH, &args); @@ -1752,7 +1749,7 @@ EXPORT_SYMBOL_GPL(tdh_mng_init); u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = field, }; u64 ret; @@ -1769,7 +1766,7 @@ EXPORT_SYMBOL_GPL(tdh_vp_rd); u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = field, .r8 = data, .r9 = mask, @@ -1782,7 +1779,7 @@ EXPORT_SYMBOL_GPL(tdh_vp_wr); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) { struct tdx_module_args args = { - .rcx = tdx_tdvpr_pa(vp), + .rcx = vp->tdvpr_pa, .rdx = initial_rcx, .r8 = x2apicid, }; @@ -1870,3 +1867,22 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); + +#ifdef CONFIG_KEXEC_CORE +void tdx_cpu_flush_cache_for_kexec(void) +{ + lockdep_assert_preemption_disabled(); + + if (!this_cpu_read(cache_state_incoherent)) + return; + + /* + * Private memory cachelines need to be clean at the time of + * kexec. Write them back now, as the caller promises that + * there should be no more SEAMCALLs on this CPU. + */ + wbinvd(); + this_cpu_write(cache_state_incoherent, false); +} +EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec); +#endif diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index f2af1a32c9c7..dc942bbac69f 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -103,7 +103,7 @@ CONFIG_SND_SIMPLE_CARD=y # CONFIG_USB_SUPPORT is not set CONFIG_COMMON_CLK_CDCE706=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 88ed5284e21c..81a057f25f21 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -80,7 +80,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 4427907becca..3ee7e1c56556 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -90,7 +90,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 5828228522ba..c6e96f0aa700 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -91,7 +91,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 326966ca7831..373d42b9e510 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -94,7 +94,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index e37048985b47..72628d31e87a 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -76,7 +76,7 @@ CONFIG_LOGO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index ee47438f9b51..5d6013ea70fc 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -82,7 +82,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index d6eb695f2b26..50a136213b2b 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -58,7 +58,6 @@ #define PTRS_PER_PTE_SHIFT 10 #define PTRS_PER_PGD 1024 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_PGD_NR (FIRST_USER_ADDRESS >> PGDIR_SHIFT) #ifdef CONFIG_MMU /* diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index 926b8bf0f14c..f14713060fd4 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -14,6 +14,7 @@ #include <linux/printk.h> #include <linux/types.h> +#include <linux/units.h> #include <asm/platform.h> #include <asm/timex.h> @@ -38,7 +39,7 @@ void __weak platform_idle(void) #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT void __weak platform_calibrate_ccount(void) { - pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n"); - ccount_freq = 10 * 1000000UL; + pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10 MHz.\n"); + ccount_freq = 10 * HZ_PER_MHZ; } #endif diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 6ed009318d24..3cafc8feddee 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -231,10 +231,14 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf, static ssize_t proc_write_simdisk(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *tmp = memdup_user_nul(buf, count); + char *tmp; struct simdisk *dev = pde_data(file_inode(file)); int err; + if (count == 0 || count > PAGE_SIZE) + return -EINVAL; + + tmp = memdup_user_nul(buf, count); if (IS_ERR(tmp)) return PTR_ERR(tmp); |