diff options
Diffstat (limited to 'arch/arm64')
130 files changed, 3065 insertions, 956 deletions
| diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a93339f5178f..b488076d63c2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -522,20 +522,13 @@ config CAVIUM_ERRATUM_30115  config QCOM_FALKOR_ERRATUM_1003  	bool "Falkor E1003: Incorrect translation due to ASID change"  	default y -	select ARM64_PAN if ARM64_SW_TTBR0_PAN  	help  	  On Falkor v1, an incorrect ASID may be cached in the TLB when ASID -	  and BADDR are changed together in TTBRx_EL1. The workaround for this -	  issue is to use a reserved ASID in cpu_do_switch_mm() before -	  switching to the new ASID. Saying Y here selects ARM64_PAN if -	  ARM64_SW_TTBR0_PAN is selected. This is done because implementing and -	  maintaining the E1003 workaround in the software PAN emulation code -	  would be an unnecessary complication. The affected Falkor v1 CPU -	  implements ARMv8.1 hardware PAN support and using hardware PAN -	  support versus software PAN emulation is mutually exclusive at -	  runtime. - -	  If unsure, say Y. +	  and BADDR are changed together in TTBRx_EL1. Since we keep the ASID +	  in TTBR1_EL1, this situation only occurs in the entry trampoline and +	  then only for entries in the walk cache, since the leaf translation +	  is unchanged. Work around the erratum by invalidating the walk cache +	  entries for the trampoline before entering the kernel proper.  config QCOM_FALKOR_ERRATUM_1009  	bool "Falkor E1009: Prematurely complete a DSB after a TLBI" @@ -557,7 +550,6 @@ config QCOM_QDF2400_ERRATUM_0065  	  If unsure, say Y. -  config SOCIONEXT_SYNQUACER_PREITS  	bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"  	default y @@ -576,6 +568,17 @@ config HISILICON_ERRATUM_161600802  	  a 128kB offset to be applied to the target address in this commands.  	  If unsure, say Y. + +config QCOM_FALKOR_ERRATUM_E1041 +	bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" +	default y +	help +	  Falkor CPU may speculatively fetch instructions from an improper +	  memory location when MMU translation is changed from SCTLR_ELn[M]=1 +	  to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem. + +	  If unsure, say Y. +  endmenu @@ -646,6 +649,35 @@ config ARM64_VA_BITS  	default 47 if ARM64_VA_BITS_47  	default 48 if ARM64_VA_BITS_48 +choice +	prompt "Physical address space size" +	default ARM64_PA_BITS_48 +	help +	  Choose the maximum physical address range that the kernel will +	  support. + +config ARM64_PA_BITS_48 +	bool "48-bit" + +config ARM64_PA_BITS_52 +	bool "52-bit (ARMv8.2)" +	depends on ARM64_64K_PAGES +	depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN +	help +	  Enable support for a 52-bit physical address space, introduced as +	  part of the ARMv8.2-LPA extension. + +	  With this enabled, the kernel will also continue to work on CPUs that +	  do not support ARMv8.2-LPA, but with some added memory overhead (and +	  minor performance overhead). + +endchoice + +config ARM64_PA_BITS +	int +	default 48 if ARM64_PA_BITS_48 +	default 52 if ARM64_PA_BITS_52 +  config CPU_BIG_ENDIAN         bool "Build big-endian kernel"         help @@ -840,6 +872,35 @@ config FORCE_MAX_ZONEORDER  	  However for 4K, we choose a higher default value, 11 as opposed to 10, giving us  	  4M allocations matching the default size used by generic code. +config UNMAP_KERNEL_AT_EL0 +	bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT +	default y +	help +	  Speculation attacks against some high-performance processors can +	  be used to bypass MMU permission checks and leak kernel data to +	  userspace. This can be defended against by unmapping the kernel +	  when running in userspace, mapping it back in on exception entry +	  via a trampoline page in the vector table. + +	  If unsure, say Y. + +config HARDEN_BRANCH_PREDICTOR +	bool "Harden the branch predictor against aliasing attacks" if EXPERT +	default y +	help +	  Speculation attacks against some high-performance processors rely on +	  being able to manipulate the branch predictor for a victim context by +	  executing aliasing branches in the attacker context.  Such attacks +	  can be partially mitigated against by clearing internal branch +	  predictor state and limiting the prediction logic in some situations. + +	  This config option will take CPU-specific actions to harden the +	  branch predictor against aliasing attacks and may rely on specific +	  instruction sequences or control bits being set by the system +	  firmware. + +	  If unsure, say Y. +  menuconfig ARMV8_DEPRECATED  	bool "Emulate deprecated/obsolete ARMv8 instructions"  	depends on COMPAT @@ -1011,6 +1072,22 @@ config ARM64_PMEM  	  operations if DC CVAP is not supported (following the behaviour of  	  DC CVAP itself if the system does not define a point of persistence). +config ARM64_RAS_EXTN +	bool "Enable support for RAS CPU Extensions" +	default y +	help +	  CPUs that support the Reliability, Availability and Serviceability +	  (RAS) Extensions, part of ARMv8.2 are able to track faults and +	  errors, classify them and report them to software. + +	  On CPUs with these extensions system software can use additional +	  barriers to determine if faults are pending and read the +	  classification from a new set of registers. + +	  Selecting this feature will allow the kernel to use these barriers +	  and access the new registers if the system supports the extension. +	  Platform RAS features may additionally depend on firmware support. +  endmenu  config ARM64_SVE diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif  ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)  KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE	+= $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif  endif  # Default value diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index d7c22d51bc50..4aa50b9b26bc 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -12,6 +12,7 @@ subdir-y += cavium  subdir-y += exynos  subdir-y += freescale  subdir-y += hisilicon +subdir-y += lg  subdir-y += marvell  subdir-y += mediatek  subdir-y += nvidia @@ -22,5 +23,4 @@ subdir-y += rockchip  subdir-y += socionext  subdir-y += sprd  subdir-y += xilinx -subdir-y += lg  subdir-y += zte diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 45bdbfb96126..4a8d3f83a36e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -75,6 +75,7 @@  	pinctrl-0 = <&rgmii_pins>;  	phy-mode = "rgmii";  	phy-handle = <&ext_rgmii_phy>; +	phy-supply = <®_dc1sw>;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 806442d3e846..604cdaedac38 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -77,6 +77,7 @@  	pinctrl-0 = <&rmii_pins>;  	phy-mode = "rmii";  	phy-handle = <&ext_rmii_phy1>; +	phy-supply = <®_dc1sw>;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 0eb2acedf8c3..abe179de35d7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -82,6 +82,7 @@  	pinctrl-0 = <&rgmii_pins>;  	phy-mode = "rgmii";  	phy-handle = <&ext_rgmii_phy>; +	phy-supply = <®_dc1sw>;  	status = "okay";  }; @@ -95,7 +96,7 @@  &mmc2 {  	pinctrl-names = "default";  	pinctrl-0 = <&mmc2_pins>; -	vmmc-supply = <®_vcc3v3>; +	vmmc-supply = <®_dcdc1>;  	vqmmc-supply = <®_vcc1v8>;  	bus-width = <8>;  	non-removable; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index a5da18a6f286..43418bd881d8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -45,19 +45,10 @@  #include "sun50i-a64.dtsi" -/ { -	reg_vcc3v3: vcc3v3 { -		compatible = "regulator-fixed"; -		regulator-name = "vcc3v3"; -		regulator-min-microvolt = <3300000>; -		regulator-max-microvolt = <3300000>; -	}; -}; -  &mmc0 {  	pinctrl-names = "default";  	pinctrl-0 = <&mmc0_pins>; -	vmmc-supply = <®_vcc3v3>; +	vmmc-supply = <®_dcdc1>;  	non-removable;  	disable-wp;  	bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts index b6b7a561df8c..a42fd79a62a3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts @@ -71,7 +71,7 @@  	pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;  	vmmc-supply = <®_vcc3v3>;  	bus-width = <4>; -	cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; +	cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c9bdc7ab50b..9db19314c60c 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -66,6 +66,7 @@  				     <&cpu1>,  				     <&cpu2>,  				     <&cpu3>; +		interrupt-parent = <&intc>;  	};  	psci { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index ead895a4e9a5..1fb8b9d6cb4e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -753,12 +753,12 @@  &uart_B {  	clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; -	clock-names = "xtal", "core", "baud"; +	clock-names = "xtal", "pclk", "baud";  };  &uart_C {  	clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; -	clock-names = "xtal", "core", "baud"; +	clock-names = "xtal", "pclk", "baud";  };  &vpu { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 8ed981f59e5a..6524b89e7115 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -688,7 +688,7 @@  &uart_A {  	clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>; -	clock-names = "xtal", "core", "baud"; +	clock-names = "xtal", "pclk", "baud";  };  &uart_AO { @@ -703,12 +703,12 @@  &uart_B {  	clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; -	clock-names = "xtal", "core", "baud"; +	clock-names = "xtal", "pclk", "baud";  };  &uart_C {  	clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; -	clock-names = "xtal", "core", "baud"; +	clock-names = "xtal", "pclk", "baud";  };  &vpu { diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index e3b64d03fbd8..9c7724e82aff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -63,8 +63,10 @@  			cpm_ethernet: ethernet@0 {  				compatible = "marvell,armada-7k-pp22";  				reg = <0x0 0x100000>, <0x129000 0xb000>; -				clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>; -				clock-names = "pp_clk", "gop_clk", "mg_clk"; +				clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, +					 <&cpm_clk 1 5>, <&cpm_clk 1 18>; +				clock-names = "pp_clk", "gop_clk", +					      "mg_clk","axi_clk";  				marvell,system-controller = <&cpm_syscon0>;  				status = "disabled";  				dma-coherent; @@ -155,7 +157,8 @@  				#size-cells = <0>;  				compatible = "marvell,orion-mdio";  				reg = <0x12a200 0x10>; -				clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>; +				clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>, +					 <&cpm_clk 1 6>, <&cpm_clk 1 18>;  				status = "disabled";  			}; @@ -338,8 +341,8 @@  				compatible = "marvell,armada-cp110-sdhci";  				reg = <0x780000 0x300>;  				interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>; -				clock-names = "core"; -				clocks = <&cpm_clk 1 4>; +				clock-names = "core","axi"; +				clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>;  				dma-coherent;  				status = "disabled";  			}; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 0d51096c69f8..87ac68b2cf37 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -63,8 +63,10 @@  			cps_ethernet: ethernet@0 {  				compatible = "marvell,armada-7k-pp22";  				reg = <0x0 0x100000>, <0x129000 0xb000>; -				clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>; -				clock-names = "pp_clk", "gop_clk", "mg_clk"; +				clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, +					 <&cps_clk 1 5>, <&cps_clk 1 18>; +				clock-names = "pp_clk", "gop_clk", +					      "mg_clk", "axi_clk";  				marvell,system-controller = <&cps_syscon0>;  				status = "disabled";  				dma-coherent; @@ -155,7 +157,8 @@  				#size-cells = <0>;  				compatible = "marvell,orion-mdio";  				reg = <0x12a200 0x10>; -				clocks = <&cps_clk 1 9>, <&cps_clk 1 5>; +				clocks = <&cps_clk 1 9>, <&cps_clk 1 5>, +					 <&cps_clk 1 6>, <&cps_clk 1 18>;  				status = "disabled";  			}; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a298df74ca6c..dbe2648649db 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -255,7 +255,6 @@  &avb {  	pinctrl-0 = <&avb_pins>;  	pinctrl-names = "default"; -	renesas,no-ether-link;  	phy-handle = <&phy0>;  	status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0d85b315ce71..73439cf48659 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -145,7 +145,6 @@  &avb {  	pinctrl-0 = <&avb_pins>;  	pinctrl-names = "default"; -	renesas,no-ether-link;  	phy-handle = <&phy0>;  	status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c2..3890468678ce 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,6 +132,8 @@  	assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;  	assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;  	clock_in_out = "input"; +	/* shows instability at 1GBit right now */ +	max-speed = <100>;  	phy-supply = <&vcc_io>;  	phy-mode = "rgmii";  	pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99..2426da631938 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -514,7 +514,7 @@  	tsadc: tsadc@ff250000 {  		compatible = "rockchip,rk3328-tsadc";  		reg = <0x0 0xff250000 0x0 0x100>; -		interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; +		interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;  		assigned-clocks = <&cru SCLK_TSADC>;  		assigned-clock-rates = <50000>;  		clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 910628d18add..1fc5060d7027 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -155,17 +155,6 @@  		regulator-min-microvolt = <5000000>;  		regulator-max-microvolt = <5000000>;  	}; - -	vdd_log: vdd-log { -		compatible = "pwm-regulator"; -		pwms = <&pwm2 0 25000 0>; -		regulator-name = "vdd_log"; -		regulator-min-microvolt = <800000>; -		regulator-max-microvolt = <1400000>; -		regulator-always-on; -		regulator-boot-on; -		status = "okay"; -	};  };  &cpu_b0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts index dd7193acc7df..6bdefb26b329 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts @@ -40,7 +40,6 @@  };  ðsc { -	interrupt-parent = <&gpio>;  	interrupts = <0 8>;  }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index d99e3731358c..254d6795c67e 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -40,7 +40,6 @@  };  ðsc { -	interrupt-parent = <&gpio>;  	interrupts = <0 8>;  }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts index 864feeb35180..f9f06fcfb94a 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts @@ -38,8 +38,7 @@  };  ðsc { -	interrupt-parent = <&gpio>; -	interrupts = <0 8>; +	interrupts = <4 8>;  };  &serial0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 48e733136db4..0ac2ace82435 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -198,8 +198,8 @@  			gpio-controller;  			#gpio-cells = <2>;  			gpio-ranges = <&pinctrl 0 0 0>, -				      <&pinctrl 96 0 0>, -				      <&pinctrl 160 0 0>; +				      <&pinctrl 104 0 0>, +				      <&pinctrl 168 0 0>;  			gpio-ranges-group-names = "gpio_range0",  						  "gpio_range1",  						  "gpio_range2"; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6356c6da34ea..b20fa9b31efe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -161,7 +161,7 @@ CONFIG_MTD_BLOCK=y  CONFIG_MTD_M25P80=y  CONFIG_MTD_NAND=y  CONFIG_MTD_NAND_DENALI_DT=y -CONFIG_MTD_NAND_PXA3xx=y +CONFIG_MTD_NAND_MARVELL=y  CONFIG_MTD_SPI_NOR=y  CONFIG_BLK_DEV_LOOP=y  CONFIG_BLK_DEV_NBD=m diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4a85c6952a22..669028172fd6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -12,6 +12,8 @@  #include <linux/stddef.h>  #include <linux/stringify.h> +extern int alternatives_applied; +  struct alt_instr {  	s32 orig_offset;	/* offset to original instruction */  	s32 alt_offset;		/* offset to replacement instruction */ diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h b/arch/arm64/include/asm/arm_dsu_pmu.h new file mode 100644 index 000000000000..82e5cc3356bf --- /dev/null +++ b/arch/arm64/include/asm/arm_dsu_pmu.h @@ -0,0 +1,129 @@ +/* + * ARM DynamIQ Shared Unit (DSU) PMU Low level register access routines. + * + * Copyright (C) ARM Limited, 2017. + * + * Author: Suzuki K Poulose <suzuki.poulose@arm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2, as published by the Free Software Foundation. + */ + +#include <linux/bitops.h> +#include <linux/build_bug.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/barrier.h> +#include <asm/sysreg.h> + + +#define CLUSTERPMCR_EL1			sys_reg(3, 0, 15, 5, 0) +#define CLUSTERPMCNTENSET_EL1		sys_reg(3, 0, 15, 5, 1) +#define CLUSTERPMCNTENCLR_EL1		sys_reg(3, 0, 15, 5, 2) +#define CLUSTERPMOVSSET_EL1		sys_reg(3, 0, 15, 5, 3) +#define CLUSTERPMOVSCLR_EL1		sys_reg(3, 0, 15, 5, 4) +#define CLUSTERPMSELR_EL1		sys_reg(3, 0, 15, 5, 5) +#define CLUSTERPMINTENSET_EL1		sys_reg(3, 0, 15, 5, 6) +#define CLUSTERPMINTENCLR_EL1		sys_reg(3, 0, 15, 5, 7) +#define CLUSTERPMCCNTR_EL1		sys_reg(3, 0, 15, 6, 0) +#define CLUSTERPMXEVTYPER_EL1		sys_reg(3, 0, 15, 6, 1) +#define CLUSTERPMXEVCNTR_EL1		sys_reg(3, 0, 15, 6, 2) +#define CLUSTERPMMDCR_EL1		sys_reg(3, 0, 15, 6, 3) +#define CLUSTERPMCEID0_EL1		sys_reg(3, 0, 15, 6, 4) +#define CLUSTERPMCEID1_EL1		sys_reg(3, 0, 15, 6, 5) + +static inline u32 __dsu_pmu_read_pmcr(void) +{ +	return read_sysreg_s(CLUSTERPMCR_EL1); +} + +static inline void __dsu_pmu_write_pmcr(u32 val) +{ +	write_sysreg_s(val, CLUSTERPMCR_EL1); +	isb(); +} + +static inline u32 __dsu_pmu_get_reset_overflow(void) +{ +	u32 val = read_sysreg_s(CLUSTERPMOVSCLR_EL1); +	/* Clear the bit */ +	write_sysreg_s(val, CLUSTERPMOVSCLR_EL1); +	isb(); +	return val; +} + +static inline void __dsu_pmu_select_counter(int counter) +{ +	write_sysreg_s(counter, CLUSTERPMSELR_EL1); +	isb(); +} + +static inline u64 __dsu_pmu_read_counter(int counter) +{ +	__dsu_pmu_select_counter(counter); +	return read_sysreg_s(CLUSTERPMXEVCNTR_EL1); +} + +static inline void __dsu_pmu_write_counter(int counter, u64 val) +{ +	__dsu_pmu_select_counter(counter); +	write_sysreg_s(val, CLUSTERPMXEVCNTR_EL1); +	isb(); +} + +static inline void __dsu_pmu_set_event(int counter, u32 event) +{ +	__dsu_pmu_select_counter(counter); +	write_sysreg_s(event, CLUSTERPMXEVTYPER_EL1); +	isb(); +} + +static inline u64 __dsu_pmu_read_pmccntr(void) +{ +	return read_sysreg_s(CLUSTERPMCCNTR_EL1); +} + +static inline void __dsu_pmu_write_pmccntr(u64 val) +{ +	write_sysreg_s(val, CLUSTERPMCCNTR_EL1); +	isb(); +} + +static inline void __dsu_pmu_disable_counter(int counter) +{ +	write_sysreg_s(BIT(counter), CLUSTERPMCNTENCLR_EL1); +	isb(); +} + +static inline void __dsu_pmu_enable_counter(int counter) +{ +	write_sysreg_s(BIT(counter), CLUSTERPMCNTENSET_EL1); +	isb(); +} + +static inline void __dsu_pmu_counter_interrupt_enable(int counter) +{ +	write_sysreg_s(BIT(counter), CLUSTERPMINTENSET_EL1); +	isb(); +} + +static inline void __dsu_pmu_counter_interrupt_disable(int counter) +{ +	write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1); +	isb(); +} + + +static inline u32 __dsu_pmu_read_pmceid(int n) +{ +	switch (n) { +	case 0: +		return read_sysreg_s(CLUSTERPMCEID0_EL1); +	case 1: +		return read_sysreg_s(CLUSTERPMCEID1_EL1); +	default: +		BUILD_BUG(); +		return 0; +	} +} diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index b3da6c886835..4128bec033f6 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -4,6 +4,7 @@  #include <asm/alternative.h>  #include <asm/kernel-pgtable.h> +#include <asm/mmu.h>  #include <asm/sysreg.h>  #include <asm/assembler.h> @@ -12,52 +13,63 @@   */  #ifdef CONFIG_ARM64_SW_TTBR0_PAN  	.macro	__uaccess_ttbr0_disable, tmp1 -	mrs	\tmp1, ttbr1_el1		// swapper_pg_dir -	add	\tmp1, \tmp1, #SWAPPER_DIR_SIZE	// reserved_ttbr0 at the end of swapper_pg_dir -	msr	ttbr0_el1, \tmp1		// set reserved TTBR0_EL1 +	mrs	\tmp1, ttbr1_el1			// swapper_pg_dir +	bic	\tmp1, \tmp1, #TTBR_ASID_MASK +	sub	\tmp1, \tmp1, #RESERVED_TTBR0_SIZE	// reserved_ttbr0 just before swapper_pg_dir +	msr	ttbr0_el1, \tmp1			// set reserved TTBR0_EL1 +	isb +	add	\tmp1, \tmp1, #RESERVED_TTBR0_SIZE +	msr	ttbr1_el1, \tmp1		// set reserved ASID  	isb  	.endm -	.macro	__uaccess_ttbr0_enable, tmp1 +	.macro	__uaccess_ttbr0_enable, tmp1, tmp2  	get_thread_info \tmp1  	ldr	\tmp1, [\tmp1, #TSK_TI_TTBR0]	// load saved TTBR0_EL1 +	mrs	\tmp2, ttbr1_el1 +	extr    \tmp2, \tmp2, \tmp1, #48 +	ror     \tmp2, \tmp2, #16 +	msr	ttbr1_el1, \tmp2		// set the active ASID +	isb  	msr	ttbr0_el1, \tmp1		// set the non-PAN TTBR0_EL1  	isb  	.endm -	.macro	uaccess_ttbr0_disable, tmp1 +	.macro	uaccess_ttbr0_disable, tmp1, tmp2  alternative_if_not ARM64_HAS_PAN +	save_and_disable_irq \tmp2		// avoid preemption  	__uaccess_ttbr0_disable \tmp1 +	restore_irq \tmp2  alternative_else_nop_endif  	.endm -	.macro	uaccess_ttbr0_enable, tmp1, tmp2 +	.macro	uaccess_ttbr0_enable, tmp1, tmp2, tmp3  alternative_if_not ARM64_HAS_PAN -	save_and_disable_irq \tmp2		// avoid preemption -	__uaccess_ttbr0_enable \tmp1 -	restore_irq \tmp2 +	save_and_disable_irq \tmp3		// avoid preemption +	__uaccess_ttbr0_enable \tmp1, \tmp2 +	restore_irq \tmp3  alternative_else_nop_endif  	.endm  #else -	.macro	uaccess_ttbr0_disable, tmp1 +	.macro	uaccess_ttbr0_disable, tmp1, tmp2  	.endm -	.macro	uaccess_ttbr0_enable, tmp1, tmp2 +	.macro	uaccess_ttbr0_enable, tmp1, tmp2, tmp3  	.endm  #endif  /*   * These macros are no-ops when UAO is present.   */ -	.macro	uaccess_disable_not_uao, tmp1 -	uaccess_ttbr0_disable \tmp1 +	.macro	uaccess_disable_not_uao, tmp1, tmp2 +	uaccess_ttbr0_disable \tmp1, \tmp2  alternative_if ARM64_ALT_PAN_NOT_UAO  	SET_PSTATE_PAN(1)  alternative_else_nop_endif  	.endm -	.macro	uaccess_enable_not_uao, tmp1, tmp2 -	uaccess_ttbr0_enable \tmp1, \tmp2 +	.macro	uaccess_enable_not_uao, tmp1, tmp2, tmp3 +	uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3  alternative_if ARM64_ALT_PAN_NOT_UAO  	SET_PSTATE_PAN(0)  alternative_else_nop_endif diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aef72d886677..3873dd7b5a32 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -26,7 +26,6 @@  #include <asm/asm-offsets.h>  #include <asm/cpufeature.h>  #include <asm/debug-monitors.h> -#include <asm/mmu_context.h>  #include <asm/page.h>  #include <asm/pgtable-hwdef.h>  #include <asm/ptrace.h> @@ -110,6 +109,13 @@  	.endm  /* + * RAS Error Synchronization barrier + */ +	.macro  esb +	hint    #16 +	.endm + +/*   * NOP sequence   */  	.macro	nops, num @@ -255,7 +261,11 @@ lr	.req	x30		// link register  #else  	adr_l	\dst, \sym  #endif +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN  	mrs	\tmp, tpidr_el1 +alternative_else +	mrs	\tmp, tpidr_el2 +alternative_endif  	add	\dst, \dst, \tmp  	.endm @@ -266,7 +276,11 @@ lr	.req	x30		// link register  	 */  	.macro ldr_this_cpu dst, sym, tmp  	adr_l	\dst, \sym +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN  	mrs	\tmp, tpidr_el1 +alternative_else +	mrs	\tmp, tpidr_el2 +alternative_endif  	ldr	\dst, [\dst, \tmp]  	.endm @@ -344,10 +358,26 @@ alternative_endif   * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map   */  	.macro	tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48  	ldr_l	\tmpreg, idmap_t0sz  	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif +	.endm + +/* + * tcr_compute_pa_size - set TCR.(I)PS to the highest supported + * ID_AA64MMFR0_EL1.PARange value + * + *	tcr:		register with the TCR_ELx value to be updated + *	pos:		IPS or PS bitfield position + *	tmp{0,1}:	temporary registers + */ +	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1 +	mrs	\tmp0, ID_AA64MMFR0_EL1 +	// Narrow PARange to fit the PS field in TCR_ELx +	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3 +	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX +	cmp	\tmp0, \tmp1 +	csel	\tmp0, \tmp1, \tmp0, hi +	bfi	\tcr, \tmp0, \pos, #3  	.endm  /* @@ -478,37 +508,28 @@ alternative_endif  	.endm  /* - * Errata workaround prior to TTBR0_EL1 update + * Arrange a physical address in a TTBR register, taking care of 52-bit + * addresses.   * - * 	val:	TTBR value with new BADDR, preserved - * 	tmp0:	temporary register, clobbered - * 	tmp1:	other temporary register, clobbered + * 	phys:	physical address, preserved + * 	ttbr:	returns the TTBR value   */ -	.macro	pre_ttbr0_update_workaround, val, tmp0, tmp1 -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 -	mrs	\tmp0, ttbr0_el1 -	mov	\tmp1, #FALKOR_RESERVED_ASID -	bfi	\tmp0, \tmp1, #48, #16		// reserved ASID + old BADDR -	msr	ttbr0_el1, \tmp0 -	isb -	bfi	\tmp0, \val, #0, #48		// reserved ASID + new BADDR -	msr	ttbr0_el1, \tmp0 -	isb -alternative_else_nop_endif +	.macro	phys_to_ttbr, phys, ttbr +#ifdef CONFIG_ARM64_PA_BITS_52 +	orr	\ttbr, \phys, \phys, lsr #46 +	and	\ttbr, \ttbr, #TTBR_BADDR_MASK_52 +#else +	mov	\ttbr, \phys  #endif  	.endm -/* - * Errata workaround post TTBR0_EL1 update. +/** + * Errata workaround prior to disable MMU. Insert an ISB immediately prior + * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.   */ -	.macro	post_ttbr0_update_workaround -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -alternative_if ARM64_WORKAROUND_CAVIUM_27456 -	ic	iallu -	dsb	nsh +	.macro pre_disable_mmu_workaround +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041  	isb -alternative_else_nop_endif  #endif  	.endm diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@   *   *	See Documentation/cachetlb.txt for more information. Please note that   *	the implementation assumes non-aliasing VIPT D-cache and (aliasing) - *	VIPT or ASID-tagged VIVT I-cache. + *	VIPT I-cache.   *   *	flush_cache_mm(mm)   * diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index a3c7f271ad4c..c00c62e1a4a3 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -150,70 +150,6 @@ typedef u32		compat_old_sigset_t;  typedef u32		compat_sigset_word; -typedef union compat_sigval { -	compat_int_t	sival_int; -	compat_uptr_t	sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { -	int si_signo; -	int si_errno; -	int si_code; - -	union { -		int _pad[128/sizeof(int) - 3]; - -		/* kill() */ -		struct { -			compat_pid_t _pid;	/* sender's pid */ -			__compat_uid32_t _uid;	/* sender's uid */ -		} _kill; - -		/* POSIX.1b timers */ -		struct { -			compat_timer_t _tid;	/* timer id */ -			int _overrun;		/* overrun count */ -			compat_sigval_t _sigval;	/* same as below */ -			int _sys_private;       /* not to be passed to user */ -		} _timer; - -		/* POSIX.1b signals */ -		struct { -			compat_pid_t _pid;	/* sender's pid */ -			__compat_uid32_t _uid;	/* sender's uid */ -			compat_sigval_t _sigval; -		} _rt; - -		/* SIGCHLD */ -		struct { -			compat_pid_t _pid;	/* which child */ -			__compat_uid32_t _uid;	/* sender's uid */ -			int _status;		/* exit code */ -			compat_clock_t _utime; -			compat_clock_t _stime; -		} _sigchld; - -		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ -		struct { -			compat_uptr_t _addr; /* faulting insn/memory ref. */ -			short _addr_lsb; /* LSB of the reported address */ -		} _sigfault; - -		/* SIGPOLL */ -		struct { -			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */ -			int _fd; -		} _sigpoll; - -		/* SIGSYS */ -		struct { -			compat_uptr_t _call_addr; /* calling user insn */ -			int _syscall;	/* triggering system call number */ -			compat_uint_t _arch;	/* AUDIT_ARCH_* of syscall */ -		} _sigsys; -	} _sifields; -} compat_siginfo_t; -  #define COMPAT_OFF_T_MAX	0x7fffffff  /* diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 2ff7c5e8efab..bb263820de13 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -41,7 +41,11 @@  #define ARM64_WORKAROUND_CAVIUM_30115		20  #define ARM64_HAS_DCPOP				21  #define ARM64_SVE				22 +#define ARM64_UNMAP_KERNEL_AT_EL0		23 +#define ARM64_HARDEN_BRANCH_PREDICTOR		24 +#define ARM64_HARDEN_BP_POST_GUEST_EXIT		25 +#define ARM64_HAS_RAS_EXTN			26 -#define ARM64_NCAPS				23 +#define ARM64_NCAPS				27  #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ac67cfc2585a..060e3a4008ab 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -60,6 +60,9 @@ enum ftr_type {  #define FTR_VISIBLE	true	/* Feature visible to the user space */  #define FTR_HIDDEN	false	/* Feature is hidden from the user */ +#define FTR_VISIBLE_IF_IS_ENABLED(config)		\ +	(IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN) +  struct arm64_ftr_bits {  	bool		sign;	/* Value is signed ? */  	bool		visible; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 235e77d98261..be7bd19c87ec 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,26 +79,37 @@  #define ARM_CPU_PART_AEM_V8		0xD0F  #define ARM_CPU_PART_FOUNDATION		0xD00  #define ARM_CPU_PART_CORTEX_A57		0xD07 +#define ARM_CPU_PART_CORTEX_A72		0xD08  #define ARM_CPU_PART_CORTEX_A53		0xD03  #define ARM_CPU_PART_CORTEX_A73		0xD09 +#define ARM_CPU_PART_CORTEX_A75		0xD0A  #define APM_CPU_PART_POTENZA		0x000  #define CAVIUM_CPU_PART_THUNDERX	0x0A1  #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2  #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3 +#define CAVIUM_CPU_PART_THUNDERX2	0x0AF  #define BRCM_CPU_PART_VULCAN		0x516  #define QCOM_CPU_PART_FALKOR_V1		0x800 +#define QCOM_CPU_PART_FALKOR		0xC00 +#define QCOM_CPU_PART_KRYO		0x200  #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)  #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)  #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)  #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)  #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)  #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)  #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) +#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)  #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 650344d01124..8389050328bb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -121,22 +121,22 @@ static inline void efi_set_pgd(struct mm_struct *mm)  		if (mm != current->active_mm) {  			/*  			 * Update the current thread's saved ttbr0 since it is -			 * restored as part of a return from exception. Set -			 * the hardware TTBR0_EL1 using cpu_switch_mm() -			 * directly to enable potential errata workarounds. +			 * restored as part of a return from exception. Enable +			 * access to the valid TTBR0_EL1 and invoke the errata +			 * workaround directly since there is no return from +			 * exception when invoking the EFI run-time services.  			 */  			update_saved_ttbr0(current, mm); -			cpu_switch_mm(mm->pgd, mm); +			uaccess_ttbr0_enable(); +			post_ttbr_update_workaround();  		} else {  			/*  			 * Defer the switch to the current thread's TTBR0_EL1  			 * until uaccess_enable(). Restore the current  			 * thread's saved ttbr0 corresponding to its active_mm -			 * (if different from init_mm).  			 */ -			cpu_set_reserved_ttbr0(); -			if (current->active_mm != &init_mm) -				update_saved_ttbr0(current, current->active_mm); +			uaccess_ttbr0_disable(); +			update_saved_ttbr0(current, current->active_mm);  		}  	}  } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 014d7d8edcf9..803443d74926 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -86,6 +86,18 @@  #define ESR_ELx_WNR_SHIFT	(6)  #define ESR_ELx_WNR		(UL(1) << ESR_ELx_WNR_SHIFT) +/* Asynchronous Error Type */ +#define ESR_ELx_IDS_SHIFT	(24) +#define ESR_ELx_IDS		(UL(1) << ESR_ELx_IDS_SHIFT) +#define ESR_ELx_AET_SHIFT	(10) +#define ESR_ELx_AET		(UL(0x7) << ESR_ELx_AET_SHIFT) + +#define ESR_ELx_AET_UC		(UL(0) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEU		(UL(1) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEO		(UL(2) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UER		(UL(3) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_CE		(UL(6) << ESR_ELx_AET_SHIFT) +  /* Shared ISS field definitions for Data/Instruction aborts */  #define ESR_ELx_SET_SHIFT	(11)  #define ESR_ELx_SET_MASK	(UL(3) << ESR_ELx_SET_SHIFT) @@ -100,6 +112,7 @@  #define ESR_ELx_FSC		(0x3F)  #define ESR_ELx_FSC_TYPE	(0x3C)  #define ESR_ELx_FSC_EXTABT	(0x10) +#define ESR_ELx_FSC_SERROR	(0x11)  #define ESR_ELx_FSC_ACCESS	(0x08)  #define ESR_ELx_FSC_FAULT	(0x04)  #define ESR_ELx_FSC_PERM	(0x0C) @@ -127,6 +140,13 @@  #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)  #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1) +#define DISR_EL1_IDS		(UL(1) << 24) +/* + * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean + * different things in the future... + */ +#define DISR_EL1_ESR_MASK	(ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) +  /* ESR value templates for specific events */  /* BRK instruction trap from AArch64 state */ diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0c2eec490abf..bc30429d8e91 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -18,6 +18,8 @@  #ifndef __ASM_EXCEPTION_H  #define __ASM_EXCEPTION_H +#include <asm/esr.h> +  #include <linux/interrupt.h>  #define __exception	__attribute__((section(".exception.text"))) @@ -27,4 +29,16 @@  #define __exception_irq_entry	__exception  #endif +static inline u32 disr_to_esr(u64 disr) +{ +	unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; + +	if ((disr & DISR_EL1_IDS) == 0) +		esr |= (disr & DISR_EL1_ESR_MASK); +	else +		esr |= (disr & ESR_ELx_ISS_MASK); + +	return esr; +} +  #endif	/* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4052ec39e8db..ec1e6d6fa14c 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -58,6 +58,11 @@ enum fixed_addresses {  	FIX_APEI_GHES_NMI,  #endif /* CONFIG_ACPI_APEI_GHES */ +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	FIX_ENTRY_TRAMP_DATA, +	FIX_ENTRY_TRAMP_TEXT, +#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */  	__end_of_permanent_fixed_addresses,  	/* diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 74f34392a531..8857a0f0d0f7 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -71,7 +71,7 @@ extern void fpsimd_flush_thread(void);  extern void fpsimd_signal_preserve_current_state(void);  extern void fpsimd_preserve_current_state(void);  extern void fpsimd_restore_current_state(void); -extern void fpsimd_update_current_state(struct fpsimd_state *state); +extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);  extern void fpsimd_flush_task_state(struct task_struct *target);  extern void sve_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 7803343e5881..82386e860dd2 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -52,7 +52,52 @@  #define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))  #endif -#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) + +/* + * If KASLR is enabled, then an offset K is added to the kernel address + * space. The bottom 21 bits of this offset are zero to guarantee 2MB + * alignment for PA and VA. + * + * For each pagetable level of the swapper, we know that the shift will + * be larger than 21 (for the 4KB granule case we use section maps thus + * the smallest shift is actually 30) thus there is the possibility that + * KASLR can increase the number of pagetable entries by 1, so we make + * room for this extra entry. + * + * Note KASLR cannot increase the number of required entries for a level + * by more than one because it increments both the virtual start and end + * addresses equally (the extra entry comes from the case where the end + * address is just pushed over a boundary and the start address isn't). + */ + +#ifdef CONFIG_RANDOMIZE_BASE +#define EARLY_KASLR	(1) +#else +#define EARLY_KASLR	(0) +#endif + +#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \ +					- ((vstart) >> (shift)) + 1 + EARLY_KASLR) + +#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) + +#if SWAPPER_PGTABLE_LEVELS > 3 +#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT)) +#else +#define EARLY_PUDS(vstart, vend) (0) +#endif + +#if SWAPPER_PGTABLE_LEVELS > 2 +#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT)) +#else +#define EARLY_PMDS(vstart, vend) (0) +#endif + +#define EARLY_PAGES(vstart, vend) ( 1 			/* PGDIR page */				\ +			+ EARLY_PGDS((vstart), (vend)) 	/* each PGDIR needs a next level page table */	\ +			+ EARLY_PUDS((vstart), (vend))	/* each PUD needs a next level page table */	\ +			+ EARLY_PMDS((vstart), (vend)))	/* each PMD needs a next level page table */ +#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))  #define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)  #ifdef CONFIG_ARM64_SW_TTBR0_PAN @@ -78,8 +123,16 @@  /*   * Initial memory map attributes.   */ -#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define SWAPPER_PTE_FLAGS	(_SWAPPER_PTE_FLAGS | PTE_NG) +#define SWAPPER_PMD_FLAGS	(_SWAPPER_PMD_FLAGS | PMD_SECT_NG) +#else +#define SWAPPER_PTE_FLAGS	_SWAPPER_PTE_FLAGS +#define SWAPPER_PMD_FLAGS	_SWAPPER_PMD_FLAGS +#endif  #if ARM64_SWAPPER_USES_SECTION_MAPS  #define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7f069ff37f06..b0c84171e6a3 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,8 @@  #include <asm/types.h>  /* Hyp Configuration Register (HCR) bits */ +#define HCR_TEA		(UL(1) << 37) +#define HCR_TERR	(UL(1) << 36)  #define HCR_E2H		(UL(1) << 34)  #define HCR_ID		(UL(1) << 33)  #define HCR_CD		(UL(1) << 32) @@ -170,8 +172,7 @@  #define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)  #define VTTBR_X				(VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)  #define VTTBR_VMID_SHIFT  (UL(48))  #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ab4d0a926043..24961b732e65 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -68,6 +68,8 @@ extern u32 __kvm_get_mdcr_el2(void);  extern u32 __init_stage2_translation(void); +extern void __qcom_hyp_sanitize_btac_predictors(void); +  #endif  #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5f28dfa14cee..413dc82b1e89 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -50,6 +50,13 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)  	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;  	if (is_kernel_in_hyp_mode())  		vcpu->arch.hcr_el2 |= HCR_E2H; +	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { +		/* route synchronous external abort exceptions to EL2 */ +		vcpu->arch.hcr_el2 |= HCR_TEA; +		/* trap error record accesses */ +		vcpu->arch.hcr_el2 |= HCR_TERR; +	} +  	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))  		vcpu->arch.hcr_el2 &= ~HCR_RW;  } @@ -64,6 +71,11 @@ static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)  	vcpu->arch.hcr_el2 = hcr;  } +static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) +{ +	vcpu->arch.vsesr_el2 = vsesr; +} +  static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)  {  	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; @@ -171,6 +183,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)  	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;  } +static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) +{ +	return vcpu->arch.fault.disr_el1; +} +  static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)  {  	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 674912d7a571..4485ae8e98de 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,6 +25,7 @@  #include <linux/types.h>  #include <linux/kvm_types.h>  #include <asm/cpufeature.h> +#include <asm/daifflags.h>  #include <asm/fpsimd.h>  #include <asm/kvm.h>  #include <asm/kvm_asm.h> @@ -89,6 +90,7 @@ struct kvm_vcpu_fault_info {  	u32 esr_el2;		/* Hyp Syndrom Register */  	u64 far_el2;		/* Hyp Fault Address Register */  	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */ +	u64 disr_el1;		/* Deferred [SError] Status Register */  };  /* @@ -120,6 +122,7 @@ enum vcpu_sysreg {  	PAR_EL1,	/* Physical Address Register */  	MDSCR_EL1,	/* Monitor Debug System Control Register */  	MDCCINT_EL1,	/* Monitor Debug Comms Channel Interrupt Enable Reg */ +	DISR_EL1,	/* Deferred Interrupt Status Register */  	/* Performance Monitors Registers */  	PMCR_EL0,	/* Control Register */ @@ -192,6 +195,8 @@ struct kvm_cpu_context {  		u64 sys_regs[NR_SYS_REGS];  		u32 copro[NR_COPRO_REGS];  	}; + +	struct kvm_vcpu *__hyp_running_vcpu;  };  typedef struct kvm_cpu_context kvm_cpu_context_t; @@ -277,6 +282,9 @@ struct kvm_vcpu_arch {  	/* Detect first run of a vcpu */  	bool has_run_once; + +	/* Virtual SError ESR to restore when HCR_EL2.VSE is set */ +	u64 vsesr_el2;  };  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs) @@ -340,6 +348,8 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,  		int exception_index); +void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, +		       int exception_index);  int kvm_perf_init(void);  int kvm_perf_teardown(void); @@ -370,6 +380,7 @@ void kvm_arm_init_debug(void);  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);  void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);  void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);  int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,  			       struct kvm_device_attr *attr);  int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, @@ -395,4 +406,13 @@ static inline void kvm_fpsimd_flush_cpu_state(void)  		sve_flush_cpu_state();  } +static inline void kvm_arm_vhe_guest_enter(void) +{ +	local_daif_mask(); +} + +static inline void kvm_arm_vhe_guest_exit(void) +{ +	local_daif_restore(DAIF_PROCCTX_NOIRQ); +}  #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 672c8684d5c2..72e279dbae5f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -273,15 +273,26 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);  static inline bool __kvm_cpu_uses_extended_idmap(void)  { -	return __cpu_uses_extended_idmap(); +	return __cpu_uses_extended_idmap_level();  } +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ +	return idmap_ptrs_per_pgd; +} + +/* + * Can't use pgd_populate here, because the extended idmap adds an extra level + * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended + * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. + */  static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,  				       pgd_t *hyp_pgd,  				       pgd_t *merged_hyp_pgd,  				       unsigned long hyp_idmap_start)  {  	int idmap_idx; +	u64 pgd_addr;  	/*  	 * Use the first entry to access the HYP mappings. It is @@ -289,7 +300,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,  	 * extended idmap.  	 */  	VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); -	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE); +	pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd)); +	merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);  	/*  	 * Create another extended level entry that points to the boot HYP map, @@ -299,7 +311,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,  	 */  	idmap_idx = hyp_idmap_start >> VA_BITS;  	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); -	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); +	pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd)); +	merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);  }  static inline unsigned int kvm_get_vmid_bits(void) @@ -309,5 +322,45 @@ static inline unsigned int kvm_get_vmid_bits(void)  	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;  } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include <asm/mmu.h> + +static inline void *kvm_get_hyp_vector(void) +{ +	struct bp_hardening_data *data = arm64_get_bp_hardening_data(); +	void *vect = kvm_ksym_ref(__kvm_hyp_vector); + +	if (data->fn) { +		vect = __bp_harden_hyp_vecs_start + +		       data->hyp_vectors_slot * SZ_2K; + +		if (!has_vhe()) +			vect = lm_alias(vect); +	} + +	return vect; +} + +static inline int kvm_map_vectors(void) +{ +	return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), +				   kvm_ksym_ref(__bp_harden_hyp_vecs_end), +				   PAGE_HYP_EXEC); +} + +#else +static inline void *kvm_get_hyp_vector(void) +{ +	return kvm_ksym_ref(__kvm_hyp_vector); +} + +static inline int kvm_map_vectors(void) +{ +	return 0; +} +#endif + +#define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr) +  #endif /* __ASSEMBLY__ */  #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 0d34bf0a89c7..a050d4f3615d 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,6 +17,11 @@  #define __ASM_MMU_H  #define MMCF_AARCH32	0x1	/* mm context flag for AArch32 executables */ +#define USER_ASID_BIT	48 +#define USER_ASID_FLAG	(UL(1) << USER_ASID_BIT) +#define TTBR_ASID_MASK	(UL(0xffff) << 48) + +#ifndef __ASSEMBLY__  typedef struct {  	atomic64_t	id; @@ -31,6 +36,49 @@ typedef struct {   */  #define ASID(mm)	((mm)->context.id.counter & 0xffff) +static inline bool arm64_kernel_unmapped_at_el0(void) +{ +	return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && +	       cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); +} + +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { +	int			hyp_vectors_slot; +	bp_hardening_cb_t	fn; +}; + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ +	return this_cpu_ptr(&bp_hardening_data); +} + +static inline void arm64_apply_bp_hardening(void) +{ +	struct bp_hardening_data *d; + +	if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) +		return; + +	d = arm64_get_bp_hardening_data(); +	if (d->fn) +		d->fn(); +} +#else +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ +	return NULL; +} + +static inline void arm64_apply_bp_hardening(void)	{ } +#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */ +  extern void paging_init(void);  extern void bootmem_init(void);  extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); @@ -41,4 +89,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,  extern void *fixmap_remap_fdt(phys_addr_t dt_phys);  extern void mark_linear_text_alias_ro(void); +#endif	/* !__ASSEMBLY__ */  #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3257895a9b5e..8d3331985d2e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -19,8 +19,6 @@  #ifndef __ASM_MMU_CONTEXT_H  #define __ASM_MMU_CONTEXT_H -#define FALKOR_RESERVED_ASID	1 -  #ifndef __ASSEMBLY__  #include <linux/compiler.h> @@ -51,23 +49,39 @@ static inline void contextidr_thread_switch(struct task_struct *next)   */  static inline void cpu_set_reserved_ttbr0(void)  { -	unsigned long ttbr = __pa_symbol(empty_zero_page); +	unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));  	write_sysreg(ttbr, ttbr0_el1);  	isb();  } +static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) +{ +	BUG_ON(pgd == swapper_pg_dir); +	cpu_set_reserved_ttbr0(); +	cpu_do_switch_mm(virt_to_phys(pgd),mm); +} +  /*   * TCR.T0SZ value to use when the ID map is active. Usually equals   * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in   * physical memory, in which case it will be smaller.   */  extern u64 idmap_t0sz; +extern u64 idmap_ptrs_per_pgd;  static inline bool __cpu_uses_extended_idmap(void)  { -	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) && -		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); +	return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); +} + +/* + * True if the extended ID map requires an extra level of translation table + * to be configured. + */ +static inline bool __cpu_uses_extended_idmap_level(void) +{ +	return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS;  }  /* @@ -156,29 +170,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);  #define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; }) -/* - * This is called when "tsk" is about to enter lazy TLB mode. - * - * mm:  describes the currently active mm context - * tsk: task which is entering lazy tlb - * cpu: cpu number which is entering lazy tlb - * - * tsk->mm will be NULL - */ -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -} -  #ifdef CONFIG_ARM64_SW_TTBR0_PAN  static inline void update_saved_ttbr0(struct task_struct *tsk,  				      struct mm_struct *mm)  { -	if (system_uses_ttbr0_pan()) { -		BUG_ON(mm->pgd == swapper_pg_dir); -		task_thread_info(tsk)->ttbr0 = -			virt_to_phys(mm->pgd) | ASID(mm) << 48; -	} +	u64 ttbr; + +	if (!system_uses_ttbr0_pan()) +		return; + +	if (mm == &init_mm) +		ttbr = __pa_symbol(empty_zero_page); +	else +		ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + +	WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);  }  #else  static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -187,6 +193,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,  }  #endif +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +	/* +	 * We don't actually care about the ttbr0 mapping, so point it at the +	 * zero page. +	 */ +	update_saved_ttbr0(tsk, &init_mm); +} +  static inline void __switch_mm(struct mm_struct *next)  {  	unsigned int cpu = smp_processor_id(); @@ -214,17 +230,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,  	 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous  	 * value may have not been initialised yet (activate_mm caller) or the  	 * ASID has changed since the last run (following the context switch -	 * of another thread of the same process). Avoid setting the reserved -	 * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). +	 * of another thread of the same process).  	 */ -	if (next != &init_mm) -		update_saved_ttbr0(tsk, next); +	update_saved_ttbr0(tsk, next);  }  #define deactivate_mm(tsk,mm)	do { } while (0)  #define activate_mm(prev,next)	switch_mm(prev, next, current)  void verify_cpu_asid_bits(void); +void post_ttbr_update_workaround(void);  #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific {  	struct mod_plt_sec	init;  	/* for CONFIG_DYNAMIC_FTRACE */ -	void			*ftrace_trampoline; +	struct plt_entry 	*ftrace_trampoline;  };  #endif @@ -45,4 +45,48 @@ extern u64 module_alloc_base;  #define module_alloc_base	((u64)_etext - MODULES_VSIZE)  #endif +struct plt_entry { +	/* +	 * A program that conforms to the AArch64 Procedure Call Standard +	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or +	 * IP1 (x17) may be inserted at any branch instruction that is +	 * exposed to a relocation that supports long branches. Since that +	 * is exactly what we are dealing with here, we are free to use x16 +	 * as a scratch register in the PLT veneers. +	 */ +	__le32	mov0;	/* movn	x16, #0x....			*/ +	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/ +	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/ +	__le32	br;	/* br	x16				*/ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ +	/* +	 * MOVK/MOVN/MOVZ opcode: +	 * +--------+------------+--------+-----------+-------------+---------+ +	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | +	 * +--------+------------+--------+-----------+-------------+---------+ +	 * +	 * Rd     := 0x10 (x16) +	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) +	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) +	 * sf     := 1 (64-bit variant) +	 */ +	return (struct plt_entry){ +		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5), +		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), +		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), +		cpu_to_le32(0xd61f0200) +	}; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, +				     const struct plt_entry *b) +{ +	return a->mov0 == b->mov0 && +	       a->mov1 == b->mov1 && +	       a->mov2 == b->mov2; +} +  #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 3bd498e4de4c..43393208229e 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,11 +16,15 @@  #ifndef __ASM_PERCPU_H  #define __ASM_PERCPU_H +#include <asm/alternative.h>  #include <asm/stack_pointer.h>  static inline void set_my_cpu_offset(unsigned long off)  { -	asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); +	asm volatile(ALTERNATIVE("msr tpidr_el1, %0", +				 "msr tpidr_el2, %0", +				 ARM64_HAS_VIRT_HOST_EXTN) +			:: "r" (off) : "memory");  }  static inline unsigned long __my_cpu_offset(void) @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)  	 * We want to allow caching the value, so avoid using volatile and  	 * instead use a fake stack read to hazard against barrier().  	 */ -	asm("mrs %0, tpidr_el1" : "=r" (off) : +	asm(ALTERNATIVE("mrs %0, tpidr_el1", +			"mrs %0, tpidr_el2", +			ARM64_HAS_VIRT_HOST_EXTN) +		: "=r" (off) :  		"Q" (*(const unsigned long *)current_stack_pointer));  	return off; diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 8d5cbec17d80..f9ccc36d3dc3 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -18,6 +18,7 @@  #define __ASM_PERF_EVENT_H  #include <asm/stack_pointer.h> +#include <asm/ptrace.h>  #define	ARMV8_PMU_MAX_COUNTERS	32  #define	ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1) @@ -79,6 +80,7 @@ struct pt_regs;  extern unsigned long perf_instruction_pointer(struct pt_regs *regs);  extern unsigned long perf_misc_flags(struct pt_regs *regs);  #define perf_misc_flags(regs)	perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs  #endif  #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 5ca6a573a701..e9d9f1b006ef 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)  static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)  { -	set_pud(pud, __pud(pmd | prot)); +	set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));  }  static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -73,7 +73,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)  static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)  { -	set_pgd(pgdp, __pgd(pud | prot)); +	set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));  }  static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) @@ -129,7 +129,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)  static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,  				  pmdval_t prot)  { -	set_pmd(pmdp, __pmd(pte | prot)); +	set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));  }  /* diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index eb0c2bd90de9..f42836da8723 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,6 +16,8 @@  #ifndef __ASM_PGTABLE_HWDEF_H  #define __ASM_PGTABLE_HWDEF_H +#include <asm/memory.h> +  /*   * Number of page-table levels required to address 'va_bits' wide   * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) @@ -116,9 +118,9 @@   * Level 1 descriptor (PUD).   */  #define PUD_TYPE_TABLE		(_AT(pudval_t, 3) << 0) -#define PUD_TABLE_BIT		(_AT(pgdval_t, 1) << 1) -#define PUD_TYPE_MASK		(_AT(pgdval_t, 3) << 0) -#define PUD_TYPE_SECT		(_AT(pgdval_t, 1) << 0) +#define PUD_TABLE_BIT		(_AT(pudval_t, 1) << 1) +#define PUD_TYPE_MASK		(_AT(pudval_t, 3) << 0) +#define PUD_TYPE_SECT		(_AT(pudval_t, 1) << 0)  /*   * Level 2 descriptor (PMD). @@ -166,6 +168,14 @@  #define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */  #define PTE_HYP_XN		(_AT(pteval_t, 1) << 54)	/* HYP XN */ +#define PTE_ADDR_LOW		(((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) +#ifdef CONFIG_ARM64_PA_BITS_52 +#define PTE_ADDR_HIGH		(_AT(pteval_t, 0xf) << 12) +#define PTE_ADDR_MASK		(PTE_ADDR_LOW | PTE_ADDR_HIGH) +#else +#define PTE_ADDR_MASK		PTE_ADDR_LOW +#endif +  /*   * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).   */ @@ -196,7 +206,7 @@  /*   * Highest possible physical address supported.   */ -#define PHYS_MASK_SHIFT		(48) +#define PHYS_MASK_SHIFT		(CONFIG_ARM64_PA_BITS)  #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)  /* @@ -272,9 +282,23 @@  #define TCR_TG1_4K		(UL(2) << TCR_TG1_SHIFT)  #define TCR_TG1_64K		(UL(3) << TCR_TG1_SHIFT) +#define TCR_IPS_SHIFT		32 +#define TCR_IPS_MASK		(UL(7) << TCR_IPS_SHIFT) +#define TCR_A1			(UL(1) << 22)  #define TCR_ASID16		(UL(1) << 36)  #define TCR_TBI0		(UL(1) << 37)  #define TCR_HA			(UL(1) << 39)  #define TCR_HD			(UL(1) << 40) +/* + * TTBR. + */ +#ifdef CONFIG_ARM64_PA_BITS_52 +/* + * This should be GENMASK_ULL(47, 2). + * TTBR_ELx[1] is RES0 in this configuration. + */ +#define TTBR_BADDR_MASK_52	(((UL(1) << 46) - 1) << 2) +#endif +  #endif diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 0a5635fb0ef9..22a926825e3f 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -34,8 +34,16 @@  #include <asm/pgtable-types.h> -#define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define PROT_DEFAULT		(_PROT_DEFAULT | PTE_NG) +#define PROT_SECT_DEFAULT	(_PROT_SECT_DEFAULT | PMD_SECT_NG) +#else +#define PROT_DEFAULT		_PROT_DEFAULT +#define PROT_SECT_DEFAULT	_PROT_SECT_DEFAULT +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */  #define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))  #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -48,6 +56,7 @@  #define PROT_SECT_NORMAL_EXEC	(PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))  #define _PAGE_DEFAULT		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT	(_PAGE_DEFAULT & ~PTE_NG)  #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)  #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) @@ -55,15 +64,15 @@  #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)  #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) +#define PAGE_HYP		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)  #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)  #define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)  #define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) +#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)  #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)  #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)  #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca8..89167c43ebb5 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -42,6 +42,8 @@  #include <asm/cmpxchg.h>  #include <asm/fixmap.h>  #include <linux/mmdebug.h> +#include <linux/mm_types.h> +#include <linux/sched.h>  extern void __pte_error(const char *file, int line, unsigned long val);  extern void __pmd_error(const char *file, int line, unsigned long val); @@ -57,9 +59,22 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];  #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte)) -#define pte_pfn(pte)		((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) +/* + * Macros to convert between a physical address and its placement in a + * page table entry, taking care of 52-bit addresses. + */ +#ifdef CONFIG_ARM64_PA_BITS_52 +#define __pte_to_phys(pte)	\ +	((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) +#define __phys_to_pte_val(phys)	(((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +#else +#define __pte_to_phys(pte)	(pte_val(pte) & PTE_ADDR_MASK) +#define __phys_to_pte_val(phys)	(phys) +#endif -#define pfn_pte(pfn,prot)	(__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pte_pfn(pte)		(__pte_to_phys(pte) >> PAGE_SHIFT) +#define pfn_pte(pfn,prot)	\ +	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))  #define pte_none(pte)		(!pte_val(pte))  #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0)) @@ -149,12 +164,20 @@ static inline pte_t pte_mkwrite(pte_t pte)  static inline pte_t pte_mkclean(pte_t pte)  { -	return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); +	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); +	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + +	return pte;  }  static inline pte_t pte_mkdirty(pte_t pte)  { -	return set_pte_bit(pte, __pgprot(PTE_DIRTY)); +	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + +	if (pte_write(pte)) +		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + +	return pte;  }  static inline pte_t pte_mkold(pte_t pte) @@ -207,9 +230,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)  	}  } -struct mm_struct; -struct vm_area_struct; -  extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);  /* @@ -238,7 +258,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,  	 * hardware updates of the pte (ptep_set_access_flags safely changes  	 * valid ptes without going through an invalid entry).  	 */ -	if (pte_valid(*ptep) && pte_valid(pte)) { +	if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && +	   (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {  		VM_WARN_ONCE(!pte_young(pte),  			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",  			     __func__, pte_val(*ptep), pte_val(pte)); @@ -284,6 +305,11 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)  #define __HAVE_ARCH_PTE_SPECIAL +static inline pte_t pgd_pte(pgd_t pgd) +{ +	return __pte(pgd_val(pgd)); +} +  static inline pte_t pud_pte(pud_t pud)  {  	return __pte(pud_val(pud)); @@ -345,20 +371,28 @@ static inline int pmd_protnone(pmd_t pmd)  #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE  #define pmd_write(pmd)		pte_write(pmd_pte(pmd))  #define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) -#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) -#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define __pmd_to_phys(pmd)	__pte_to_phys(pmd_pte(pmd)) +#define __phys_to_pmd_val(phys)	__phys_to_pte_val(phys) +#define pmd_pfn(pmd)		((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot)	__pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))  #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)  #define pud_write(pud)		pte_write(pud_pte(pud)) -#define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) + +#define __pud_to_phys(pud)	__pte_to_phys(pud_pte(pud)) +#define __phys_to_pud_val(phys)	__phys_to_pte_val(phys) +#define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) +#define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))  #define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) +#define __pgd_to_phys(pgd)	__pte_to_phys(pgd_pte(pgd)) +#define __phys_to_pgd_val(phys)	__phys_to_pte_val(phys) +  #define __pgprot_modify(prot,mask,bits) \  	__pgprot((pgprot_val(prot) & ~(mask)) | (bits)) @@ -409,7 +443,7 @@ static inline void pmd_clear(pmd_t *pmdp)  static inline phys_addr_t pmd_page_paddr(pmd_t pmd)  { -	return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; +	return __pmd_to_phys(pmd);  }  /* Find an entry in the third-level page table. */ @@ -427,7 +461,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)  #define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))  #define pte_clear_fixmap()		clear_fixmap(FIX_PTE) -#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) +#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))  /* use ONLY for statically allocated translation tables */  #define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -460,7 +494,7 @@ static inline void pud_clear(pud_t *pudp)  static inline phys_addr_t pud_page_paddr(pud_t pud)  { -	return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; +	return __pud_to_phys(pud);  }  /* Find an entry in the second-level page table. */ @@ -473,7 +507,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)  #define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))  #define pmd_clear_fixmap()		clear_fixmap(FIX_PMD) -#define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +#define pud_page(pud)		pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))  /* use ONLY for statically allocated translation tables */  #define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -512,7 +546,7 @@ static inline void pgd_clear(pgd_t *pgdp)  static inline phys_addr_t pgd_page_paddr(pgd_t pgd)  { -	return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; +	return __pgd_to_phys(pgd);  }  /* Find an entry in the frst-level page table. */ @@ -525,7 +559,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)  #define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))  #define pud_clear_fixmap()		clear_fixmap(FIX_PUD) -#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))  /* use ONLY for statically allocated translation tables */  #define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) @@ -642,28 +676,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */  /* - * ptep_set_wrprotect - mark read-only while preserving the hardware update of - * the Access Flag. + * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.   */  #define __HAVE_ARCH_PTEP_SET_WRPROTECT  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)  {  	pte_t old_pte, pte; -	/* -	 * ptep_set_wrprotect() is only called on CoW mappings which are -	 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE && -	 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE && -	 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and -	 * protection_map[]. There is no race with the hardware update of the -	 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) -	 * is set. -	 */ -	VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep), -		     "%s: potential race with hardware DBM", __func__);  	pte = READ_ONCE(*ptep);  	do {  		old_pte = pte; +		/* +		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY +		 * clear), set the PTE_DIRTY bit. +		 */ +		if (pte_hw_dirty(pte)) +			pte = pte_mkdirty(pte);  		pte = pte_wrprotect(pte);  		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),  					       pte_val(old_pte), pte_val(pte)); @@ -680,7 +709,9 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,  #endif  extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t swapper_pg_end[];  extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD];  /*   * Encode and decode a swap entry: @@ -734,6 +765,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,  #define kc_vaddr_to_offset(v)	((v) & ~VA_START)  #define kc_offset_to_vaddr(o)	((o) | VA_START) +#ifdef CONFIG_ARM64_PA_BITS_52 +#define phys_to_ttbr(addr)	(((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) +#else +#define phys_to_ttbr(addr)	(addr) +#endif +  #endif /* !__ASSEMBLY__ */  #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 14ad6e4e87d1..16cef2e8449e 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);  #include <asm/memory.h> -#define cpu_switch_mm(pgd,mm)				\ -do {							\ -	BUG_ON(pgd == swapper_pg_dir);			\ -	cpu_do_switch_mm(virt_to_phys(pgd),mm);		\ -} while (0) -  #endif /* __ASSEMBLY__ */  #endif /* __KERNEL__ */  #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 023cacb946c3..cee4ae25a5d1 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -216,6 +216,7 @@ static inline void spin_lock_prefetch(const void *ptr)  int cpu_enable_pan(void *__unused);  int cpu_enable_cache_maint_trap(void *__unused); +int cpu_clear_disr(void *__unused);  /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */  #define SVE_SET_VL(arg)	sve_set_current_vl(arg) diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h new file mode 100644 index 000000000000..e073e6886685 --- /dev/null +++ b/arch/arm64/include/asm/sdei.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2017 Arm Ltd. +#ifndef __ASM_SDEI_H +#define __ASM_SDEI_H + +/* Values for sdei_exit_mode */ +#define SDEI_EXIT_HVC  0 +#define SDEI_EXIT_SMC  1 + +#define SDEI_STACK_SIZE		IRQ_STACK_SIZE + +#ifndef __ASSEMBLY__ + +#include <linux/linkage.h> +#include <linux/preempt.h> +#include <linux/types.h> + +#include <asm/virt.h> + +extern unsigned long sdei_exit_mode; + +/* Software Delegated Exception entry point from firmware*/ +asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg, +				   unsigned long pc, unsigned long pstate); + +/* and its CONFIG_UNMAP_KERNEL_AT_EL0 trampoline */ +asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, +						   unsigned long arg, +						   unsigned long pc, +						   unsigned long pstate); + +/* + * The above entry point does the minimum to call C code. This function does + * anything else, before calling the driver. + */ +struct sdei_registered_event; +asmlinkage unsigned long __sdei_handler(struct pt_regs *regs, +					struct sdei_registered_event *arg); + +unsigned long sdei_arch_get_entry_point(int conduit); +#define sdei_arch_get_entry_point(x)	sdei_arch_get_entry_point(x) + +bool _on_sdei_stack(unsigned long sp); +static inline bool on_sdei_stack(unsigned long sp) +{ +	if (!IS_ENABLED(CONFIG_VMAP_STACK)) +		return false; +	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) +		return false; +	if (in_nmi()) +		return _on_sdei_stack(sp); + +	return false; +} + +#endif /* __ASSEMBLY__ */ +#endif	/* __ASM_SDEI_H */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 941267caa39c..caab039d6305 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -28,5 +28,6 @@ extern char __initdata_begin[], __initdata_end[];  extern char __inittext_begin[], __inittext_end[];  extern char __irqentry_text_start[], __irqentry_text_end[];  extern char __mmuoff_data_start[], __mmuoff_data_end[]; +extern char __entry_tramp_text_start[], __entry_tramp_text_end[];  #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 74a9d301819f..b299929fe56c 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -17,7 +17,7 @@  #define __ASM_SPARSEMEM_H  #ifdef CONFIG_SPARSEMEM -#define MAX_PHYSMEM_BITS	48 +#define MAX_PHYSMEM_BITS	CONFIG_ARM64_PA_BITS  #define SECTION_SIZE_BITS	30  #endif diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 6ad30776e984..472ef944e932 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -22,6 +22,7 @@  #include <asm/memory.h>  #include <asm/ptrace.h> +#include <asm/sdei.h>  struct stackframe {  	unsigned long fp; @@ -85,6 +86,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp  		return true;  	if (on_overflow_stack(sp))  		return true; +	if (on_sdei_stack(sp)) +		return true;  	return false;  } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 08cc88574659..0e1960c59197 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,7 @@  #ifndef __ASM_SYSREG_H  #define __ASM_SYSREG_H +#include <asm/compiler.h>  #include <linux/stringify.h>  /* @@ -175,6 +176,16 @@  #define SYS_AFSR0_EL1			sys_reg(3, 0, 5, 1, 0)  #define SYS_AFSR1_EL1			sys_reg(3, 0, 5, 1, 1)  #define SYS_ESR_EL1			sys_reg(3, 0, 5, 2, 0) + +#define SYS_ERRIDR_EL1			sys_reg(3, 0, 5, 3, 0) +#define SYS_ERRSELR_EL1			sys_reg(3, 0, 5, 3, 1) +#define SYS_ERXFR_EL1			sys_reg(3, 0, 5, 4, 0) +#define SYS_ERXCTLR_EL1			sys_reg(3, 0, 5, 4, 1) +#define SYS_ERXSTATUS_EL1		sys_reg(3, 0, 5, 4, 2) +#define SYS_ERXADDR_EL1			sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXMISC0_EL1		sys_reg(3, 0, 5, 5, 0) +#define SYS_ERXMISC1_EL1		sys_reg(3, 0, 5, 5, 1) +  #define SYS_FAR_EL1			sys_reg(3, 0, 6, 0, 0)  #define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0) @@ -278,6 +289,7 @@  #define SYS_AMAIR_EL1			sys_reg(3, 0, 10, 3, 0)  #define SYS_VBAR_EL1			sys_reg(3, 0, 12, 0, 0) +#define SYS_DISR_EL1			sys_reg(3, 0, 12, 1, 1)  #define SYS_ICC_IAR0_EL1		sys_reg(3, 0, 12, 8, 0)  #define SYS_ICC_EOIR0_EL1		sys_reg(3, 0, 12, 8, 1) @@ -353,8 +365,10 @@  #define SYS_DACR32_EL2			sys_reg(3, 4, 3, 0, 0)  #define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1) +#define SYS_VSESR_EL2			sys_reg(3, 4, 5, 2, 3)  #define SYS_FPEXC32_EL2			sys_reg(3, 4, 5, 3, 0) +#define SYS_VDISR_EL2			sys_reg(3, 4, 12, 1,  1)  #define __SYS__AP0Rx_EL2(x)		sys_reg(3, 4, 12, 8, x)  #define SYS_ICH_AP0R0_EL2		__SYS__AP0Rx_EL2(0)  #define SYS_ICH_AP0R1_EL2		__SYS__AP0Rx_EL2(1) @@ -398,27 +412,85 @@  /* Common SCTLR_ELx flags. */  #define SCTLR_ELx_EE    (1 << 25) +#define SCTLR_ELx_IESB	(1 << 21) +#define SCTLR_ELx_WXN	(1 << 19)  #define SCTLR_ELx_I	(1 << 12)  #define SCTLR_ELx_SA	(1 << 3)  #define SCTLR_ELx_C	(1 << 2)  #define SCTLR_ELx_A	(1 << 1)  #define SCTLR_ELx_M	1 +#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M  | SCTLR_ELx_A | SCTLR_ELx_C | \ +			 SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB) + +/* SCTLR_EL2 specific flags. */  #define SCTLR_EL2_RES1	((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \  			 (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \  			 (1 << 29)) +#define SCTLR_EL2_RES0	((1 << 6)  | (1 << 7)  | (1 << 8)  | (1 << 9)  | \ +			 (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ +			 (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ +			 (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2		SCTLR_ELx_EE +#define ENDIAN_CLEAR_EL2	0 +#else +#define ENDIAN_SET_EL2		0 +#define ENDIAN_CLEAR_EL2	SCTLR_ELx_EE +#endif + +/* SCTLR_EL2 value used for the hyp-stub */ +#define SCTLR_EL2_SET	(SCTLR_ELx_IESB   | ENDIAN_SET_EL2   | SCTLR_EL2_RES1) +#define SCTLR_EL2_CLEAR	(SCTLR_ELx_M      | SCTLR_ELx_A    | SCTLR_ELx_C   | \ +			 SCTLR_ELx_SA     | SCTLR_ELx_I    | SCTLR_ELx_WXN | \ +			 ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS	BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0) -#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ -			 SCTLR_ELx_SA | SCTLR_ELx_I)  /* SCTLR_EL1 specific flags. */  #define SCTLR_EL1_UCI		(1 << 26) +#define SCTLR_EL1_E0E		(1 << 24)  #define SCTLR_EL1_SPAN		(1 << 23) +#define SCTLR_EL1_NTWE		(1 << 18) +#define SCTLR_EL1_NTWI		(1 << 16)  #define SCTLR_EL1_UCT		(1 << 15) +#define SCTLR_EL1_DZE		(1 << 14) +#define SCTLR_EL1_UMA		(1 << 9)  #define SCTLR_EL1_SED		(1 << 8) +#define SCTLR_EL1_ITD		(1 << 7)  #define SCTLR_EL1_CP15BEN	(1 << 5) +#define SCTLR_EL1_SA0		(1 << 4) + +#define SCTLR_EL1_RES1	((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ +			 (1 << 29)) +#define SCTLR_EL1_RES0  ((1 << 6)  | (1 << 10) | (1 << 13) | (1 << 17) | \ +			 (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1		(SCTLR_EL1_E0E | SCTLR_ELx_EE) +#define ENDIAN_CLEAR_EL1	0 +#else +#define ENDIAN_SET_EL1		0 +#define ENDIAN_CLEAR_EL1	(SCTLR_EL1_E0E | SCTLR_ELx_EE) +#endif + +#define SCTLR_EL1_SET	(SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   |\ +			 SCTLR_EL1_SA0  | SCTLR_EL1_SED  | SCTLR_ELx_I    |\ +			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT  | SCTLR_EL1_NTWI |\ +			 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ +			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1) +#define SCTLR_EL1_CLEAR	(SCTLR_ELx_A   | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD    |\ +			 SCTLR_EL1_UMA | SCTLR_ELx_WXN     | ENDIAN_CLEAR_EL1 |\ +			 SCTLR_EL1_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS	BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0)  /* id_aa64isar0 */ +#define ID_AA64ISAR0_FHM_SHIFT		48  #define ID_AA64ISAR0_DP_SHIFT		44  #define ID_AA64ISAR0_SM4_SHIFT		40  #define ID_AA64ISAR0_SM3_SHIFT		36 @@ -437,7 +509,10 @@  #define ID_AA64ISAR1_DPB_SHIFT		0  /* id_aa64pfr0 */ +#define ID_AA64PFR0_CSV3_SHIFT		60 +#define ID_AA64PFR0_CSV2_SHIFT		56  #define ID_AA64PFR0_SVE_SHIFT		32 +#define ID_AA64PFR0_RAS_SHIFT		28  #define ID_AA64PFR0_GIC_SHIFT		24  #define ID_AA64PFR0_ASIMD_SHIFT		20  #define ID_AA64PFR0_FP_SHIFT		16 @@ -447,6 +522,7 @@  #define ID_AA64PFR0_EL0_SHIFT		0  #define ID_AA64PFR0_SVE			0x1 +#define ID_AA64PFR0_RAS_V1		0x1  #define ID_AA64PFR0_FP_NI		0xf  #define ID_AA64PFR0_FP_SUPPORTED	0x0  #define ID_AA64PFR0_ASIMD_NI		0xf @@ -471,6 +547,14 @@  #define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0  #define ID_AA64MMFR0_TGRAN16_NI		0x0  #define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1 +#define ID_AA64MMFR0_PARANGE_48		0x5 +#define ID_AA64MMFR0_PARANGE_52		0x6 + +#ifdef CONFIG_ARM64_PA_BITS_52 +#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_52 +#else +#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_48 +#endif  /* id_aa64mmfr1 */  #define ID_AA64MMFR1_PAN_SHIFT		20 @@ -582,6 +666,7 @@  #else +#include <linux/build_bug.h>  #include <linux/types.h>  asm( @@ -638,6 +723,9 @@ static inline void config_sctlr_el1(u32 clear, u32 set)  {  	u32 val; +	SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS; +	SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS; +  	val = read_sysreg(sctlr_el1);  	val &= ~clear;  	val |= set; diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index eb431286bacd..740aa03c5f0d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -51,8 +51,6 @@ struct thread_info {  	.addr_limit	= KERNEL_DS,					\  } -#define init_stack		(init_thread_union.stack) -  #define thread_saved_pc(tsk)	\  	((unsigned long)(tsk->thread.cpu_context.pc))  #define thread_saved_sp(tsk)	\ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index af1c76981911..9e82dd79c7db 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -23,6 +23,7 @@  #include <linux/sched.h>  #include <asm/cputype.h> +#include <asm/mmu.h>  /*   * Raw TLBI operations. @@ -54,6 +55,11 @@  #define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0) +#define __tlbi_user(op, arg) do {						\ +	if (arm64_kernel_unmapped_at_el0())					\ +		__tlbi(op, (arg) | USER_ASID_FLAG);				\ +} while (0) +  /*   *	TLB Management   *	============== @@ -115,6 +121,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)  	dsb(ishst);  	__tlbi(aside1is, asid); +	__tlbi_user(aside1is, asid);  	dsb(ish);  } @@ -125,6 +132,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,  	dsb(ishst);  	__tlbi(vale1is, addr); +	__tlbi_user(vale1is, addr);  	dsb(ish);  } @@ -151,10 +159,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,  	dsb(ishst);  	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { -		if (last_level) +		if (last_level) {  			__tlbi(vale1is, addr); -		else +			__tlbi_user(vale1is, addr); +		} else {  			__tlbi(vae1is, addr); +			__tlbi_user(vae1is, addr); +		}  	}  	dsb(ish);  } @@ -194,6 +205,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,  	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);  	__tlbi(vae1is, addr); +	__tlbi_user(vae1is, addr);  	dsb(ish);  } diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 1696f9de9359..178e338d2889 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -19,6 +19,7 @@  #define __ASM_TRAP_H  #include <linux/list.h> +#include <asm/esr.h>  #include <asm/sections.h>  struct pt_regs; @@ -66,4 +67,57 @@ static inline int in_entry_text(unsigned long ptr)  	return ptr >= (unsigned long)&__entry_text_start &&  	       ptr < (unsigned long)&__entry_text_end;  } + +/* + * CPUs with the RAS extensions have an Implementation-Defined-Syndrome bit + * to indicate whether this ESR has a RAS encoding. CPUs without this feature + * have a ISS-Valid bit in the same position. + * If this bit is set, we know its not a RAS SError. + * If its clear, we need to know if the CPU supports RAS. Uncategorized RAS + * errors share the same encoding as an all-zeros encoding from a CPU that + * doesn't support RAS. + */ +static inline bool arm64_is_ras_serror(u32 esr) +{ +	WARN_ON(preemptible()); + +	if (esr & ESR_ELx_IDS) +		return false; + +	if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) +		return true; +	else +		return false; +} + +/* + * Return the AET bits from a RAS SError's ESR. + * + * It is implementation defined whether Uncategorized errors are containable. + * We treat them as Uncontainable. + * Non-RAS SError's are reported as Uncontained/Uncategorized. + */ +static inline u32 arm64_ras_serror_get_severity(u32 esr) +{ +	u32 aet = esr & ESR_ELx_AET; + +	if (!arm64_is_ras_serror(esr)) { +		/* Not a RAS error, we can't interpret the ESR. */ +		return ESR_ELx_AET_UC; +	} + +	/* +	 * AET is RES0 if 'the value returned in the DFSC field is not +	 * [ESR_ELx_FSC_SERROR]' +	 */ +	if ((esr & ESR_ELx_FSC) != ESR_ELx_FSC_SERROR) { +		/* No severity information : Uncategorized */ +		return ESR_ELx_AET_UC; +	} + +	return aet; +} + +bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr); +void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);  #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index fc0f9eb66039..59fda5292936 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -105,17 +105,23 @@ static inline void set_fs(mm_segment_t fs)  #ifdef CONFIG_ARM64_SW_TTBR0_PAN  static inline void __uaccess_ttbr0_disable(void)  { -	unsigned long ttbr; +	unsigned long flags, ttbr; -	/* reserved_ttbr0 placed at the end of swapper_pg_dir */ -	ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; -	write_sysreg(ttbr, ttbr0_el1); +	local_irq_save(flags); +	ttbr = read_sysreg(ttbr1_el1); +	ttbr &= ~TTBR_ASID_MASK; +	/* reserved_ttbr0 placed before swapper_pg_dir */ +	write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1); +	isb(); +	/* Set reserved ASID */ +	write_sysreg(ttbr, ttbr1_el1);  	isb(); +	local_irq_restore(flags);  }  static inline void __uaccess_ttbr0_enable(void)  { -	unsigned long flags; +	unsigned long flags, ttbr0, ttbr1;  	/*  	 * Disable interrupts to avoid preemption between reading the 'ttbr0' @@ -123,7 +129,17 @@ static inline void __uaccess_ttbr0_enable(void)  	 * roll-over and an update of 'ttbr0'.  	 */  	local_irq_save(flags); -	write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); +	ttbr0 = READ_ONCE(current_thread_info()->ttbr0); + +	/* Restore active ASID */ +	ttbr1 = read_sysreg(ttbr1_el1); +	ttbr1 &= ~TTBR_ASID_MASK;		/* safety measure */ +	ttbr1 |= ttbr0 & TTBR_ASID_MASK; +	write_sysreg(ttbr1, ttbr1_el1); +	isb(); + +	/* Restore user page table */ +	write_sysreg(ttbr0, ttbr0_el1);  	isb();  	local_irq_restore(flags);  } @@ -155,6 +171,18 @@ static inline bool uaccess_ttbr0_enable(void)  }  #endif +static inline void __uaccess_disable_hw_pan(void) +{ +	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, +			CONFIG_ARM64_PAN)); +} + +static inline void __uaccess_enable_hw_pan(void) +{ +	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, +			CONFIG_ARM64_PAN)); +} +  #define __uaccess_disable(alt)						\  do {									\  	if (!uaccess_ttbr0_disable())					\ diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h new file mode 100644 index 000000000000..0b5ec6e08c10 --- /dev/null +++ b/arch/arm64/include/asm/vmap_stack.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2017 Arm Ltd. +#ifndef __ASM_VMAP_STACK_H +#define __ASM_VMAP_STACK_H + +#include <linux/bug.h> +#include <linux/gfp.h> +#include <linux/kconfig.h> +#include <linux/vmalloc.h> +#include <asm/memory.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> + +/* + * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd + * stacks need to have the same alignment. + */ +static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) +{ +	BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); + +	return __vmalloc_node_range(stack_size, THREAD_ALIGN, +				    VMALLOC_START, VMALLOC_END, +				    THREADINFO_GFP, PAGE_KERNEL, 0, node, +				    __builtin_return_address(0)); +} + +#endif /* __ASM_VMAP_STACK_H */ diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <asm/ptrace.h> + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index cda76fa8b9b2..f018c3deea3b 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -43,5 +43,6 @@  #define HWCAP_ASIMDDP		(1 << 20)  #define HWCAP_SHA512		(1 << 21)  #define HWCAP_SVE		(1 << 22) +#define HWCAP_ASIMDFHM		(1 << 23)  #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/siginfo.h b/arch/arm64/include/uapi/asm/siginfo.h index 574d12f86039..9b4d91277742 100644 --- a/arch/arm64/include/uapi/asm/siginfo.h +++ b/arch/arm64/include/uapi/asm/siginfo.h @@ -21,4 +21,25 @@  #include <asm-generic/siginfo.h> +/* + * SIGFPE si_codes + */ +#ifdef __KERNEL__ +#define FPE_FIXME	0	/* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + +/* + * SIGBUS si_codes + */ +#ifdef __KERNEL__ +#define BUS_FIXME	0	/* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + +/* + * SIGTRAP si_codes + */ +#ifdef __KERNEL__ +#define TRAP_FIXME	0	/* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ +  #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..b87541360f43 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -52,6 +52,11 @@ arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\  arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o  arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o  arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o +arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o + +ifeq ($(CONFIG_KVM),y) +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)	+= bpi.o +endif  obj-y					+= $(arm64-obj-y) vdso/ probes/  obj-m					+= $(arm64-obj-m) @@ -61,6 +66,3 @@ extra-y					+= $(head-y) vmlinux.lds  ifeq ($(CONFIG_DEBUG_EFI),y)  AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""  endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index b3162715ed78..252396a96c78 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -117,7 +117,7 @@ bool __init acpi_psci_present(void)  }  /* Whether HVC must be used instead of SMC as the PSCI conduit */ -bool __init acpi_psci_use_hvc(void) +bool acpi_psci_use_hvc(void)  {  	return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;  } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 6dd0a3a3e5c9..414288a558c8 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -32,6 +32,8 @@  #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)  #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset) +int alternatives_applied; +  struct alt_region {  	struct alt_instr *begin;  	struct alt_instr *end; @@ -143,7 +145,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)   */  static int __apply_alternatives_multi_stop(void *unused)  { -	static int patched = 0;  	struct alt_region region = {  		.begin	= (struct alt_instr *)__alt_instructions,  		.end	= (struct alt_instr *)__alt_instructions_end, @@ -151,14 +152,14 @@ static int __apply_alternatives_multi_stop(void *unused)  	/* We always have a CPU 0 at this point (__init) */  	if (smp_processor_id()) { -		while (!READ_ONCE(patched)) +		while (!READ_ONCE(alternatives_applied))  			cpu_relax();  		isb();  	} else { -		BUG_ON(patched); +		BUG_ON(alternatives_applied);  		__apply_alternatives(®ion, true);  		/* Barriers provided by the cache flushing */ -		WRITE_ONCE(patched, 1); +		WRITE_ONCE(alternatives_applied, 1);  	}  	return 0; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 71bf088f1e4b..1303e04110cd 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -18,12 +18,14 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/arm_sdei.h>  #include <linux/sched.h>  #include <linux/mm.h>  #include <linux/dma-mapping.h>  #include <linux/kvm_host.h>  #include <linux/suspend.h>  #include <asm/cpufeature.h> +#include <asm/fixmap.h>  #include <asm/thread_info.h>  #include <asm/memory.h>  #include <asm/smp_plat.h> @@ -130,6 +132,7 @@ int main(void)    BLANK();  #ifdef CONFIG_KVM_ARM_HOST    DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt)); +  DEFINE(VCPU_FAULT_DISR,	offsetof(struct kvm_vcpu, arch.fault.disr_el1));    DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));    DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));    DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs)); @@ -148,11 +151,18 @@ int main(void)    DEFINE(ARM_SMCCC_RES_X2_OFFS,		offsetof(struct arm_smccc_res, a2));    DEFINE(ARM_SMCCC_QUIRK_ID_OFFS,	offsetof(struct arm_smccc_quirk, id));    DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS,	offsetof(struct arm_smccc_quirk, state)); -    BLANK();    DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));    DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));    DEFINE(HIBERN_PBE_NEXT,	offsetof(struct pbe, next));    DEFINE(ARM64_FTR_SYSVAL,	offsetof(struct arm64_ftr_reg, sys_val)); +  BLANK(); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +  DEFINE(TRAMP_VALIAS,		TRAMP_VALIAS); +#endif +#ifdef CONFIG_ARM_SDE_INTERFACE +  DEFINE(SDEI_EVENT_INTREGS,	offsetof(struct sdei_registered_event, interrupted_regs)); +  DEFINE(SDEI_EVENT_PRIORITY,	offsetof(struct sdei_registered_event, priority)); +#endif    return 0;  } diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S new file mode 100644 index 000000000000..76225c2611ea --- /dev/null +++ b/arch/arm64/kernel/bpi.S @@ -0,0 +1,87 @@ +/* + * Contains CPU specific branch predictor invalidation sequences + * + * Copyright (C) 2018 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +.macro ventry target +	.rept 31 +	nop +	.endr +	b	\target +.endm + +.macro vectors target +	ventry \target + 0x000 +	ventry \target + 0x080 +	ventry \target + 0x100 +	ventry \target + 0x180 + +	ventry \target + 0x200 +	ventry \target + 0x280 +	ventry \target + 0x300 +	ventry \target + 0x380 + +	ventry \target + 0x400 +	ventry \target + 0x480 +	ventry \target + 0x500 +	ventry \target + 0x580 + +	ventry \target + 0x600 +	ventry \target + 0x680 +	ventry \target + 0x700 +	ventry \target + 0x780 +.endm + +	.align	11 +ENTRY(__bp_harden_hyp_vecs_start) +	.rept 4 +	vectors __kvm_hyp_vector +	.endr +ENTRY(__bp_harden_hyp_vecs_end) +ENTRY(__psci_hyp_bp_inval_start) +	sub	sp, sp, #(8 * 18) +	stp	x16, x17, [sp, #(16 * 0)] +	stp	x14, x15, [sp, #(16 * 1)] +	stp	x12, x13, [sp, #(16 * 2)] +	stp	x10, x11, [sp, #(16 * 3)] +	stp	x8, x9, [sp, #(16 * 4)] +	stp	x6, x7, [sp, #(16 * 5)] +	stp	x4, x5, [sp, #(16 * 6)] +	stp	x2, x3, [sp, #(16 * 7)] +	stp	x0, x1, [sp, #(16 * 8)] +	mov	x0, #0x84000000 +	smc	#0 +	ldp	x16, x17, [sp, #(16 * 0)] +	ldp	x14, x15, [sp, #(16 * 1)] +	ldp	x12, x13, [sp, #(16 * 2)] +	ldp	x10, x11, [sp, #(16 * 3)] +	ldp	x8, x9, [sp, #(16 * 4)] +	ldp	x6, x7, [sp, #(16 * 5)] +	ldp	x4, x5, [sp, #(16 * 6)] +	ldp	x2, x3, [sp, #(16 * 7)] +	ldp	x0, x1, [sp, #(16 * 8)] +	add	sp, sp, #(8 * 18) +ENTRY(__psci_hyp_bp_inval_end) + +ENTRY(__qcom_hyp_sanitize_link_stack_start) +	stp     x29, x30, [sp, #-16]! +	.rept	16 +	bl	. + 4 +	.endr +	ldp	x29, x30, [sp], #16 +ENTRY(__qcom_hyp_sanitize_link_stack_end) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 65f42d257414..2a752cb2a0f3 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)  	mrs	x12, sctlr_el1  	ldr	x13, =SCTLR_ELx_FLAGS  	bic	x12, x12, x13 +	pre_disable_mmu_workaround  	msr	sctlr_el1, x12  	isb diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0e27f86ee709..ed6881882231 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,20 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)  				       entry->midr_range_max);  } +static bool __maybe_unused +is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) +{ +	u32 model; + +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + +	model = read_cpuid_id(); +	model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | +		 MIDR_ARCHITECTURE_MASK; + +	return model == entry->midr_model; +} +  static bool  has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,  				int scope) @@ -46,6 +60,127 @@ static int cpu_enable_trap_ctr_access(void *__unused)  	return 0;  } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +#ifdef CONFIG_KVM +extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; +extern char __qcom_hyp_sanitize_link_stack_start[]; +extern char __qcom_hyp_sanitize_link_stack_end[]; + +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, +				const char *hyp_vecs_end) +{ +	void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K); +	int i; + +	for (i = 0; i < SZ_2K; i += 0x80) +		memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + +	flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, +				      const char *hyp_vecs_start, +				      const char *hyp_vecs_end) +{ +	static int last_slot = -1; +	static DEFINE_SPINLOCK(bp_lock); +	int cpu, slot = -1; + +	spin_lock(&bp_lock); +	for_each_possible_cpu(cpu) { +		if (per_cpu(bp_hardening_data.fn, cpu) == fn) { +			slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); +			break; +		} +	} + +	if (slot == -1) { +		last_slot++; +		BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) +			/ SZ_2K) <= last_slot); +		slot = last_slot; +		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); +	} + +	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); +	__this_cpu_write(bp_hardening_data.fn, fn); +	spin_unlock(&bp_lock); +} +#else +#define __psci_hyp_bp_inval_start		NULL +#define __psci_hyp_bp_inval_end			NULL +#define __qcom_hyp_sanitize_link_stack_start	NULL +#define __qcom_hyp_sanitize_link_stack_end	NULL + +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, +				      const char *hyp_vecs_start, +				      const char *hyp_vecs_end) +{ +	__this_cpu_write(bp_hardening_data.fn, fn); +} +#endif	/* CONFIG_KVM */ + +static void  install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, +				     bp_hardening_cb_t fn, +				     const char *hyp_vecs_start, +				     const char *hyp_vecs_end) +{ +	u64 pfr0; + +	if (!entry->matches(entry, SCOPE_LOCAL_CPU)) +		return; + +	pfr0 = read_cpuid(ID_AA64PFR0_EL1); +	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) +		return; + +	__install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); +} + +#include <linux/psci.h> + +static int enable_psci_bp_hardening(void *data) +{ +	const struct arm64_cpu_capabilities *entry = data; + +	if (psci_ops.get_version) +		install_bp_hardening_cb(entry, +				       (bp_hardening_cb_t)psci_ops.get_version, +				       __psci_hyp_bp_inval_start, +				       __psci_hyp_bp_inval_end); + +	return 0; +} + +static void qcom_link_stack_sanitization(void) +{ +	u64 tmp; + +	asm volatile("mov	%0, x30		\n" +		     ".rept	16		\n" +		     "bl	. + 4		\n" +		     ".endr			\n" +		     "mov	x30, %0		\n" +		     : "=&r" (tmp)); +} + +static int qcom_enable_link_stack_sanitization(void *data) +{ +	const struct arm64_cpu_capabilities *entry = data; + +	install_bp_hardening_cb(entry, qcom_link_stack_sanitization, +				__qcom_hyp_sanitize_link_stack_start, +				__qcom_hyp_sanitize_link_stack_end); + +	return 0; +} +#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */ +  #define MIDR_RANGE(model, min, max) \  	.def_scope = SCOPE_LOCAL_CPU, \  	.matches = is_affected_midr_range, \ @@ -169,6 +304,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {  			   MIDR_CPU_VAR_REV(0, 0),  			   MIDR_CPU_VAR_REV(0, 0)),  	}, +	{ +		.desc = "Qualcomm Technologies Kryo erratum 1003", +		.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, +		.def_scope = SCOPE_LOCAL_CPU, +		.midr_model = MIDR_QCOM_KRYO, +		.matches = is_kryo_midr, +	},  #endif  #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009  	{ @@ -187,6 +329,47 @@ const struct arm64_cpu_capabilities arm64_errata[] = {  		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),  	},  #endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), +		.enable = enable_psci_bp_hardening, +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), +		.enable = enable_psci_bp_hardening, +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), +		.enable = enable_psci_bp_hardening, +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), +		.enable = enable_psci_bp_hardening, +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), +		.enable = qcom_enable_link_stack_sanitization, +	}, +	{ +		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, +		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), +		.enable = enable_psci_bp_hardening, +	}, +	{ +		.capability = ARM64_HARDEN_BRANCH_PREDICTOR, +		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), +		.enable = enable_psci_bp_hardening, +	}, +#endif  	{  	}  }; @@ -200,15 +383,18 @@ void verify_local_cpu_errata_workarounds(void)  {  	const struct arm64_cpu_capabilities *caps = arm64_errata; -	for (; caps->matches; caps++) -		if (!cpus_have_cap(caps->capability) && -			caps->matches(caps, SCOPE_LOCAL_CPU)) { +	for (; caps->matches; caps++) { +		if (cpus_have_cap(caps->capability)) { +			if (caps->enable) +				caps->enable((void *)caps); +		} else if (caps->matches(caps, SCOPE_LOCAL_CPU)) {  			pr_crit("CPU%d: Requires work around for %s, not detected"  					" at boot time\n",  				smp_processor_id(),  				caps->desc ? : "an erratum");  			cpu_die_early();  		} +	}  }  void update_cpu_errata_workarounds(void) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;  const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {  	&smp_spin_table_ops,  	&cpu_psci_ops,  	NULL,  }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {  #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL  	&acpi_parking_protocol_ops,  #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {  static const struct cpu_operations * __init cpu_get_ops(const char *name)  { -	const struct cpu_operations **ops; +	const struct cpu_operations *const *ops;  	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c5ba0097887f..0fb6a3151443 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)   * sync with the documentation of the CPU feature register ABI.   */  static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { +	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), @@ -145,7 +146,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {  };  static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { -	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), +	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), +				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),  	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),  	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), @@ -845,6 +850,67 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus  					ID_AA64PFR0_FP_SHIFT) < 0;  } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ + +static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, +				int __unused) +{ +	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + +	/* Forced on command line? */ +	if (__kpti_forced) { +		pr_info_once("kernel page table isolation forced %s by command line option\n", +			     __kpti_forced > 0 ? "ON" : "OFF"); +		return __kpti_forced > 0; +	} + +	/* Useful for KASLR robustness */ +	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) +		return true; + +	/* Don't force KPTI for CPUs that are not vulnerable */ +	switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { +	case MIDR_CAVIUM_THUNDERX2: +	case MIDR_BRCM_VULCAN: +		return false; +	} + +	/* Defer to CPU feature registers */ +	return !cpuid_feature_extract_unsigned_field(pfr0, +						     ID_AA64PFR0_CSV3_SHIFT); +} + +static int __init parse_kpti(char *str) +{ +	bool enabled; +	int ret = strtobool(str, &enabled); + +	if (ret) +		return ret; + +	__kpti_forced = enabled ? 1 : -1; +	return 0; +} +__setup("kpti=", parse_kpti); +#endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */ + +static int cpu_copy_el2regs(void *__unused) +{ +	/* +	 * Copy register values that aren't redirected by hardware. +	 * +	 * Before code patching, we only set tpidr_el1, all CPUs need to copy +	 * this value to tpidr_el2 before we patch the code. Once we've done +	 * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to +	 * do anything here. +	 */ +	if (!alternatives_applied) +		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); + +	return 0; +} +  static const struct arm64_cpu_capabilities arm64_features[] = {  	{  		.desc = "GIC system register CPU interface", @@ -914,6 +980,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {  		.capability = ARM64_HAS_VIRT_HOST_EXTN,  		.def_scope = SCOPE_SYSTEM,  		.matches = runs_at_el2, +		.enable = cpu_copy_el2regs,  	},  	{  		.desc = "32-bit EL0 Support", @@ -931,6 +998,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {  		.def_scope = SCOPE_SYSTEM,  		.matches = hyp_offset_low,  	}, +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	{ +		.desc = "Kernel page table isolation (KPTI)", +		.capability = ARM64_UNMAP_KERNEL_AT_EL0, +		.def_scope = SCOPE_SYSTEM, +		.matches = unmap_kernel_at_el0, +	}, +#endif  	{  		/* FP/SIMD is not implemented */  		.capability = ARM64_HAS_NO_FPSIMD, @@ -962,6 +1037,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = {  		.enable = sve_kernel_enable,  	},  #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_RAS_EXTN +	{ +		.desc = "RAS Extension Support", +		.capability = ARM64_HAS_RAS_EXTN, +		.def_scope = SCOPE_SYSTEM, +		.matches = has_cpuid_feature, +		.sys_reg = SYS_ID_AA64PFR0_EL1, +		.sign = FTR_UNSIGNED, +		.field_pos = ID_AA64PFR0_RAS_SHIFT, +		.min_field_value = ID_AA64PFR0_RAS_V1, +		.enable = cpu_clear_disr, +	}, +#endif /* CONFIG_ARM64_RAS_EXTN */  	{},  }; @@ -991,6 +1079,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {  	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),  	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),  	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), +	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),  	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),  	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),  	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), @@ -1070,6 +1159,25 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)  			cap_set_elf_hwcap(hwcaps);  } +/* + * Check if the current CPU has a given feature capability. + * Should be called from non-preemptible context. + */ +static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, +			       unsigned int cap) +{ +	const struct arm64_cpu_capabilities *caps; + +	if (WARN_ON(preemptible())) +		return false; + +	for (caps = cap_array; caps->matches; caps++) +		if (caps->capability == cap && +		    caps->matches(caps, SCOPE_LOCAL_CPU)) +			return true; +	return false; +} +  void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,  			    const char *info)  { @@ -1105,7 +1213,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)  			 * uses an IPI, giving us a PSTATE that disappears when  			 * we return.  			 */ -			stop_machine(caps->enable, NULL, cpu_online_mask); +			stop_machine(caps->enable, (void *)caps, cpu_online_mask);  		}  	}  } @@ -1133,8 +1241,9 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)  }  static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) +verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list)  { +	const struct arm64_cpu_capabilities *caps = caps_list;  	for (; caps->matches; caps++) {  		if (!cpus_have_cap(caps->capability))  			continue; @@ -1142,13 +1251,13 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)  		 * If the new CPU misses an advertised feature, we cannot proceed  		 * further, park the cpu.  		 */ -		if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { +		if (!__this_cpu_has_cap(caps_list, caps->capability)) {  			pr_crit("CPU%d: missing feature: %s\n",  					smp_processor_id(), caps->desc);  			cpu_die_early();  		}  		if (caps->enable) -			caps->enable(NULL); +			caps->enable((void *)caps);  	}  } @@ -1188,6 +1297,9 @@ static void verify_local_cpu_capabilities(void)  	if (system_supports_sve())  		verify_sve_features(); + +	if (system_uses_ttbr0_pan()) +		pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n");  }  void check_local_cpu_capabilities(void) @@ -1224,25 +1336,6 @@ static void __init mark_const_caps_ready(void)  	static_branch_enable(&arm64_const_caps_ready);  } -/* - * Check if the current CPU has a given feature capability. - * Should be called from non-preemptible context. - */ -static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, -			       unsigned int cap) -{ -	const struct arm64_cpu_capabilities *caps; - -	if (WARN_ON(preemptible())) -		return false; - -	for (caps = cap_array; caps->desc; caps++) -		if (caps->capability == cap && caps->matches) -			return caps->matches(caps, SCOPE_LOCAL_CPU); - -	return false; -} -  extern const struct arm64_cpu_capabilities arm64_errata[];  bool this_cpu_has_cap(unsigned int cap) @@ -1386,3 +1479,11 @@ static int __init enable_mrs_emulation(void)  }  core_initcall(enable_mrs_emulation); + +int cpu_clear_disr(void *__unused) +{ +	/* Firmware may have left a deferred SError in this register. */ +	write_sysreg_s(0, SYS_DISR_EL1); + +	return 0; +} diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index fd691087dc9a..f2d13810daa8 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -47,6 +47,8 @@ int arm_cpuidle_suspend(int index)  #include <acpi/processor.h> +#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) +  int acpi_processor_ffh_lpi_probe(unsigned int cpu)  {  	return arm_cpuidle_init(cpu); @@ -54,6 +56,10 @@ int acpi_processor_ffh_lpi_probe(unsigned int cpu)  int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)  { -	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); +	if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) +		return CPU_PM_CPU_IDLE_ENTER_RETENTION(arm_cpuidle_suspend, +						lpi->index); +	else +		return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index);  }  #endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 1e2554543506..7f94623df8a5 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -76,6 +76,7 @@ static const char *const hwcap_str[] = {  	"asimddp",  	"sha512",  	"sve", +	"asimdfhm",  	NULL  }; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a88b6ccebbb4..53781f5687c5 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -209,12 +209,13 @@ NOKPROBE_SYMBOL(call_step_hook);  static void send_user_sigtrap(int si_code)  {  	struct pt_regs *regs = current_pt_regs(); -	siginfo_t info = { -		.si_signo	= SIGTRAP, -		.si_errno	= 0, -		.si_code	= si_code, -		.si_addr	= (void __user *)instruction_pointer(regs), -	}; +	siginfo_t info; + +	clear_siginfo(&info); +	info.si_signo	= SIGTRAP; +	info.si_errno	= 0; +	info.si_code	= si_code; +	info.si_addr	= (void __user *)instruction_pointer(regs);  	if (WARN_ON(!user_mode(regs)))  		return; diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 4e6ad355bd05..6b9736c3fb56 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -96,6 +96,7 @@ ENTRY(entry)  	mrs	x0, sctlr_el2  	bic	x0, x0, #1 << 0	// clear SCTLR.M  	bic	x0, x0, #1 << 2	// clear SCTLR.C +	pre_disable_mmu_workaround  	msr	sctlr_el2, x0  	isb  	b	2f @@ -103,6 +104,7 @@ ENTRY(entry)  	mrs	x0, sctlr_el1  	bic	x0, x0, #1 << 0	// clear SCTLR.M  	bic	x0, x0, #1 << 2	// clear SCTLR.C +	pre_disable_mmu_workaround  	msr	sctlr_el1, x0  	isb  2: diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 82cd07592519..f85ac58d08a3 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -48,7 +48,9 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)  		return pgprot_val(PAGE_KERNEL_ROX);  	/* RW- */ -	if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE) +	if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) == +	     EFI_MEMORY_XP) || +	    type != EFI_RUNTIME_SERVICES_CODE)  		return pgprot_val(PAGE_KERNEL);  	/* RWX */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6d14b8f29b5f..b34e717d7597 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,8 @@  #include <asm/errno.h>  #include <asm/esr.h>  #include <asm/irq.h> +#include <asm/memory.h> +#include <asm/mmu.h>  #include <asm/processor.h>  #include <asm/ptrace.h>  #include <asm/thread_info.h> @@ -69,8 +71,21 @@  #define BAD_FIQ		2  #define BAD_ERROR	3 -	.macro kernel_ventry	label +	.macro kernel_ventry, el, label, regsize = 64  	.align 7 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 +	.if	\el == 0 +	.if	\regsize == 64 +	mrs	x30, tpidrro_el0 +	msr	tpidrro_el0, xzr +	.else +	mov	x30, xzr +	.endif +	.endif +alternative_else_nop_endif +#endif +  	sub	sp, sp, #S_FRAME_SIZE  #ifdef CONFIG_VMAP_STACK  	/* @@ -82,7 +97,7 @@  	tbnz	x0, #THREAD_SHIFT, 0f  	sub	x0, sp, x0			// x0'' = sp' - x0' = (sp + x0) - sp = x0  	sub	sp, sp, x0			// sp'' = sp' - x0 = (sp + x0) - x0 = sp -	b	\label +	b	el\()\el\()_\label  0:  	/* @@ -114,7 +129,12 @@  	sub	sp, sp, x0  	mrs	x0, tpidrro_el0  #endif -	b	\label +	b	el\()\el\()_\label +	.endm + +	.macro tramp_alias, dst, sym +	mov_q	\dst, TRAMP_VALIAS +	add	\dst, \dst, #(\sym - .entry.tramp.text)  	.endm  	.macro	kernel_entry, el, regsize = 64 @@ -185,7 +205,7 @@ alternative_else_nop_endif  	.if	\el != 0  	mrs	x21, ttbr0_el1 -	tst	x21, #0xffff << 48		// Check for the reserved ASID +	tst	x21, #TTBR_ASID_MASK		// Check for the reserved ASID  	orr	x23, x23, #PSR_PAN_BIT		// Set the emulated PAN in the saved SPSR  	b.eq	1f				// TTBR0 access already disabled  	and	x23, x23, #~PSR_PAN_BIT		// Clear the emulated PAN in the saved SPSR @@ -248,7 +268,7 @@ alternative_else_nop_endif  	tbnz	x22, #22, 1f			// Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set  	.endif -	__uaccess_ttbr0_enable x0 +	__uaccess_ttbr0_enable x0, x1  	.if	\el == 0  	/* @@ -257,7 +277,7 @@ alternative_else_nop_endif  	 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache  	 * corruption).  	 */ -	post_ttbr0_update_workaround +	bl	post_ttbr_update_workaround  	.endif  1:  	.if	\el != 0 @@ -269,18 +289,20 @@ alternative_else_nop_endif  	.if	\el == 0  	ldr	x23, [sp, #S_SP]		// load return stack pointer  	msr	sp_el0, x23 +	tst	x22, #PSR_MODE32_BIT		// native task? +	b.eq	3f +  #ifdef CONFIG_ARM64_ERRATUM_845719  alternative_if ARM64_WORKAROUND_845719 -	tbz	x22, #4, 1f  #ifdef CONFIG_PID_IN_CONTEXTIDR  	mrs	x29, contextidr_el1  	msr	contextidr_el1, x29  #else  	msr contextidr_el1, xzr  #endif -1:  alternative_else_nop_endif  #endif +3:  	.endif  	msr	elr_el1, x21			// set up the return data @@ -302,7 +324,21 @@ alternative_else_nop_endif  	ldp	x28, x29, [sp, #16 * 14]  	ldr	lr, [sp, #S_LR]  	add	sp, sp, #S_FRAME_SIZE		// restore sp -	eret					// return to kernel + +	.if	\el == 0 +alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	bne	4f +	msr	far_el1, x30 +	tramp_alias	x30, tramp_exit_native +	br	x30 +4: +	tramp_alias	x30, tramp_exit_compat +	br	x30 +#endif +	.else +	eret +	.endif  	.endm  	.macro	irq_stack_entry @@ -367,31 +403,31 @@ tsk	.req	x28		// current thread_info  	.align	11  ENTRY(vectors) -	kernel_ventry	el1_sync_invalid		// Synchronous EL1t -	kernel_ventry	el1_irq_invalid			// IRQ EL1t -	kernel_ventry	el1_fiq_invalid			// FIQ EL1t -	kernel_ventry	el1_error_invalid		// Error EL1t +	kernel_ventry	1, sync_invalid			// Synchronous EL1t +	kernel_ventry	1, irq_invalid			// IRQ EL1t +	kernel_ventry	1, fiq_invalid			// FIQ EL1t +	kernel_ventry	1, error_invalid		// Error EL1t -	kernel_ventry	el1_sync			// Synchronous EL1h -	kernel_ventry	el1_irq				// IRQ EL1h -	kernel_ventry	el1_fiq_invalid			// FIQ EL1h -	kernel_ventry	el1_error			// Error EL1h +	kernel_ventry	1, sync				// Synchronous EL1h +	kernel_ventry	1, irq				// IRQ EL1h +	kernel_ventry	1, fiq_invalid			// FIQ EL1h +	kernel_ventry	1, error			// Error EL1h -	kernel_ventry	el0_sync			// Synchronous 64-bit EL0 -	kernel_ventry	el0_irq				// IRQ 64-bit EL0 -	kernel_ventry	el0_fiq_invalid			// FIQ 64-bit EL0 -	kernel_ventry	el0_error			// Error 64-bit EL0 +	kernel_ventry	0, sync				// Synchronous 64-bit EL0 +	kernel_ventry	0, irq				// IRQ 64-bit EL0 +	kernel_ventry	0, fiq_invalid			// FIQ 64-bit EL0 +	kernel_ventry	0, error			// Error 64-bit EL0  #ifdef CONFIG_COMPAT -	kernel_ventry	el0_sync_compat			// Synchronous 32-bit EL0 -	kernel_ventry	el0_irq_compat			// IRQ 32-bit EL0 -	kernel_ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0 -	kernel_ventry	el0_error_compat		// Error 32-bit EL0 +	kernel_ventry	0, sync_compat, 32		// Synchronous 32-bit EL0 +	kernel_ventry	0, irq_compat, 32		// IRQ 32-bit EL0 +	kernel_ventry	0, fiq_invalid_compat, 32	// FIQ 32-bit EL0 +	kernel_ventry	0, error_compat, 32		// Error 32-bit EL0  #else -	kernel_ventry	el0_sync_invalid		// Synchronous 32-bit EL0 -	kernel_ventry	el0_irq_invalid			// IRQ 32-bit EL0 -	kernel_ventry	el0_fiq_invalid			// FIQ 32-bit EL0 -	kernel_ventry	el0_error_invalid		// Error 32-bit EL0 +	kernel_ventry	0, sync_invalid, 32		// Synchronous 32-bit EL0 +	kernel_ventry	0, irq_invalid, 32		// IRQ 32-bit EL0 +	kernel_ventry	0, fiq_invalid, 32		// FIQ 32-bit EL0 +	kernel_ventry	0, error_invalid, 32		// Error 32-bit EL0  #endif  END(vectors) @@ -685,12 +721,15 @@ el0_ia:  	 * Instruction abort handling  	 */  	mrs	x26, far_el1 -	enable_daif +	enable_da_f +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	trace_hardirqs_off +#endif  	ct_user_exit  	mov	x0, x26  	mov	x1, x25  	mov	x2, sp -	bl	do_mem_abort +	bl	do_el0_ia_bp_hardening  	b	ret_to_user  el0_fpsimd_acc:  	/* @@ -943,6 +982,124 @@ __ni_sys_trace:  	.popsection				// .entry.text +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + */ +	.pushsection ".entry.tramp.text", "ax" + +	.macro tramp_map_kernel, tmp +	mrs	\tmp, ttbr1_el1 +	add	\tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) +	bic	\tmp, \tmp, #USER_ASID_FLAG +	msr	ttbr1_el1, \tmp +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 +	/* ASID already in \tmp[63:48] */ +	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12) +	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12) +	/* 2MB boundary containing the vectors, so we nobble the walk cache */ +	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12) +	isb +	tlbi	vae1, \tmp +	dsb	nsh +alternative_else_nop_endif +#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */ +	.endm + +	.macro tramp_unmap_kernel, tmp +	mrs	\tmp, ttbr1_el1 +	sub	\tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) +	orr	\tmp, \tmp, #USER_ASID_FLAG +	msr	ttbr1_el1, \tmp +	/* +	 * We avoid running the post_ttbr_update_workaround here because the +	 * user and kernel ASIDs don't have conflicting mappings, so any +	 * "blessing" as described in: +	 * +	 *   http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com +	 * +	 * will not hurt correctness. Whilst this may partially defeat the +	 * point of using split ASIDs in the first place, it avoids +	 * the hit of invalidating the entire I-cache on every return to +	 * userspace. +	 */ +	.endm + +	.macro tramp_ventry, regsize = 64 +	.align	7 +1: +	.if	\regsize == 64 +	msr	tpidrro_el0, x30	// Restored in kernel_ventry +	.endif +	/* +	 * Defend against branch aliasing attacks by pushing a dummy +	 * entry onto the return stack and using a RET instruction to +	 * enter the full-fat kernel vectors. +	 */ +	bl	2f +	b	. +2: +	tramp_map_kernel	x30 +#ifdef CONFIG_RANDOMIZE_BASE +	adr	x30, tramp_vectors + PAGE_SIZE +alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 +	ldr	x30, [x30] +#else +	ldr	x30, =vectors +#endif +	prfm	plil1strm, [x30, #(1b - tramp_vectors)] +	msr	vbar_el1, x30 +	add	x30, x30, #(1b - tramp_vectors) +	isb +	ret +	.endm + +	.macro tramp_exit, regsize = 64 +	adr	x30, tramp_vectors +	msr	vbar_el1, x30 +	tramp_unmap_kernel	x30 +	.if	\regsize == 64 +	mrs	x30, far_el1 +	.endif +	eret +	.endm + +	.align	11 +ENTRY(tramp_vectors) +	.space	0x400 + +	tramp_ventry +	tramp_ventry +	tramp_ventry +	tramp_ventry + +	tramp_ventry	32 +	tramp_ventry	32 +	tramp_ventry	32 +	tramp_ventry	32 +END(tramp_vectors) + +ENTRY(tramp_exit_native) +	tramp_exit +END(tramp_exit_native) + +ENTRY(tramp_exit_compat) +	tramp_exit	32 +END(tramp_exit_compat) + +	.ltorg +	.popsection				// .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE +	.pushsection ".rodata", "a" +	.align PAGE_SHIFT +	.globl	__entry_tramp_data_start +__entry_tramp_data_start: +	.quad	vectors +	.popsection				// .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +  /*   * Special system call wrappers.   */ @@ -996,3 +1153,180 @@ ENTRY(ret_from_fork)  	b	ret_to_user  ENDPROC(ret_from_fork)  NOKPROBE(ret_from_fork) + +#ifdef CONFIG_ARM_SDE_INTERFACE + +#include <asm/sdei.h> +#include <uapi/linux/arm_sdei.h> + +.macro sdei_handler_exit exit_mode +	/* On success, this call never returns... */ +	cmp	\exit_mode, #SDEI_EXIT_SMC +	b.ne	99f +	smc	#0 +	b	. +99:	hvc	#0 +	b	. +.endm + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * The regular SDEI entry point may have been unmapped along with the rest of + * the kernel. This trampoline restores the kernel mapping to make the x1 memory + * argument accessible. + * + * This clobbers x4, __sdei_handler() will restore this from firmware's + * copy. + */ +.ltorg +.pushsection ".entry.tramp.text", "ax" +ENTRY(__sdei_asm_entry_trampoline) +	mrs	x4, ttbr1_el1 +	tbz	x4, #USER_ASID_BIT, 1f + +	tramp_map_kernel tmp=x4 +	isb +	mov	x4, xzr + +	/* +	 * Use reg->interrupted_regs.addr_limit to remember whether to unmap +	 * the kernel on exit. +	 */ +1:	str	x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] + +#ifdef CONFIG_RANDOMIZE_BASE +	adr	x4, tramp_vectors + PAGE_SIZE +	add	x4, x4, #:lo12:__sdei_asm_trampoline_next_handler +	ldr	x4, [x4] +#else +	ldr	x4, =__sdei_asm_handler +#endif +	br	x4 +ENDPROC(__sdei_asm_entry_trampoline) +NOKPROBE(__sdei_asm_entry_trampoline) + +/* + * Make the exit call and restore the original ttbr1_el1 + * + * x0 & x1: setup for the exit API call + * x2: exit_mode + * x4: struct sdei_registered_event argument from registration time. + */ +ENTRY(__sdei_asm_exit_trampoline) +	ldr	x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] +	cbnz	x4, 1f + +	tramp_unmap_kernel	tmp=x4 + +1:	sdei_handler_exit exit_mode=x2 +ENDPROC(__sdei_asm_exit_trampoline) +NOKPROBE(__sdei_asm_exit_trampoline) +	.ltorg +.popsection		// .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE +.pushsection ".rodata", "a" +__sdei_asm_trampoline_next_handler: +	.quad	__sdei_asm_handler +.popsection		// .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + +/* + * Software Delegated Exception entry point. + * + * x0: Event number + * x1: struct sdei_registered_event argument from registration time. + * x2: interrupted PC + * x3: interrupted PSTATE + * x4: maybe clobbered by the trampoline + * + * Firmware has preserved x0->x17 for us, we must save/restore the rest to + * follow SMC-CC. We save (or retrieve) all the registers as the handler may + * want them. + */ +ENTRY(__sdei_asm_handler) +	stp     x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC] +	stp     x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2] +	stp     x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3] +	stp     x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4] +	stp     x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5] +	stp     x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6] +	stp     x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7] +	stp     x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8] +	stp     x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9] +	stp     x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10] +	stp     x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11] +	stp     x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12] +	stp     x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13] +	stp     x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14] +	mov	x4, sp +	stp     lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR] + +	mov	x19, x1 + +#ifdef CONFIG_VMAP_STACK +	/* +	 * entry.S may have been using sp as a scratch register, find whether +	 * this is a normal or critical event and switch to the appropriate +	 * stack for this CPU. +	 */ +	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY] +	cbnz	w4, 1f +	ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 +	b	2f +1:	ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6 +2:	mov	x6, #SDEI_STACK_SIZE +	add	x5, x5, x6 +	mov	sp, x5 +#endif + +	/* +	 * We may have interrupted userspace, or a guest, or exit-from or +	 * return-to either of these. We can't trust sp_el0, restore it. +	 */ +	mrs	x28, sp_el0 +	ldr_this_cpu	dst=x0, sym=__entry_task, tmp=x1 +	msr	sp_el0, x0 + +	/* If we interrupted the kernel point to the previous stack/frame. */ +	and     x0, x3, #0xc +	mrs     x1, CurrentEL +	cmp     x0, x1 +	csel	x29, x29, xzr, eq	// fp, or zero +	csel	x4, x2, xzr, eq		// elr, or zero + +	stp	x29, x4, [sp, #-16]! +	mov	x29, sp + +	add	x0, x19, #SDEI_EVENT_INTREGS +	mov	x1, x19 +	bl	__sdei_handler + +	msr	sp_el0, x28 +	/* restore regs >x17 that we clobbered */ +	mov	x4, x19         // keep x4 for __sdei_asm_exit_trampoline +	ldp	x28, x29, [x4, #SDEI_EVENT_INTREGS + 16 * 14] +	ldp	x18, x19, [x4, #SDEI_EVENT_INTREGS + 16 * 9] +	ldp	lr, x1, [x4, #SDEI_EVENT_INTREGS + S_LR] +	mov	sp, x1 + +	mov	x1, x0			// address to complete_and_resume +	/* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */ +	cmp	x0, #1 +	mov_q	x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE +	mov_q	x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME +	csel	x0, x2, x3, ls + +	ldr_l	x2, sdei_exit_mode + +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 +	sdei_handler_exit exit_mode=x2 +alternative_else_nop_endif + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline +	br	x5 +#endif +ENDPROC(__sdei_asm_handler) +NOKPROBE(__sdei_asm_handler) +#endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..e7226c4c7493 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -114,7 +114,12 @@   *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so   *   whatever is in the FPSIMD registers is not saved to memory, but discarded.   */ -static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); +struct fpsimd_last_state_struct { +	struct fpsimd_state *st; +	bool sve_in_use; +}; + +static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);  /* Default VL for tasks that don't set it explicitly: */  static int sve_default_vl = -1; @@ -862,7 +867,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)  asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)  {  	siginfo_t info; -	unsigned int si_code = 0; +	unsigned int si_code = FPE_FIXME;  	if (esr & FPEXC_IOF)  		si_code = FPE_FLTINV; @@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next)  		 */  		struct fpsimd_state *st = &next->thread.fpsimd_state; -		if (__this_cpu_read(fpsimd_last_state) == st +		if (__this_cpu_read(fpsimd_last_state.st) == st  		    && st->cpu == smp_processor_id())  			clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);  		else @@ -992,6 +997,21 @@ void fpsimd_signal_preserve_current_state(void)  }  /* + * Associate current's FPSIMD context with this cpu + * Preemption must be disabled when calling this function. + */ +static void fpsimd_bind_to_cpu(void) +{ +	struct fpsimd_last_state_struct *last = +		this_cpu_ptr(&fpsimd_last_state); +	struct fpsimd_state *st = ¤t->thread.fpsimd_state; + +	last->st = st; +	last->sve_in_use = test_thread_flag(TIF_SVE); +	st->cpu = smp_processor_id(); +} + +/*   * Load the userland FPSIMD state of 'current' from memory, but only if the   * FPSIMD state already held in the registers is /not/ the most recent FPSIMD   * state of 'current' @@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void)  	local_bh_disable();  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { -		struct fpsimd_state *st = ¤t->thread.fpsimd_state; -  		task_fpsimd_load(); -		__this_cpu_write(fpsimd_last_state, st); -		st->cpu = smp_processor_id(); +		fpsimd_bind_to_cpu();  	}  	local_bh_enable(); @@ -1019,25 +1036,21 @@ void fpsimd_restore_current_state(void)   * flag that indicates that the FPSIMD register contents are the most recent   * FPSIMD state of 'current'   */ -void fpsimd_update_current_state(struct fpsimd_state *state) +void fpsimd_update_current_state(struct user_fpsimd_state const *state)  {  	if (!system_supports_fpsimd())  		return;  	local_bh_disable(); -	if (system_supports_sve() && test_thread_flag(TIF_SVE)) { -		current->thread.fpsimd_state = *state; +	current->thread.fpsimd_state.user_fpsimd = *state; +	if (system_supports_sve() && test_thread_flag(TIF_SVE))  		fpsimd_to_sve(current); -	} -	task_fpsimd_load(); -	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { -		struct fpsimd_state *st = ¤t->thread.fpsimd_state; +	task_fpsimd_load(); -		__this_cpu_write(fpsimd_last_state, st); -		st->cpu = smp_processor_id(); -	} +	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) +		fpsimd_bind_to_cpu();  	local_bh_enable();  } @@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t)  static inline void fpsimd_flush_cpu_state(void)  { -	__this_cpu_write(fpsimd_last_state, NULL); +	__this_cpu_write(fpsimd_last_state.st, NULL);  }  /* @@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void)  #ifdef CONFIG_ARM64_SVE  void sve_flush_cpu_state(void)  { -	struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state); -	struct task_struct *tsk; - -	if (!fpstate) -		return; +	struct fpsimd_last_state_struct const *last = +		this_cpu_ptr(&fpsimd_last_state); -	tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state); -	if (test_tsk_thread_flag(tsk, TIF_SVE)) +	if (last->st && last->sve_in_use)  		fpsimd_flush_cpu_state();  }  #endif /* CONFIG_ARM64_SVE */ @@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { }  #ifdef CONFIG_HOTPLUG_CPU  static int fpsimd_cpu_dead(unsigned int cpu)  { -	per_cpu(fpsimd_last_state, cpu) = NULL; +	per_cpu(fpsimd_last_state.st, cpu) = NULL;  	return 0;  } diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - -	.section	".text.ftrace_trampoline", "ax" -	.align		3 -0:	.quad		0 -__ftrace_trampoline: -	ldr		x16, 0b -	br		x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	if (offset < -SZ_128M || offset >= SZ_128M) {  #ifdef CONFIG_ARM64_MODULE_PLTS -		unsigned long *trampoline; +		struct plt_entry trampoline;  		struct module *mod;  		/* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  		 * is added in the future, but for now, the pr_err() below  		 * deals with a theoretical issue only.  		 */ -		trampoline = (unsigned long *)mod->arch.ftrace_trampoline; -		if (trampoline[0] != addr) { -			if (trampoline[0] != 0) { +		trampoline = get_plt_entry(addr); +		if (!plt_entries_equal(mod->arch.ftrace_trampoline, +				       &trampoline)) { +			if (!plt_entries_equal(mod->arch.ftrace_trampoline, +					       &(struct plt_entry){})) {  				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");  				return -EINVAL;  			}  			/* point the trampoline to our ftrace entry point */  			module_disable_ro(mod); -			trampoline[0] = addr; +			*mod->arch.ftrace_trampoline = trampoline;  			module_enable_ro(mod, true);  			/* update trampoline before patching in the branch */  			smp_wmb();  		} -		addr = (unsigned long)&trampoline[1]; +		addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;  #else /* CONFIG_ARM64_MODULE_PLTS */  		return -EINVAL;  #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 67e86a0f57ac..ba3ab04788dc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -148,6 +148,26 @@ preserve_boot_args:  ENDPROC(preserve_boot_args)  /* + * Macro to arrange a physical address in a page table entry, taking care of + * 52-bit addresses. + * + * Preserves:	phys + * Returns:	pte + */ +	.macro	phys_to_pte, phys, pte +#ifdef CONFIG_ARM64_PA_BITS_52 +	/* +	 * We assume \phys is 64K aligned and this is guaranteed by only +	 * supporting this configuration with 64K pages. +	 */ +	orr	\pte, \phys, \phys, lsr #36 +	and	\pte, \pte, #PTE_ADDR_MASK +#else +	mov	\pte, \phys +#endif +	.endm + +/*   * Macro to create a table entry to the next page.   *   *	tbl:	page table address @@ -156,54 +176,124 @@ ENDPROC(preserve_boot_args)   *	ptrs:	#imm pointers per table page   *   * Preserves:	virt - * Corrupts:	tmp1, tmp2 + * Corrupts:	ptrs, tmp1, tmp2   * Returns:	tbl -> next level table page address   */  	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 -	lsr	\tmp1, \virt, #\shift -	and	\tmp1, \tmp1, #\ptrs - 1	// table index -	add	\tmp2, \tbl, #PAGE_SIZE +	add	\tmp1, \tbl, #PAGE_SIZE +	phys_to_pte \tmp1, \tmp2  	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type +	lsr	\tmp1, \virt, #\shift +	sub	\ptrs, \ptrs, #1 +	and	\tmp1, \tmp1, \ptrs		// table index  	str	\tmp2, [\tbl, \tmp1, lsl #3]  	add	\tbl, \tbl, #PAGE_SIZE		// next level table page  	.endm  /* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. + * Macro to populate page table entries, these entries can be pointers to the next level + * or last level entries pointing to physical memory. + * + *	tbl:	page table address + *	rtbl:	pointer to page table or physical memory + *	index:	start index to write + *	eindex:	end index to write - [index, eindex] written to + *	flags:	flags for pagetable entry to or in + *	inc:	increment to rtbl between each entry + *	tmp1:	temporary variable   * - * Preserves:	tbl, next, virt - * Corrupts:	tmp1, tmp2 + * Preserves:	tbl, eindex, flags, inc + * Corrupts:	index, tmp1 + * Returns:	rtbl   */ -	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2 -	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS > 3 -	create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 -#endif -#if SWAPPER_PGTABLE_LEVELS > 2 -	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif +	.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 +.Lpe\@:	phys_to_pte \rtbl, \tmp1 +	orr	\tmp1, \tmp1, \flags	// tmp1 = table entry +	str	\tmp1, [\tbl, \index, lsl #3] +	add	\rtbl, \rtbl, \inc	// rtbl = pa next level +	add	\index, \index, #1 +	cmp	\index, \eindex +	b.ls	.Lpe\@ +	.endm + +/* + * Compute indices of table entries from virtual address range. If multiple entries + * were needed in the previous page table level then the next page table level is assumed + * to be composed of multiple pages. (This effectively scales the end index). + * + *	vstart:	virtual address of start of range + *	vend:	virtual address of end of range + *	shift:	shift used to transform virtual address into index + *	ptrs:	number of entries in page table + *	istart:	index in table corresponding to vstart + *	iend:	index in table corresponding to vend + *	count:	On entry: how many extra entries were required in previous level, scales + *			  our end index. + *		On exit: returns how many extra entries required for next page table level + * + * Preserves:	vstart, vend, shift, ptrs + * Returns:	istart, iend, count + */ +	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count +	lsr	\iend, \vend, \shift +	mov	\istart, \ptrs +	sub	\istart, \istart, #1 +	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1) +	mov	\istart, \ptrs +	mul	\istart, \istart, \count +	add	\iend, \iend, \istart	// iend += (count - 1) * ptrs +					// our entries span multiple tables + +	lsr	\istart, \vstart, \shift +	mov	\count, \ptrs +	sub	\count, \count, #1 +	and	\istart, \istart, \count + +	sub	\count, \iend, \istart  	.endm  /* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). + * Map memory for specified virtual address range. Each level of page table needed supports + * multiple entries. If a level requires n entries the next page table level is assumed to be + * formed from n pages. + * + *	tbl:	location of page table + *	rtbl:	address to be used for first level page table entry (typically tbl + PAGE_SIZE) + *	vstart:	start address to map + *	vend:	end address to map - we map [vstart, vend] + *	flags:	flags to use to map last level entries + *	phys:	physical address corresponding to vstart - physical memory is contiguous + *	pgds:	the number of pgd entries   * - * Preserves:	tbl, flags - * Corrupts:	phys, start, end, pstate + * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers + * Preserves:	vstart, vend, flags + * Corrupts:	tbl, rtbl, istart, iend, tmp, count, sv   */ -	.macro	create_block_map, tbl, flags, phys, start, end -	lsr	\phys, \phys, #SWAPPER_BLOCK_SHIFT -	lsr	\start, \start, #SWAPPER_BLOCK_SHIFT -	and	\start, \start, #PTRS_PER_PTE - 1	// table index -	orr	\phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT	// table entry -	lsr	\end, \end, #SWAPPER_BLOCK_SHIFT -	and	\end, \end, #PTRS_PER_PTE - 1		// table end index -9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry -	add	\start, \start, #1			// next entry -	add	\phys, \phys, #SWAPPER_BLOCK_SIZE		// next block -	cmp	\start, \end -	b.ls	9999b +	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv +	add \rtbl, \tbl, #PAGE_SIZE +	mov \sv, \rtbl +	mov \count, #0 +	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count +	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp +	mov \tbl, \sv +	mov \sv, \rtbl + +#if SWAPPER_PGTABLE_LEVELS > 3 +	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count +	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp +	mov \tbl, \sv +	mov \sv, \rtbl +#endif + +#if SWAPPER_PGTABLE_LEVELS > 2 +	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count +	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp +	mov \tbl, \sv +#endif + +	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count +	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 +	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp  	.endm  /* @@ -221,14 +311,16 @@ __create_page_tables:  	 * dirty cache lines being evicted.  	 */  	adrp	x0, idmap_pg_dir -	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) +	adrp	x1, swapper_pg_end +	sub	x1, x1, x0  	bl	__inval_dcache_area  	/*  	 * Clear the idmap and swapper page tables.  	 */  	adrp	x0, idmap_pg_dir -	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) +	adrp	x1, swapper_pg_end +	sub	x1, x1, x0  1:	stp	xzr, xzr, [x0], #16  	stp	xzr, xzr, [x0], #16  	stp	xzr, xzr, [x0], #16 @@ -244,26 +336,13 @@ __create_page_tables:  	adrp	x0, idmap_pg_dir  	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start) -#ifndef CONFIG_ARM64_VA_BITS_48 -#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT)) - -	/* -	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be -	 * created that covers system RAM if that is located sufficiently high -	 * in the physical address space. So for the ID map, use an extended -	 * virtual range in that case, by configuring an additional translation -	 * level. -	 * First, we have to verify our assumption that the current value of -	 * VA_BITS was chosen such that all translation levels are fully -	 * utilised, and that lowering T0SZ will always result in an additional -	 * translation level to be configured. -	 */ -#if VA_BITS != EXTRA_SHIFT -#error "Mismatch between VA_BITS and page size/number of translation levels" -#endif -  	/* +	 * VA_BITS may be too small to allow for an ID mapping to be created +	 * that covers system RAM if that is located sufficiently high in the +	 * physical address space. So for the ID map, use an extended virtual +	 * range in that case, and configure an additional translation level +	 * if needed. +	 *  	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the  	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),  	 * this number conveniently equals the number of leading zeroes in @@ -272,21 +351,44 @@ __create_page_tables:  	adrp	x5, __idmap_text_end  	clz	x5, x5  	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough? -	b.ge	1f			// .. then skip additional level +	b.ge	1f			// .. then skip VA range extension  	adr_l	x6, idmap_t0sz  	str	x5, [x6]  	dmb	sy  	dc	ivac, x6		// Invalidate potentially stale cache line -	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 -1: +#if (VA_BITS < 48) +#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) + +	/* +	 * If VA_BITS < 48, we have to configure an additional table level. +	 * First, we have to verify our assumption that the current value of +	 * VA_BITS was chosen such that all translation levels are fully +	 * utilised, and that lowering T0SZ will always result in an additional +	 * translation level to be configured. +	 */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels"  #endif -	create_pgd_entry x0, x3, x5, x6 +	mov	x4, EXTRA_PTRS +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 +#else +	/* +	 * If VA_BITS == 48, we don't have to configure an additional +	 * translation level, but the top-level table has more entries. +	 */ +	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) +	str_l	x4, idmap_ptrs_per_pgd, x5 +#endif +1: +	ldr_l	x4, idmap_ptrs_per_pgd  	mov	x5, x3				// __pa(__idmap_text_start)  	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end) -	create_block_map x0, x7, x3, x5, x6 + +	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14  	/*  	 * Map the kernel image (starting with PHYS_OFFSET). @@ -294,12 +396,13 @@ __create_page_tables:  	adrp	x0, swapper_pg_dir  	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)  	add	x5, x5, x23			// add KASLR displacement -	create_pgd_entry x0, x5, x3, x6 +	mov	x4, PTRS_PER_PGD  	adrp	x6, _end			// runtime __pa(_end)  	adrp	x3, _text			// runtime __pa(_text)  	sub	x6, x6, x3			// _end - _text  	add	x6, x6, x5			// runtime __va(_end) -	create_block_map x0, x7, x3, x5, x6 + +	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14  	/*  	 * Since the page tables have been populated with non-cacheable @@ -307,7 +410,8 @@ __create_page_tables:  	 * tables again to remove any speculatively loaded cache lines.  	 */  	adrp	x0, idmap_pg_dir -	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) +	adrp	x1, swapper_pg_end +	sub	x1, x1, x0  	dmb	sy  	bl	__inval_dcache_area @@ -388,17 +492,13 @@ ENTRY(el2_setup)  	mrs	x0, CurrentEL  	cmp	x0, #CurrentEL_EL2  	b.eq	1f -	mrs	x0, sctlr_el1 -CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1 -CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1 +	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)  	msr	sctlr_el1, x0  	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1  	isb  	ret -1:	mrs	x0, sctlr_el2 -CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2 -CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2 +1:	mov_q	x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)  	msr	sctlr_el2, x0  #ifdef CONFIG_ARM64_VHE @@ -514,10 +614,7 @@ install_el2_stub:  	 * requires no configuration, and all non-hyp-specific EL2 setup  	 * will be done via the _EL1 system register aliases in __cpu_setup.  	 */ -	/* sctlr_el1 */ -	mov	x0, #0x0800			// Set/clear RES{1,0} bits -CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems -CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems +	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)  	msr	sctlr_el1, x0  	/* Coprocessor traps. */ @@ -679,8 +776,10 @@ ENTRY(__enable_mmu)  	update_early_cpu_boot_status 0, x1, x2  	adrp	x1, idmap_pg_dir  	adrp	x2, swapper_pg_dir -	msr	ttbr0_el1, x1			// load TTBR0 -	msr	ttbr1_el1, x2			// load TTBR1 +	phys_to_ttbr x1, x3 +	phys_to_ttbr x2, x4 +	msr	ttbr0_el1, x3			// load TTBR0 +	msr	ttbr1_el1, x4			// load TTBR1  	isb  	msr	sctlr_el1, x0  	isb @@ -750,6 +849,7 @@ __primary_switch:  	 * to take into account by discarding the current kernel mapping and  	 * creating a new one.  	 */ +	pre_disable_mmu_workaround  	msr	sctlr_el1, x20			// disable the MMU  	isb  	bl	__create_page_tables		// recreate kernel mapping diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index e56d848b6466..84f5d52fddda 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -33,12 +33,14 @@   * Even switching to our copied tables will cause a changed output address at   * each stage of the walk.   */ -.macro break_before_make_ttbr_switch zero_page, page_table -	msr	ttbr1_el1, \zero_page +.macro break_before_make_ttbr_switch zero_page, page_table, tmp +	phys_to_ttbr \zero_page, \tmp +	msr	ttbr1_el1, \tmp  	isb  	tlbi	vmalle1  	dsb	nsh -	msr	ttbr1_el1, \page_table +	phys_to_ttbr \page_table, \tmp +	msr	ttbr1_el1, \tmp  	isb  .endm @@ -78,7 +80,7 @@ ENTRY(swsusp_arch_suspend_exit)  	 * We execute from ttbr0, change ttbr1 to our copied linear map tables  	 * with a break-before-make via the zero page  	 */ -	break_before_make_ttbr_switch	x5, x0 +	break_before_make_ttbr_switch	x5, x0, x6  	mov	x21, x1  	mov	x30, x2 @@ -109,7 +111,7 @@ ENTRY(swsusp_arch_suspend_exit)  	dsb	ish		/* wait for PoU cleaning to finish */  	/* switch to the restored kernels page tables */ -	break_before_make_ttbr_switch	x25, x21 +	break_before_make_ttbr_switch	x25, x21, x6  	ic	ialluis  	dsb	ish diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3009b8b80f08..f20cf7e99249 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length,  	}  	pte = pte_offset_kernel(pmd, dst_addr); -	set_pte(pte, __pte(virt_to_phys((void *)dst) | -			 pgprot_val(PAGE_KERNEL_EXEC))); +	set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));  	/*  	 * Load our new page tables. A strict BBM approach requires that we @@ -264,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,  	 */  	cpu_set_reserved_ttbr0();  	local_flush_tlb_all(); -	write_sysreg(virt_to_phys(pgd), ttbr0_el1); +	write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);  	isb();  	*phys_dst_addr = virt_to_phys((void *)dst); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 749f81779420..74bb56f656ef 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@  #include <linux/perf_event.h>  #include <linux/ptrace.h>  #include <linux/smp.h> +#include <linux/uaccess.h>  #include <asm/compat.h>  #include <asm/current.h> @@ -36,7 +37,6 @@  #include <asm/traps.h>  #include <asm/cputype.h>  #include <asm/system_misc.h> -#include <asm/uaccess.h>  /* Breakpoint currently in use for each BRP. */  static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 713561e5bcab..60e5fc661f74 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -29,6 +29,7 @@  #include <linux/irqchip.h>  #include <linux/seq_file.h>  #include <linux/vmalloc.h> +#include <asm/vmap_stack.h>  unsigned long irq_err_count; @@ -58,17 +59,7 @@ static void init_irq_stacks(void)  	unsigned long *p;  	for_each_possible_cpu(cpu) { -		/* -		* To ensure that VMAP'd stack overflow detection works -		* correctly, the IRQ stacks need to have the same -		* alignment as other stacks. -		*/ -		p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN, -					 VMALLOC_START, VMALLOC_END, -					 THREADINFO_GFP, PAGE_KERNEL, -					 0, cpu_to_node(cpu), -					 __builtin_return_address(0)); - +		p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));  		per_cpu(irq_stack_ptr, cpu) = p;  	}  } diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@  #include <linux/module.h>  #include <linux/sort.h> -struct plt_entry { -	/* -	 * A program that conforms to the AArch64 Procedure Call Standard -	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or -	 * IP1 (x17) may be inserted at any branch instruction that is -	 * exposed to a relocation that supports long branches. Since that -	 * is exactly what we are dealing with here, we are free to use x16 -	 * as a scratch register in the PLT veneers. -	 */ -	__le32	mov0;	/* movn	x16, #0x....			*/ -	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/ -	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/ -	__le32	br;	/* br	x16				*/ -}; -  static bool in_init(const struct module *mod, void *loc)  {  	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,  	int i = pltsec->plt_num_entries;  	u64 val = sym->st_value + rela->r_addend; -	/* -	 * MOVK/MOVN/MOVZ opcode: -	 * +--------+------------+--------+-----------+-------------+---------+ -	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | -	 * +--------+------------+--------+-----------+-------------+---------+ -	 * -	 * Rd     := 0x10 (x16) -	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) -	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) -	 * sf     := 1 (64-bit variant) -	 */ -	plt[i] = (struct plt_entry){ -		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5), -		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), -		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), -		cpu_to_le32(0xd61f0200) -	}; +	plt[i] = get_plt_entry(val);  	/*  	 * Check if the entry we just created is a duplicate. Given that the  	 * relocations are sorted, this will be the last entry we allocated.  	 * (if one exists).  	 */ -	if (i > 0 && -	    plt[i].mov0 == plt[i - 1].mov0 && -	    plt[i].mov1 == plt[i - 1].mov1 && -	    plt[i].mov2 == plt[i - 1].mov2) +	if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))  		return (u64)&plt[i - 1];  	pltsec->plt_num_entries++; @@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  	unsigned long core_plts = 0;  	unsigned long init_plts = 0;  	Elf64_Sym *syms = NULL; +	Elf_Shdr *tramp = NULL;  	int i;  	/* @@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  			mod->arch.core.plt = sechdrs + i;  		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))  			mod->arch.init.plt = sechdrs + i; +		else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && +			 !strcmp(secstrings + sechdrs[i].sh_name, +				 ".text.ftrace_trampoline")) +			tramp = sechdrs + i;  		else if (sechdrs[i].sh_type == SHT_SYMTAB)  			syms = (Elf64_Sym *)sechdrs[i].sh_addr;  	} @@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  	mod->arch.init.plt_num_entries = 0;  	mod->arch.init.plt_max_entries = init_plts; +	if (tramp) { +		tramp->sh_type = SHT_NOBITS; +		tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; +		tramp->sh_addralign = __alignof__(struct plt_entry); +		tramp->sh_size = sizeof(struct plt_entry); +	} +  	return 0;  } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@  SECTIONS {  	.plt (NOLOAD) : { BYTE(0) }  	.init.plt (NOLOAD) : { BYTE(0) } +	.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }  } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,  	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - -	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, -	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - -	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, -	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,  };  static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b2adcce7bc18..583fd8154695 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,  	clear_tsk_thread_flag(p, TIF_SVE);  	p->thread.sve_state = NULL; +	/* +	 * In case p was allocated the same task_struct pointer as some +	 * other recently-exited task, make sure p is disassociated from +	 * any cpu that may have run that now-exited task recently. +	 * Otherwise we could erroneously skip reloading the FPSIMD +	 * registers for p. +	 */ +	fpsimd_flush_task_state(p); +  	if (likely(!(p->flags & PF_KTHREAD))) {  		*childregs = *current_pt_regs();  		childregs->regs[0] = 0; @@ -361,16 +370,14 @@ void tls_preserve_current_state(void)  static void tls_thread_switch(struct task_struct *next)  { -	unsigned long tpidr, tpidrro; -  	tls_preserve_current_state(); -	tpidr = *task_user_tls(next); -	tpidrro = is_compat_thread(task_thread_info(next)) ? -		  next->thread.tp_value : 0; +	if (is_compat_thread(task_thread_info(next))) +		write_sysreg(next->thread.tp_value, tpidrro_el0); +	else if (!arm64_kernel_unmapped_at_el0()) +		write_sysreg(0, tpidrro_el0); -	write_sysreg(tpidr, tpidr_el0); -	write_sysreg(tpidrro, tpidrro_el0); +	write_sysreg(*task_user_tls(next), tpidr_el0);  }  /* Restore the UAO state depending on next's addr_limit */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7c44658b316d..6618036ae6d4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -180,34 +180,34 @@ static void ptrace_hbptriggered(struct perf_event *bp,  				struct pt_regs *regs)  {  	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); -	siginfo_t info = { -		.si_signo	= SIGTRAP, -		.si_errno	= 0, -		.si_code	= TRAP_HWBKPT, -		.si_addr	= (void __user *)(bkpt->trigger), -	}; +	siginfo_t info; -#ifdef CONFIG_COMPAT -	int i; +	clear_siginfo(&info); +	info.si_signo	= SIGTRAP; +	info.si_errno	= 0; +	info.si_code	= TRAP_HWBKPT; +	info.si_addr	= (void __user *)(bkpt->trigger); -	if (!is_compat_task()) -		goto send_sig; +#ifdef CONFIG_COMPAT +	if (is_compat_task()) { +		int si_errno = 0; +		int i; -	for (i = 0; i < ARM_MAX_BRP; ++i) { -		if (current->thread.debug.hbp_break[i] == bp) { -			info.si_errno = (i << 1) + 1; -			break; +		for (i = 0; i < ARM_MAX_BRP; ++i) { +			if (current->thread.debug.hbp_break[i] == bp) { +				si_errno = (i << 1) + 1; +				break; +			}  		} -	} -	for (i = 0; i < ARM_MAX_WRP; ++i) { -		if (current->thread.debug.hbp_watch[i] == bp) { -			info.si_errno = -((i << 1) + 1); -			break; +		for (i = 0; i < ARM_MAX_WRP; ++i) { +			if (current->thread.debug.hbp_watch[i] == bp) { +				si_errno = -((i << 1) + 1); +				break; +			}  		} +		force_sig_ptrace_errno_trap(si_errno, (void __user *)bkpt->trigger);  	} - -send_sig:  #endif  	force_sig_info(SIGTRAP, &info, current);  } diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index ce704a4aeadd..f407e422a720 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)  	mrs	x0, sctlr_el2  	ldr	x1, =SCTLR_ELx_FLAGS  	bic	x0, x0, x1 +	pre_disable_mmu_workaround  	msr	sctlr_el2, x0  	isb  1: diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c new file mode 100644 index 000000000000..6b8d90d5ceae --- /dev/null +++ b/arch/arm64/kernel/sdei.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2017 Arm Ltd. +#define pr_fmt(fmt) "sdei: " fmt + +#include <linux/arm_sdei.h> +#include <linux/hardirq.h> +#include <linux/irqflags.h> +#include <linux/sched/task_stack.h> +#include <linux/uaccess.h> + +#include <asm/alternative.h> +#include <asm/kprobes.h> +#include <asm/mmu.h> +#include <asm/ptrace.h> +#include <asm/sections.h> +#include <asm/sysreg.h> +#include <asm/vmap_stack.h> + +unsigned long sdei_exit_mode; + +/* + * VMAP'd stacks checking for stack overflow on exception using sp as a scratch + * register, meaning SDEI has to switch to its own stack. We need two stacks as + * a critical event may interrupt a normal event that has just taken a + * synchronous exception, and is using sp as scratch register. For a critical + * event interrupting a normal event, we can't reliably tell if we were on the + * sdei stack. + * For now, we allocate stacks when the driver is probed. + */ +DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); +DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); + +#ifdef CONFIG_VMAP_STACK +DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); +DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); +#endif + +static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) +{ +	unsigned long *p; + +	p = per_cpu(*ptr, cpu); +	if (p) { +		per_cpu(*ptr, cpu) = NULL; +		vfree(p); +	} +} + +static void free_sdei_stacks(void) +{ +	int cpu; + +	for_each_possible_cpu(cpu) { +		_free_sdei_stack(&sdei_stack_normal_ptr, cpu); +		_free_sdei_stack(&sdei_stack_critical_ptr, cpu); +	} +} + +static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu) +{ +	unsigned long *p; + +	p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu)); +	if (!p) +		return -ENOMEM; +	per_cpu(*ptr, cpu) = p; + +	return 0; +} + +static int init_sdei_stacks(void) +{ +	int cpu; +	int err = 0; + +	for_each_possible_cpu(cpu) { +		err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); +		if (err) +			break; +		err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu); +		if (err) +			break; +	} + +	if (err) +		free_sdei_stacks(); + +	return err; +} + +bool _on_sdei_stack(unsigned long sp) +{ +	unsigned long low, high; + +	if (!IS_ENABLED(CONFIG_VMAP_STACK)) +		return false; + +	low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); +	high = low + SDEI_STACK_SIZE; + +	if (low <= sp && sp < high) +		return true; + +	low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); +	high = low + SDEI_STACK_SIZE; + +	return (low <= sp && sp < high); +} + +unsigned long sdei_arch_get_entry_point(int conduit) +{ +	/* +	 * SDEI works between adjacent exception levels. If we booted at EL1 we +	 * assume a hypervisor is marshalling events. If we booted at EL2 and +	 * dropped to EL1 because we don't support VHE, then we can't support +	 * SDEI. +	 */ +	if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { +		pr_err("Not supported on this hardware/boot configuration\n"); +		return 0; +	} + +	if (IS_ENABLED(CONFIG_VMAP_STACK)) { +		if (init_sdei_stacks()) +			return 0; +	} + +	sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	if (arm64_kernel_unmapped_at_el0()) { +		unsigned long offset; + +		offset = (unsigned long)__sdei_asm_entry_trampoline - +			 (unsigned long)__entry_tramp_text_start; +		return TRAMP_VALIAS + offset; +	} else +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +		return (unsigned long)__sdei_asm_handler; + +} + +/* + * __sdei_handler() returns one of: + *  SDEI_EV_HANDLED -  success, return to the interrupted context. + *  SDEI_EV_FAILED  -  failure, return this error code to firmare. + *  virtual-address -  success, return to this address. + */ +static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, +					     struct sdei_registered_event *arg) +{ +	u32 mode; +	int i, err = 0; +	int clobbered_registers = 4; +	u64 elr = read_sysreg(elr_el1); +	u32 kernel_mode = read_sysreg(CurrentEL) | 1;	/* +SPSel */ +	unsigned long vbar = read_sysreg(vbar_el1); + +	if (arm64_kernel_unmapped_at_el0()) +		clobbered_registers++; + +	/* Retrieve the missing registers values */ +	for (i = 0; i < clobbered_registers; i++) { +		/* from within the handler, this call always succeeds */ +		sdei_api_event_context(i, ®s->regs[i]); +	} + +	/* +	 * We didn't take an exception to get here, set PAN. UAO will be cleared +	 * by sdei_event_handler()s set_fs(USER_DS) call. +	 */ +	__uaccess_enable_hw_pan(); + +	err = sdei_event_handler(regs, arg); +	if (err) +		return SDEI_EV_FAILED; + +	if (elr != read_sysreg(elr_el1)) { +		/* +		 * We took a synchronous exception from the SDEI handler. +		 * This could deadlock, and if you interrupt KVM it will +		 * hyp-panic instead. +		 */ +		pr_warn("unsafe: exception during handler\n"); +	} + +	mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK); + +	/* +	 * If we interrupted the kernel with interrupts masked, we always go +	 * back to wherever we came from. +	 */ +	if (mode == kernel_mode && !interrupts_enabled(regs)) +		return SDEI_EV_HANDLED; + +	/* +	 * Otherwise, we pretend this was an IRQ. This lets user space tasks +	 * receive signals before we return to them, and KVM to invoke it's +	 * world switch to do the same. +	 * +	 * See DDI0487B.a Table D1-7 'Vector offsets from vector table base +	 * address'. +	 */ +	if (mode == kernel_mode) +		return vbar + 0x280; +	else if (mode & PSR_MODE32_BIT) +		return vbar + 0x680; + +	return vbar + 0x480; +} + + +asmlinkage __kprobes notrace unsigned long +__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) +{ +	unsigned long ret; +	bool do_nmi_exit = false; + +	/* +	 * nmi_enter() deals with printk() re-entrance and use of RCU when +	 * RCU believed this CPU was idle. Because critical events can +	 * interrupt normal events, we may already be in_nmi(). +	 */ +	if (!in_nmi()) { +		nmi_enter(); +		do_nmi_exit = true; +	} + +	ret = _sdei_handler(regs, arg); + +	if (do_nmi_exit) +		nmi_exit(); + +	return ret; +} diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b120111a46be..f60c052e8d1c 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -178,7 +178,8 @@ static void __user *apply_user_offset(  static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)  { -	struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; +	struct user_fpsimd_state const *fpsimd = +		¤t->thread.fpsimd_state.user_fpsimd;  	int err;  	/* copy the FP and status/control registers */ @@ -195,7 +196,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)  static int restore_fpsimd_context(struct fpsimd_context __user *ctx)  { -	struct fpsimd_state fpsimd; +	struct user_fpsimd_state fpsimd;  	__u32 magic, size;  	int err = 0; @@ -266,7 +267,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)  {  	int err;  	unsigned int vq; -	struct fpsimd_state fpsimd; +	struct user_fpsimd_state fpsimd;  	struct sve_context sve;  	if (__copy_from_user(&sve, user->sve, sizeof(sve))) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 22711ee8e36c..79feb861929b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -125,86 +125,6 @@ static inline int get_sigset_t(sigset_t *set,  	return 0;  } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ -	int err; - -	if (!access_ok(VERIFY_WRITE, to, sizeof(*to))) -		return -EFAULT; - -	/* If you change siginfo_t structure, please be sure -	 * this code is fixed accordingly. -	 * It should never copy any pad contained in the structure -	 * to avoid security leaks, but must copy the generic -	 * 3 ints plus the relevant union member. -	 * This routine must convert siginfo from 64bit to 32bit as well -	 * at the same time. -	 */ -	err = __put_user(from->si_signo, &to->si_signo); -	err |= __put_user(from->si_errno, &to->si_errno); -	err |= __put_user(from->si_code, &to->si_code); -	if (from->si_code < 0) -		err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, -				      SI_PAD_SIZE); -	else switch (siginfo_layout(from->si_signo, from->si_code)) { -	case SIL_KILL: -		err |= __put_user(from->si_pid, &to->si_pid); -		err |= __put_user(from->si_uid, &to->si_uid); -		break; -	case SIL_TIMER: -		 err |= __put_user(from->si_tid, &to->si_tid); -		 err |= __put_user(from->si_overrun, &to->si_overrun); -		 err |= __put_user(from->si_int, &to->si_int); -		break; -	case SIL_POLL: -		err |= __put_user(from->si_band, &to->si_band); -		err |= __put_user(from->si_fd, &to->si_fd); -		break; -	case SIL_FAULT: -		err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, -				  &to->si_addr); -#ifdef BUS_MCEERR_AO -		/* -		 * Other callers might not initialize the si_lsb field, -		 * so check explicitly for the right codes here. -		 */ -		if (from->si_signo == SIGBUS && -		    (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) -			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); -#endif -		break; -	case SIL_CHLD: -		err |= __put_user(from->si_pid, &to->si_pid); -		err |= __put_user(from->si_uid, &to->si_uid); -		err |= __put_user(from->si_status, &to->si_status); -		err |= __put_user(from->si_utime, &to->si_utime); -		err |= __put_user(from->si_stime, &to->si_stime); -		break; -	case SIL_RT: -		err |= __put_user(from->si_pid, &to->si_pid); -		err |= __put_user(from->si_uid, &to->si_uid); -		err |= __put_user(from->si_int, &to->si_int); -		break; -	case SIL_SYS: -		err |= __put_user((compat_uptr_t)(unsigned long) -				from->si_call_addr, &to->si_call_addr); -		err |= __put_user(from->si_syscall, &to->si_syscall); -		err |= __put_user(from->si_arch, &to->si_arch); -		break; -	} -	return err; -} - -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ -	if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || -	    copy_from_user(to->_sifields._pad, -			   from->_sifields._pad, SI_PAD_SIZE)) -		return -EFAULT; - -	return 0; -} -  /*   * VFP save/restore code.   * @@ -228,7 +148,8 @@ union __fpsimd_vreg {  static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)  { -	struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; +	struct user_fpsimd_state const *fpsimd = +		¤t->thread.fpsimd_state.user_fpsimd;  	compat_ulong_t magic = VFP_MAGIC;  	compat_ulong_t size = VFP_STORAGE_SIZE;  	compat_ulong_t fpscr, fpexc; @@ -277,7 +198,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)  static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)  { -	struct fpsimd_state fpsimd; +	struct user_fpsimd_state fpsimd;  	compat_ulong_t magic = VFP_MAGIC;  	compat_ulong_t size = VFP_STORAGE_SIZE;  	compat_ulong_t fpscr; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 551eb07c53b6..3b8ad7be9c33 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -18,6 +18,7 @@   */  #include <linux/acpi.h> +#include <linux/arm_sdei.h>  #include <linux/delay.h>  #include <linux/init.h>  #include <linux/spinlock.h> @@ -836,6 +837,7 @@ static void ipi_cpu_stop(unsigned int cpu)  	set_cpu_online(cpu, false);  	local_daif_mask(); +	sdei_mask_local_cpu();  	while (1)  		cpu_relax(); @@ -853,6 +855,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)  	atomic_dec(&waiting_for_crash_ipi);  	local_irq_disable(); +	sdei_mask_local_cpu();  #ifdef CONFIG_HOTPLUG_CPU  	if (cpu_ops[cpu]->cpu_die) @@ -972,6 +975,8 @@ void smp_send_stop(void)  	if (num_online_cpus() > 1)  		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",  			   cpumask_pr_args(cpu_online_mask)); + +	sdei_mask_local_cpu();  }  #ifdef CONFIG_KEXEC_CORE @@ -990,8 +995,10 @@ void crash_smp_send_stop(void)  	cpus_stopped = 1; -	if (num_online_cpus() == 1) +	if (num_online_cpus() == 1) { +		sdei_mask_local_cpu();  		return; +	}  	cpumask_copy(&mask, cpu_online_mask);  	cpumask_clear_cpu(smp_processor_id(), &mask); @@ -1009,6 +1016,8 @@ void crash_smp_send_stop(void)  	if (atomic_read(&waiting_for_crash_ipi) > 0)  		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",  			   cpumask_pr_args(&mask)); + +	sdei_mask_local_cpu();  }  bool smp_crash_stop_failed(void) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 3fe5ad884418..a307b9e13392 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -2,6 +2,7 @@  #include <linux/ftrace.h>  #include <linux/percpu.h>  #include <linux/slab.h> +#include <linux/uaccess.h>  #include <asm/alternative.h>  #include <asm/cacheflush.h>  #include <asm/cpufeature.h> @@ -51,8 +52,7 @@ void notrace __cpu_suspend_exit(void)  	 * PSTATE was not saved over suspend/resume, re-enable any detected  	 * features that might not have been set correctly.  	 */ -	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, -			CONFIG_ARM64_PAN)); +	__uaccess_enable_hw_pan();  	uao_thread_switch(current);  	/* diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 8d48b233e6ce..21868530018e 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -37,18 +37,14 @@ static int __init get_cpu_for_node(struct device_node *node)  	if (!cpu_node)  		return -1; -	for_each_possible_cpu(cpu) { -		if (of_get_cpu_node(cpu, NULL) == cpu_node) { -			topology_parse_cpu_capacity(cpu_node, cpu); -			of_node_put(cpu_node); -			return cpu; -		} -	} - -	pr_crit("Unable to find CPU node for %pOF\n", cpu_node); +	cpu = of_cpu_node_to_id(cpu_node); +	if (cpu >= 0) +		topology_parse_cpu_capacity(cpu_node, cpu); +	else +		pr_crit("Unable to find CPU node for %pOF\n", cpu_node);  	of_node_put(cpu_node); -	return -1; +	return cpu;  }  static int __init parse_core(struct device_node *core, int cluster_id, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3d3588fcd1c7..bbb0fde2780e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -662,17 +662,58 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)  }  #endif -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)  { -	nmi_enter(); -  	console_verbose();  	pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",  		smp_processor_id(), esr, esr_get_class_string(esr)); -	__show_regs(regs); +	if (regs) +		__show_regs(regs); + +	nmi_panic(regs, "Asynchronous SError Interrupt"); + +	cpu_park_loop(); +	unreachable(); +} + +bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) +{ +	u32 aet = arm64_ras_serror_get_severity(esr); + +	switch (aet) { +	case ESR_ELx_AET_CE:	/* corrected error */ +	case ESR_ELx_AET_UEO:	/* restartable, not yet consumed */ +		/* +		 * The CPU can make progress. We may take UEO again as +		 * a more severe error. +		 */ +		return false; + +	case ESR_ELx_AET_UEU:	/* Uncorrected Unrecoverable */ +	case ESR_ELx_AET_UER:	/* Uncorrected Recoverable */ +		/* +		 * The CPU can't make progress. The exception may have +		 * been imprecise. +		 */ +		return true; + +	case ESR_ELx_AET_UC:	/* Uncontainable or Uncategorized error */ +	default: +		/* Error has been silently propagated */ +		arm64_serror_panic(regs, esr); +	} +} + +asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +{ +	nmi_enter(); + +	/* non-RAS errors are not containable */ +	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) +		arm64_serror_panic(regs, esr); -	panic("Asynchronous SError Interrupt"); +	nmi_exit();  }  void __pte_error(const char *file, int line, unsigned long val) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7da3e5c366a0..0221aca6493d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,6 +57,17 @@ jiffies = jiffies_64;  #define HIBERNATE_TEXT  #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_TEXT					\ +	. = ALIGN(PAGE_SIZE);				\ +	VMLINUX_SYMBOL(__entry_tramp_text_start) = .;	\ +	*(.entry.tramp.text)				\ +	. = ALIGN(PAGE_SIZE);				\ +	VMLINUX_SYMBOL(__entry_tramp_text_end) = .; +#else +#define TRAMP_TEXT +#endif +  /*   * The size of the PE/COFF section that covers the kernel image, which   * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -113,6 +124,7 @@ SECTIONS  			HYPERVISOR_TEXT  			IDMAP_TEXT  			HIBERNATE_TEXT +			TRAMP_TEXT  			*(.fixup)  			*(.gnu.warning)  		. = ALIGN(16); @@ -206,13 +218,19 @@ SECTIONS  	. = ALIGN(PAGE_SIZE);  	idmap_pg_dir = .;  	. += IDMAP_DIR_SIZE; -	swapper_pg_dir = .; -	. += SWAPPER_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +	tramp_pg_dir = .; +	. += PAGE_SIZE; +#endif  #ifdef CONFIG_ARM64_SW_TTBR0_PAN  	reserved_ttbr0 = .;  	. += RESERVED_TTBR0_SIZE;  #endif +	swapper_pg_dir = .; +	. += SWAPPER_DIR_SIZE; +	swapper_pg_end = .;  	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);  	_end = .; @@ -234,7 +252,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,  ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))  	<= SZ_4K, "Hibernate exit text too big or misaligned")  #endif - +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +	"Entry trampoline text too big") +#endif  /*   * If padding is applied before .head.text, virt<->phys conversions will fail.   */ diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index dbadfaf850a7..fa63b28c65e0 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)  		}  	}  } + + +/* + * After successfully emulating an instruction, we might want to + * return to user space with a KVM_EXIT_DEBUG. We can only do this + * once the emulation is complete, though, so for userspace emulations + * we have to wait until we have re-entered KVM before calling this + * helper. + * + * Return true (and set exit_reason) to return to userspace or false + * if no further action is required. + */ +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { +		run->exit_reason = KVM_EXIT_DEBUG; +		run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT; +		return true; +	} +	return false; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b71247995469..520b0dad3c62 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,17 +23,26 @@  #include <linux/kvm_host.h>  #include <asm/esr.h> +#include <asm/exception.h>  #include <asm/kvm_asm.h>  #include <asm/kvm_coproc.h>  #include <asm/kvm_emulate.h>  #include <asm/kvm_mmu.h>  #include <asm/kvm_psci.h> +#include <asm/debug-monitors.h> +#include <asm/traps.h>  #define CREATE_TRACE_POINTS  #include "trace.h"  typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); +static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr) +{ +	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) +		kvm_inject_vabt(vcpu); +} +  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)  {  	int ret; @@ -44,7 +53,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)  	ret = kvm_psci_call(vcpu);  	if (ret < 0) { -		kvm_inject_undefined(vcpu); +		vcpu_set_reg(vcpu, 0, ~0UL);  		return 1;  	} @@ -53,7 +62,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)  { -	kvm_inject_undefined(vcpu); +	vcpu_set_reg(vcpu, 0, ~0UL);  	return 1;  } @@ -187,14 +196,46 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)  }  /* + * We may be single-stepping an emulated instruction. If the emulation + * has been completed in the kernel, we can return to userspace with a + * KVM_EXIT_DEBUG, otherwise userspace needs to complete its + * emulation first. + */ +static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ +	int handled; + +	/* +	 * See ARM ARM B1.14.1: "Hyp traps on instructions +	 * that fail their condition code check" +	 */ +	if (!kvm_condition_valid(vcpu)) { +		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); +		handled = 1; +	} else { +		exit_handle_fn exit_handler; + +		exit_handler = kvm_get_exit_handler(vcpu); +		handled = exit_handler(vcpu, run); +	} + +	/* +	 * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run +	 * structure if we need to return to userspace. +	 */ +	if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run)) +		handled = 0; + +	return handled; +} + +/*   * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on   * proper exit to userspace.   */  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,  		       int exception_index)  { -	exit_handle_fn exit_handler; -  	if (ARM_SERROR_PENDING(exception_index)) {  		u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -209,7 +250,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,  			*vcpu_pc(vcpu) -= adj;  		} -		kvm_inject_vabt(vcpu);  		return 1;  	} @@ -219,21 +259,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,  	case ARM_EXCEPTION_IRQ:  		return 1;  	case ARM_EXCEPTION_EL1_SERROR: -		kvm_inject_vabt(vcpu); -		return 1; -	case ARM_EXCEPTION_TRAP: -		/* -		 * See ARM ARM B1.14.1: "Hyp traps on instructions -		 * that fail their condition code check" -		 */ -		if (!kvm_condition_valid(vcpu)) { -			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); +		/* We may still need to return for single-step */ +		if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) +			&& kvm_arm_handle_step_debug(vcpu, run)) +			return 0; +		else  			return 1; -		} - -		exit_handler = kvm_get_exit_handler(vcpu); - -		return exit_handler(vcpu, run); +	case ARM_EXCEPTION_TRAP: +		return handle_trap_exceptions(vcpu, run);  	case ARM_EXCEPTION_HYP_GONE:  		/*  		 * EL2 has been reset to the hyp-stub. This happens when a guest @@ -248,3 +281,25 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,  		return 0;  	}  } + +/* For exit types that need handling before we can be preempted */ +void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, +		       int exception_index) +{ +	if (ARM_SERROR_PENDING(exception_index)) { +		if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) { +			u64 disr = kvm_vcpu_get_disr(vcpu); + +			kvm_handle_guest_serror(vcpu, disr_to_esr(disr)); +		} else { +			kvm_inject_vabt(vcpu); +		} + +		return; +	} + +	exception_index = ARM_EXCEPTION_CODE(exception_index); + +	if (exception_index == ARM_EXCEPTION_EL1_SERROR) +		kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377..e086c6eff8c6 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,7 +63,8 @@ __do_hyp_init:  	cmp	x0, #HVC_STUB_HCALL_NR  	b.lo	__kvm_handle_stub_hvc -	msr	ttbr0_el2, x0 +	phys_to_ttbr x0, x4 +	msr	ttbr0_el2, x4  	mrs	x4, tcr_el1  	ldr	x5, =TCR_EL2_MASK @@ -71,30 +72,27 @@ __do_hyp_init:  	mov	x5, #TCR_EL2_RES1  	orr	x4, x4, x5 -#ifndef CONFIG_ARM64_VA_BITS_48  	/* -	 * If we are running with VA_BITS < 48, we may be running with an extra -	 * level of translation in the ID map. This is only the case if system -	 * RAM is out of range for the currently configured page size and number -	 * of translation levels, in which case we will also need the extra -	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU. +	 * The ID map may be configured to use an extended virtual address +	 * range. This is only the case if system RAM is out of range for the +	 * currently configured page size and VA_BITS, in which case we will +	 * also need the extended virtual range for the HYP ID map, or we won't +	 * be able to enable the EL2 MMU.  	 *  	 * However, at EL2, there is only one TTBR register, and we can't switch  	 * between translation tables *and* update TCR_EL2.T0SZ at the same -	 * time. Bottom line: we need the extra level in *both* our translation -	 * tables. +	 * time. Bottom line: we need to use the extended range with *both* our +	 * translation tables.  	 *  	 * So use the same T0SZ value we use for the ID map.  	 */  	ldr_l	x5, idmap_t0sz  	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH -#endif +  	/* -	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in -	 * TCR_EL2. +	 * Set the PS bits in TCR_EL2.  	 */ -	mrs	x5, ID_AA64MMFR0_EL1 -	bfi	x4, x5, #16, #3 +	tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6  	msr	tcr_el2, x4 @@ -122,6 +120,10 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)  	kern_hyp_va	x2  	msr	vbar_el2, x2 +	/* copy tpidr_el1 into tpidr_el2 for use by HYP */ +	mrs	x1, tpidr_el1 +	msr	tpidr_el2, x1 +  	/* Hello, World! */  	eret  ENDPROC(__kvm_hyp_init) @@ -151,6 +153,7 @@ reset:  	mrs	x5, sctlr_el2  	ldr	x6, =SCTLR_ELx_FLAGS  	bic	x5, x5, x6		// Clear SCTL_M and etc +	pre_disable_mmu_workaround  	msr	sctlr_el2, x5  	isb diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 321c9c05dd9e..f4363d40e2cd 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)  {  	u64 reg; +	/* Clear pmscr in case of early return */ +	*pmscr_el1 = 0; +  	/* SPE present on this CPU? */  	if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),  						  ID_AA64DFR0_PMSVER_SHIFT)) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 12ee62d6d410..fdd1068ee3a5 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -62,8 +62,8 @@ ENTRY(__guest_enter)  	// Store the host regs  	save_callee_saved_regs x1 -	// Store the host_ctxt for use at exit time -	str	x1, [sp, #-16]! +	// Store host_ctxt and vcpu for use at exit time +	stp	x1, x0, [sp, #-16]!  	add	x18, x0, #VCPU_CONTEXT @@ -124,6 +124,17 @@ ENTRY(__guest_exit)  	// Now restore the host regs  	restore_callee_saved_regs x2 +alternative_if ARM64_HAS_RAS_EXTN +	// If we have the RAS extensions we can consume a pending error +	// without an unmask-SError and isb. +	esb +	mrs_s	x2, SYS_DISR_EL1 +	str	x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] +	cbz	x2, 1f +	msr_s	SYS_DISR_EL1, xzr +	orr	x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT) +1:	ret +alternative_else  	// If we have a pending asynchronous abort, now is the  	// time to find out. From your VAXorcist book, page 666:  	// "Threaten me not, oh Evil one!  For I speak with @@ -134,7 +145,9 @@ ENTRY(__guest_exit)  	mov	x5, x0  	dsb	sy		// Synchronize against in-flight ld/st +	nop  	msr	daifclr, #4	// Unmask aborts +alternative_endif  	// This is our single instruction exception window. A pending  	// SError is guaranteed to occur at the earliest when we unmask @@ -159,6 +172,10 @@ abort_guest_exit_end:  ENDPROC(__guest_exit)  ENTRY(__fpsimd_guest_restore) +	// x0: esr +	// x1: vcpu +	// x2-x29,lr: vcpu regs +	// vcpu x0-x1 on the stack  	stp	x2, x3, [sp, #-16]!  	stp	x4, lr, [sp, #-16]! @@ -173,7 +190,7 @@ alternative_else  alternative_endif  	isb -	mrs	x3, tpidr_el2 +	mov	x3, x1  	ldr	x0, [x3, #VCPU_HOST_CONTEXT]  	kern_hyp_va x0 @@ -196,3 +213,15 @@ alternative_endif  	eret  ENDPROC(__fpsimd_guest_restore) + +ENTRY(__qcom_hyp_sanitize_btac_predictors) +	/** +	 * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) +	 * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls +	 * b15-b0: contains SiP functionID +	 */ +	movz    x0, #0x1700 +	movk    x0, #0xc200, lsl #16 +	smc     #0 +	ret +ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5170ce1021da..e4f37b9dd47c 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -104,6 +104,7 @@ el1_trap:  	/*  	 * x0: ESR_EC  	 */ +	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter  	/*  	 * We trap the first access to the FP/SIMD to save the host context @@ -116,19 +117,18 @@ alternative_if_not ARM64_HAS_NO_FPSIMD  	b.eq	__fpsimd_guest_restore  alternative_else_nop_endif -	mrs	x1, tpidr_el2  	mov	x0, #ARM_EXCEPTION_TRAP  	b	__guest_exit  el1_irq:  	stp     x0, x1, [sp, #-16]! -	mrs	x1, tpidr_el2 +	ldr	x1, [sp, #16 + 8]  	mov	x0, #ARM_EXCEPTION_IRQ  	b	__guest_exit  el1_error:  	stp     x0, x1, [sp, #-16]! -	mrs	x1, tpidr_el2 +	ldr	x1, [sp, #16 + 8]  	mov	x0, #ARM_EXCEPTION_EL1_SERROR  	b	__guest_exit @@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)  	eret  ENDPROC(__hyp_do_panic) +ENTRY(__hyp_panic) +	/* +	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may +	 * not be accessible by this address from EL2, hyp_panic() converts +	 * it with kern_hyp_va() before use. +	 */ +	ldr	x0, =kvm_host_cpu_state +	mrs	x1, tpidr_el2 +	add	x0, x0, x1 +	b	hyp_panic +ENDPROC(__hyp_panic) +  .macro invalid_vector	label, target = __hyp_panic  	.align	2  \label: diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index a81f5e10fc8c..603e1ee83e89 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void)  	 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...  	 */  	parange = read_sysreg(id_aa64mmfr0_el1) & 7; +	if (parange > ID_AA64MMFR0_PARANGE_MAX) +		parange = ID_AA64MMFR0_PARANGE_MAX;  	val |= parange << 16;  	/* Compute the actual PARange... */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 525c01f48867..036e1f3d77a6 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -17,11 +17,13 @@  #include <linux/types.h>  #include <linux/jump_label.h> +#include <uapi/linux/psci.h>  #include <asm/kvm_asm.h>  #include <asm/kvm_emulate.h>  #include <asm/kvm_hyp.h>  #include <asm/fpsimd.h> +#include <asm/debug-monitors.h>  static bool __hyp_text __fpsimd_enabled_nvhe(void)  { @@ -51,7 +53,7 @@ static void __hyp_text __activate_traps_vhe(void)  	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);  	write_sysreg(val, cpacr_el1); -	write_sysreg(__kvm_hyp_vector, vbar_el1); +	write_sysreg(kvm_get_hyp_vector(), vbar_el1);  }  static void __hyp_text __activate_traps_nvhe(void) @@ -92,6 +94,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)  	write_sysreg(val, hcr_el2); +	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE)) +		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */  	write_sysreg(1 << 15, hstr_el2);  	/* @@ -234,11 +239,12 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)  static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)  { -	u64 esr = read_sysreg_el2(esr); -	u8 ec = ESR_ELx_EC(esr); +	u8 ec; +	u64 esr;  	u64 hpfar, far; -	vcpu->arch.fault.esr_el2 = esr; +	esr = vcpu->arch.fault.esr_el2; +	ec = ESR_ELx_EC(esr);  	if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)  		return true; @@ -269,7 +275,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)  	return true;  } -static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) +/* Skip an instruction which has been emulated. Returns true if + * execution can continue or false if we need to exit hyp mode because + * single-step was in effect. + */ +static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)  {  	*vcpu_pc(vcpu) = read_sysreg_el2(elr); @@ -282,6 +292,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)  	}  	write_sysreg_el2(*vcpu_pc(vcpu), elr); + +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { +		vcpu->arch.fault.esr_el2 = +			(ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22; +		return false; +	} else { +		return true; +	}  }  int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) @@ -292,9 +310,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)  	u64 exit_code;  	vcpu = kern_hyp_va(vcpu); -	write_sysreg(vcpu, tpidr_el2);  	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); +	host_ctxt->__hyp_running_vcpu = vcpu;  	guest_ctxt = &vcpu->arch.ctxt;  	__sysreg_save_host_state(host_ctxt); @@ -319,6 +337,8 @@ again:  	exit_code = __guest_enter(vcpu, host_ctxt);  	/* And we're baaack! */ +	if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ) +		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);  	/*  	 * We're using the raw exception code in order to only process  	 * the trap if no SError is pending. We will come back to the @@ -328,6 +348,18 @@ again:  	if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))  		goto again; +	if (exit_code == ARM_EXCEPTION_TRAP && +	    (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || +	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && +	    vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { +		u64 val = PSCI_RET_NOT_SUPPORTED; +		if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) +			val = 2; + +		vcpu_set_reg(vcpu, 0, val); +		goto again; +	} +  	if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&  	    exit_code == ARM_EXCEPTION_TRAP) {  		bool valid; @@ -342,13 +374,21 @@ again:  			int ret = __vgic_v2_perform_cpuif_access(vcpu);  			if (ret == 1) { -				__skip_instr(vcpu); -				goto again; +				if (__skip_instr(vcpu)) +					goto again; +				else +					exit_code = ARM_EXCEPTION_TRAP;  			}  			if (ret == -1) { -				/* Promote an illegal access to an SError */ -				__skip_instr(vcpu); +				/* Promote an illegal access to an +				 * SError. If we would be returning +				 * due to single-step clear the SS +				 * bit so handle_exit knows what to +				 * do after dealing with the error. +				 */ +				if (!__skip_instr(vcpu)) +					*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;  				exit_code = ARM_EXCEPTION_EL1_SERROR;  			} @@ -363,13 +403,23 @@ again:  		int ret = __vgic_v3_perform_cpuif_access(vcpu);  		if (ret == 1) { -			__skip_instr(vcpu); -			goto again; +			if (__skip_instr(vcpu)) +				goto again; +			else +				exit_code = ARM_EXCEPTION_TRAP;  		}  		/* 0 falls through to be handled out of EL2 */  	} +	if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { +		u32 midr = read_cpuid_id(); + +		/* Apply BTAC predictors mitigation to all Falkor chips */ +		if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) +			__qcom_hyp_sanitize_btac_predictors(); +	} +  	fp_enabled = __fpsimd_enabled();  	__sysreg_save_guest_state(guest_ctxt); @@ -399,7 +449,8 @@ again:  static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) +static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, +					     struct kvm_vcpu *vcpu)  {  	unsigned long str_va; @@ -413,35 +464,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)  	__hyp_do_panic(str_va,  		       spsr,  elr,  		       read_sysreg(esr_el2),   read_sysreg_el2(far), -		       read_sysreg(hpfar_el2), par, -		       (void *)read_sysreg(tpidr_el2)); +		       read_sysreg(hpfar_el2), par, vcpu);  } -static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par) +static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, +					    struct kvm_vcpu *vcpu)  {  	panic(__hyp_panic_string,  	      spsr,  elr,  	      read_sysreg_el2(esr),   read_sysreg_el2(far), -	      read_sysreg(hpfar_el2), par, -	      (void *)read_sysreg(tpidr_el2)); +	      read_sysreg(hpfar_el2), par, vcpu);  }  static hyp_alternate_select(__hyp_call_panic,  			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,  			    ARM64_HAS_VIRT_HOST_EXTN); -void __hyp_text __noreturn __hyp_panic(void) +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)  { +	struct kvm_vcpu *vcpu = NULL; +  	u64 spsr = read_sysreg_el2(spsr);  	u64 elr = read_sysreg_el2(elr);  	u64 par = read_sysreg(par_el1);  	if (read_sysreg(vttbr_el2)) { -		struct kvm_vcpu *vcpu;  		struct kvm_cpu_context *host_ctxt; -		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); -		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); +		host_ctxt = kern_hyp_va(__host_ctxt); +		vcpu = host_ctxt->__hyp_running_vcpu;  		__timer_disable_traps(vcpu);  		__deactivate_traps(vcpu);  		__deactivate_vm(vcpu); @@ -449,7 +500,7 @@ void __hyp_text __noreturn __hyp_panic(void)  	}  	/* Call panic for real */ -	__hyp_call_panic()(spsr, elr, par); +	__hyp_call_panic()(spsr, elr, par, vcpu);  	unreachable();  } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 934137647837..2c17afd2be96 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }  /*   * Non-VHE: Both host and guest must save everything.   * - * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc, - * pstate, and guest must save everything. + * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0, + * and guest must save everything.   */  static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) @@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)  	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);  	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);  	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0); -	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);  	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);  	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0); -	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr); -	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);  }  static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) @@ -62,10 +59,16 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)  	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg_el1(amair);  	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg_el1(cntkctl);  	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1); +	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);  	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);  	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);  	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); +	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr); +	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr); + +	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) +		ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);  }  static hyp_alternate_select(__sysreg_call_save_host_state, @@ -89,11 +92,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx  	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);  	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);  	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);  	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);  	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0); -	write_sysreg_el2(ctxt->gp_regs.regs.pc,	  elr); -	write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);  }  static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) @@ -115,10 +115,16 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)  	write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1],	amair);  	write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], 	cntkctl);  	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1); +	write_sysreg(ctxt->sys_regs[TPIDR_EL1],		tpidr_el1);  	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);  	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);  	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); +	write_sysreg_el2(ctxt->gp_regs.regs.pc,		elr); +	write_sysreg_el2(ctxt->gp_regs.regs.pstate,	spsr); + +	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) +		write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);  }  static hyp_alternate_select(__sysreg_call_restore_host_state, diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 8ecbcb40e317..60666a056944 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -164,14 +164,25 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)  		inject_undef64(vcpu);  } +static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) +{ +	vcpu_set_vsesr(vcpu, esr); +	vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); +} +  /**   * kvm_inject_vabt - inject an async abort / SError into the guest   * @vcpu: The VCPU to receive the exception   *   * It is assumed that this code is called from the VCPU thread and that the   * VCPU therefore is not currently executing guest code. + * + * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with + * the remaining ISS all-zeros so that this error is not interpreted as an + * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR + * value, so the CPU generates an imp-def value.   */  void kvm_inject_vabt(struct kvm_vcpu *vcpu)  { -	vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); +	pend_guest_serror(vcpu, ESR_ELx_ISV);  } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1830ebc227d1..50a43c7b97ca 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1159,6 +1159,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },  	{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },  	{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, + +	{ SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, +	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, +  	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },  	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, @@ -1169,6 +1179,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },  	{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, +	{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },  	{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },  	{ SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index e88fb99c1561..3d69a8d41fa5 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -30,7 +30,7 @@   * Alignment fixed up by hardware.   */  ENTRY(__clear_user) -	uaccess_enable_not_uao x2, x3 +	uaccess_enable_not_uao x2, x3, x4  	mov	x2, x1			// save the size for fixup return  	subs	x1, x1, #8  	b.mi	2f @@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2  	b.mi	5f  uao_user_alternative 9f, strb, sttrb, wzr, x0, 0  5:	mov	x0, #0 -	uaccess_disable_not_uao x2 +	uaccess_disable_not_uao x2, x3  	ret  ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4b5d826895ff..20305d485046 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -64,10 +64,10 @@  end	.req	x5  ENTRY(__arch_copy_from_user) -	uaccess_enable_not_uao x3, x4 +	uaccess_enable_not_uao x3, x4, x5  	add	end, x0, x2  #include "copy_template.S" -	uaccess_disable_not_uao x3 +	uaccess_disable_not_uao x3, x4  	mov	x0, #0				// Nothing to copy  	ret  ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index b24a830419ad..fbb090f431a5 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -65,10 +65,10 @@  end	.req	x5  ENTRY(raw_copy_in_user) -	uaccess_enable_not_uao x3, x4 +	uaccess_enable_not_uao x3, x4, x5  	add	end, x0, x2  #include "copy_template.S" -	uaccess_disable_not_uao x3 +	uaccess_disable_not_uao x3, x4  	mov	x0, #0  	ret  ENDPROC(raw_copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 351f0766f7a6..fda6172d6b88 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -63,10 +63,10 @@  end	.req	x5  ENTRY(__arch_copy_to_user) -	uaccess_enable_not_uao x3, x4 +	uaccess_enable_not_uao x3, x4, x5  	add	end, x0, x2  #include "copy_template.S" -	uaccess_disable_not_uao x3 +	uaccess_disable_not_uao x3, x4  	mov	x0, #0  	ret  ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index 0179a43cc045..d3db9b2cd479 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -38,19 +38,19 @@ ENTRY(__ashlti3)  ENDPROC(__ashlti3)  ENTRY(__ashrti3) -	cbz	x2, 3f +	cbz	x2, 1f  	mov	x3, #64  	sub	x3, x3, x2  	cmp	x3, #0 -	b.le	4f +	b.le	2f  	lsr	x0, x0, x2  	lsl	x3, x1, x3  	asr	x2, x1, x2  	orr	x0, x0, x3  	mov	x1, x2 -3: +1:  	ret -4: +2:  	neg	w0, w3  	asr	x2, x1, #63  	asr	x0, x1, x0 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7f1dbe962cf5..91464e7f77cc 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -49,7 +49,7 @@ ENTRY(flush_icache_range)   *	- end     - virtual end address of region   */  ENTRY(__flush_cache_user_range) -	uaccess_ttbr0_enable x2, x3 +	uaccess_ttbr0_enable x2, x3, x4  	dcache_line_size x2, x3  	sub	x3, x2, #1  	bic	x4, x0, x3 @@ -72,7 +72,7 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU  	isb  	mov	x0, #0  1: -	uaccess_ttbr0_disable x1 +	uaccess_ttbr0_disable x1, x2  	ret  9:  	mov	x0, #-EFAULT diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..301417ae2ba8 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending;  #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))  #define ASID_FIRST_VERSION	(1UL << asid_bits) -#define NUM_USER_ASIDS		ASID_FIRST_VERSION + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define NUM_USER_ASIDS		(ASID_FIRST_VERSION >> 1) +#define asid2idx(asid)		(((asid) & ~ASID_MASK) >> 1) +#define idx2asid(idx)		(((idx) << 1) & ~ASID_MASK) +#else +#define NUM_USER_ASIDS		(ASID_FIRST_VERSION) +#define asid2idx(asid)		((asid) & ~ASID_MASK) +#define idx2asid(idx)		asid2idx(idx) +#endif  /* Get the ASIDBits supported by the current CPU */  static u32 get_cpu_asid_bits(void) @@ -79,13 +88,6 @@ void verify_cpu_asid_bits(void)  	}  } -static void set_reserved_asid_bits(void) -{ -	if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) && -	    cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003)) -		__set_bit(FALKOR_RESERVED_ASID, asid_map); -} -  static void flush_context(unsigned int cpu)  {  	int i; @@ -94,14 +96,6 @@ static void flush_context(unsigned int cpu)  	/* Update the list of reserved ASIDs and the ASID bitmap. */  	bitmap_clear(asid_map, 0, NUM_USER_ASIDS); -	set_reserved_asid_bits(); - -	/* -	 * Ensure the generation bump is observed before we xchg the -	 * active_asids. -	 */ -	smp_wmb(); -  	for_each_possible_cpu(i) {  		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);  		/* @@ -113,11 +107,14 @@ static void flush_context(unsigned int cpu)  		 */  		if (asid == 0)  			asid = per_cpu(reserved_asids, i); -		__set_bit(asid & ~ASID_MASK, asid_map); +		__set_bit(asid2idx(asid), asid_map);  		per_cpu(reserved_asids, i) = asid;  	} -	/* Queue a TLB invalidate and flush the I-cache if necessary. */ +	/* +	 * Queue a TLB invalidation for each CPU to perform on next +	 * context-switch +	 */  	cpumask_setall(&tlb_flush_pending);  } @@ -165,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)  		 * We had a valid ASID in a previous life, so try to re-use  		 * it if possible.  		 */ -		asid &= ~ASID_MASK; -		if (!__test_and_set_bit(asid, asid_map)) +		if (!__test_and_set_bit(asid2idx(asid), asid_map))  			return newasid;  	}  	/*  	 * Allocate a free ASID. If we can't find one, take a note of the -	 * currently active ASIDs and mark the TLBs as requiring flushes. -	 * We always count from ASID #1, as we use ASID #0 when setting a -	 * reserved TTBR0 for the init_mm. +	 * currently active ASIDs and mark the TLBs as requiring flushes.  We +	 * always count from ASID #2 (index 1), as we use ASID #0 when setting +	 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd +	 * pairs.  	 */  	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);  	if (asid != NUM_USER_ASIDS) @@ -191,25 +188,35 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)  set_asid:  	__set_bit(asid, asid_map);  	cur_idx = asid; -	return asid | generation; +	return idx2asid(asid) | generation;  }  void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)  {  	unsigned long flags; -	u64 asid; +	u64 asid, old_active_asid;  	asid = atomic64_read(&mm->context.id);  	/* -	 * The memory ordering here is subtle. We rely on the control -	 * dependency between the generation read and the update of -	 * active_asids to ensure that we are synchronised with a -	 * parallel rollover (i.e. this pairs with the smp_wmb() in -	 * flush_context). +	 * The memory ordering here is subtle. +	 * If our active_asids is non-zero and the ASID matches the current +	 * generation, then we update the active_asids entry with a relaxed +	 * cmpxchg. Racing with a concurrent rollover means that either: +	 * +	 * - We get a zero back from the cmpxchg and end up waiting on the +	 *   lock. Taking the lock synchronises with the rollover and so +	 *   we are forced to see the updated generation. +	 * +	 * - We get a valid ASID back from the cmpxchg, which means the +	 *   relaxed xchg in flush_context will treat us as reserved +	 *   because atomic RmWs are totally ordered for a given location.  	 */ -	if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) -	    && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) +	old_active_asid = atomic64_read(&per_cpu(active_asids, cpu)); +	if (old_active_asid && +	    !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && +	    atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu), +				     old_active_asid, asid))  		goto switch_mm_fastpath;  	raw_spin_lock_irqsave(&cpu_asid_lock, flags); @@ -227,6 +234,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)  	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);  switch_mm_fastpath: + +	arm64_apply_bp_hardening(); +  	/*  	 * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when  	 * emulating PAN. @@ -235,6 +245,15 @@ switch_mm_fastpath:  		cpu_switch_mm(mm->pgd, mm);  } +/* Errata workaround post TTBRx_EL1 update. */ +asmlinkage void post_ttbr_update_workaround(void) +{ +	asm(ALTERNATIVE("nop; nop; nop", +			"ic iallu; dsb nsh; isb", +			ARM64_WORKAROUND_CAVIUM_27456, +			CONFIG_CAVIUM_ERRATUM_27456)); +} +  static int asids_init(void)  {  	asid_bits = get_cpu_asid_bits(); @@ -250,8 +269,6 @@ static int asids_init(void)  		panic("Failed to allocate bitmap for %lu ASIDs\n",  		      NUM_USER_ASIDS); -	set_reserved_asid_bits(); -  	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);  	return 0;  } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ca74a2aace42..7b60d62ac593 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -389,7 +389,7 @@ void ptdump_check_wx(void)  		.check_wx = true,  	}; -	walk_pgd(&st, &init_mm, 0); +	walk_pgd(&st, &init_mm, VA_START);  	note_page(&st, 0, 0, 0);  	if (st.wx_pages || st.uxn_pages)  		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 22168cd0dde7..ce441d29e7f6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)  {  	struct siginfo info;  	const struct fault_info *inf; -	int ret = 0;  	inf = esr_to_fault_info(esr);  	pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)  		if (interrupts_enabled(regs))  			nmi_enter(); -		ret = ghes_notify_sea(); +		ghes_notify_sea();  		if (interrupts_enabled(regs))  			nmi_exit(); @@ -597,81 +596,81 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)  	info.si_signo = SIGBUS;  	info.si_errno = 0; -	info.si_code  = 0; +	info.si_code  = BUS_FIXME;  	if (esr & ESR_ELx_FnV)  		info.si_addr = NULL;  	else  		info.si_addr  = (void __user *)addr;  	arm64_notify_die("", regs, &info, esr); -	return ret; +	return 0;  }  static const struct fault_info fault_info[] = { -	{ do_bad,		SIGBUS,  0,		"ttbr address size fault"	}, -	{ do_bad,		SIGBUS,  0,		"level 1 address size fault"	}, -	{ do_bad,		SIGBUS,  0,		"level 2 address size fault"	}, -	{ do_bad,		SIGBUS,  0,		"level 3 address size fault"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"ttbr address size fault"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 1 address size fault"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 2 address size fault"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 3 address size fault"	},  	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},  	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},  	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},  	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	}, -	{ do_bad,		SIGBUS,  0,		"unknown 8"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 8"			},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	}, -	{ do_bad,		SIGBUS,  0,		"unknown 12"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 12"			},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},  	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	}, -	{ do_sea,		SIGBUS,  0,		"synchronous external abort"	}, -	{ do_bad,		SIGBUS,  0,		"unknown 17"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 18"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 19"			}, -	{ do_sea,		SIGBUS,  0,		"level 0 (translation table walk)"	}, -	{ do_sea,		SIGBUS,  0,		"level 1 (translation table walk)"	}, -	{ do_sea,		SIGBUS,  0,		"level 2 (translation table walk)"	}, -	{ do_sea,		SIGBUS,  0,		"level 3 (translation table walk)"	}, -	{ do_sea,		SIGBUS,  0,		"synchronous parity or ECC error" },	// Reserved when RAS is implemented -	{ do_bad,		SIGBUS,  0,		"unknown 25"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 26"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 27"			}, -	{ do_sea,		SIGBUS,  0,		"level 0 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented -	{ do_sea,		SIGBUS,  0,		"level 1 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented -	{ do_sea,		SIGBUS,  0,		"level 2 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented -	{ do_sea,		SIGBUS,  0,		"level 3 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented -	{ do_bad,		SIGBUS,  0,		"unknown 32"			}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"synchronous external abort"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 17"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 18"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 19"			}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 0 (translation table walk)"	}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 1 (translation table walk)"	}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 2 (translation table walk)"	}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 3 (translation table walk)"	}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"synchronous parity or ECC error" },	// Reserved when RAS is implemented +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 25"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 26"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 27"			}, +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 0 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 1 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 2 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented +	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 3 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 32"			},  	{ do_alignment_fault,	SIGBUS,  BUS_ADRALN,	"alignment fault"		}, -	{ do_bad,		SIGBUS,  0,		"unknown 34"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 35"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 36"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 37"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 38"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 39"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 40"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 41"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 42"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 43"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 44"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 45"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 46"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 47"			}, -	{ do_bad,		SIGBUS,  0,		"TLB conflict abort"		}, -	{ do_bad,		SIGBUS,  0,		"Unsupported atomic hardware update fault"	}, -	{ do_bad,		SIGBUS,  0,		"unknown 50"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 51"			}, -	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" }, -	{ do_bad,		SIGBUS,  0,		"implementation fault (unsupported exclusive)" }, -	{ do_bad,		SIGBUS,  0,		"unknown 54"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 55"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 56"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 57"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 58" 			}, -	{ do_bad,		SIGBUS,  0,		"unknown 59"			}, -	{ do_bad,		SIGBUS,  0,		"unknown 60"			}, -	{ do_bad,		SIGBUS,  0,		"section domain fault"		}, -	{ do_bad,		SIGBUS,  0,		"page domain fault"		}, -	{ do_bad,		SIGBUS,  0,		"unknown 63"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 34"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 35"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 36"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 37"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 38"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 39"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 40"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 41"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 42"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 43"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 44"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 45"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 46"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 47"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"TLB conflict abort"		}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"Unsupported atomic hardware update fault"	}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 50"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 51"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"implementation fault (lockdown abort)" }, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"implementation fault (unsupported exclusive)" }, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 54"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 55"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 56"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 57"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 58" 			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 59"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 60"			}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"section domain fault"		}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"page domain fault"		}, +	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 63"			},  };  int handle_guest_sea(phys_addr_t addr, unsigned int esr) @@ -708,6 +707,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,  	arm64_notify_die("", regs, &info, esr);  } +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, +						   unsigned int esr, +						   struct pt_regs *regs) +{ +	/* +	 * We've taken an instruction abort from userspace and not yet +	 * re-enabled IRQs. If the address is a kernel address, apply +	 * BP hardening prior to enabling IRQs and pre-emption. +	 */ +	if (addr > TASK_SIZE) +		arm64_apply_bp_hardening(); + +	local_irq_enable(); +	do_mem_abort(addr, esr, regs); +} + +  asmlinkage void __exception do_sp_pc_abort(unsigned long addr,  					   unsigned int esr,  					   struct pt_regs *regs) @@ -740,11 +756,11 @@ static struct fault_info __refdata debug_fault_info[] = {  	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	},  	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	},  	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	}, -	{ do_bad,	SIGBUS,		0,		"unknown 3"		}, +	{ do_bad,	SIGBUS,		BUS_FIXME,	"unknown 3"		},  	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		}, -	{ do_bad,	SIGTRAP,	0,		"aarch32 vector catch"	}, +	{ do_bad,	SIGTRAP,	TRAP_FIXME,	"aarch32 vector catch"	},  	{ early_brk64,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		}, -	{ do_bad,	SIGBUS,		0,		"unknown 7"		}, +	{ do_bad,	SIGBUS,		BUS_FIXME,	"unknown 7"		},  };  void __init hook_debug_fault_code(int nr, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5960bef0170d..c903f7ccbdd2 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -366,6 +366,9 @@ void __init arm64_memblock_init(void)  	/* Handle linux,usable-memory-range property */  	fdt_enforce_memory_region(); +	/* Remove memory above our supported physical address size */ +	memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); +  	/*  	 * Ensure that the linear region takes up exactly half of the kernel  	 * virtual address space. This way, we can distinguish a linear address @@ -476,6 +479,8 @@ void __init arm64_memblock_init(void)  	reserve_elfcorehdr(); +	high_memory = __va(memblock_end_of_DRAM() - 1) + 1; +  	dma_contiguous_reserve(arm64_dma_phys_limit);  	memblock_allow_resize(); @@ -502,7 +507,6 @@ void __init bootmem_init(void)  	sparse_init();  	zone_sizes_init(min, max); -	high_memory = __va((max << PAGE_SHIFT) - 1) + 1;  	memblock_dump_all();  } @@ -599,49 +603,6 @@ void __init mem_init(void)  	mem_init_print_info(NULL); -#define MLK(b, t) b, t, ((t) - (b)) >> 10 -#define MLM(b, t) b, t, ((t) - (b)) >> 20 -#define MLG(b, t) b, t, ((t) - (b)) >> 30 -#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) - -	pr_notice("Virtual kernel memory layout:\n"); -#ifdef CONFIG_KASAN -	pr_notice("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n", -		MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); -#endif -	pr_notice("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n", -		MLM(MODULES_VADDR, MODULES_END)); -	pr_notice("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n", -		MLG(VMALLOC_START, VMALLOC_END)); -	pr_notice("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n", -		MLK_ROUNDUP(_text, _etext)); -	pr_notice("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n", -		MLK_ROUNDUP(__start_rodata, __init_begin)); -	pr_notice("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n", -		MLK_ROUNDUP(__init_begin, __init_end)); -	pr_notice("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n", -		MLK_ROUNDUP(_sdata, _edata)); -	pr_notice("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n", -		MLK_ROUNDUP(__bss_start, __bss_stop)); -	pr_notice("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n", -		MLK(FIXADDR_START, FIXADDR_TOP)); -	pr_notice("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n", -		MLM(PCI_IO_START, PCI_IO_END)); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -	pr_notice("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n", -		MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); -	pr_notice("              0x%16lx - 0x%16lx   (%6ld MB actual)\n", -		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), -		    (unsigned long)virt_to_page(high_memory))); -#endif -	pr_notice("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n", -		MLM(__phys_to_virt(memblock_start_of_DRAM()), -		    (unsigned long)high_memory)); - -#undef MLK -#undef MLM -#undef MLK_ROUNDUP -  	/*  	 * Check boundaries twice: Some fundamental inconsistencies can be  	 * detected at build time already. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..b44992ec9643 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -50,6 +50,7 @@  #define NO_CONT_MAPPINGS	BIT(1)  u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;  u64 kimage_voffset __ro_after_init;  EXPORT_SYMBOL(kimage_voffset); @@ -525,6 +526,35 @@ static int __init parse_rodata(char *arg)  }  early_param("rodata", parse_rodata); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ +	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + +	/* The trampoline is always mapped and can therefore be global */ +	pgprot_val(prot) &= ~PTE_NG; + +	/* Map only the text into the trampoline page table */ +	memset(tramp_pg_dir, 0, PGD_SIZE); +	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, +			     prot, pgd_pgtable_alloc, 0); + +	/* Map both the text and data into the kernel page table */ +	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); +	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { +		extern char __entry_tramp_data_start[]; + +		__set_fixmap(FIX_ENTRY_TRAMP_DATA, +			     __pa_symbol(__entry_tramp_data_start), +			     PAGE_KERNEL_RO); +	} + +	return 0; +} +core_initcall(map_entry_trampoline); +#endif +  /*   * Create fine-grained mappings for the kernel.   */ @@ -570,8 +600,8 @@ static void __init map_kernel(pgd_t *pgd)  		 * entry instead.  		 */  		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); -		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), -			__pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); +		pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), +			     lm_alias(bm_pmd));  		pud_clear_fixmap();  	} else {  		BUG(); @@ -612,7 +642,8 @@ void __init paging_init(void)  	 * allocated with it.  	 */  	memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, -		      SWAPPER_DIR_SIZE - PAGE_SIZE); +		      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) +		      - PAGE_SIZE);  }  /* @@ -686,7 +717,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)  			if (!p)  				return -ENOMEM; -			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); +			pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));  		} else  			vmemmap_verify((pte_t *)pmd, node, addr, next);  	} while (addr = next, addr != end); @@ -879,15 +910,19 @@ int __init arch_ioremap_pmd_supported(void)  int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)  { +	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | +					pgprot_val(mk_sect_prot(prot)));  	BUG_ON(phys & ~PUD_MASK); -	set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); +	set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));  	return 1;  }  int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)  { +	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | +					pgprot_val(mk_sect_prot(prot)));  	BUG_ON(phys & ~PMD_MASK); -	set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); +	set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));  	return 1;  } diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..289f9113a27a 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@  #include <asm/page.h>  #include <asm/tlbflush.h> -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init;  pgd_t *pgd_alloc(struct mm_struct *mm)  { @@ -49,6 +49,14 @@ void __init pgd_cache_init(void)  	if (PGD_SIZE == PAGE_SIZE)  		return; +#ifdef CONFIG_ARM64_PA_BITS_52 +	/* +	 * With 52-bit physical addresses, the architecture requires the +	 * top-level table to be aligned to at least 64 bytes. +	 */ +	BUILD_BUG_ON(PGD_SIZE < 64); +#endif +  	/*  	 * Naturally aligned pgds required by the architecture.  	 */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 95233dfc4c39..9f177aac6390 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)  	mrs	x8, mdscr_el1  	mrs	x9, oslsr_el1  	mrs	x10, sctlr_el1 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN  	mrs	x11, tpidr_el1 +alternative_else +	mrs	x11, tpidr_el2 +alternative_endif  	mrs	x12, sp_el0  	stp	x2, x3, [x0]  	stp	x4, xzr, [x0, #16] @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)  	msr	mdscr_el1, x10  	msr	sctlr_el1, x12 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN  	msr	tpidr_el1, x13 +alternative_else +	msr	tpidr_el2, x13 +alternative_endif  	msr	sp_el0, x14  	/*  	 * Restore oslsr_el1 by writing oslar_el1 @@ -124,6 +132,11 @@ ENTRY(cpu_do_resume)  	ubfx	x11, x11, #1, #1  	msr	oslar_el1, x11  	reset_pmuserenr_el0 x0			// Disable PMU access from EL0 + +alternative_if ARM64_HAS_RAS_EXTN +	msr_s	SYS_DISR_EL1, xzr +alternative_else_nop_endif +  	isb  	ret  ENDPROC(cpu_do_resume) @@ -138,13 +151,18 @@ ENDPROC(cpu_do_resume)   *	- pgd_phys - physical address of new TTB   */  ENTRY(cpu_do_switch_mm) -	pre_ttbr0_update_workaround x0, x2, x3 +	mrs	x2, ttbr1_el1  	mmid	x1, x1				// get mm->context.id -	bfi	x0, x1, #48, #16		// set the ASID -	msr	ttbr0_el1, x0			// set TTBR0 +	phys_to_ttbr x0, x3 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +	bfi	x3, x1, #48, #16		// set the ASID field in TTBR0 +#endif +	bfi	x2, x1, #48, #16		// set the ASID +	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)  	isb -	post_ttbr0_update_workaround -	ret +	msr	ttbr0_el1, x3			// now update TTBR0 +	isb +	b	post_ttbr_update_workaround	// Back to C code...  ENDPROC(cpu_do_switch_mm)  	.pushsection ".idmap.text", "ax" @@ -158,14 +176,16 @@ ENTRY(idmap_cpu_replace_ttbr1)  	save_and_disable_daif flags=x2  	adrp	x1, empty_zero_page -	msr	ttbr1_el1, x1 +	phys_to_ttbr x1, x3 +	msr	ttbr1_el1, x3  	isb  	tlbi	vmalle1  	dsb	nsh  	isb -	msr	ttbr1_el1, x0 +	phys_to_ttbr x0, x3 +	msr	ttbr1_el1, x3  	isb  	restore_daif x2 @@ -214,25 +234,19 @@ ENTRY(__cpu_setup)  	/*  	 * Prepare SCTLR  	 */ -	adr	x5, crval -	ldp	w5, w6, [x5] -	mrs	x0, sctlr_el1 -	bic	x0, x0, x5			// clear bits -	orr	x0, x0, x6			// set bits +	mov_q	x0, SCTLR_EL1_SET  	/*  	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for  	 * both user and kernel.  	 */  	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ -			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 +			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1  	tcr_set_idmap_t0sz	x10, x9  	/* -	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in -	 * TCR_EL1. +	 * Set the IPS bits in TCR_EL1.  	 */ -	mrs	x9, ID_AA64MMFR0_EL1 -	bfi	x10, x9, #32, #3 +	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6  #ifdef CONFIG_ARM64_HW_AFDBM  	/*  	 * Hardware update of the Access and Dirty bits. @@ -249,21 +263,3 @@ ENTRY(__cpu_setup)  	msr	tcr_el1, x10  	ret					// return to head.S  ENDPROC(__cpu_setup) - -	/* -	 * We set the desired value explicitly, including those of the -	 * reserved bits. The values of bits EE & E0E were set early in -	 * el2_setup, which are left untouched below. -	 * -	 *                 n n            T -	 *       U E      WT T UD     US IHBS -	 *       CE0      XWHW CZ     ME TEEA S -	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM -	 * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved -	 * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings -	 */ -	.type	crval, #object -crval: -	.word	0xfcffffff			// clear -	.word	0x34d5d91d			// set -	.popsection diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ba38d403abb2..bb32f7f6dd0f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -148,7 +148,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)  /* Stack must be multiples of 16B */  #define STACK_ALIGN(sz) (((sz) + 15) & ~15) -#define PROLOGUE_OFFSET 8 +/* Tail call offset to jump into */ +#define PROLOGUE_OFFSET 7  static int build_prologue(struct jit_ctx *ctx)  { @@ -200,19 +201,19 @@ static int build_prologue(struct jit_ctx *ctx)  	/* Initialize tail_call_cnt */  	emit(A64_MOVZ(1, tcc, 0, 0), ctx); -	/* 4 byte extra for skb_copy_bits buffer */ -	ctx->stack_size = prog->aux->stack_depth + 4; -	ctx->stack_size = STACK_ALIGN(ctx->stack_size); - -	/* Set up function call stack */ -	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); -  	cur_offset = ctx->idx - idx0;  	if (cur_offset != PROLOGUE_OFFSET) {  		pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",  			    cur_offset, PROLOGUE_OFFSET);  		return -1;  	} + +	/* 4 byte extra for skb_copy_bits buffer */ +	ctx->stack_size = prog->aux->stack_depth + 4; +	ctx->stack_size = STACK_ALIGN(ctx->stack_size); + +	/* Set up function call stack */ +	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);  	return 0;  } @@ -260,11 +261,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)  	emit(A64_LDR64(prg, tmp, prg), ctx);  	emit(A64_CBZ(1, prg, jmp_offset), ctx); -	/* goto *(prog->bpf_func + prologue_size); */ +	/* goto *(prog->bpf_func + prologue_offset); */  	off = offsetof(struct bpf_prog, bpf_func);  	emit_a64_mov_i64(tmp, off, ctx);  	emit(A64_LDR64(tmp, prg, tmp), ctx);  	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); +	emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);  	emit(A64_BR(tmp), ctx);  	/* out: */ diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 401ceb71540c..c5f05c4a4d00 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -101,12 +101,12 @@ ENTRY(privcmd_call)  	 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation  	 * is enabled (it implies that hardware UAO and PAN disabled).  	 */ -	uaccess_ttbr0_enable x6, x7 +	uaccess_ttbr0_enable x6, x7, x8  	hvc XEN_IMM  	/*  	 * Disable userspace access from kernel once the hyp call completed.  	 */ -	uaccess_ttbr0_disable x6 +	uaccess_ttbr0_disable x6, x7  	ret  ENDPROC(privcmd_call); | 
