From 5e2feac330953fe75197aecb20c781400e2bf606 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 16 Jun 2017 01:41:20 +0000 Subject: arm64: renesas: salvator-common: sound clock-frequency needs descending order It will be used ADG clock initial settings, and will be sound codec's initial system clock which needs maximum clock frequency. Thus, descending order is required Fixes: d37d2b3c0ec2708a ("arm64: dts: salvator-x: add 12288000 for sound ADG") Fixes: 0b03c32db03d63de ("arm64: dts: r8a7795: salvator-x: Add support for R-Car H3 ES2.0") Signed-off-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/salvator-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index aef35e0b685a..a451996f590a 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -508,7 +508,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; -- cgit v1.2.3 From 324dd7a6ac27b388e605ef136f23c88a5e49edbe Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jun 2017 01:48:38 -0700 Subject: ARM: OMAP2+: Fix omap3 prm shared irq Shared interrupts with IRQ_NOAUTOEN got a warning added with commit 04c848d39879 ("genirq: Warn when IRQ_NOAUTOEN is used with shared interrupts"). Let's just drop the IRQ_NOAUTOEN use for omap3 PRM shared interrupt as it does not seem to cause any other issues based on my testing. We have moved a lot of the code to initialize later, and whatever problems the legacy booting had seem to be gone now with pinctrl driver and device tree based booting. Otherwise we will get: WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:1348 __setup_irq+0x5d0/0x64c [] (__setup_irq) from [] (request_threaded_irq+0xdc/0x188) [] (request_threaded_irq) from [] (pcs_probe+0x6ec/0x8a4) [] (pcs_probe) from [] (platform_drv_probe+0x50/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x33c/0x478) Note that we also need to remove the related enable_irq() to avoid getting the following: WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:529 enable_irq+0x34/0x70 [] (enable_irq) from [] (omap3_pm_init+0x118/0x3f8) [] (omap3_pm_init) from [] (am35xx_init_late+0x10/0x18) Cc: Kevin Hilman Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pm34xx.c | 1 - arch/arm/mach-omap2/prm3xxx.c | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index d44e0e2f1106..841ba19d64a6 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -486,7 +486,6 @@ int __init omap3_pm_init(void) ret = request_irq(omap_prcm_event_to_irq("io"), _prcm_int_handle_io, IRQF_SHARED | IRQF_NO_SUSPEND, "pm_io", omap3_pm_init); - enable_irq(omap_prcm_event_to_irq("io")); if (ret) { pr_err("pm: Failed to request pm_io irq\n"); diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 382e236fbfd9..64f6451499a7 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -692,7 +692,6 @@ static int omap3xxx_prm_late_init(void) { struct device_node *np; int irq_num; - int ret; if (!(prm_features & PRM_HAS_IO_WAKEUP)) return 0; @@ -712,12 +711,8 @@ static int omap3xxx_prm_late_init(void) } omap3xxx_prm_enable_io_wakeup(); - ret = omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); - if (!ret) - irq_set_status_flags(omap_prcm_event_to_irq("io"), - IRQ_NOAUTOEN); - return ret; + return omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); } static void __exit omap3xxx_prm_exit(void) -- cgit v1.2.3 From c5b3955828baa4d7ae44ec075529d66fa7bdf903 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 30 Jun 2017 03:37:03 -0700 Subject: ARM: OMAP4: Fix legacy code clean-up regression Commit 2a26d31b1bae ("ARM: OMAP2+: Remove unused legacy code for PRM") removed PRM platform init code that I thought is unused. Turns out omap4 still needs this code, so let's do a partial revert to add it back. I probably missed this earlier as the comments used to say "OMAP4+ is DT only now" for !of_have_populated_dt() to exit early and missed the negative test. Let's not add those lines back as they are confusing and no longer needed as we only boot in device tree mode. Without things things can mysterious fail for i2c, for example LM75 I2C temperature sensor can stop working as the PRM interrupts won't work. Fixes: 2a26d31b1bae ("ARM: OMAP2+: Remove unused legacy code for PRM") Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm44xx.c | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 87e86a4a9ead..3ab5df1ce900 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -336,6 +336,27 @@ static void omap44xx_prm_reconfigure_io_chain(void) return; } +/** + * omap44xx_prm_enable_io_wakeup - enable wakeup events from I/O wakeup latches + * + * Activates the I/O wakeup event latches and allows events logged by + * those latches to signal a wakeup event to the PRCM. For I/O wakeups + * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and + * omap44xx_prm_reconfigure_io_chain() must be called. No return value. + */ +static void __init omap44xx_prm_enable_io_wakeup(void) +{ + s32 inst = omap4_prmst_get_prm_dev_inst(); + + if (inst == PRM_INSTANCE_UNKNOWN) + return; + + omap4_prm_rmw_inst_reg_bits(OMAP4430_GLOBAL_WUEN_MASK, + OMAP4430_GLOBAL_WUEN_MASK, + inst, + omap4_prcm_irq_setup.pm_ctrl); +} + /** * omap44xx_prm_read_reset_sources - return the last SoC reset source * @@ -668,6 +689,8 @@ struct pwrdm_ops omap4_pwrdm_operations = { .pwrdm_has_voltdm = omap4_check_vcvp, }; +static int omap44xx_prm_late_init(void); + /* * XXX document */ @@ -675,6 +698,7 @@ static struct prm_ll_data omap44xx_prm_ll_data = { .read_reset_sources = &omap44xx_prm_read_reset_sources, .was_any_context_lost_old = &omap44xx_prm_was_any_context_lost_old, .clear_context_loss_flags_old = &omap44xx_prm_clear_context_loss_flags_old, + .late_init = &omap44xx_prm_late_init, .assert_hardreset = omap4_prminst_assert_hardreset, .deassert_hardreset = omap4_prminst_deassert_hardreset, .is_hardreset_asserted = omap4_prminst_is_hardreset_asserted, @@ -711,6 +735,37 @@ int __init omap44xx_prm_init(const struct omap_prcm_init_data *data) return prm_register(&omap44xx_prm_ll_data); } +static int omap44xx_prm_late_init(void) +{ + int irq_num; + + if (!(prm_features & PRM_HAS_IO_WAKEUP)) + return 0; + + irq_num = of_irq_get(prm_init_data->np, 0); + /* + * Already have OMAP4 IRQ num. For all other platforms, we need + * IRQ numbers from DT + */ + if (irq_num < 0 && !(prm_init_data->flags & PRM_IRQ_DEFAULT)) { + if (irq_num == -EPROBE_DEFER) + return irq_num; + + /* Have nothing to do */ + return 0; + } + + /* Once OMAP4 DT is filled as well */ + if (irq_num >= 0) { + omap4_prcm_irq_setup.irq = irq_num; + omap4_prcm_irq_setup.xlate_irq = NULL; + } + + omap44xx_prm_enable_io_wakeup(); + + return omap_prcm_register_chain_handler(&omap4_prcm_irq_setup); +} + static void __exit omap44xx_prm_exit(void) { prm_unregister(&omap44xx_prm_ll_data); -- cgit v1.2.3 From 0c88e963a3090099725a5edd3b65afb4c9cf7858 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Wed, 5 Jul 2017 00:54:36 -0700 Subject: ARM: dts: dm816x: Correct NAND support nodes The ELM node in dm816x.dtsi needs to declare the correct compatible value here as per the binding only one value is correct, and the current driver handles it correctly. We then add pinmux information for the NAND found on the EVM so that we do not rely on the ROM to do this for us, and also so that we do not try and probe NAND before we probe the ELM. Cc: Rob Herring Cc: Mark Rutland Cc: Russell King Cc: Roger Quadros Cc: Tony Lindgren Cc: Mihail Grigorov Signed-off-by: Tom Rini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8168-evm.dts | 32 ++++++++++++++++++++++++++++++++ arch/arm/boot/dts/dm816x.dtsi | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index 1865976db5f9..996eba0c2e7a 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -68,6 +68,34 @@ DM816X_IOPAD(0x0d08, MUX_MODE0) /* USB1_DRVVBUS */ >; }; + + nandflash_pins: nandflash_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0b38, PULL_UP | MUX_MODE0) /* PINCTRL207 GPMC_CS0*/ + DM816X_IOPAD(0x0b60, PULL_ENA | MUX_MODE0) /* PINCTRL217 GPMC_ADV_ALE */ + DM816X_IOPAD(0x0b54, PULL_UP | PULL_ENA | MUX_MODE0) /* PINCTRL214 GPMC_OE_RE */ + DM816X_IOPAD(0x0b58, PULL_ENA | MUX_MODE0) /* PINCTRL215 GPMC_BE0_CLE */ + DM816X_IOPAD(0x0b50, PULL_UP | MUX_MODE0) /* PINCTRL213 GPMC_WE */ + DM816X_IOPAD(0x0b6c, MUX_MODE0) /* PINCTRL220 GPMC_WAIT */ + DM816X_IOPAD(0x0be4, PULL_ENA | MUX_MODE0) /* PINCTRL250 GPMC_CLK */ + DM816X_IOPAD(0x0ba4, MUX_MODE0) /* PINCTRL234 GPMC_D0 */ + DM816X_IOPAD(0x0ba8, MUX_MODE0) /* PINCTRL234 GPMC_D1 */ + DM816X_IOPAD(0x0bac, MUX_MODE0) /* PINCTRL234 GPMC_D2 */ + DM816X_IOPAD(0x0bb0, MUX_MODE0) /* PINCTRL234 GPMC_D3 */ + DM816X_IOPAD(0x0bb4, MUX_MODE0) /* PINCTRL234 GPMC_D4 */ + DM816X_IOPAD(0x0bb8, MUX_MODE0) /* PINCTRL234 GPMC_D5 */ + DM816X_IOPAD(0x0bbc, MUX_MODE0) /* PINCTRL234 GPMC_D6 */ + DM816X_IOPAD(0x0bc0, MUX_MODE0) /* PINCTRL234 GPMC_D7 */ + DM816X_IOPAD(0x0bc4, MUX_MODE0) /* PINCTRL234 GPMC_D8 */ + DM816X_IOPAD(0x0bc8, MUX_MODE0) /* PINCTRL234 GPMC_D9 */ + DM816X_IOPAD(0x0bcc, MUX_MODE0) /* PINCTRL234 GPMC_D10 */ + DM816X_IOPAD(0x0bd0, MUX_MODE0) /* PINCTRL234 GPMC_D11 */ + DM816X_IOPAD(0x0bd4, MUX_MODE0) /* PINCTRL234 GPMC_D12 */ + DM816X_IOPAD(0x0bd8, MUX_MODE0) /* PINCTRL234 GPMC_D13 */ + DM816X_IOPAD(0x0bdc, MUX_MODE0) /* PINCTRL234 GPMC_D14 */ + DM816X_IOPAD(0x0be0, MUX_MODE0) /* PINCTRL234 GPMC_D15 */ + >; + }; }; &i2c1 { @@ -90,6 +118,8 @@ &gpmc { ranges = <0 0 0x04000000 0x01000000>; /* CS0: 16MB for NAND */ + pinctrl-names = "default"; + pinctrl-0 = <&nandflash_pins>; nand@0,0 { compatible = "ti,omap2-nand"; @@ -98,9 +128,11 @@ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ + rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */ #address-cells = <1>; #size-cells = <1>; ti,nand-ecc-opt = "bch8"; + ti,elm-id = <&elm>; nand-bus-width = <16>; gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 59cbf958fcc3..566b2a8c8b96 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -145,7 +145,7 @@ }; elm: elm@48080000 { - compatible = "ti,816-elm"; + compatible = "ti,am3352-elm"; ti,hwmods = "elm"; reg = <0x48080000 0x2000>; interrupts = <4>; -- cgit v1.2.3 From 5601ca471f0892acc36e0316844fa6bd774bfc7e Mon Sep 17 00:00:00 2001 From: Mihail Grigorov Date: Wed, 5 Jul 2017 00:54:45 -0700 Subject: ARM: dts: dm816x: Correct the state of the write protect pin Commit 599c376c4932 ("ARM: dts: Fix gpio interrupts for dm816x") corrected some problems with the MMC. However, it gets the write protect pin backwards. It needs to be ACTIVE_HIGH not ACTIVE_LOW. Cc: Rob Herring Cc: Mark Rutland Cc: Russell King Cc: Tony Lindgren Signed-off-by: Mihail Grigorov Signed-off-by: Tom Rini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8168-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index 996eba0c2e7a..c72a2132aa82 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -196,7 +196,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <4>; cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ -- cgit v1.2.3 From ff570a01b1bd15cf8a87c9ea774fc9c52ea07f15 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 5 Jul 2017 00:54:52 -0700 Subject: ARM: dts: dra71-evm: mdio: Fix impedance values v1 series[1] for dp83867 phy impedance-control support, specifies to use ti,impedance-control with a value. These properties got updated iduring review to specify whether min or max impedance. But the DT still uses the old values which never takes effect. Update the DT node by using the proper DT properties. [1] https://patchwork.kernel.org/patch/9239729/ Fixes: 9868bc585ae2c ("ARM: dts: Add support for dra718-evm") Signed-off-by: Lokesh Vutla Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra71-evm.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra71-evm.dts b/arch/arm/boot/dts/dra71-evm.dts index 4d57a55473af..a6298eb56978 100644 --- a/arch/arm/boot/dts/dra71-evm.dts +++ b/arch/arm/boot/dts/dra71-evm.dts @@ -190,7 +190,7 @@ ti,rx-internal-delay = ; ti,tx-internal-delay = ; ti,fifo-depth = ; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; dp83867_1: ethernet-phy@3 { @@ -198,7 +198,7 @@ ti,rx-internal-delay = ; ti,tx-internal-delay = ; ti,fifo-depth = ; - ti,impedance-control = <0x1f>; + ti,min-output-impedance; }; }; -- cgit v1.2.3 From 2752660a37aed65b1e00fd4563d9f152eefb8200 Mon Sep 17 00:00:00 2001 From: Vladimir Barinov Date: Fri, 7 Jul 2017 05:09:33 +0300 Subject: arm64: dts: renesas: ulcb: sound clock-frequency needs descending order Correct order of sound clock frequencies for ULCB boards used with r8a7795 and r8a7796 SoCs. These sounds clock frequencies are used as the ADG clock (output clocks for audio module) initial setting and sound codec's initial system clock which needs the maximum clock frequency. Thus, descending order is required. Fixes: 9f22774c214ada7b ("arm64: dts: ulcb: add 12288000 for sound ADG") Signed-off-by: Vladimir Barinov Reviewed-by: Geert Uytterhoeven [simon: rewrote changelog] Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/ulcb.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index b5c6ee07d7f9..d1a3f3b7a0ab 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -281,7 +281,7 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; - clock-frequency = <11289600 12288000>; + clock-frequency = <12288000 11289600>; status = "okay"; -- cgit v1.2.3 From b855629b9a4107c37999723c09cc9fd357b53caf Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 14:08:25 +0200 Subject: s390/perf: fix problem state detection The P sample bit indicates problem state and not PER. Fixes: commit a752598254 ("s390: rename struct psw_bits members") Cc: Heiko Carstens Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0c82f7903fc7..c1bf75ffb875 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -998,7 +998,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) psw_bits(regs.psw).ia = sfr->basic.ia; psw_bits(regs.psw).dat = sfr->basic.T; psw_bits(regs.psw).wait = sfr->basic.W; - psw_bits(regs.psw).per = sfr->basic.P; + psw_bits(regs.psw).pstate = sfr->basic.P; psw_bits(regs.psw).as = sfr->basic.AS; /* -- cgit v1.2.3 From 97ca7bfc19605bc08e9183441b8b8545e84032d6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 10:12:58 +0200 Subject: s390/mm: set change and reference bit on lazy key enablement When we enable storage keys for a guest lazily, we reset the ACC and F values. That is correct assuming that these are 0 on a clear reset and the guest obviously has not used any key setting instruction. We also zero out the change and reference bit. This is not correct as the architecture prefers over-indication instead of under-indication for the keyless->keyed transition. This patch fixes the behaviour and always sets guest change and guest reference for all guest storage keys on the keyless -> keyed switch. Signed-off-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d4d409ba206b..4a1f7366b17a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -591,11 +591,11 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) unsigned long ptev; pgste_t pgste; - /* Clear storage key */ + /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); -- cgit v1.2.3 From ca12437303f35b0a5b97c856d88886ce9df6a0d7 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 1 Jul 2017 18:20:35 +0200 Subject: ARM: dts: rockchip: fix mali gpu node on rk3288 The binding specifies the actual implementations only (mali-t760 for example) but not the arm,mali-midgard used in some vendor kernels. So drop that compatible property from the rk3288 where it had slipped in. Also fix the node name which should be a generic gpu@... Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 2484f11761ea..858e1fed762a 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1126,8 +1126,8 @@ }; }; - gpu: mali@ffa30000 { - compatible = "rockchip,rk3288-mali", "arm,mali-t760", "arm,mali-midgard"; + gpu: gpu@ffa30000 { + compatible = "rockchip,rk3288-mali", "arm,mali-t760"; reg = <0xffa30000 0x10000>; interrupts = , , -- cgit v1.2.3 From ba64792ff9b4612ab20623118e0b616e760ecdf7 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Fri, 14 Jul 2017 18:16:43 +0530 Subject: ARM: OMAP2+: hsmmc.c: Remove dead code Most platforms using OMAP hsmmc driver have switched to device tree for passing platform data to omap_hsmmc.c driver. The hsmmc.c file in mach-omap2 exists only to support pandora board which uses wl1251 driver in legacy platform data mode. Hence, remove the dead code not used by the pandora board. Signed-off-by: Faiz Abbas Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson --- arch/arm/mach-omap2/hsmmc.c | 239 +------------------------------------------- arch/arm/mach-omap2/hsmmc.h | 9 -- 2 files changed, 2 insertions(+), 246 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c index be517b048762..5b614388d72f 100644 --- a/arch/arm/mach-omap2/hsmmc.c +++ b/arch/arm/mach-omap2/hsmmc.c @@ -32,120 +32,6 @@ static u16 control_devconf1_offset; #define HSMMC_NAME_LEN 9 -static void omap_hsmmc1_before_set_reg(struct device *dev, - int power_on, int vdd) -{ - u32 reg, prog_io; - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (mmc->remux) - mmc->remux(dev, power_on); - - /* - * Assume we power both OMAP VMMC1 (for CMD, CLK, DAT0..3) and the - * card with Vcc regulator (from twl4030 or whatever). OMAP has both - * 1.8V and 3.0V modes, controlled by the PBIAS register. - * - * In 8-bit modes, OMAP VMMC1A (for DAT4..7) needs a supply, which - * is most naturally TWL VSIM; those pins also use PBIAS. - * - * FIXME handle VMMC1A as needed ... - */ - if (power_on) { - if (cpu_is_omap2430()) { - reg = omap_ctrl_readl(OMAP243X_CONTROL_DEVCONF1); - if ((1 << vdd) >= MMC_VDD_30_31) - reg |= OMAP243X_MMC1_ACTIVE_OVERWRITE; - else - reg &= ~OMAP243X_MMC1_ACTIVE_OVERWRITE; - omap_ctrl_writel(reg, OMAP243X_CONTROL_DEVCONF1); - } - - if (mmc->internal_clock) { - reg = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0); - reg |= OMAP2_MMCSDIO1ADPCLKISEL; - omap_ctrl_writel(reg, OMAP2_CONTROL_DEVCONF0); - } - - reg = omap_ctrl_readl(control_pbias_offset); - if (cpu_is_omap3630()) { - /* Set MMC I/O to 52MHz */ - prog_io = omap_ctrl_readl(OMAP343X_CONTROL_PROG_IO1); - prog_io |= OMAP3630_PRG_SDMMC1_SPEEDCTRL; - omap_ctrl_writel(prog_io, OMAP343X_CONTROL_PROG_IO1); - } else { - reg |= OMAP2_PBIASSPEEDCTRL0; - } - reg &= ~OMAP2_PBIASLITEPWRDNZ0; - omap_ctrl_writel(reg, control_pbias_offset); - } else { - reg = omap_ctrl_readl(control_pbias_offset); - reg &= ~OMAP2_PBIASLITEPWRDNZ0; - omap_ctrl_writel(reg, control_pbias_offset); - } -} - -static void omap_hsmmc1_after_set_reg(struct device *dev, int power_on, int vdd) -{ - u32 reg; - - /* 100ms delay required for PBIAS configuration */ - msleep(100); - - if (power_on) { - reg = omap_ctrl_readl(control_pbias_offset); - reg |= (OMAP2_PBIASLITEPWRDNZ0 | OMAP2_PBIASSPEEDCTRL0); - if ((1 << vdd) <= MMC_VDD_165_195) - reg &= ~OMAP2_PBIASLITEVMODE0; - else - reg |= OMAP2_PBIASLITEVMODE0; - omap_ctrl_writel(reg, control_pbias_offset); - } else { - reg = omap_ctrl_readl(control_pbias_offset); - reg |= (OMAP2_PBIASSPEEDCTRL0 | OMAP2_PBIASLITEPWRDNZ0 | - OMAP2_PBIASLITEVMODE0); - omap_ctrl_writel(reg, control_pbias_offset); - } -} - -static void hsmmc2_select_input_clk_src(struct omap_hsmmc_platform_data *mmc) -{ - u32 reg; - - reg = omap_ctrl_readl(control_devconf1_offset); - if (mmc->internal_clock) - reg |= OMAP2_MMCSDIO2ADPCLKISEL; - else - reg &= ~OMAP2_MMCSDIO2ADPCLKISEL; - omap_ctrl_writel(reg, control_devconf1_offset); -} - -static void hsmmc2_before_set_reg(struct device *dev, int power_on, int vdd) -{ - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (mmc->remux) - mmc->remux(dev, power_on); - - if (power_on) - hsmmc2_select_input_clk_src(mmc); -} - -static int am35x_hsmmc2_set_power(struct device *dev, int power_on, int vdd) -{ - struct omap_hsmmc_platform_data *mmc = dev->platform_data; - - if (power_on) - hsmmc2_select_input_clk_src(mmc); - - return 0; -} - -static int nop_mmc_set_power(struct device *dev, int power_on, int vdd) -{ - return 0; -} - static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, struct omap_hsmmc_platform_data *mmc) { @@ -157,101 +43,11 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, return -ENOMEM; } - if (c->name) - strncpy(hc_name, c->name, HSMMC_NAME_LEN); - else - snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", - c->mmc, 1); + snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", c->mmc, 1); mmc->name = hc_name; mmc->caps = c->caps; - mmc->internal_clock = !c->ext_clock; mmc->reg_offset = 0; - if (c->cover_only) { - /* detect if mobile phone cover removed */ - mmc->gpio_cd = -EINVAL; - mmc->gpio_cod = c->gpio_cd; - } else { - /* card detect pin on the mmc socket itself */ - mmc->gpio_cd = c->gpio_cd; - mmc->gpio_cod = -EINVAL; - } - mmc->gpio_wp = c->gpio_wp; - - mmc->remux = c->remux; - mmc->init_card = c->init_card; - - if (c->nonremovable) - mmc->nonremovable = 1; - - /* - * NOTE: MMC slots should have a Vcc regulator set up. - * This may be from a TWL4030-family chip, another - * controllable regulator, or a fixed supply. - * - * temporary HACK: ocr_mask instead of fixed supply - */ - if (soc_is_am35xx()) - mmc->ocr_mask = MMC_VDD_165_195 | - MMC_VDD_26_27 | - MMC_VDD_27_28 | - MMC_VDD_29_30 | - MMC_VDD_30_31 | - MMC_VDD_31_32; - else - mmc->ocr_mask = c->ocr_mask; - - if (!soc_is_am35xx()) - mmc->features |= HSMMC_HAS_PBIAS; - - switch (c->mmc) { - case 1: - if (mmc->features & HSMMC_HAS_PBIAS) { - /* on-chip level shifting via PBIAS0/PBIAS1 */ - mmc->before_set_reg = - omap_hsmmc1_before_set_reg; - mmc->after_set_reg = - omap_hsmmc1_after_set_reg; - } - - if (soc_is_am35xx()) - mmc->set_power = nop_mmc_set_power; - - /* OMAP3630 HSMMC1 supports only 4-bit */ - if (cpu_is_omap3630() && - (c->caps & MMC_CAP_8_BIT_DATA)) { - c->caps &= ~MMC_CAP_8_BIT_DATA; - c->caps |= MMC_CAP_4_BIT_DATA; - mmc->caps = c->caps; - } - break; - case 2: - if (soc_is_am35xx()) - mmc->set_power = am35x_hsmmc2_set_power; - - if (c->ext_clock) - c->transceiver = 1; - if (c->transceiver && (c->caps & MMC_CAP_8_BIT_DATA)) { - c->caps &= ~MMC_CAP_8_BIT_DATA; - c->caps |= MMC_CAP_4_BIT_DATA; - } - if (mmc->features & HSMMC_HAS_PBIAS) { - /* off-chip level shifting, or none */ - mmc->before_set_reg = hsmmc2_before_set_reg; - mmc->after_set_reg = NULL; - } - break; - case 3: - case 4: - case 5: - mmc->before_set_reg = NULL; - mmc->after_set_reg = NULL; - break; - default: - pr_err("MMC%d configuration not supported!\n", c->mmc); - kfree(hc_name); - return -ENODEV; - } return 0; } @@ -260,7 +56,6 @@ static int omap_hsmmc_done; void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) { struct platform_device *pdev; - struct omap_hsmmc_platform_data *mmc_pdata; int res; if (omap_hsmmc_done != 1) @@ -269,32 +64,12 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) omap_hsmmc_done++; for (; c->mmc; c++) { - if (!c->deferred) - continue; - pdev = c->pdev; if (!pdev) continue; - - mmc_pdata = pdev->dev.platform_data; - if (!mmc_pdata) - continue; - - if (c->cover_only) { - /* detect if mobile phone cover removed */ - mmc_pdata->gpio_cd = -EINVAL; - mmc_pdata->gpio_cod = c->gpio_cd; - } else { - /* card detect pin on the mmc socket itself */ - mmc_pdata->gpio_cd = c->gpio_cd; - mmc_pdata->gpio_cod = -EINVAL; - } - mmc_pdata->gpio_wp = c->gpio_wp; - res = omap_device_register(pdev); if (res) - pr_err("Could not late init MMC %s\n", - c->name); + pr_err("Could not late init MMC\n"); } } @@ -336,13 +111,6 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo, if (oh->dev_attr != NULL) { mmc_dev_attr = oh->dev_attr; mmc_data->controller_flags = mmc_dev_attr->flags; - /* - * erratum 2.1.1.128 doesn't apply if board has - * a transceiver is attached - */ - if (hsmmcinfo->transceiver) - mmc_data->controller_flags &= - ~OMAP_HSMMC_BROKEN_MULTIBLOCK_READ; } pdev = platform_device_alloc(name, ctrl_nr - 1); @@ -367,9 +135,6 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo, hsmmcinfo->pdev = pdev; - if (hsmmcinfo->deferred) - goto free_mmc; - res = omap_device_register(pdev); if (res) { pr_err("Could not register od for %s\n", name); diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h index 69b619ddc765..af9af5094ec3 100644 --- a/arch/arm/mach-omap2/hsmmc.h +++ b/arch/arm/mach-omap2/hsmmc.h @@ -12,18 +12,9 @@ struct omap2_hsmmc_info { u8 mmc; /* controller 1/2/3 */ u32 caps; /* 4/8 wires and any additional host * capabilities OR'd (ref. linux/mmc/host.h) */ - bool transceiver; /* MMC-2 option */ - bool ext_clock; /* use external pin for input clock */ - bool cover_only; /* No card detect - just cover switch */ - bool nonremovable; /* Nonremovable e.g. eMMC */ - bool deferred; /* mmc needs a deferred probe */ int gpio_cd; /* or -EINVAL */ int gpio_wp; /* or -EINVAL */ - char *name; /* or NULL for default */ struct platform_device *pdev; /* mmc controller instance */ - int ocr_mask; /* temporary HACK */ - /* Remux (pad configuration) when powering on/off */ - void (*remux)(struct device *dev, int power_on); /* init some special card */ void (*init_card)(struct mmc_card *card); }; -- cgit v1.2.3 From 76127d6fe00062bddb25515d8a4f44633c41fe14 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 7 Jul 2017 09:59:28 +0200 Subject: ARM: mvebu: use __pa_symbol in the mv98dx3236 platform SMP code As we already did for Armada XP switch from virt_to_phys() to __pa_symbol(). The reason for it was well explained by Mark Rutland so let's quote him: "virt_to_phys() is intended to operate on the linear/direct mapping of RAM. __pa_symbol() is intended to operate on the kernel mapping, which may not be in the linear/direct mapping on all architectures. e.g. arm64 and x86_64 map the kernel image and RAM separately. On 32-bit ARM the kernel image mapping is tied to the linear/direct mapping, so that works, but as it's semantically wrong (and broken for generic code), the DEBUG_VIRTUAL checks complain." Fixes: db88977894ab ("arm: mvebu: support for SMP on 98DX3336 SoC") Cc: Reviewed-by: Florian Fainelli Tested-by: Chris Packham Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/platsmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c index e62273aacb43..4ffbbd217e82 100644 --- a/arch/arm/mach-mvebu/platsmp.c +++ b/arch/arm/mach-mvebu/platsmp.c @@ -211,7 +211,7 @@ static int mv98dx3236_resume_set_cpu_boot_addr(int hw_cpu, void *boot_addr) return PTR_ERR(base); writel(0, base + MV98DX3236_CPU_RESUME_CTRL_REG); - writel(virt_to_phys(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); + writel(__pa_symbol(boot_addr), base + MV98DX3236_CPU_RESUME_ADDR_REG); iounmap(base); -- cgit v1.2.3 From 8d4514173211586c6238629b1ef1e071927735f5 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 12 Jul 2017 13:23:11 +0200 Subject: ARM: dts: armada-38x: Fix irq type for pca955 As written in the datasheet the PCA955 can only handle low level irq and not edge irq. Without this fix the interrupt is not usable for pca955: the gpio-pca953x driver already set the irq type as low level which is incompatible with edge type, then the kernel prevents using the interrupt: "irq: type mismatch, failed to map hwirq-18 for /soc/internal-regs/gpio@18100!" Fixes: 928413bd859c ("ARM: mvebu: Add Armada 388 General Purpose Development Board support") Cc: stable@vger.kernel.org Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-388-gp.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 895fa6cfa15a..563901e0ec07 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -75,7 +75,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pca0_pins>; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -87,7 +87,7 @@ compatible = "nxp,pca9555"; pinctrl-names = "default"; interrupt-parent = <&gpio0>; - interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + interrupts = <18 IRQ_TYPE_LEVEL_LOW>; gpio-controller; #gpio-cells = <2>; interrupt-controller; -- cgit v1.2.3 From 579c183e4fe01d493f2efef8a335a9607376f754 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 18 Jul 2017 15:10:32 +0200 Subject: arm64: dts: marvell: use ICU for the CP110 slave RTC When the conversion of the Marvell CP110 Device Tree description from using GIC interrupts to using ICU interrupts was done, the RTC on the slave CP110 was left unchanged. This commit fixes that, so that all devices on the CP properly get their interrupt through the ICU. Fixes: 6ef84a827c375 ("arm64: dts: marvell: enable GICP and ICU on Armada 7K/8K") Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 95f8e5f607f6..471aaf8dc750 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -64,7 +64,7 @@ compatible = "marvell,armada-8k-rtc"; reg = <0x284000 0x20>, <0x284080 0x24>; reg-names = "rtc", "rtc-soc"; - interrupts = ; + interrupts = ; }; cps_ethernet: ethernet@0 { -- cgit v1.2.3 From fc290a114fc6034b0f6a5a46e2fb7d54976cf87a Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Mon, 17 Jul 2017 09:22:27 -0600 Subject: sparc64: Prevent perf from running during super critical sections This fixes another cause of random segfaults and bus errors that may occur while running perf with the callgraph option. Critical sections beginning with spin_lock_irqsave() raise the interrupt level to PIL_NORMAL_MAX (14) and intentionally do not block performance counter interrupts, which arrive at PIL_NMI (15). But some sections of code are "super critical" with respect to perf because the perf_callchain_user() path accesses user space and may cause TLB activity as well as faults as it unwinds the user stack. One particular critical section occurs in switch_mm: spin_lock_irqsave(&mm->context.lock, flags); ... load_secondary_context(mm); tsb_context_switch(mm); ... spin_unlock_irqrestore(&mm->context.lock, flags); If a perf interrupt arrives in between load_secondary_context() and tsb_context_switch(), then perf_callchain_user() could execute with the context ID of one process, but with an active TSB for a different process. When the user stack is accessed, it is very likely to incur a TLB miss, since the h/w context ID has been changed. The TLB will then be reloaded with a translation from the TSB for one process, but using a context ID for another process. This exposes memory from one process to another, and since it is a mapping for stack memory, this usually causes the new process to crash quickly. This super critical section needs more protection than is provided by spin_lock_irqsave() since perf interrupts must not be allowed in. Since __tsb_context_switch already goes through the trouble of disabling interrupts completely, we fix this by moving the secondary context load down into this better protected region. Orabug: 25577560 Signed-off-by: Dave Aldridge Signed-off-by: Rob Gardner Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 14 +++++++++----- arch/sparc/kernel/tsb.S | 12 ++++++++++++ arch/sparc/power/hibernate.c | 3 +-- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 2cddcda4f85f..87841d687f8d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[MM_TSB_BASE], @@ -40,9 +42,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[MM_TSB_BASE])); + , __pa(&mm->context.tsb_descr[MM_TSB_BASE]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 07c0df924960..db872dbfafe9 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -360,6 +360,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -372,6 +373,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } -- cgit v1.2.3 From e3ccf1d1dee5129beb839fe05c61eb134131bdd6 Mon Sep 17 00:00:00 2001 From: Harvey Hunt Date: Tue, 18 Jul 2017 14:25:45 +0100 Subject: MIPS: ralink: Fix build error due to missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, was included before ralink_regs.h in all ralink files - leading to being implicitly included. After commit 26dd3e4ff9ac ("MIPS: Audit and remove any unnecessary uses of module.h") removed the inclusion of module.h from multiple places, some ralink platforms failed to build with the following error: In file included from arch/mips/ralink/mt7620.c:17:0: ./arch/mips/include/asm/mach-ralink/ralink_regs.h: In function ‘rt_sysc_w32’: ./arch/mips/include/asm/mach-ralink/ralink_regs.h:38:2: error: implicit declaration of function ‘__raw_writel’ [-Werror=implicit-function-declaration] __raw_writel(val, rt_sysc_membase + reg); ^ ./arch/mips/include/asm/mach-ralink/ralink_regs.h: In function ‘rt_sysc_r32’: ./arch/mips/include/asm/mach-ralink/ralink_regs.h:43:2: error: implicit declaration of function ‘__raw_readl’ [-Werror=implicit-function-declaration] return __raw_readl(rt_sysc_membase + reg); Fix this by including . Signed-off-by: Harvey Hunt Fixes: 26dd3e4ff9ac ("MIPS: Audit and remove any unnecessary uses of module.h") Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: #4.11+ Patchwork: https://patchwork.linux-mips.org/patch/16780/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-ralink/ralink_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/mach-ralink/ralink_regs.h b/arch/mips/include/asm/mach-ralink/ralink_regs.h index 9df1a53bcb36..b4e7dfa214eb 100644 --- a/arch/mips/include/asm/mach-ralink/ralink_regs.h +++ b/arch/mips/include/asm/mach-ralink/ralink_regs.h @@ -13,6 +13,8 @@ #ifndef _RALINK_REGS_H_ #define _RALINK_REGS_H_ +#include + enum ralink_soc_type { RALINK_UNKNOWN = 0, RT2880_SOC, -- cgit v1.2.3 From f13343e87713959437a4a50a730de1736f23fc20 Mon Sep 17 00:00:00 2001 From: Harvey Hunt Date: Tue, 18 Jul 2017 14:25:46 +0100 Subject: MIPS: ralink: mt7620: Add missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a build error caused by not including . The following compilation errors are caused by the missing header: arch/mips/ralink/mt7620.c: In function ‘mt7620_get_cpu_pll_rate’: arch/mips/ralink/mt7620.c:431:2: error: implicit declaration of function ‘WARN_ON’ [-Werror=implicit-function-declaration] WARN_ON(div >= ARRAY_SIZE(mt7620_clk_divider)); ^ arch/mips/ralink/mt7620.c: In function ‘mt7620_get_sys_rate’: arch/mips/ralink/mt7620.c:500:2: error: implicit declaration of function ‘WARN’ [-Werror=implicit-function-declaration] if (WARN(!div, "invalid divider for OCP ratio %u", ocp_ratio)) ^ arch/mips/ralink/mt7620.c: In function ‘mt7620_dram_init’: arch/mips/ralink/mt7620.c:619:3: error: implicit declaration of function ‘BUG’ [-Werror=implicit-function-declaration] BUG(); ^ cc1: some warnings being treated as errors scripts/Makefile.build:302: recipe for target 'arch/mips/ralink/mt7620.o' failed Signed-off-by: Harvey Hunt Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16781/ Signed-off-by: Ralf Baechle --- arch/mips/ralink/mt7620.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 094a0ee4af46..9be8b08ae46b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -12,6 +12,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From ec0aef9881d18aa781268ec9cba8eba5b202f5b4 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 19 Jul 2017 11:04:26 +0200 Subject: arm64: dts: marvell: mark the cp110 crypto engine as dma coherent The crypto engines found on the cp110 master and slave are dma coherent. This patch adds the relevant property to their dt nodes. Cc: stable@vger.kernel.org # v4.12+ Fixes: 973020fd9498 ("arm64: marvell: dts: add crypto engine description for 7k/8k") Signed-off-by: Antoine Tenart Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 1 + arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index 726528ce54e9..4c68605675a8 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -270,6 +270,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cpm_clk 1 26>; + dma-coherent; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 471aaf8dc750..923f354b02f0 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -261,6 +261,7 @@ interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3", "eip"; clocks = <&cps_clk 1 26>; + dma-coherent; /* * The cryptographic engine found on the cp110 * master is enabled by default at the SoC -- cgit v1.2.3 From c396fe7f0c2efdf6c02d723f7bd492c58725c822 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 3 Jul 2017 14:37:46 +0100 Subject: arm64: uaccess: Remove redundant __force from addr cast in __range_ok Casting a pointer to an integral type doesn't require a __force attribute, because you'll need to cast back to a pointer in order to dereference the thing anyway. This patch removes the redundant __force cast from __range_ok. Reported-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8f0a1de11e4a..fab46a0ea223 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -69,7 +69,7 @@ static inline void set_fs(mm_segment_t fs) */ #define __range_ok(addr, size) \ ({ \ - unsigned long __addr = (unsigned long __force)(addr); \ + unsigned long __addr = (unsigned long)(addr); \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ -- cgit v1.2.3 From 32fb5d73c98b079e7c815b62e9d88a39ff8ce509 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 7 Jul 2017 17:01:50 +0100 Subject: arm64: atomics: Remove '&' from '+&' asm constraint in lse atomics The lse implementation of atomic64_dec_if_positive uses the '+&' constraint, but the '&' is redundant and confusing in this case, since early clobber on a read/write operand is a strange concept. Replace the constraint with '+'. Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_lse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 99fa69c9c3cf..9ef0797380cb 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+&r" (x0), [v] "+Q" (v->counter) + : [ret] "+r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); -- cgit v1.2.3 From 6f44a0bacb79a03972c83759711832b382b1b8ac Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Fri, 7 Jul 2017 17:29:34 +0800 Subject: arm64: traps: disable irq in die() In current die(), the irq is disabled for __die() handle, not including the possible panic() handling. Since the log in __die() can take several hundreds ms, new irq might come and interrupt current die(). If the process calling die() holds some critical resource, and some other process scheduled later also needs it, then it would deadlock. The first panic will not be executed. So here disable irq for the whole flow of die(). Signed-off-by: Qiao Zhou Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c7c7088097be..d48f47080213 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -274,10 +274,12 @@ static DEFINE_RAW_SPINLOCK(die_lock); void die(const char *str, struct pt_regs *regs, int err) { int ret; + unsigned long flags; + + raw_spin_lock_irqsave(&die_lock, flags); oops_enter(); - raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); ret = __die(str, err, regs); @@ -287,13 +289,15 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - raw_spin_unlock_irq(&die_lock); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); + + raw_spin_unlock_irqrestore(&die_lock, flags); + if (ret != NOTIFY_STOP) do_exit(SIGSEGV); } -- cgit v1.2.3 From a270f32735a20affe325c351c359f13603537d05 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2017 16:42:42 -0500 Subject: arm64: Convert to using %pOF instead of full_name Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Signed-off-by: Rob Herring Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 4 ++-- arch/arm64/kernel/smp.c | 12 ++++++------ arch/arm64/kernel/topology.c | 22 +++++++++++----------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index e137ceaf5016..d16978213c5b 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -82,8 +82,8 @@ static const char *__init cpu_read_enable_method(int cpu) * Don't warn spuriously. */ if (cpu != 0) - pr_err("%s: missing enable-method property\n", - dn->full_name); + pr_err("%pOF: missing enable-method property\n", + dn); } } else { enable_method = acpi_get_enable_method(cpu); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 321119881abf..dc66e6ec3a99 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -469,7 +469,7 @@ static u64 __init of_get_cpu_mpidr(struct device_node *dn) */ cell = of_get_property(dn, "reg", NULL); if (!cell) { - pr_err("%s: missing reg property\n", dn->full_name); + pr_err("%pOF: missing reg property\n", dn); return INVALID_HWID; } @@ -478,7 +478,7 @@ static u64 __init of_get_cpu_mpidr(struct device_node *dn) * Non affinity bits must be set to 0 in the DT */ if (hwid & ~MPIDR_HWID_BITMASK) { - pr_err("%s: invalid reg property\n", dn->full_name); + pr_err("%pOF: invalid reg property\n", dn); return INVALID_HWID; } return hwid; @@ -627,8 +627,8 @@ static void __init of_parse_and_init_cpus(void) goto next; if (is_mpidr_duplicate(cpu_count, hwid)) { - pr_err("%s: duplicate cpu reg properties in the DT\n", - dn->full_name); + pr_err("%pOF: duplicate cpu reg properties in the DT\n", + dn); goto next; } @@ -640,8 +640,8 @@ static void __init of_parse_and_init_cpus(void) */ if (hwid == cpu_logical_map(0)) { if (bootcpu_valid) { - pr_err("%s: duplicate boot cpu reg property in DT\n", - dn->full_name); + pr_err("%pOF: duplicate boot cpu reg property in DT\n", + dn); goto next; } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 79244c75eaec..8d48b233e6ce 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -45,7 +45,7 @@ static int __init get_cpu_for_node(struct device_node *node) } } - pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + pr_crit("Unable to find CPU node for %pOF\n", cpu_node); of_node_put(cpu_node); return -1; @@ -71,8 +71,8 @@ static int __init parse_core(struct device_node *core, int cluster_id, cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { - pr_err("%s: Can't get CPU for thread\n", - t->full_name); + pr_err("%pOF: Can't get CPU for thread\n", + t); of_node_put(t); return -EINVAL; } @@ -84,15 +84,15 @@ static int __init parse_core(struct device_node *core, int cluster_id, cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { - pr_err("%s: Core has both threads and CPU\n", - core->full_name); + pr_err("%pOF: Core has both threads and CPU\n", + core); return -EINVAL; } cpu_topology[cpu].cluster_id = cluster_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { - pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + pr_err("%pOF: Can't get CPU for leaf core\n", core); return -EINVAL; } @@ -137,8 +137,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) has_cores = true; if (depth == 0) { - pr_err("%s: cpu-map children should be clusters\n", - c->full_name); + pr_err("%pOF: cpu-map children should be clusters\n", + c); of_node_put(c); return -EINVAL; } @@ -146,8 +146,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth) if (leaf) { ret = parse_core(c, cluster_id, core_id++); } else { - pr_err("%s: Non-leaf cluster with core %s\n", - cluster->full_name, name); + pr_err("%pOF: Non-leaf cluster with core %s\n", + cluster, name); ret = -EINVAL; } @@ -159,7 +159,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } while (c); if (leaf && !has_cores) - pr_warn("%s: empty cluster\n", cluster->full_name); + pr_warn("%pOF: empty cluster\n", cluster); if (leaf) cluster_id++; -- cgit v1.2.3 From 67556d7a851c20116923c23f1d49ecdec954e3a0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Jul 2017 23:01:38 +0100 Subject: ARM: kexec: avoid allocating crashkernel region outside lowmem Allocating the crashkernel region outside lowmem causes the kernel to oops while trying to kexec into the new kernel: Loading crashdump kernel... Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = edd70000 [00000000] *pgd=de19e835 Internal error: Oops: 817 [#2] SMP ARM Modules linked in: ... CPU: 0 PID: 689 Comm: sh Not tainted 4.12.0-rc3-next-20170601-04015-gc3a5a20 Hardware name: Generic DRA74X (Flattened Device Tree) task: edb32f00 task.stack: edf18000 PC is at memcpy+0x50/0x330 LR is at 0xe3c34001 pc : [] lr : [] psr: 800c0193 sp : edf19c2c ip : 0a000001 fp : c0553170 r10: c055316e r9 : 00000001 r8 : e3130001 r7 : e4903004 r6 : 0a000014 r5 : e3500000 r4 : e59f106c r3 : e59f0074 r2 : ffffffe8 r1 : c010fb88 r0 : 00000000 Flags: Nzcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: add7006a DAC: 00000051 Process sh (pid: 689, stack limit = 0xedf18218) Stack: (0xedf19c2c to 0xedf1a000) ... [] (memcpy) from [] (machine_kexec+0xa8/0x12c) [] (machine_kexec) from [] (__crash_kexec+0x5c/0x98) [] (__crash_kexec) from [] (crash_kexec+0x5c/0x68) [] (crash_kexec) from [] (die+0x228/0x490) [] (die) from [] (__do_kernel_fault.part.0+0x54/0x1e4) [] (__do_kernel_fault.part.0) from [] (do_page_fault+0x1e8/0x400) [] (do_page_fault) from [] (do_DataAbort+0x38/0xb8) [] (do_DataAbort) from [] (__dabt_svc+0x64/0xa0) This is caused by image->control_code_page being a highmem page, so page_address(image->control_code_page) returns NULL. In any case, we don't want the control page to be a highmem page. We already limit the crash kernel region to the top of 32-bit physical memory space. Also limit it to the top of lowmem in physical space. Reported-by: Keerthy Tested-by: Keerthy Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 4e80bf7420d4..8e9a3e40d949 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -987,6 +987,9 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { unsigned long long crash_max = idmap_to_phys((u32)~0); + unsigned long long lowmem_max = __pa(high_memory - 1) + 1; + if (crash_max > lowmem_max) + crash_max = lowmem_max; crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max, crash_size, CRASH_ALIGN); if (!crash_base) { -- cgit v1.2.3 From 0d70262a2d60886da6fe5b1fc8bbcd76cbbc306d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Jul 2017 23:09:58 +0100 Subject: ARM: kexec: fix failure to boot crash kernel When kexec was converted to DTB, the dtb address was passed between machine_kexec_prepare() and machine_kexec() using a static variable. This is bad news if you load a crash kernel followed by a normal kernel or vice versa - the last loaded kernel overwrites the dtb address. This can result in kexec failures, as (eg) we try to boot the crash kernel with the last loaded dtb. For example, with: the crash kernel fails to find the dtb. Avoid this by defining a kimage architecture structure, and store the address to be passed in r2 there, which will either be the ATAGs or the dtb blob. Fixes: 4cabd1d9625c ("ARM: 7539/1: kexec: scan for dtb magic in segments") Fixes: 42d720d1731a ("ARM: kexec: Make .text R/W in machine_kexec") Reported-by: Keerthy Tested-by: Keerthy Signed-off-by: Russell King --- arch/arm/include/asm/kexec.h | 5 +++++ arch/arm/kernel/machine_kexec.c | 11 ++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index 1869af6bac5c..25021b798a1e 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -19,6 +19,11 @@ #ifndef __ASSEMBLY__ +#define ARCH_HAS_KIMAGE_ARCH +struct kimage_arch { + u32 kernel_r2; +}; + /** * crash_setup_regs() - save registers for the panic kernel * @newregs: registers are saved here diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 15495887ca14..fe1419eeb932 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -30,7 +30,6 @@ extern unsigned long kexec_boot_atags; static atomic_t waiting_for_crash_ipi; -static unsigned long dt_mem; /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -42,6 +41,9 @@ int machine_kexec_prepare(struct kimage *image) __be32 header; int i, err; + image->arch.kernel_r2 = image->start - KEXEC_ARM_ZIMAGE_OFFSET + + KEXEC_ARM_ATAGS_OFFSET; + /* * Validate that if the current HW supports SMP, then the SW supports * and implements CPU hotplug for the current HW. If not, we won't be @@ -66,8 +68,8 @@ int machine_kexec_prepare(struct kimage *image) if (err) return err; - if (be32_to_cpu(header) == OF_DT_HEADER) - dt_mem = current_segment->mem; + if (header == cpu_to_be32(OF_DT_HEADER)) + image->arch.kernel_r2 = current_segment->mem; } return 0; } @@ -165,8 +167,7 @@ void machine_kexec(struct kimage *image) kexec_start_address = image->start; kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; - kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET - + KEXEC_ARM_ATAGS_OFFSET; + kexec_boot_atags = image->arch.kernel_r2; /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, -- cgit v1.2.3 From 43fc509c3efb5c973991ee24c449ab2a0d71dd1e Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 20 Jul 2017 11:19:58 +0100 Subject: dma-coherent: introduce interface for default DMA pool Christoph noticed [1] that default DMA pool in current form overload the DMA coherent infrastructure. In reply, Robin suggested [2] to split the per-device vs. global pool interfaces, so allocation/release from default DMA pool is driven by dma ops implementation. This patch implements Robin's idea and provide interface to allocate/release/mmap the default (aka global) DMA pool. To make it clear that existing *_from_coherent routines work on per-device pool rename them to *_from_dev_coherent. [1] https://lkml.org/lkml/2017/7/7/370 [2] https://lkml.org/lkml/2017/7/7/431 Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Suggested-by: Robin Murphy Tested-by: Andras Szemzo Reviewed-by: Robin Murphy Signed-off-by: Vladimir Murzin Signed-off-by: Christoph Hellwig --- arch/arc/mm/dma.c | 2 +- arch/arm/mm/dma-mapping.c | 2 +- arch/arm64/mm/dma-mapping.c | 4 +- arch/mips/mm/dma-default.c | 2 +- drivers/base/dma-coherent.c | 164 ++++++++++++++++++++++++++++---------------- drivers/base/dma-mapping.c | 2 +- include/linux/dma-mapping.h | 40 ++++++++--- 7 files changed, 144 insertions(+), 72 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2a07e6ecafbd..71d3efff99d3 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e7380bafbfa6..fcf1473d6fed 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -851,7 +851,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e90cd1db42a8..f27d4dd04384 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -329,7 +329,7 @@ static int __swiotlb_mmap(struct device *dev, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; return __swiotlb_mmap_pfn(vma, pfn, size); @@ -706,7 +706,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev)); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index e08598c70b3e..8e78251eccc2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -232,7 +232,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index 2ae24c28e70c..1c152aed6b82 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -25,7 +25,7 @@ static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de { if (dev && dev->dma_mem) return dev->dma_mem; - return dma_coherent_default_memory; + return NULL; } static inline dma_addr_t dma_get_device_base(struct device *dev, @@ -165,34 +165,15 @@ void *dma_mark_declared_memory_occupied(struct device *dev, } EXPORT_SYMBOL(dma_mark_declared_memory_occupied); -/** - * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area - * - * @dev: device from which we allocate memory - * @size: size of requested memory area - * @dma_handle: This will be filled with the correct dma handle - * @ret: This pointer will be filled with the virtual address - * to allocated area. - * - * This function should be only called from per-arch dma_alloc_coherent() - * to support allocation from per-device coherent memory pools. - * - * Returns 0 if dma_alloc_coherent should continue with allocating from - * generic memory areas, or !0 if dma_alloc_coherent should return @ret. - */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) +static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem, + ssize_t size, dma_addr_t *dma_handle) { - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); int order = get_order(size); unsigned long flags; int pageno; int dma_memory_map; + void *ret; - if (!mem) - return 0; - - *ret = NULL; spin_lock_irqsave(&mem->spinlock, flags); if (unlikely(size > (mem->size << PAGE_SHIFT))) @@ -203,21 +184,50 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size, goto err; /* - * Memory was found in the per-device area. + * Memory was found in the coherent area. */ - *dma_handle = dma_get_device_base(dev, mem) + (pageno << PAGE_SHIFT); - *ret = mem->virt_base + (pageno << PAGE_SHIFT); + *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); + ret = mem->virt_base + (pageno << PAGE_SHIFT); dma_memory_map = (mem->flags & DMA_MEMORY_MAP); spin_unlock_irqrestore(&mem->spinlock, flags); if (dma_memory_map) - memset(*ret, 0, size); + memset(ret, 0, size); else - memset_io(*ret, 0, size); + memset_io(ret, 0, size); - return 1; + return ret; err: spin_unlock_irqrestore(&mem->spinlock, flags); + return NULL; +} + +/** + * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool + * @dev: device from which we allocate memory + * @size: size of requested memory area + * @dma_handle: This will be filled with the correct dma handle + * @ret: This pointer will be filled with the virtual address + * to allocated area. + * + * This function should be only called from per-arch dma_alloc_coherent() + * to support allocation from per-device coherent memory pools. + * + * Returns 0 if dma_alloc_coherent should continue with allocating from + * generic memory areas, or !0 if dma_alloc_coherent should return @ret. + */ +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + if (!mem) + return 0; + + *ret = __dma_alloc_from_coherent(mem, size, dma_handle); + if (*ret) + return 1; + /* * In the case where the allocation can not be satisfied from the * per-device area, try to fall back to generic memory if the @@ -225,25 +235,20 @@ err: */ return mem->flags & DMA_MEMORY_EXCLUSIVE; } -EXPORT_SYMBOL(dma_alloc_from_coherent); +EXPORT_SYMBOL(dma_alloc_from_dev_coherent); -/** - * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool - * @dev: device from which the memory was allocated - * @order: the order of pages allocated - * @vaddr: virtual address of allocated pages - * - * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, releases that memory. - * - * Returns 1 if we correctly released the memory, or 0 if - * dma_release_coherent() should proceed with releasing memory from - * generic pools. - */ -int dma_release_from_coherent(struct device *dev, int order, void *vaddr) +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle) { - struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + if (!dma_coherent_default_memory) + return NULL; + + return __dma_alloc_from_coherent(dma_coherent_default_memory, size, + dma_handle); +} +static int __dma_release_from_coherent(struct dma_coherent_mem *mem, + int order, void *vaddr) +{ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; @@ -256,28 +261,39 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) } return 0; } -EXPORT_SYMBOL(dma_release_from_coherent); /** - * dma_mmap_from_coherent() - try to mmap the memory allocated from - * per-device coherent memory pool to userspace + * dma_release_from_dev_coherent() - free memory to device coherent memory pool * @dev: device from which the memory was allocated - * @vma: vm_area for the userspace memory - * @vaddr: cpu address returned by dma_alloc_from_coherent - * @size: size of the memory buffer allocated by dma_alloc_from_coherent - * @ret: result from remap_pfn_range() + * @order: the order of pages allocated + * @vaddr: virtual address of allocated pages * * This checks whether the memory was allocated from the per-device - * coherent memory pool and if so, maps that memory to the provided vma. + * coherent memory pool and if so, releases that memory. * - * Returns 1 if we correctly mapped the memory, or 0 if the caller should - * proceed with mapping memory from generic pools. + * Returns 1 if we correctly released the memory, or 0 if the caller should + * proceed with releasing memory from generic pools. */ -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, - void *vaddr, size_t size, int *ret) +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) { struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + return __dma_release_from_coherent(mem, order, vaddr); +} +EXPORT_SYMBOL(dma_release_from_dev_coherent); + +int dma_release_from_global_coherent(int order, void *vaddr) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_release_from_coherent(dma_coherent_default_memory, order, + vaddr); +} + +static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, + struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) +{ if (mem && vaddr >= mem->virt_base && vaddr + size <= (mem->virt_base + (mem->size << PAGE_SHIFT))) { unsigned long off = vma->vm_pgoff; @@ -296,7 +312,39 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, } return 0; } -EXPORT_SYMBOL(dma_mmap_from_coherent); + +/** + * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_dev_coherent + * @size: size of the memory buffer allocated + * @ret: result from remap_pfn_range() + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if we correctly mapped the memory, or 0 if the caller should + * proceed with mapping memory from generic pools. + */ +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev_get_coherent_memory(dev); + + return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); +} +EXPORT_SYMBOL(dma_mmap_from_dev_coherent); + +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, + size_t size, int *ret) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_mmap_from_coherent(dma_coherent_default_memory, vma, + vaddr, size, ret); +} /* * Support for reserved memory regions defined in device tree diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 5096755d185e..b555ff9dd8fc 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -235,7 +235,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 843ab866e0f4..03c0196a6f24 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -157,16 +157,40 @@ static inline int is_device_dma_capable(struct device *dev) * These three functions are only for dma allocator. * Don't use them in device drivers. */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); -int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + #else -#define dma_alloc_from_coherent(dev, size, handle, ret) (0) -#define dma_release_from_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0) +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(ssize_t size, + dma_addr_t *dma_handle) +{ + return NULL; +} + +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} + +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, + int *ret) +{ + return 0; +} #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ #ifdef CONFIG_HAS_DMA @@ -481,7 +505,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, BUG_ON(!ops); - if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; if (!arch_dma_alloc_attrs(&dev, &flag)) @@ -503,7 +527,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, BUG_ON(!ops); WARN_ON(irqs_disabled()); - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) return; if (!ops->free || !cpu_addr) -- cgit v1.2.3 From 878ec36765fa71ffe656bc003d25bb9f8bad28f9 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 20 Jul 2017 11:19:59 +0100 Subject: ARM: NOMMU: Wire-up default DMA interface The way how default DMA pool is exposed has changed and now we need to use dedicated interface to work with it. This patch makes alloc/release operations to use such interface. Since, default DMA pool is not handled by generic code anymore we have to implement our own mmap operation. Tested-by: Andras Szemzo Reviewed-by: Robin Murphy Signed-off-by: Vladimir Murzin Signed-off-by: Christoph Hellwig --- arch/arm/mm/dma-mapping-nommu.c | 45 ++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 90ee354d803e..6db5fc26d154 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -40,9 +40,21 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, { const struct dma_map_ops *ops = &dma_noop_ops; + void *ret; /* - * We are here because: + * Try generic allocator first if we are advertised that + * consistency is not required. + */ + + if (attrs & DMA_ATTR_NON_CONSISTENT) + return ops->alloc(dev, size, dma_handle, gfp, attrs); + + ret = dma_alloc_from_global_coherent(size, dma_handle); + + /* + * dma_alloc_from_global_coherent() may fail because: + * * - no consistent DMA region has been defined, so we can't * continue. * - there is no space left in consistent DMA region, so we @@ -50,11 +62,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, * advertised that consistency is not required. */ - if (attrs & DMA_ATTR_NON_CONSISTENT) - return ops->alloc(dev, size, dma_handle, gfp, attrs); - - WARN_ON_ONCE(1); - return NULL; + WARN_ON_ONCE(ret == NULL); + return ret; } static void arm_nommu_dma_free(struct device *dev, size_t size, @@ -63,14 +72,31 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, { const struct dma_map_ops *ops = &dma_noop_ops; - if (attrs & DMA_ATTR_NON_CONSISTENT) + if (attrs & DMA_ATTR_NON_CONSISTENT) { ops->free(dev, size, cpu_addr, dma_addr, attrs); - else - WARN_ON_ONCE(1); + } else { + int ret = dma_release_from_global_coherent(get_order(size), + cpu_addr); + + WARN_ON_ONCE(ret == 0); + } return; } +static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + int ret; + + if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) + return ret; + + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + + static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -173,6 +199,7 @@ static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist const struct dma_map_ops arm_nommu_dma_ops = { .alloc = arm_nommu_dma_alloc, .free = arm_nommu_dma_free, + .mmap = arm_nommu_dma_mmap, .map_page = arm_nommu_dma_map_page, .unmap_page = arm_nommu_dma_unmap_page, .map_sg = arm_nommu_dma_map_sg, -- cgit v1.2.3 From ece4b206be9934b3bb32adb1261545ead831c993 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 20 Jul 2017 12:03:59 +0100 Subject: arm64/numa: Drop duplicate message When booting linux on a system without CONFIG_NUMA enabled, the following messages are printed during boot - NUMA: Faking a node at [mem 0x0000000000000000-0x00000083ffffffff] NUMA: Adding memblock [0x8000000000 - 0x8000e7ffff] on node 0 NUMA: Adding memblock [0x8000e80000 - 0x83f65cffff] on node 0 NUMA: Adding memblock [0x83f65d0000 - 0x83f665ffff] on node 0 NUMA: Adding memblock [0x83f6660000 - 0x83f676ffff] on node 0 NUMA: Adding memblock [0x83f6770000 - 0x83f678ffff] on node 0 NUMA: Adding memblock [0x83f6790000 - 0x83fb82ffff] on node 0 NUMA: Adding memblock [0x83fb830000 - 0x83fbc0ffff] on node 0 NUMA: Adding memblock [0x83fbc10000 - 0x83fbdfffff] on node 0 NUMA: Adding memblock [0x83fbe00000 - 0x83fbffffff] on node 0 NUMA: Adding memblock [0x83fc000000 - 0x83fffbffff] on node 0 NUMA: Adding memblock [0x83fffc0000 - 0x83fffdffff] on node 0 NUMA: Adding memblock [0x83fffe0000 - 0x83ffffffff] on node 0 NUMA: Initmem setup node 0 [mem 0x8000000000-0x83ffffffff] NUMA: NODE_DATA [mem 0x83fffec500-0x83fffedfff] The information is then duplicated by core kernel messages right after the above output. Early memory node ranges node 0: [mem 0x0000008000000000-0x0000008000e7ffff] node 0: [mem 0x0000008000e80000-0x00000083f65cffff] node 0: [mem 0x00000083f65d0000-0x00000083f665ffff] node 0: [mem 0x00000083f6660000-0x00000083f676ffff] node 0: [mem 0x00000083f6770000-0x00000083f678ffff] node 0: [mem 0x00000083f6790000-0x00000083fb82ffff] node 0: [mem 0x00000083fb830000-0x00000083fbc0ffff] node 0: [mem 0x00000083fbc10000-0x00000083fbdfffff] node 0: [mem 0x00000083fbe00000-0x00000083fbffffff] node 0: [mem 0x00000083fc000000-0x00000083fffbffff] node 0: [mem 0x00000083fffc0000-0x00000083fffdffff] node 0: [mem 0x00000083fffe0000-0x00000083ffffffff] Initmem setup node 0 [mem 0x0000008000000000-0x00000083ffffffff] Remove the duplication of memblock layout information printed during boot by dropping the messages from arm64 numa initialisation. Signed-off-by: Punit Agrawal Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index b388a99fea7b..dad128ba98bf 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -208,8 +208,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) } node_set(nid, numa_nodes_parsed); - pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n", - start, (end - 1), nid); return ret; } @@ -223,10 +221,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - if (start_pfn < end_pfn) - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, - start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); - else + if (start_pfn >= end_pfn) pr_info("Initmem setup node %d []\n", nid); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); -- cgit v1.2.3 From 8399e4b88a93fc7bc00fff3b8da9b2e718b7f45e Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Wed, 19 Jul 2017 17:12:54 -0700 Subject: sparc64: Register hugepages during arch init Add hstate for each supported hugepage size using arch initcall. This change fixes some hugepage parameter parsing inconsistencies: case 1: no hugepage parameters Without hugepage parameters, only a hugepages-8192kB entry is visible in sysfs. It's different from x86_64 where both 2M and 1G hugepage sizes are available. case 2: default_hugepagesz=[64K|256M|2G] When specifying only a default_hugepagesz parameter, the default hugepage size isn't really changed and it stays at 8M. This is again different from x86_64. Orabug: 25869946 Reviewed-by: Bob Picco Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3c40ebd50f92..fed73f14aa49 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde } #ifdef CONFIG_HUGETLB_PAGE +static void __init add_huge_page_size(unsigned long size) +{ + unsigned int order; + + if (size_to_hstate(size)) + return; + + order = ilog2(size) - PAGE_SHIFT; + hugetlb_add_hstate(order); +} + +static int __init hugetlbpage_init(void) +{ + add_huge_page_size(1UL << HPAGE_64K_SHIFT); + add_huge_page_size(1UL << HPAGE_SHIFT); + add_huge_page_size(1UL << HPAGE_256MB_SHIFT); + add_huge_page_size(1UL << HPAGE_2GB_SHIFT); + + return 0; +} + +arch_initcall(hugetlbpage_init); + static int __init setup_hugepagesz(char *string) { unsigned long long hugepage_size; @@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char *string) goto out; } - hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT); + add_huge_page_size(hugepage_size); rc = 1; out: -- cgit v1.2.3 From 45fc0113ccee2352ec470a48687c906edc37eca3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jul 2017 07:00:55 +0200 Subject: sparc: defconfig: Cleanup from old Kconfig options Remove old, dead Kconfig options (in order appearing in this commit): - EXPERIMENTAL is gone since v3.9; - INET_LRO: commit 7bbf3cae65b6 ("ipv4: Remove inet_lro library"); - AUTOFS_FS: commit 561c5cf9236a ("staging: Remove autofs3"); - RCU_CPU_STALL_DETECTOR: commit a00e0d714fbd ("rcu: Remove conditional compilation for RCU CPU stall warnings"); - USB_DEVICE_CLASS: commit 007bab91324e ("USB: remove CONFIG_USB_DEVICE_CLASS"); - SYSCTL_SYSCALL_CHECK: commit 7c60c48f58a7 ("sysctl: Improve the sysctl sanity checks"); Signed-off-by: Krzysztof Kozlowski Acked-by: David S. Miller Signed-off-by: David S. Miller --- arch/sparc/configs/sparc32_defconfig | 4 ---- arch/sparc/configs/sparc64_defconfig | 4 ---- 2 files changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index c74d3701ad68..207a43a2d8b3 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 @@ -23,7 +22,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -69,7 +67,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_PROC_KCORE=y @@ -82,7 +79,6 @@ CONFIG_NLS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_CRYPTO_NULL=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index b2e650d1764f..ca8609d7292f 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -1,5 +1,4 @@ CONFIG_64BIT=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -184,7 +183,6 @@ CONFIG_HID_TOPSEED=y CONFIG_HID_THRUSTMASTER=y CONFIG_HID_ZEROPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_TT_NEWSCHED is not set CONFIG_USB_OHCI_HCD=y @@ -210,8 +208,6 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENTS=y CONFIG_KEYS=y -- cgit v1.2.3 From 6620f146849d15d35f3a2f6f5c5d5f2ff13a6b58 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 8 Jun 2017 14:39:55 +0200 Subject: ARM64: dts: meson-gx: use specific compatible for the AO pwms Use the specific compatible for AO pwms so the pwms input can be correctly set FDIV4 is not present on the pwm A0, so change kadhas vim input clocks to xtal. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 2 +- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 35b8c88c3220..738ed689ff69 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -400,7 +400,7 @@ }; pwm_AO_ab: pwm@550 { - compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm"; + compatible = "amlogic,meson-gx-ao-pwm", "amlogic,meson-gxbb-ao-pwm"; reg = <0x0 0x00550 0x0 0x10>; #pwm-cells = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 72c5a9f64ca8..94567eb17875 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -109,8 +109,8 @@ status = "okay"; pinctrl-0 = <&pwm_ao_a_3_pins>, <&pwm_ao_b_pins>; pinctrl-names = "default"; - clocks = <&clkc CLKID_FCLK_DIV4>; - clock-names = "clkin0"; + clocks = <&xtal> , <&xtal>; + clock-names = "clkin0", "clkin1" ; }; &pwm_ef { -- cgit v1.2.3 From 72fb2c852188ad75ab3badb55bf491925b9eba46 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 17 Jul 2017 18:03:01 +0200 Subject: ARM64: dts: meson-gxl-s905x-libretech-cc: fixup board definition The libretech CC derives less from the p212 than initially thought. Several voltage regulators are different and the capabilities of the sdcard and emmc also differ. Deriving from the p212 is not convient anymore so the libretech is now derived from s905x definition directly. Fixes: cd84aff1d981 ("ARM64: dts: meson-gxl: Add Libre Technology CC support") Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- .../dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 103 ++++++++++++++++++--- 1 file changed, 91 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 890821d6e52b..266fbcf3e47f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -10,12 +10,20 @@ #include -#include "meson-gxl-s905x-p212.dtsi" +#include "meson-gxl-s905x.dtsi" / { compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl"; model = "Libre Technology CC"; + aliases { + serial0 = &uart_AO; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + cvbs-connector { compatible = "composite-video-connector"; @@ -26,6 +34,11 @@ }; }; + emmc_pwrseq: emmc-pwrseq { + compatible = "mmc-pwrseq-emmc"; + reset-gpios = <&gpio BOOT_9 GPIO_ACTIVE_LOW>; + }; + hdmi-connector { compatible = "hdmi-connector"; type = "a"; @@ -53,6 +66,39 @@ linux,default-trigger = "heartbeat"; }; }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x0 0x0 0x80000000>; + }; + + vcc_3v3: regulator-vcc_3v3 { + compatible = "regulator-fixed"; + regulator-name = "VCC_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + vcc_card: regulator-vcc-card { + compatible = "regulator-gpio"; + + regulator-name = "VCC_CARD"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + + gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_HIGH>; + gpios-states = <0>; + + states = <3300000 0>, + <1800000 1>; + }; + + vddio_boot: regulator-vddio_boot { + compatible = "regulator-fixed"; + regulator-name = "VDDIO_BOOT"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; }; &cvbs_vdac_port { @@ -61,6 +107,16 @@ }; }; +ðmac { + status = "okay"; +}; + +&ir { + status = "okay"; + pinctrl-0 = <&remote_input_ao_pins>; + pinctrl-names = "default"; +}; + &hdmi_tx { status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; @@ -73,20 +129,43 @@ }; }; -/* - * The following devices exists but are exposed on the general - * purpose GPIO header. End user may well decide to use those pins - * for another purpose - */ +/* SD card */ +&sd_emmc_b { + status = "okay"; + pinctrl-0 = <&sdcard_pins>; + pinctrl-names = "default"; + + bus-width = <4>; + cap-sd-highspeed; + max-frequency = <100000000>; + disable-wp; + + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; + cd-inverted; -&sd_emmc_a { - status = "disabled"; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vcc_card>; }; -&uart_A { - status = "disabled"; +/* eMMC */ +&sd_emmc_c { + status = "okay"; + pinctrl-0 = <&emmc_pins>; + pinctrl-names = "default"; + + bus-width = <8>; + cap-mmc-highspeed; + max-frequency = <50000000>; + non-removable; + disable-wp; + + mmc-pwrseq = <&emmc_pwrseq>; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vddio_boot>; }; -&wifi32k { - status = "disabled"; +&uart_AO { + status = "okay"; + pinctrl-0 = <&uart_ao_a_pins>; + pinctrl-names = "default"; }; -- cgit v1.2.3 From b82d6cb41eb54bf1c0e5d2ae73f49a4dff54c54b Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Jul 2017 10:35:31 +0200 Subject: xtensa: remove wrapper header for asm/device.h xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and does not add any arch specific extensions. Thus, use the asm-generic header directly. Acked-by: Max Filippov Signed-off-by: Tobias Klauser Signed-off-by: Max Filippov --- arch/xtensa/include/asm/Kbuild | 1 + arch/xtensa/include/asm/device.h | 15 --------------- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 arch/xtensa/include/asm/device.h (limited to 'arch') diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 2d716ebc5a5e..a2fdabfce43a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += bug.h generic-y += clkdev.h +generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h deleted file mode 100644 index 1deeb8ebbb1b..000000000000 --- a/arch/xtensa/include/asm/device.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_XTENSA_DEVICE_H -#define _ASM_XTENSA_DEVICE_H - -struct dev_archdata { -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_XTENSA_DEVICE_H */ -- cgit v1.2.3 From 536dcc9c34035778892a7e3cbf45166ce73f8d34 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Jul 2017 10:44:07 +0200 Subject: xtensa: remove wrapper header for asm/param.h xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and does not add any arch specific extensions. Thus, use the asm-generic header directly. Signed-off-by: Tobias Klauser Signed-off-by: Max Filippov --- arch/xtensa/include/asm/Kbuild | 1 + arch/xtensa/include/asm/param.h | 18 ------------------ 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/xtensa/include/asm/param.h (limited to 'arch') diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index a2fdabfce43a..dff7cc39437c 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h +generic-y += param.h generic-y += percpu.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h deleted file mode 100644 index 0a70e780ef2a..000000000000 --- a/arch/xtensa/include/asm/param.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/param.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ -#ifndef _XTENSA_PARAM_H -#define _XTENSA_PARAM_H - -#include - -# define HZ CONFIG_HZ /* internal timer frequency */ -# define USER_HZ 100 /* for user interfaces in "ticks" */ -# define CLOCKS_PER_SEC (USER_HZ) /* frequnzy at which times() counts */ -#endif /* _XTENSA_PARAM_H */ -- cgit v1.2.3 From 5bc64bd246ca00e0bb0b7137afaf586f2f66a911 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 8 Jul 2017 23:25:14 +0200 Subject: parisc: Disable further stack checks when panic occurs during stack check Before the irq handler detects a low stack and then panics the kernel, disable further stack checks to avoid recursive panics. Reported-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/irq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ba5e1c7b1f17..5404e4086cb9 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -413,6 +413,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sr[7]) return; + /* exit if already in panic */ + if (sysctl_panic_on_stackoverflow < 0) + return; + /* calculate kernel stack usage */ stack_usage = sp - stack_start; #ifdef CONFIG_IRQSTACKS @@ -454,8 +458,10 @@ check_kernel_stack: #ifdef CONFIG_IRQSTACKS panic_check: #endif - if (sysctl_panic_on_stackoverflow) + if (sysctl_panic_on_stackoverflow) { + sysctl_panic_on_stackoverflow = -1; /* disable further checks */ panic("low stack detected by irq handler - check messages\n"); + } #endif } -- cgit v1.2.3 From 6cd819e8e978b361ae558056a9e79fd30e6acb0d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 9 Jul 2017 22:50:40 +0200 Subject: parisc: Merge millicode routines via linker script When compiling the 4.13-rc kernel I got those linker errors: libgcc2.c:(.text+0x110): relocation truncated to fit: R_PARISC_PCREL22F against symbol `$$divU' defined in .text.div section in /usr/lib/gcc/hppa64-linux-gnu/4.9.2/libgcc.a(_divU.o) hppa64-linux-gnu-ld: /usr/lib/gcc/hppa64-linux-gnu/4.9.2/libgcc.a(_moddi3.o)(.text+0x174): cannot reach $$divU Avoid such errors by bundling the millicode routines in the linker script. Signed-off-by: Helge Deller --- arch/parisc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 3d6ef1b29c6a..ffe2cbf52d1a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -78,6 +78,8 @@ SECTIONS *(.text.sys_exit) *(.text.do_sigaltstack) *(.text.do_fork) + *(.text.div) + *($$*) /* millicode routines */ *(.text.*) *(.fixup) *(.lock.text) /* out-of-line lock text */ -- cgit v1.2.3 From 108ea18722df59d8977951eecd635d296fa64765 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 23 Jul 2017 22:08:42 +0200 Subject: parisc: regenerate defconfig files Signed-off-by: Helge Deller --- arch/parisc/configs/712_defconfig | 41 ++++++----------------- arch/parisc/configs/a500_defconfig | 50 +++++++---------------------- arch/parisc/configs/b180_defconfig | 17 ++-------- arch/parisc/configs/c3000_defconfig | 38 ++++------------------ arch/parisc/configs/c8000_defconfig | 17 ++-------- arch/parisc/configs/default_defconfig | 49 +++++++--------------------- arch/parisc/configs/generic-32bit_defconfig | 21 +----------- arch/parisc/configs/generic-64bit_defconfig | 48 +++++++++------------------ 8 files changed, 59 insertions(+), 222 deletions(-) (limited to 'arch') diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig index 143d02652792..ccc109761f44 100644 --- a/arch/parisc/configs/712_defconfig +++ b/arch/parisc/configs/712_defconfig @@ -1,11 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -14,7 +12,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA7100LC=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_GSC_LASI=y @@ -32,11 +29,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m # CONFIG_IPV6 is not set CONFIG_NETFILTER=y -CONFIG_IP_NF_QUEUE=m CONFIG_LLC2=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -65,21 +60,20 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=m CONFIG_LASI_82596=y CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_HIL_OLD is not set CONFIG_MOUSE_SERIAL=m +CONFIG_LEGACY_PTY_COUNT=64 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=17 @@ -88,22 +82,17 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_MUX is not set CONFIG_PDC_CONSOLE=y -CONFIG_LEGACY_PTY_COUNT=64 CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_DUMMY_CONSOLE_COLUMNS=128 CONFIG_DUMMY_CONSOLE_ROWS=48 CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -111,13 +100,9 @@ CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_HARMONY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y @@ -130,14 +115,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m @@ -177,21 +158,16 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m @@ -200,6 +176,7 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index 1a4f776b49b8..5acb93dcaabf 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -1,13 +1,10 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -16,7 +13,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA8X00=y CONFIG_64BIT=y CONFIG_SMP=y @@ -43,21 +39,17 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_DCCP is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_RAW=m @@ -70,7 +62,6 @@ CONFIG_IP6_NF_MATCH_OPTS=m CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m @@ -94,7 +85,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=m @@ -106,43 +96,38 @@ CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_FUSION=y CONFIG_FUSION_SPI=m -CONFIG_FUSION_FC=m CONFIG_FUSION_CTL=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_3C589=m CONFIG_VORTEX=m CONFIG_TYPHOON=m +CONFIG_ACENIC=m +CONFIG_ACENIC_OMIT_TIGON_I=y +CONFIG_PCNET32=m +CONFIG_TIGON3=m CONFIG_NET_TULIP=y CONFIG_DE2104X=m CONFIG_TULIP=y CONFIG_TULIP_MMIO=y CONFIG_PCMCIA_XIRCOM=m CONFIG_HP100=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m CONFIG_E100=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_E1000=m -CONFIG_TIGON3=m -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m CONFIG_PCMCIA_SMC91C92=m CONFIG_PCMCIA_XIRC2PS=m CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -# CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m @@ -151,7 +136,6 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_PDC_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set @@ -160,7 +144,6 @@ CONFIG_AGP_PARISC=y # CONFIG_STI_CONSOLE is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_JFS_FS=m CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y @@ -173,13 +156,9 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_UFS_FS=m CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -187,17 +166,12 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_BLOWFISH=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m diff --git a/arch/parisc/configs/b180_defconfig b/arch/parisc/configs/b180_defconfig index f1a0c25bef8d..83ffd161aec5 100644 --- a/arch/parisc/configs/b180_defconfig +++ b/arch/parisc/configs/b180_defconfig @@ -3,7 +3,6 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -25,8 +24,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set -CONFIG_IPV6=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -53,10 +50,9 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_LASI_82596=y CONFIG_NET_TULIP=y CONFIG_TULIP=y +CONFIG_LASI_82596=y CONFIG_PPP=y CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_HIL_OLD is not set @@ -71,40 +67,31 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_PRINTER=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_HARMONY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V3=y -CONFIG_SMB_FS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_HEADERS_CHECK=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SECURITY=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 8e8f0e34f817..0764d3971cf6 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -1,12 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -15,7 +12,6 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set CONFIG_PA8X00=y CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_GSC is not set @@ -31,13 +27,11 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y CONFIG_NETFILTER_DEBUG=y -CONFIG_IP_NF_QUEUE=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y @@ -50,13 +44,11 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_NS87415=y -CONFIG_PATA_SIL680=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 @@ -76,28 +68,23 @@ CONFIG_FUSION=y CONFIG_FUSION_SPI=m CONFIG_FUSION_CTL=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y +CONFIG_ACENIC=m +CONFIG_TIGON3=m CONFIG_NET_TULIP=y CONFIG_DE2104X=m CONFIG_TULIP=y CONFIG_TULIP_MMIO=y -CONFIG_NET_PCI=y CONFIG_E100=m -CONFIG_ACENIC=m CONFIG_E1000=m -CONFIG_TIGON3=m CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200 +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set CONFIG_SERIO=m @@ -111,7 +98,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set CONFIG_RAW_DRIVER=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -121,9 +107,6 @@ CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_AD1889=y CONFIG_USB_HIDDEV=y CONFIG_USB=y @@ -139,7 +122,6 @@ CONFIG_USB_MICROTEK=m CONFIG_USB_LEGOTOWER=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y @@ -149,7 +131,6 @@ CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V3=y @@ -159,18 +140,13 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_MD5=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_DES=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_LIBCRC32C=m diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index f6a4c016304b..088ab948a5ca 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -1,16 +1,13 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_LZO=y CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_SLAB=y @@ -23,7 +20,6 @@ CONFIG_PA8X00=y CONFIG_64BIT=y CONFIG_SMP=y CONFIG_PREEMPT=y -# CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_IOMMU_CCIO=y CONFIG_PCI=y CONFIG_PCI_LBA=y @@ -146,7 +142,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y # CONFIG_FB_STI is not set -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_GENERIC is not set CONFIG_FRAMEBUFFER_CONSOLE=y @@ -157,12 +152,9 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=m CONFIG_SND=m +CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_AD1889=m # CONFIG_SND_USB is not set # CONFIG_SND_GSC is not set @@ -174,8 +166,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT4_FS=m CONFIG_REISERFS_FS=m CONFIG_REISERFS_PROC_INFO=y CONFIG_XFS_FS=m @@ -238,11 +228,8 @@ CONFIG_DEBUG_SLAB=y CONFIG_DEBUG_SLAB_LEAK=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_PROVE_RCU_DELAY=y CONFIG_DEBUG_BLOCK_EXT_DEVT=y CONFIG_LATENCYTOP=y CONFIG_KEYS=y diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig index 310b6657e4ac..52c9050a7c5c 100644 --- a/arch/parisc/configs/default_defconfig +++ b/arch/parisc/configs/default_defconfig @@ -1,11 +1,9 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y @@ -41,9 +39,7 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m -CONFIG_IPV6=y CONFIG_INET6_AH=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y @@ -82,26 +78,23 @@ CONFIG_MD_RAID1=y CONFIG_MD_RAID10=y CONFIG_BLK_DEV_DM=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=m -CONFIG_LASI_82596=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=y -CONFIG_NET_PCI=y CONFIG_ACENIC=y CONFIG_TIGON3=y -CONFIG_NET_PCMCIA=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=y +CONFIG_LASI_82596=y CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m # CONFIG_KEYBOARD_HIL_OLD is not set CONFIG_MOUSE_SERIAL=y +CONFIG_LEGACY_PTY_COUNT=64 CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=y @@ -109,31 +102,24 @@ CONFIG_SERIAL_8250_NR_UARTS=17 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_LEGACY_PTY_COUNT=64 CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_DUMMY_CONSOLE_COLUMNS=128 CONFIG_DUMMY_CONSOLE_ROWS=48 CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=y CONFIG_SND=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SEQUENCER=y CONFIG_SND_AD1889=y CONFIG_SND_HARMONY=y CONFIG_HID_GYRATION=y @@ -141,7 +127,6 @@ CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y CONFIG_USB=y @@ -150,21 +135,15 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m @@ -204,30 +183,24 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_HEADERS_CHECK=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KEYS=y -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_LIBCRC32C=m +CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 8688ba7f5966..37ae4b57c001 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -2,15 +2,11 @@ CONFIG_LOCALVERSION="-32bit" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_LZO=y CONFIG_EXPERT=y CONFIG_SYSCTL_SYSCALL=y CONFIG_PERF_EVENTS=y @@ -49,7 +45,6 @@ CONFIG_INET_ESP=m # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_LLC2=m # CONFIG_WIRELESS is not set @@ -149,10 +144,8 @@ CONFIG_PRINTER=m CONFIG_PPDEV=m # CONFIG_HW_RANDOM is not set CONFIG_I2C=y -CONFIG_POWER_SUPPLY=y # CONFIG_HWMON is not set CONFIG_AGP=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_MODE_HELPERS=y @@ -169,11 +162,8 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SEQUENCER=m CONFIG_SND_AD1889=m CONFIG_SND_HARMONY=m CONFIG_HIDRAW=y @@ -223,12 +213,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_XFS_FS=m -CONFIG_XFS_QUOTA=y -CONFIG_XFS_RT=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y @@ -293,15 +278,12 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_LATENCYTOP=y CONFIG_LKDTM=m CONFIG_KEYS=y -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y @@ -320,7 +302,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index c564e6e1fa23..d39e7f821aba 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -8,10 +8,11 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -# CONFIG_UTS_NS is not set -# CONFIG_IPC_NS is not set -# CONFIG_PID_NS is not set -# CONFIG_NET_NS is not set +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CPUSETS=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -52,7 +53,6 @@ CONFIG_INET_ESP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_LRO=m CONFIG_INET_DIAG=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set @@ -84,7 +84,6 @@ CONFIG_PATA_SIL680=y CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_RAID=m CONFIG_DM_UEVENT=y @@ -138,21 +137,21 @@ CONFIG_QLGE=m # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_MDIO_BITBANG=m CONFIG_PHYLIB=y -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m CONFIG_BROADCOM_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_DAVICOM_PHY=m CONFIG_ICPLUS_PHY=m -CONFIG_REALTEK_PHY=m +CONFIG_LSI_ET1011C_PHY=m +CONFIG_LXT_PHY=m +CONFIG_MARVELL_PHY=m CONFIG_NATIONAL_PHY=m +CONFIG_QSEMI_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_SMSC_PHY=m CONFIG_STE10XP=m -CONFIG_LSI_ET1011C_PHY=m -CONFIG_MDIO_BITBANG=m +CONFIG_VITESSE_PHY=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y @@ -166,10 +165,8 @@ CONFIG_INPUT_MISC=y CONFIG_SERIO_SERPORT=m # CONFIG_HP_SDC is not set CONFIG_SERIO_RAW=m -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y # CONFIG_LEGACY_PTYS is not set CONFIG_NOZOMI=m -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y @@ -207,10 +204,8 @@ CONFIG_AGP=y CONFIG_AGP_PARISC=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_DRM_RADEON_UMS=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_BACKLIGHT_GENERIC is not set CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y @@ -246,8 +241,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y @@ -286,27 +279,16 @@ CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y # CONFIG_SCHED_DEBUG is not set -CONFIG_TIMER_STATS=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=m CONFIG_LIBCRC32C=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_POWERPC=y -CONFIG_XZ_DEC_IA64=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y -- cgit v1.2.3 From e47057151422a67ce08747176fa21cb3b526a2c9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Jul 2017 13:57:14 +1000 Subject: KVM: PPC: Book3S HV: Enable TM before accessing TM registers Commit 46a704f8409f ("KVM: PPC: Book3S HV: Preserve userspace HTM state properly", 2017-06-15) added code to read transactional memory (TM) registers but forgot to enable TM before doing so. The result is that if userspace does have live values in the TM registers, a KVM_RUN ioctl will cause a host kernel crash like this: [ 181.328511] Unrecoverable TM Unavailable Exception f60 at d00000001e7d9980 [ 181.328605] Oops: Unrecoverable TM Unavailable Exception, sig: 6 [#1] [ 181.328613] SMP NR_CPUS=2048 [ 181.328613] NUMA [ 181.328618] PowerNV [ 181.328646] Modules linked in: vhost_net vhost tap nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 dns_resolver nfs +fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat +nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun ebtable_filter ebtables +ip6table_filter ip6_tables iptable_filter bridge stp llc kvm_hv kvm nfsd ses enclosure scsi_transport_sas ghash_generic +auth_rpcgss gf128mul xts sg ctr nfs_acl lockd vmx_crypto shpchp ipmi_powernv i2c_opal grace ipmi_devintf i2c_core +powernv_rng sunrpc ipmi_msghandler ibmpowernv uio_pdrv_genirq uio leds_powernv powernv_op_panel ip_tables xfs sd_mod +lpfc ipr bnx2x libata mdio ptp pps_core scsi_transport_fc libcrc32c dm_mirror dm_region_hash dm_log dm_mod [ 181.329278] CPU: 40 PID: 9926 Comm: CPU 0/KVM Not tainted 4.12.0+ #1 [ 181.329337] task: c000003fc6980000 task.stack: c000003fe4d80000 [ 181.329396] NIP: d00000001e7d9980 LR: d00000001e77381c CTR: d00000001e7d98f0 [ 181.329465] REGS: c000003fe4d837e0 TRAP: 0f60 Not tainted (4.12.0+) [ 181.329523] MSR: 9000000000009033 [ 181.329527] CR: 24022448 XER: 00000000 [ 181.329608] CFAR: d00000001e773818 SOFTE: 1 [ 181.329608] GPR00: d00000001e77381c c000003fe4d83a60 d00000001e7ef410 c000003fdcfe0000 [ 181.329608] GPR04: c000003fe4f00000 0000000000000000 0000000000000000 c000003fd7954800 [ 181.329608] GPR08: 0000000000000001 c000003fc6980000 0000000000000000 d00000001e7e2880 [ 181.329608] GPR12: d00000001e7d98f0 c000000007b19000 00000001295220e0 00007fffc0ce2090 [ 181.329608] GPR16: 0000010011886608 00007fff8c89f260 0000000000000001 00007fff8c080028 [ 181.329608] GPR20: 0000000000000000 00000100118500a6 0000010011850000 0000010011850000 [ 181.329608] GPR24: 00007fffc0ce1b48 0000010011850000 00000000d673b901 0000000000000000 [ 181.329608] GPR28: 0000000000000000 c000003fdcfe0000 c000003fdcfe0000 c000003fe4f00000 [ 181.330199] NIP [d00000001e7d9980] kvmppc_vcpu_run_hv+0x90/0x6b0 [kvm_hv] [ 181.330264] LR [d00000001e77381c] kvmppc_vcpu_run+0x2c/0x40 [kvm] [ 181.330322] Call Trace: [ 181.330351] [c000003fe4d83a60] [d00000001e773478] kvmppc_set_one_reg+0x48/0x340 [kvm] (unreliable) [ 181.330437] [c000003fe4d83b30] [d00000001e77381c] kvmppc_vcpu_run+0x2c/0x40 [kvm] [ 181.330513] [c000003fe4d83b50] [d00000001e7700b4] kvm_arch_vcpu_ioctl_run+0x114/0x2a0 [kvm] [ 181.330586] [c000003fe4d83bd0] [d00000001e7642f8] kvm_vcpu_ioctl+0x598/0x7a0 [kvm] [ 181.330658] [c000003fe4d83d40] [c0000000003451b8] do_vfs_ioctl+0xc8/0x8b0 [ 181.330717] [c000003fe4d83de0] [c000000000345a64] SyS_ioctl+0xc4/0x120 [ 181.330776] [c000003fe4d83e30] [c00000000000b004] system_call+0x58/0x6c [ 181.330833] Instruction dump: [ 181.330869] e92d0260 e9290b50 e9290108 792807e3 41820058 e92d0260 e9290b50 e9290108 [ 181.330941] 792ae8a4 794a1f87 408204f4 e92d0260 <7d4022a6> f9490ff0 e92d0260 7d4122a6 [ 181.331013] ---[ end trace 6f6ddeb4bfe92a92 ]--- The fix is just to turn on the TM bit in the MSR before accessing the registers. Cc: stable@vger.kernel.org # v3.14+ Fixes: 46a704f8409f ("KVM: PPC: Book3S HV: Preserve userspace HTM state properly") Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0b436df746fc..359c79cdf0cc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3211,6 +3211,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } + /* Enable TM so we can read the TM SPRs */ + mtmsr(mfmsr() | MSR_TM); current->thread.tm_tfhar = mfspr(SPRN_TFHAR); current->thread.tm_tfiar = mfspr(SPRN_TFIAR); current->thread.tm_texasr = mfspr(SPRN_TEXASR); -- cgit v1.2.3 From ef42719814db06fdfa26cd7566de0b64de173320 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Jul 2017 15:41:49 +1000 Subject: KVM: PPC: Book3S HV: Fix host crash on changing HPT size Commit f98a8bf9ee20 ("KVM: PPC: Book3S HV: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size", 2016-12-20) changed the behaviour of the KVM_PPC_ALLOCATE_HTAB ioctl so that it now allocates a new HPT and new revmap array if there was a previously-allocated HPT of a different size from the size being requested. In this case, we need to reset the rmap arrays of the memslots, because the rmap arrays will contain references to HPTEs which are no longer valid. Worse, these references are also references to slots in the new revmap array (which parallels the HPT), and the new revmap array contains random contents, since it doesn't get zeroed on allocation. The effect of having these stale references to slots in the revmap array that contain random contents is that subsequent calls to functions such as kvmppc_add_revmap_chain will crash because they will interpret the non-zero contents of the revmap array as HPTE indexes and thus index outside of the revmap array. This leads to host crashes such as the following. [ 7072.862122] Unable to handle kernel paging request for data at address 0xd000000c250c00f8 [ 7072.862218] Faulting instruction address: 0xc0000000000e1c78 [ 7072.862233] Oops: Kernel access of bad area, sig: 11 [#1] [ 7072.862286] SMP NR_CPUS=1024 [ 7072.862286] NUMA [ 7072.862325] PowerNV [ 7072.862378] Modules linked in: kvm_hv vhost_net vhost tap xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm iw_cxgb3 mlx5_ib ib_core ses enclosure scsi_transport_sas ipmi_powernv ipmi_devintf ipmi_msghandler powernv_op_panel i2c_opal nfsd auth_rpcgss oid_registry [ 7072.863085] nfs_acl lockd grace sunrpc kvm_pr kvm xfs libcrc32c scsi_dh_alua dm_service_time radeon lpfc nvme_fc nvme_fabrics nvme_core scsi_transport_fc i2c_algo_bit tg3 drm_kms_helper ptp pps_core syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm dm_multipath i2c_core cxgb3 mlx5_core mdio [last unloaded: kvm_hv] [ 7072.863381] CPU: 72 PID: 56929 Comm: qemu-system-ppc Not tainted 4.12.0-kvm+ #59 [ 7072.863457] task: c000000fe29e7600 task.stack: c000001e3ffec000 [ 7072.863520] NIP: c0000000000e1c78 LR: c0000000000e2e3c CTR: c0000000000e25f0 [ 7072.863596] REGS: c000001e3ffef560 TRAP: 0300 Not tainted (4.12.0-kvm+) [ 7072.863658] MSR: 9000000100009033 [ 7072.863667] CR: 44082882 XER: 20000000 [ 7072.863767] CFAR: c0000000000e2e38 DAR: d000000c250c00f8 DSISR: 42000000 SOFTE: 1 GPR00: c0000000000e2e3c c000001e3ffef7e0 c000000001407d00 d000000c250c00f0 GPR04: d00000006509fb70 d00000000b3d2048 0000000003ffdfb7 0000000000000000 GPR08: 00000001007fdfb7 00000000c000000f d0000000250c0000 000000000070f7bf GPR12: 0000000000000008 c00000000fdad000 0000000010879478 00000000105a0d78 GPR16: 00007ffaf4080000 0000000000001190 0000000000000000 0000000000010000 GPR20: 4001ffffff000415 d00000006509fb70 0000000004091190 0000000ee1881190 GPR24: 0000000003ffdfb7 0000000003ffdfb7 00000000007fdfb7 c000000f5c958000 GPR28: d00000002d09fb70 0000000003ffdfb7 d00000006509fb70 d00000000b3d2048 [ 7072.864439] NIP [c0000000000e1c78] kvmppc_add_revmap_chain+0x88/0x130 [ 7072.864503] LR [c0000000000e2e3c] kvmppc_do_h_enter+0x84c/0x9e0 [ 7072.864566] Call Trace: [ 7072.864594] [c000001e3ffef7e0] [c000001e3ffef830] 0xc000001e3ffef830 (unreliable) [ 7072.864671] [c000001e3ffef830] [c0000000000e2e3c] kvmppc_do_h_enter+0x84c/0x9e0 [ 7072.864751] [c000001e3ffef920] [d00000000b38d878] kvmppc_map_vrma+0x168/0x200 [kvm_hv] [ 7072.864831] [c000001e3ffef9e0] [d00000000b38a684] kvmppc_vcpu_run_hv+0x1284/0x1300 [kvm_hv] [ 7072.864914] [c000001e3ffefb30] [d00000000f465664] kvmppc_vcpu_run+0x44/0x60 [kvm] [ 7072.865008] [c000001e3ffefb60] [d00000000f461864] kvm_arch_vcpu_ioctl_run+0x114/0x290 [kvm] [ 7072.865152] [c000001e3ffefbe0] [d00000000f453c98] kvm_vcpu_ioctl+0x598/0x7a0 [kvm] [ 7072.865292] [c000001e3ffefd40] [c000000000389328] do_vfs_ioctl+0xd8/0x8c0 [ 7072.865410] [c000001e3ffefde0] [c000000000389be4] SyS_ioctl+0xd4/0x130 [ 7072.865526] [c000001e3ffefe30] [c00000000000b760] system_call+0x58/0x6c [ 7072.865644] Instruction dump: [ 7072.865715] e95b2110 793a0020 7b4926e4 7f8a4a14 409e0098 807c000c 786326e4 7c6a1a14 [ 7072.865857] 935e0008 7bbd0020 813c000c 913e000c <93a30008> 93bc000c 48000038 60000000 [ 7072.866001] ---[ end trace 627b6e4bf8080edc ]--- Note that to trigger this, it is necessary to use a recent upstream QEMU (or other userspace that resizes the HPT at CAS time), specify a maximum memory size substantially larger than the current memory size, and boot a guest kernel that does not support HPT resizing. This fixes the problem by resetting the rmap arrays when the old HPT is freed. Fixes: f98a8bf9ee20 ("KVM: PPC: Book3S HV: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size") Cc: stable@vger.kernel.org # v4.11+ Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cb0190e2a73..b42812e014c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -164,8 +164,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) goto out; } - if (kvm->arch.hpt.virt) + if (kvm->arch.hpt.virt) { kvmppc_free_hpt(&kvm->arch.hpt); + kvmppc_rmap_reset(kvm); + } err = kvmppc_allocate_hpt(&info, order); if (err < 0) -- cgit v1.2.3 From fa19871a166f6a940eb97dd511d3ddb1841ed95a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Jul 2017 13:38:28 +0200 Subject: KVM: VMX: remove unused field Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 29fd8af5c347..d04092f821b6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -563,7 +563,6 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; - bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; -- cgit v1.2.3 From b3625980a65db6b6b6bbd5790a77ab95ce6397c5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 13 Jul 2017 10:35:45 -0700 Subject: perf/x86/intel/uncore: Fix Skylake UPI PMU event masks This patch fixes the event_mask and event_ext_mask for the Intel Skylake Server UPI PMU. Bit 21 is not used as a filter. The extended umask is from bit 32 to bit 55. Correct both umasks. Signed-off-by: Stephane Eranian Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index dae2fedc1601..19a00a7b4964 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -316,7 +316,7 @@ #define SKX_UPI_PCI_PMON_CTL0 0x350 #define SKX_UPI_PCI_PMON_CTR0 0x318 #define SKX_UPI_PCI_PMON_BOX_CTL 0x378 -#define SKX_PMON_CTL_UMASK_EXT 0xff +#define SKX_UPI_CTL_UMASK_EXT 0xffefff /* SKX M2M */ #define SKX_M2M_PCI_PMON_CTL0 0x228 @@ -328,7 +328,7 @@ DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -3603,8 +3603,8 @@ static struct intel_uncore_type skx_uncore_upi = { .perf_ctr_bits = 48, .perf_ctr = SKX_UPI_PCI_PMON_CTR0, .event_ctl = SKX_UPI_PCI_PMON_CTL0, - .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, - .event_mask_ext = SKX_PMON_CTL_UMASK_EXT, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SKX_UPI_CTL_UMASK_EXT, .box_ctl = SKX_UPI_PCI_PMON_BOX_CTL, .ops = &skx_upi_uncore_pci_ops, .format_group = &skx_upi_uncore_format_group, -- cgit v1.2.3 From bab4e569e80c07ba6fe5e4f2d815adeef26cee94 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 13 Jul 2017 10:35:46 -0700 Subject: perf/x86/intel/uncore: Fix Skylake server PCU PMU event format PCU event format for SKX are different from snbep. Introduce a new format group for SKX PCU. Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 19a00a7b4964..fbf8f6e462e7 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3492,6 +3492,26 @@ static struct intel_uncore_type skx_uncore_irp = { .format_group = &skx_uncore_format_group, }; +static struct attribute *skx_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge_det.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute_group skx_uncore_pcu_format_group = { + .name = "format", + .attrs = skx_uncore_pcu_formats_attr, +}; + static struct intel_uncore_ops skx_uncore_pcu_ops = { IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), .hw_config = hswep_pcu_hw_config, @@ -3510,7 +3530,7 @@ static struct intel_uncore_type skx_uncore_pcu = { .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL, .num_shared_regs = 1, .ops = &skx_uncore_pcu_ops, - .format_group = &snbep_uncore_pcu_format_group, + .format_group = &skx_uncore_pcu_format_group, }; static struct intel_uncore_type *skx_msr_uncores[] = { -- cgit v1.2.3 From c3f02682a101b83424128915b14e60c156c03f02 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 13 Jul 2017 10:35:47 -0700 Subject: perf/x86/intel/uncore: Fix Skylake server CHA LLC_LOOKUP event umask Correct the umask for LLC_LOOKUP.LOCAL and LLC_LOOKUP.REMOTE events Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-4-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fbf8f6e462e7..a30bf973b5de 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3333,8 +3333,8 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4), - SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x3134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), }; static u64 skx_cha_filter_mask(int fields) -- cgit v1.2.3 From 9ad0fbd8fcd9e6815908c772f8d792a9d764449e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 13 Jul 2017 10:35:48 -0700 Subject: perf/x86/intel/uncore: Remove invalid Skylake server CHA filter field There is no field c6 and link for CHA BOX FILTER. Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-5-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index a30bf973b5de..2401d0600b47 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -351,7 +351,6 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12"); -DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); @@ -3302,7 +3301,6 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { &format_attr_inv.attr, &format_attr_thresh8.attr, &format_attr_filter_tid4.attr, - &format_attr_filter_link4.attr, &format_attr_filter_state5.attr, &format_attr_filter_rem.attr, &format_attr_filter_loc.attr, @@ -3312,7 +3310,6 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { &format_attr_filter_opc_0.attr, &format_attr_filter_opc_1.attr, &format_attr_filter_nc.attr, - &format_attr_filter_c6.attr, &format_attr_filter_isoc.attr, NULL, }; -- cgit v1.2.3 From 8aa7b7b4b4a601978672dce6604b9f5630b2eeb8 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 13 Jul 2017 10:35:49 -0700 Subject: perf/x86/intel/uncore: Fix SKX CHA event extra regs This patch adds two missing event extra regs for Skylake Server CHA PMU: - TOR_INSERTS - TOR_OCCUPANCY Were missing support for all the filters, including opcode matchers. Signed-off-by: Stephane Eranian Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-6-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 2401d0600b47..f9f825b6a46e 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3332,6 +3332,8 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x3134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), + SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), }; static u64 skx_cha_filter_mask(int fields) @@ -3344,6 +3346,17 @@ static u64 skx_cha_filter_mask(int fields) mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK; if (fields & 0x4) mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE; + if (fields & 0x8) { + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_REM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LOC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC0; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_OPC1; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_NC; + mask |= SKX_CHA_MSR_PMON_BOX_FILTER_ISOC; + } return mask; } -- cgit v1.2.3 From ba883b4abc9cd837441b01eb9cf8d9196181294d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 13 Jul 2017 10:35:50 -0700 Subject: perf/x86/intel/uncore: Fix missing marker for skx_uncore_cha_extra_regs This skx_uncore_cha_extra_regs array was missing an end-marker. Signed-off-by: Stephane Eranian Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1499967350-10385-7-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index f9f825b6a46e..4f9127644b80 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3334,6 +3334,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + EVENT_EXTRA_END }; static u64 skx_cha_filter_mask(int fields) -- cgit v1.2.3 From 38115f2f8cec8087d558c062e779c443a01f87d6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Jul 2017 23:45:52 +0900 Subject: kprobes/x86: Release insn_slot in failure path The following commit: 003002e04ed3 ("kprobes: Fix arch_prepare_kprobe to handle copy insn failures") returns an error if the copying of the instruction, but does not release the allocated insn_slot. Clean up correctly. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 003002e04ed3 ("kprobes: Fix arch_prepare_kprobe to handle copy insn failures") Link: http://lkml.kernel.org/r/150064834183.6172.11694375818447664416.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6b877807598b..f0153714ddac 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -457,6 +457,8 @@ static int arch_copy_kprobe(struct kprobe *p) int arch_prepare_kprobe(struct kprobe *p) { + int ret; + if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -467,7 +469,13 @@ int arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) return -ENOMEM; - return arch_copy_kprobe(p); + ret = arch_copy_kprobe(p); + if (ret) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } + + return ret; } void arch_arm_kprobe(struct kprobe *p) -- cgit v1.2.3 From 269583559cdd8ab1203210893590ed4cc6af8171 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 30 Jun 2017 18:56:09 +0100 Subject: ARM: 8686/1: iwmmxt: Add missing __user annotations to sigframe accessors preserve_iwmmxt_context() and restore_iwmmxt_context() lack __user accessors on their arguments pointing to the user signal frame. There does not be appear to be a bug here, but this omission is inconsistent with the crunch and vfp sigframe access functions. This patch adds the annotations, for consistency. Signed-off-by: Dave Martin Signed-off-by: Russell King --- arch/arm/kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 7b8f2141427b..8f064807d1ef 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -59,7 +59,7 @@ static int restore_crunch_context(struct crunch_sigframe __user *frame) #ifdef CONFIG_IWMMXT -static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) +static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) { char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; @@ -72,7 +72,7 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) return __copy_to_user(frame, kframe, sizeof(*frame)); } -static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) +static int restore_iwmmxt_context(struct iwmmxt_sigframe __user *frame) { char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; -- cgit v1.2.3 From ce184a0dee92a0a333236a26478e304dca29a3df Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 30 Jun 2017 18:56:59 +0100 Subject: ARM: 8687/1: signal: Fix unparseable iwmmxt_sigframe in uc_regspace[] In kernels with CONFIG_IWMMXT=y running on non-iWMMXt hardware, the signal frame can be left partially uninitialised in such a way that userspace cannot parse uc_regspace[] safely. In particular, this means that the VFP registers cannot be located reliably in the signal frame when a multi_v7_defconfig kernel is run on the majority of platforms. The cause is that the uc_regspace[] is laid out statically based on the kernel config, but the decision of whether to save/restore the iWMMXt registers must be a runtime decision. To minimise breakage of software that may assume a fixed layout, this patch emits a dummy block of the same size as iwmmxt_sigframe, for non-iWMMXt threads. However, the magic and size of this block are now filled in to help parsers skip over it. A new DUMMY_MAGIC is defined for this purpose. It is probably legitimate (if non-portable) for userspace to manufacture its own sigframe for sigreturn, and there is no obvious reason why userspace should be required to insert a DUMMY_MAGIC block when running on non-iWMMXt hardware, when omitting it has worked just fine forever in other configurations. So in this case, sigreturn does not require this block to be present. Reported-by: Edmund Grimley-Evans Signed-off-by: Dave Martin Signed-off-by: Russell King --- arch/arm/include/asm/ucontext.h | 6 ++++ arch/arm/kernel/signal.c | 76 ++++++++++++++++++++++++++++++++--------- 2 files changed, 65 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index 14749aec94bf..921d8274855c 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -35,6 +35,12 @@ struct ucontext { * bytes, to prevent unpredictable padding in the signal frame. */ +/* + * Dummy padding block: if this magic is encountered, the block should + * be skipped using the corresponding size field. + */ +#define DUMMY_MAGIC 0xb0d9ed01 + #ifdef CONFIG_CRUNCH #define CRUNCH_MAGIC 0x5065cf03 #define CRUNCH_STORAGE_SIZE (CRUNCH_SIZE + 8) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 8f064807d1ef..5814298ef0b7 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -40,8 +40,10 @@ static int preserve_crunch_context(struct crunch_sigframe __user *frame) return __copy_to_user(frame, kframe, sizeof(*frame)); } -static int restore_crunch_context(struct crunch_sigframe __user *frame) +static int restore_crunch_context(char __user **auxp) { + struct crunch_sigframe __user *frame = + (struct crunch_sigframe __user *)*auxp; char kbuf[sizeof(*frame) + 8]; struct crunch_sigframe *kframe; @@ -52,6 +54,7 @@ static int restore_crunch_context(struct crunch_sigframe __user *frame) if (kframe->magic != CRUNCH_MAGIC || kframe->size != CRUNCH_STORAGE_SIZE) return -1; + *auxp += CRUNCH_STORAGE_SIZE; crunch_task_restore(current_thread_info(), &kframe->storage); return 0; } @@ -63,17 +66,35 @@ static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame) { char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; + int err = 0; /* the iWMMXt context must be 64 bit aligned */ kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); - kframe->magic = IWMMXT_MAGIC; - kframe->size = IWMMXT_STORAGE_SIZE; - iwmmxt_task_copy(current_thread_info(), &kframe->storage); - return __copy_to_user(frame, kframe, sizeof(*frame)); + + if (test_thread_flag(TIF_USING_IWMMXT)) { + kframe->magic = IWMMXT_MAGIC; + kframe->size = IWMMXT_STORAGE_SIZE; + iwmmxt_task_copy(current_thread_info(), &kframe->storage); + + err = __copy_to_user(frame, kframe, sizeof(*frame)); + } else { + /* + * For bug-compatibility with older kernels, some space + * has to be reserved for iWMMXt even if it's not used. + * Set the magic and size appropriately so that properly + * written userspace can skip it reliably: + */ + __put_user_error(DUMMY_MAGIC, &frame->magic, err); + __put_user_error(IWMMXT_STORAGE_SIZE, &frame->size, err); + } + + return err; } -static int restore_iwmmxt_context(struct iwmmxt_sigframe __user *frame) +static int restore_iwmmxt_context(char __user **auxp) { + struct iwmmxt_sigframe __user *frame = + (struct iwmmxt_sigframe __user *)*auxp; char kbuf[sizeof(*frame) + 8]; struct iwmmxt_sigframe *kframe; @@ -81,10 +102,28 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe __user *frame) kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7); if (__copy_from_user(kframe, frame, sizeof(*frame))) return -1; - if (kframe->magic != IWMMXT_MAGIC || - kframe->size != IWMMXT_STORAGE_SIZE) + + /* + * For non-iWMMXt threads: a single iwmmxt_sigframe-sized dummy + * block is discarded for compatibility with setup_sigframe() if + * present, but we don't mandate its presence. If some other + * magic is here, it's not for us: + */ + if (!test_thread_flag(TIF_USING_IWMMXT) && + kframe->magic != DUMMY_MAGIC) + return 0; + + if (kframe->size != IWMMXT_STORAGE_SIZE) return -1; - iwmmxt_task_restore(current_thread_info(), &kframe->storage); + + if (test_thread_flag(TIF_USING_IWMMXT)) { + if (kframe->magic != IWMMXT_MAGIC) + return -1; + + iwmmxt_task_restore(current_thread_info(), &kframe->storage); + } + + *auxp += IWMMXT_STORAGE_SIZE; return 0; } @@ -107,8 +146,10 @@ static int preserve_vfp_context(struct vfp_sigframe __user *frame) return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); } -static int restore_vfp_context(struct vfp_sigframe __user *frame) +static int restore_vfp_context(char __user **auxp) { + struct vfp_sigframe __user *frame = + (struct vfp_sigframe __user *)*auxp; unsigned long magic; unsigned long size; int err = 0; @@ -121,6 +162,7 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; + *auxp += size; return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); } @@ -141,7 +183,7 @@ struct rt_sigframe { static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) { - struct aux_sigframe __user *aux; + char __user *aux; sigset_t set; int err; @@ -169,18 +211,18 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) err |= !valid_user_regs(regs); - aux = (struct aux_sigframe __user *) sf->uc.uc_regspace; + aux = (char __user *) sf->uc.uc_regspace; #ifdef CONFIG_CRUNCH if (err == 0) - err |= restore_crunch_context(&aux->crunch); + err |= restore_crunch_context(&aux); #endif #ifdef CONFIG_IWMMXT - if (err == 0 && test_thread_flag(TIF_USING_IWMMXT)) - err |= restore_iwmmxt_context(&aux->iwmmxt); + if (err == 0) + err |= restore_iwmmxt_context(&aux); #endif #ifdef CONFIG_VFP if (err == 0) - err |= restore_vfp_context(&aux->vfp); + err |= restore_vfp_context(&aux); #endif return err; @@ -286,7 +328,7 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) err |= preserve_crunch_context(&aux->crunch); #endif #ifdef CONFIG_IWMMXT - if (err == 0 && test_thread_flag(TIF_USING_IWMMXT)) + if (err == 0) err |= preserve_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -- cgit v1.2.3 From 288be97cc74e31b7871c75eb11a8dd768dcb535d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 12 Jul 2017 15:44:14 +0100 Subject: arm64/lib: copy_page: use consistent prefetch stride The optional prefetch instructions in the copy_page() routine are inconsistent: at the start of the function, two cachelines are prefetched beyond the one being loaded in the first iteration, but in the loop, the prefetch is one more line ahead. This appears to be unintentional, so let's fix it. While at it, fix the comment style and white space. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/lib/copy_page.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index c3cd65e31814..076c43715e64 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -30,9 +30,10 @@ */ ENTRY(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH - # Prefetch two cache lines ahead. - prfm pldl1strm, [x1, #128] - prfm pldl1strm, [x1, #256] + // Prefetch three cache lines ahead. + prfm pldl1strm, [x1, #128] + prfm pldl1strm, [x1, #256] + prfm pldl1strm, [x1, #384] alternative_else_nop_endif ldp x2, x3, [x1] @@ -50,7 +51,7 @@ alternative_else_nop_endif subs x18, x18, #128 alternative_if ARM64_HAS_NO_HW_PREFETCH - prfm pldl1strm, [x1, #384] + prfm pldl1strm, [x1, #384] alternative_else_nop_endif stnp x2, x3, [x0] -- cgit v1.2.3 From 18d5e6c34a8eda438d5ad8b3b15f42dab01bf05d Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Mon, 24 Jul 2017 16:51:55 -0700 Subject: x86/boot: #undef memcpy() et al in string.c undef memcpy() and friends in boot/string.c so that the functions defined here will have the correct names, otherwise we end up up trying to redefine __builtin_memcpy() etc. Surprisingly, GCC allows this (and, helpfully, discards the __builtin_ prefix from the function name when compiling it), but clang does not. Adding these #undef's appears to preserve what I assume was the original intent of the code. Signed-off-by: Michael Davidson Signed-off-by: Matthias Kaehlcke Acked-by: H. Peter Anvin Cc: Arnd Bergmann Cc: Bernhard.Rosenkranzer@linaro.org Cc: Greg Hackmann Cc: Kees Cook Cc: Linus Torvalds Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170724235155.79255-1-mka@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/string.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 630e3664906b..16f49123d747 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -16,6 +16,15 @@ #include "ctype.h" #include "string.h" +/* + * Undef these macros so that the functions that we provide + * here will have the correct names regardless of how string.h + * may have chosen to #define them. + */ +#undef memcpy +#undef memset +#undef memcmp + int memcmp(const void *s1, const void *s2, size_t len) { bool diff; -- cgit v1.2.3 From 4ecf7191fdcc5686c1eb2b06d7f93ce29d636cf0 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 24 Jul 2017 14:22:48 +0200 Subject: x86/efi: Fix reboot_mode when EFI runtime services are disabled When EFI runtime services are disabled, for example by the "noefi" kernel cmdline parameter, the reboot_type could still be set to BOOT_EFI causing reboot to fail. Fix this by checking if EFI runtime services are enabled. Signed-off-by: Stefan Assmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170724122248.24006-1-sassmann@kpanic.de [ Fixed 'not disabled' double negation. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 67393fc88353..a56bf6051f4e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -471,12 +471,12 @@ static int __init reboot_init(void) /* * The DMI quirks table takes precedence. If no quirks entry - * matches and the ACPI Hardware Reduced bit is set, force EFI - * reboot. + * matches and the ACPI Hardware Reduced bit is set and EFI + * runtime services are enabled, force EFI reboot. */ rv = dmi_check_system(reboot_dmi_table); - if (!rv && efi_reboot_required()) + if (!rv && efi_reboot_required() && !efi_runtime_disabled()) reboot_type = BOOT_EFI; return 0; -- cgit v1.2.3 From 4f899147424a189b0ad1fdd6f35784ed5a642e83 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 10 Jul 2017 13:35:48 +0200 Subject: KVM: s390: take srcu lock when getting/setting storage keys The following warning was triggered by missing srcu locks around the storage key handling functions. ============================= WARNING: suspicious RCU usage 4.12.0+ #56 Not tainted ----------------------------- ./include/linux/kvm_host.h:572 suspicious rcu_dereference_check() usage! rcu_scheduler_active = 2, debug_locks = 1 1 lock held by live_migration/4936: #0: (&mm->mmap_sem){++++++}, at: [<0000000000141be0>] kvm_arch_vm_ioctl+0x6b8/0x22d0 CPU: 8 PID: 4936 Comm: live_migration Not tainted 4.12.0+ #56 Hardware name: IBM 2964 NC9 704 (LPAR) Call Trace: ([<000000000011378a>] show_stack+0xea/0xf0) [<000000000055cc4c>] dump_stack+0x94/0xd8 [<000000000012ee70>] gfn_to_memslot+0x1a0/0x1b8 [<0000000000130b76>] gfn_to_hva+0x2e/0x48 [<0000000000141c3c>] kvm_arch_vm_ioctl+0x714/0x22d0 [<000000000013306c>] kvm_vm_ioctl+0x11c/0x7b8 [<000000000037e2c0>] do_vfs_ioctl+0xa8/0x6c8 [<000000000037e984>] SyS_ioctl+0xa4/0xb8 [<00000000008b20a4>] system_call+0xc4/0x27c 1 lock held by live_migration/4936: #0: (&mm->mmap_sem){++++++}, at: [<0000000000141be0>] kvm_arch_vm_ioctl+0x6b8/0x22d0 Signed-off-by: Christian Borntraeger Reviewed-by: Pierre Morel --- arch/s390/kvm/kvm-s390.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f2884e99ed4..af09d3437631 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1324,7 +1324,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) { uint8_t *keys; uint64_t hva; - int i, r = 0; + int srcu_idx, i, r = 0; if (args->flags != 0) return -EINVAL; @@ -1342,6 +1342,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) return -ENOMEM; down_read(¤t->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { @@ -1353,6 +1354,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) break; } + srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); if (!r) { @@ -1370,7 +1372,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) { uint8_t *keys; uint64_t hva; - int i, r = 0; + int srcu_idx, i, r = 0; if (args->flags != 0) return -EINVAL; @@ -1396,6 +1398,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) goto out; down_read(¤t->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { @@ -1413,6 +1416,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) break; } + srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); out: kvfree(keys); -- cgit v1.2.3 From d9f89b4e9290e46cd9b273e9ad0bff0f93e86fae Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sat, 1 Jul 2017 18:26:54 +0200 Subject: KVM: arm/arm64: PMU: Fix overflow interrupt injection kvm_pmu_overflow_set() is called from perf's interrupt handler, making the call of kvm_vgic_inject_irq() from it introduced with "KVM: arm/arm64: PMU: remove request-less vcpu kick" a really bad idea, as it's quite easy to try and retake a lock that the interrupted context is already holding. The fix is to use a vcpu kick, leaving the interrupt injection to kvm_pmu_sync_hwstate(), like it was doing before the refactoring. We don't just revert, though, because before the kick was request-less, leaving the vcpu exposed to the request-less vcpu kick race, and also because the kick was used unnecessarily from register access handlers. Reviewed-by: Christoffer Dall Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- include/kvm/arm_pmu.h | 2 -- virt/kvm/arm/pmu.c | 43 +++++++++++++++---------------------------- 3 files changed, 16 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 77862881ae86..2e070d3baf9f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - kvm_pmu_overflow_set(vcpu, p->regval & mask); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index f6e030617467..f87fe20fcb05 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); -void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu); @@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} -static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index fc8a723ff387..8a9c42366db7 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) return reg; } -static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) +static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow = !!kvm_pmu_overflow_status(vcpu); + bool overflow; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return; + overflow = !!kvm_pmu_overflow_status(vcpu); if (pmu->irq_level == overflow) return; @@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) if (likely(irqchip_in_kernel(vcpu->kvm))) { int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow, - &vcpu->arch.pmu); + pmu->irq_num, overflow, pmu); WARN_ON(ret); } } -/** - * kvm_pmu_overflow_set - set PMU overflow interrupt - * @vcpu: The vcpu pointer - * @val: the value guest writes to PMOVSSET register - */ -void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) -{ - if (val == 0) - return; - - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - kvm_pmu_check_overflow(vcpu); -} - -static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) -{ - if (!kvm_arm_pmu_v3_ready(vcpu)) - return; - kvm_pmu_check_overflow(vcpu); -} - bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; @@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) } /** - * When perf event overflows, call kvm_pmu_overflow_set to set overflow status. + * When the perf event overflows, set the overflow status and inform the vcpu. */ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct perf_sample_data *data, @@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; - kvm_pmu_overflow_set(vcpu, BIT(idx)); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + + if (kvm_pmu_overflow_status(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } } /** @@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) reg = lower_32_bits(reg); vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; if (!reg) - kvm_pmu_overflow_set(vcpu, BIT(i)); + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } -- cgit v1.2.3 From f520e55241e1cf0c10d308ccf47513f28533f60a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 25 Jul 2017 18:20:54 +0200 Subject: parisc: Fix crash when calling PDC_PAT_MEM PDT firmware function Commit c9c2877d08d9 ("parisc: Add Page Deallocation Table (PDT) support") introduced the pdc_pat_mem_read_pd_pdt() firmware helper function, which crashed the system because it trashed the stack if the pdc_pat_mem_read_pd_retinfo struct was located on the stack (and which is in size less than the required 32 64-bit values). Fix it by using the pdc_result struct instead when calling firmware and copy the return values back into the result struct when finished sucessfully. While debugging this code I noticed that the pdc_type wasn't set correctly either, so let's fix that too. Fixes: c9c2877d08d9 ("parisc: Add Page Deallocation Table (PDT) support") Signed-off-by: Helge Deller --- arch/parisc/kernel/firmware.c | 11 +++++++++-- arch/parisc/kernel/pdt.c | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 98190252c12f..526ed90ca56f 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1481,12 +1481,19 @@ int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, unsigned long offset) { int retval; - unsigned long flags; + unsigned long flags, entries; spin_lock_irqsave(&pdc_lock, flags); retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_READ, - __pa(&pret), __pa(pdt_entries_ptr), + __pa(&pdc_result), __pa(pdt_entries_ptr), count, offset); + + if (retval == PDC_OK) { + entries = min(pdc_result[0], count); + pret->actual_count_bytes = entries; + pret->pdt_entries = entries / sizeof(unsigned long); + } + spin_unlock_irqrestore(&pdc_lock, flags); return retval; diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index f3a797e670b0..040d49b723d7 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -112,10 +112,12 @@ void __init pdc_pdt_init(void) #ifdef CONFIG_64BIT struct pdc_pat_mem_read_pd_retinfo pat_pret; + /* try old obsolete PAT firmware function first */ + pdt_type = PDT_PAT_OLD; ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, MAX_PDT_ENTRIES); if (ret != PDC_OK) { - pdt_type = PDT_PAT_OLD; + pdt_type = PDT_PAT_NEW; ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, MAX_PDT_TABLE_SIZE, 0); } -- cgit v1.2.3 From 25a9b76597fafbbf688dd4473cb910568deb2b0c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 25 Jul 2017 19:26:23 +0200 Subject: parisc: Add function to return DIMM slot of physical address Add a firmware wrapper function, which asks PDC firmware for the DIMM slot of a physical address. This is needed to show users which DIMM module needs replacement in case a broken DIMM was encountered. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdcpat.h | 16 +++++++++++++++- arch/parisc/kernel/firmware.c | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index 32e105fb8adb..e3c0586260d8 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -150,7 +150,7 @@ #define PDC_PAT_MEM_SETGM 9L /* Set Good Memory value */ #define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */ #define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */ - /* Memory Address */ + /* Memory Address */ #define PDC_PAT_MEM_GET_TXT_SIZE 12L /* Get Formatted Text Size */ #define PDC_PAT_MEM_GET_PD_TXT 13L /* Get PD Formatted Text */ #define PDC_PAT_MEM_GET_CELL_TXT 14L /* Get Cell Formatted Text */ @@ -228,6 +228,17 @@ struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */ unsigned long pdt_entries; }; +struct pdc_pat_mem_phys_mem_location { /* PDC_PAT_MEM/PDC_PAT_MEM_ADDRESS */ + u64 cabinet:8; + u64 ign1:8; + u64 ign2:8; + u64 cell_slot:8; + u64 ign3:8; + u64 dimm_slot:8; /* DIMM slot, e.g. 0x1A, 0x2B, show user hex value! */ + u64 ign4:8; + u64 source:4; /* for mem: always 0x07 */ + u64 source_detail:4; /* for mem: always 0x04 (SIMM or DIMM) */ +}; struct pdc_pat_pd_addr_map_entry { unsigned char entry_type; /* 1 = Memory Descriptor Entry Type */ @@ -319,6 +330,9 @@ extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, unsigned long *pdt_entries_ptr, unsigned long count, unsigned long offset); +extern int pdc_pat_mem_get_dimm_phys_location( + struct pdc_pat_mem_phys_mem_location *pret, + unsigned long phys_addr); #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 526ed90ca56f..f622a311d04a 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1498,6 +1498,31 @@ int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, return retval; } + +/** + * pdc_pat_mem_get_dimm_phys_location - Get physical DIMM slot via PAT firmware + * @pret: ptr to hold returned information + * @phys_addr: physical address to examine + * + */ +int pdc_pat_mem_get_dimm_phys_location( + struct pdc_pat_mem_phys_mem_location *pret, + unsigned long phys_addr) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_ADDRESS, + __pa(&pdc_result), phys_addr); + + if (retval == PDC_OK) + memcpy(pret, &pdc_result, sizeof(*pret)); + + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} #endif /* CONFIG_64BIT */ -- cgit v1.2.3 From c46bafc4d2536a079455706cf31582b67192d5b7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 25 Jul 2017 20:12:25 +0200 Subject: parisc: Show DIMM slot number which holds broken memory module The Page Deallocation Table (PDT) holds the physical addresses of all broken memory addresses. With the physical address we now are able to show which DIMM slot (e.g. 1a, 3c) actually holds the broken memory module so that users are able to replace it. Signed-off-by: Helge Deller --- arch/parisc/kernel/pdt.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 040d49b723d7..d02874ecb94d 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -133,11 +133,20 @@ void __init pdc_pdt_init(void) } for (i = 0; i < pdt_status.pdt_entries; i++) { - if (i < 20) - pr_warn("PDT: BAD PAGE #%d at 0x%08lx (error_type = %lu)\n", - i, - pdt_entry[i] & PAGE_MASK, - pdt_entry[i] & 1); + struct pdc_pat_mem_phys_mem_location loc; + + /* get DIMM slot number */ + loc.dimm_slot = 0xff; +#ifdef CONFIG_64BIT + pdc_pat_mem_get_dimm_phys_location(&loc, pdt_entry[i]); +#endif + + pr_warn("PDT: BAD PAGE #%d at 0x%08lx, " + "DIMM slot %02x (error_type = %lu)\n", + i, + pdt_entry[i] & PAGE_MASK, + loc.dimm_slot, + pdt_entry[i] & 1); /* mark memory page bad */ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); -- cgit v1.2.3 From 56188832a50f09998cb570ba3771a1d25c193c0e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 25 Jul 2017 21:41:41 +0200 Subject: parisc: Suspend lockup detectors before system halt Some machines can't power off the machine, so disable the lockup detectors to avoid this watchdog BUG to show up every few seconds: watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [systemd-shutdow:1] Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.9+ --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index b64d7d21646e..a45a67d526f8 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,7 @@ void machine_power_off(void) /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); + lockup_detector_suspend(); for (;;); } -- cgit v1.2.3 From ae7a609c34b6fb12328c553b5f9aab26ae74a28e Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 25 Jul 2017 17:11:26 -0400 Subject: parisc: Prevent TLB speculation on flushed pages on CPUs that only support equivalent aliases Helge noticed that we flush the TLB page in flush_cache_page but not in flush_cache_range or flush_cache_mm. For a long time, we have had random segmentation faults building packages on machines with PA8800/8900 processors. These machines only support equivalent aliases. We don't see these faults on machines that don't require strict coherency. So, it appears TLB speculation sometimes leads to cache corruption on machines that require coherency. This patch adds TLB flushes to flush_cache_range and flush_cache_mm when coherency is required. We only flush the TLB in flush_cache_page when coherency is required. The patch also optimizes flush_cache_range. It turns out we always have the right context to use flush_user_dcache_range_asm and flush_user_icache_range_asm. The patch has been tested for some time on rp3440, rp3410 and A500-44. It's been boot tested on c8000. No random segmentation faults were observed during testing. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index c32a09095216..3a64ebb39ac7 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -539,6 +539,10 @@ void flush_cache_mm(struct mm_struct *mm) struct vm_area_struct *vma; pgd_t *pgd; + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_all(); + /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ if (mm_total_size(mm) >= parisc_cache_flush_threshold) { @@ -577,33 +581,22 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr; - pgd_t *pgd; - BUG_ON(!vma->vm_mm->context); + /* Flush the TLB to avoid speculation if coherency is required. */ + if (parisc_requires_coherency()) + flush_tlb_range(vma, start, end); + if ((end - start) >= parisc_cache_flush_threshold) { flush_cache_all(); return; } - if (vma->vm_mm->context == mfsp(3)) { - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - return; - } + BUG_ON(vma->vm_mm->context != mfsp(3)); - pgd = vma->vm_mm->pgd; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long pfn; - pte_t *ptep = get_ptep(pgd, addr); - if (!ptep) - continue; - pfn = pte_pfn(*ptep); - if (pfn_valid(pfn)) - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); } void @@ -612,7 +605,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long BUG_ON(!vma->vm_mm->context); if (pfn_valid(pfn)) { - flush_tlb_page(vma, vmaddr); + if (parisc_requires_coherency()) + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } -- cgit v1.2.3 From 56008c04ebc099940021b714da2d7779117cf6a7 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 25 Jul 2017 17:23:35 -0400 Subject: parisc: Extend disabled preemption in copy_user_page It's always bothered me that we only disable preemption in copy_user_page around the call to flush_dcache_page_asm. This patch extends this to after the copy. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 3a64ebb39ac7..85a92db70afc 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -453,8 +453,8 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, before it can be accessed through the kernel mapping. */ preempt_disable(); flush_dcache_page_asm(__pa(vfrom), vaddr); - preempt_enable(); copy_page_asm(vto, vfrom); + preempt_enable(); } EXPORT_SYMBOL(copy_user_page); -- cgit v1.2.3 From a25bd72badfa793ab5aeafd50dbd9db39f8c9179 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:26:06 +1000 Subject: powerpc/mm/radix: Workaround prefetch issue with KVM There's a somewhat architectural issue with Radix MMU and KVM. When coming out of a guest with AIL (Alternate Interrupt Location, ie, MMU enabled), we start executing hypervisor code with the PID register still containing whatever the guest has been using. The problem is that the CPU can (and will) then start prefetching or speculatively load from whatever host context has that same PID (if any), thus bringing translations for that context into the TLB, which Linux doesn't know about. This can cause stale translations and subsequent crashes. Fixing this in a way that is neither racy nor a huge performance impact is difficult. We could just make the host invalidations always use broadcast forms but that would hurt single threaded programs for example. We chose to fix it instead by partitioning the PID space between guest and host. This is possible because today Linux only use 19 out of the 20 bits of PID space, so existing guests will work if we make the host use the top half of the 20 bits space. We additionally add support for a property to indicate to Linux the size of the PID register which will be useful if we eventually have processors with a larger PID space available. There is still an issue with malicious guests purposefully setting the PID register to a value in the hosts PID range. Hopefully future HW can prevent that, but in the meantime, we handle it with a pair of kludges: - On the way out of a guest, before we clear the current VCPU in the PACA, we check the PID and if it's outside of the permitted range we flush the TLB for that PID. - When context switching, if the mm is "new" on that CPU (the corresponding bit was set for the first time in the mm cpumask), we check if any sibling thread is in KVM (has a non-NULL VCPU pointer in the PACA). If that is the case, we also flush the PID for that CPU (core). This second part is needed to handle the case where a process is migrated (or starts a new pthread) on a sibling thread of the CPU coming out of KVM, as there's a window where stale translations can exist before we detect it and flush them out. A future optimization could be added by keeping track of whether the PID has ever been used and avoid doing that for completely fresh PIDs. We could similarily mark PIDs that have been the subject of a global invalidation as "fresh". But for now this will do. Signed-off-by: Benjamin Herrenschmidt [mpe: Rework the asm to build with CONFIG_PPC_RADIX_MMU=n, drop unneeded include of kvm_book3s_asm.h] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 15 ++++---- arch/powerpc/include/asm/mmu_context.h | 18 ++++++++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 59 +++++++++++++++++++++++++++----- arch/powerpc/mm/mmu_context_book3s64.c | 5 +-- arch/powerpc/mm/pgtable-radix.c | 34 +++++++++++++++++- arch/powerpc/mm/tlb-radix.c | 45 ++++++++++++++++++++++-- 6 files changed, 154 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..5b4023c616f7 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb; #define PRTS_MASK 0x1f /* process table size field */ #define PRTB_MASK 0x0ffffffffffff000UL -/* - * Limit process table to PAGE_SIZE table. This - * also limit the max pid we can support. - * MAX_USER_CONTEXT * 16 bytes of space. - */ -#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4) -#define PRTB_ENTRIES (1ul << CONTEXT_BITS) +/* Number of supported PID bits */ +extern unsigned int mmu_pid_bits; + +/* Base PID to allocate from */ +extern unsigned int mmu_base_pid; + +#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) +#define PRTB_ENTRIES (1ul << mmu_pid_bits) /* * Power9 currently only support 64K partition table size. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..0c76675394c5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, - struct mm_struct *next); + struct mm_struct *next); static inline void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); #endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm); +#else +static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { } +#endif + extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); @@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool new_on_cpu = false; + /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + new_on_cpu = true; + } /* 32-bit keeps track of the current PGDIR in the thread struct */ #ifdef CONFIG_PPC32 @@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (cpu_has_feature(CPU_FTR_ALTIVEC)) asm volatile ("dssall"); #endif /* CONFIG_ALTIVEC */ + + if (new_on_cpu) + radix_kvm_prefetch_workaround(next); + /* * The actual HW switching method differs between the various * sub architectures. Out of line for now diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cb44065e2946..c52184a8efdf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1443,12 +1443,14 @@ mc_cont: ori r6,r6,1 mtspr SPRN_CTRLT,r6 4: - /* Read the guest SLB and save it away */ + /* Check if we are running hash or radix and store it in cr2 */ ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) - cmpwi r0, 0 + cmpwi cr2,r0,0 + + /* Read the guest SLB and save it away */ li r5, 0 - bne 3f /* for radix, save 0 entries */ + bne cr2, 3f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96) END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: - /* Clear out SLB */ - li r5,0 - slbmte r5,r5 - slbia - ptesync /* Restore host values of some registers */ BEGIN_FTR_SECTION @@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION mtspr SPRN_PID, r7 mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + +#ifdef CONFIG_PPC_RADIX_MMU + /* + * Are we running hash or radix ? + */ + beq cr2,3f + + /* Radix: Handle the case where the guest used an illegal PID */ + LOAD_REG_ADDR(r4, mmu_base_pid) + lwz r3, VCPU_GUEST_PID(r9) + lwz r5, 0(r4) + cmpw cr0,r3,r5 + blt 2f + + /* + * Illegal PID, the HW might have prefetched and cached in the TLB + * some translations for the LPID 0 / guest PID combination which + * Linux doesn't know about, so we need to flush that PID out of + * the TLB. First we need to set LPIDR to 0 so tlbiel applies to + * the right context. + */ + li r0,0 + mtspr SPRN_LPID,r0 + isync + + /* Then do a congruence class local flush */ + ld r6,VCPU_KVM(r9) + lwz r0,KVM_TLB_SETS(r6) + mtctr r0 + li r7,0x400 /* IS field = 0b01 */ + ptesync + sldi r0,r3,32 /* RS has PID */ +1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */ + addi r7,r7,0x1000 + bdnz 1b + ptesync + +2: /* Flush the ERAT on radix P9 DD1 guest exit */ BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) + b 4f +#endif /* CONFIG_PPC_RADIX_MMU */ + /* Hash: clear out SLB */ +3: li r5,0 + slbmte r5,r5 + slbia + ptesync +4: /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index abed1fe6992f..a75f63833284 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm) static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; - int index; + int index, max_id; - index = alloc_context_id(1, PRTB_ENTRIES - 1); + max_id = (1 << mmu_pid_bits) - 1; + index = alloc_context_id(mmu_base_pid, max_id); if (index < 0) return index; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5cc50d47ce3f..671a45d86c18 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -25,6 +25,9 @@ #include +unsigned int mmu_pid_bits; +unsigned int mmu_base_pid; + static int native_register_process_table(unsigned long base, unsigned long pg_sz, unsigned long table_size) { @@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void) for_each_memblock(memory, reg) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size)); + + /* Find out how many PID bits are supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (!mmu_pid_bits) + mmu_pid_bits = 20; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * When KVM is possible, we only use the top half of the + * PID space to avoid collisions between host and guest PIDs + * which can cause problems due to prefetch when exiting the + * guest with AIL=3 + */ + mmu_base_pid = 1 << (mmu_pid_bits - 1); +#else + mmu_base_pid = 1; +#endif + } else { + /* The guest uses the bottom half of the PID space */ + if (!mmu_pid_bits) + mmu_pid_bits = 19; + mmu_base_pid = 1; + } + /* * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); + BUG_ON(PRTB_SIZE_SHIFT > 36); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + /* Find MMU PID size */ + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); + if (prop && size == 4) + mmu_pid_bits = be32_to_cpup(prop); + + /* Grab page size encodings */ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); if (!prop) return 0; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 744e0164ecf5..16ae1bbe13f0 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,12 +12,12 @@ #include #include #include -#include +#include #include #include #include - +#include #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, else radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); } + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) +{ + unsigned int pid = mm->context.id; + + if (unlikely(pid == MMU_NO_CONTEXT)) + return; + + /* + * If this context hasn't run on that CPU before and KVM is + * around, there's a slim chance that the guest on another + * CPU just brought in obsolete translation into the TLB of + * this CPU due to a bad prefetch using the guest PID on + * the way into the hypervisor. + * + * We work around this here. If KVM is possible, we check if + * any sibling thread is in KVM. If it is, the window may exist + * and thus we flush that PID from the core. + * + * A potential future improvement would be to mark which PIDs + * have never been used on the system and avoid it if the PID + * is new and the process has no other cpumask bit set. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { + int cpu = smp_processor_id(); + int sib = cpu_first_thread_sibling(cpu); + bool flush = false; + + for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { + if (sib == cpu) + continue; + if (paca[sib].kvm_hstate.kvm_vcpu) + flush = true; + } + if (flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } +} +EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ -- cgit v1.2.3 From 4fd1bd443e80b12f0a01a45fb9a793206b41cb72 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Jul 2017 16:51:39 +0200 Subject: powerpc/pseries: Fix of_node_put() underflow during reconfig remove As for commit 68baf692c435 ("powerpc/pseries: Fix of_node_put() underflow during DLPAR remove"), the call to of_node_put() must be removed from pSeries_reconfig_remove_node(). dlpar_detach_node() and pSeries_reconfig_remove_node() both call of_detach_node(), and thus the node should not be released in both cases. Fixes: 0829f6d1f69e ("of: device_node kobject lifecycle fixes") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/reconfig.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index e5bf1e84047f..011ef2180fe6 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) of_detach_node(np); of_node_put(parent); - of_node_put(np); /* Must decrement the refcount */ return 0; } -- cgit v1.2.3 From b40b2386bce982ad97f3683b2b34e589c3be5c5a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 26 Jul 2017 15:00:42 +1000 Subject: powerpc/Makefile: Fix ld version check with 64-bit LE-only toolchain In commit efe0160cfd40 ("powerpc/64: Linker on-demand sfpr functions for modules"), we added an ld version check early in the powerpc top-level Makefile. Because the Makefile runs before the kernel config is setup, the checks for CONFIG_CPU_LITTLE_ENDIAN etc. all take the default case. So we end up configuring ld for 32-bit big endian. That would be OK, except that for historical (or perhaps no) reason, we use 'override LD' to add the endian flags to the LD variable itself, rather than the normal approach of adding them to LDFLAGS. The end result is that when we check the ld version we run it as: $(CROSS_COMPILE)ld -EB -m elf32ppc --version This often works, unless you are using a 64-bit only and/or little endian only, toolchain. In which case you see something like: $ make defconfig powerpc64le-linux-ld: unrecognised emulation mode: elf32ppc Supported emulations: elf64lppc elf32lppc elf32lppclinux elf32lppcsim /bin/sh: 1: [: -ge: unexpected operator The proper fix is to stop using 'override LD', but that will require a fair bit of testing. Instead we can fix it for now just by reordering the Makefile to do the version check earlier. Fixes: efe0160cfd40 ("powerpc/64: Linker on-demand sfpr functions for modules") Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 8d4ed73d5490..e2b3e7a00c9e 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64 machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le UTS_MACHINE := $(subst $(space),,$(machine-y)) +# XXX This needs to be before we override LD below +ifdef CONFIG_PPC32 +KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +else +ifeq ($(call ld-ifversion, -ge, 225000000, y),y) +# Have the linker provide sfpr if possible. +# There is a corresponding test in arch/powerpc/lib/Makefile +KBUILD_LDFLAGS_MODULE += --save-restore-funcs +else +KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +endif +endif + ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) override LD += -EL LDEMULATION := lppc @@ -190,18 +203,6 @@ else CHECKFLAGS += -D__LITTLE_ENDIAN__ endif -ifdef CONFIG_PPC32 -KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o -else -ifeq ($(call ld-ifversion, -ge, 225000000, y),y) -# Have the linker provide sfpr if possible. -# There is a corresponding test in arch/powerpc/lib/Makefile -KBUILD_LDFLAGS_MODULE += --save-restore-funcs -else -KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o -endif -endif - ifeq ($(CONFIG_476FPE_ERR46),y) KBUILD_LDFLAGS_MODULE += --ppc476-workaround \ -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds -- cgit v1.2.3 From d0153c7ff9226535a51e6a81f61656c9500957f4 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 25 Jul 2017 12:52:41 +0100 Subject: arm64: sysreg: Fix unprotected macro argmuent in write_sysreg write_sysreg() may misparse the value argument because it is used without parentheses to protect it. This patch adds the ( ) in order to avoid any surprises. Acked-by: Mark Rutland Signed-off-by: Dave Martin [will: same change to write_sysreg_s] Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 16e44fa9b3b6..248339e4aaf5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -492,7 +492,7 @@ asm( * the "%x0" template means XZR. */ #define write_sysreg(v, r) do { \ - u64 __val = (u64)v; \ + u64 __val = (u64)(v); \ asm volatile("msr " __stringify(r) ", %x0" \ : : "rZ" (__val)); \ } while (0) @@ -508,7 +508,7 @@ asm( }) #define write_sysreg_s(v, r) do { \ - u64 __val = (u64)v; \ + u64 __val = (u64)(v); \ asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) -- cgit v1.2.3 From a512177ef3bb92dbec8a96fe337b11c126bf9c91 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 Jul 2017 18:54:38 +0200 Subject: KVM: x86: do mask out upper bits of PAE CR3 This reverts the change of commit f85c758dbee54cc3612a6e873ef7cecdb66ebee5, as the behavior it modified was intended. The VM is running in 32-bit PAE mode, and Table 4-7 of the Intel manual says: Table 4-7. Use of CR3 with PAE Paging Bit Position(s) Contents 4:0 Ignored 31:5 Physical address of the 32-Byte aligned page-directory-pointer table used for linear-address translation 63:32 Ignored (these bits exist only on processors supporting the Intel-64 architecture) To placate the static checker, write the mask explicitly as an unsigned long constant instead of using a 32-bit unsigned constant. Cc: Dan Carpenter Fixes: f85c758dbee54cc3612a6e873ef7cecdb66ebee5 Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 82a63c59f77b..6c97c82814c4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -597,8 +597,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_avail)) return true; - gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT; - offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1); + gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; + offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1); r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) -- cgit v1.2.3 From 210f84b0ca7743f3b2a9acfae81df668dbbb6a12 Mon Sep 17 00:00:00 2001 From: Wincy Van Date: Fri, 28 Apr 2017 13:13:58 +0800 Subject: x86: irq: Define a global vector for nested posted interrupts We are using the same vector for nested/non-nested posted interrupts delivery, this may cause interrupts latency in L1 since we can't kick the L2 vcpu out of vmx-nonroot mode. This patch introduces a new vector which is only for nested posted interrupts to solve the problems above. Signed-off-by: Wincy Van Signed-off-by: Paolo Bonzini --- arch/x86/entry/entry_64.S | 1 + arch/x86/include/asm/entry_arch.h | 2 ++ arch/x86/include/asm/hardirq.h | 1 + arch/x86/include/asm/hw_irq.h | 2 ++ arch/x86/include/asm/irq_vectors.h | 3 ++- arch/x86/kernel/irq.c | 19 +++++++++++++++++++ arch/x86/kernel/irqinit.c | 2 ++ 7 files changed, 29 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a9a8027a6c0e..d271fb79248f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -705,6 +705,7 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi +apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index df002992d8fd..07b06955a05d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -25,6 +25,8 @@ BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, smp_kvm_posted_intr_ipi) BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, smp_kvm_posted_intr_wakeup_ipi) +BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR, + smp_kvm_posted_intr_nested_ipi) #endif /* diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 9b76cd331990..ad1ed531febc 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -15,6 +15,7 @@ typedef struct { #ifdef CONFIG_HAVE_KVM unsigned int kvm_posted_intr_ipis; unsigned int kvm_posted_intr_wakeup_ipis; + unsigned int kvm_posted_intr_nested_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b90e1053049b..d6dbafbd4207 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,7 @@ extern asmlinkage void apic_timer_interrupt(void); extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); +extern asmlinkage void kvm_posted_intr_nested_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); @@ -62,6 +63,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi +#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi #endif /* CONFIG_TRACING */ #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6ca9fd6234e1..aaf8d28b5d00 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -83,7 +83,6 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 -#define POSTED_INTR_WAKEUP_VECTOR 0xf1 /* * IRQ work vector: */ @@ -98,6 +97,8 @@ /* Vector for KVM to deliver posted interrupt IPI */ #ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 +#define POSTED_INTR_NESTED_VECTOR 0xf0 #endif /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 4aa03c5a14c9..4ed0aba8dbc8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -155,6 +155,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); seq_puts(p, " Posted-interrupt notification event\n"); + seq_printf(p, "%*s: ", prec, "NPI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->kvm_posted_intr_nested_ipis); + seq_puts(p, " Nested posted-interrupt event\n"); + seq_printf(p, "%*s: ", prec, "PIW"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -313,6 +319,19 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); } + +/* + * Handler for POSTED_INTERRUPT_NESTED_VECTOR. + */ +__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_nested_ipis); + exiting_irq(); + set_irq_regs(old_regs); +} #endif __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7468c6987547..c7fd18526c3e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -150,6 +150,8 @@ static void __init apic_intr_init(void) alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); /* IPI for KVM to deliver interrupt to wake up tasks */ alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); + /* IPI for KVM to deliver nested posted interrupt */ + alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ -- cgit v1.2.3 From 06a5524f091b6b41b0007810c74cc9315923a145 Mon Sep 17 00:00:00 2001 From: Wincy Van Date: Fri, 28 Apr 2017 13:13:59 +0800 Subject: KVM: nVMX: Fix posted intr delivery when vcpu is in guest mode The PI vector for L0 and L1 must be different. If dest vcpu0 is in guest mode while vcpu1 is delivering a non-nested PI to vcpu0, there wont't be any vmexit so that the non-nested interrupt will be delayed. Signed-off-by: Wincy Van Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d04092f821b6..497cf64794fd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4987,9 +4987,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) } } -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, + bool nested) { #ifdef CONFIG_SMP + int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; + if (vcpu->mode == IN_GUEST_MODE) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5007,8 +5010,7 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), - POSTED_INTR_VECTOR); + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); return true; } #endif @@ -5023,7 +5025,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu); + kvm_vcpu_trigger_posted_interrupt(vcpu, true); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. @@ -5057,7 +5059,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return; - if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); } @@ -10064,13 +10066,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { - /* - * Note that we use L0's vector here and in - * vmx_deliver_nested_posted_interrupt. - */ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); } else { exec_control &= ~PIN_BASED_POSTED_INTR; } @@ -10941,7 +10939,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, */ vmx_flush_tlb(vcpu); } - + /* Restore posted intr vector. */ + if (nested_cpu_has_posted_intr(vmcs12)) + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); -- cgit v1.2.3 From 2d6144e366fb39609aecf7a658e2e10af37627e9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 25 Jul 2017 03:40:46 -0700 Subject: KVM: nVMX: Fix loss of L2's NMI blocking state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run kvm-unit-tests/eventinj.flat in L1 w/ ept=0 on both L0 and L1: Before NMI IRET test Sending NMI to self NMI isr running stack 0x461000 Sending nested NMI to self After nested NMI to self Nested NMI isr running rip=40038e After iret After NMI to self FAIL: NMI Commit 4c4a6f790ee862 (KVM: nVMX: track NMI blocking state separately for each VMCS) tracks NMI blocking state separately for vmcs01 and vmcs02. However it is not enough: - The L2 (kvm-unit-tests/eventinj.flat) generates NMI that will fault on IRET, so the L2 can generate #PF which can be intercepted by L0. - L0 walks L1's guest page table and sees the mapping is invalid, it resumes the L1 guest and injects the #PF into L1. At this point the vmcs02 has nmi_known_unmasked=true. - L1 sets set bit 3 (blocking by NMI) in the interruptibility-state field of vmcs12 (and fixes the shadow page table) before resuming L2 guest. - L1 executes VMRESUME to resume L2, causing a vmexit to L0 - during VMRESUME emulation, prepare_vmcs02 sets bit 3 in the interruptibility-state field of vmcs02, but nmi_known_unmasked is still true. - L2 immediately exits to L0 with another page fault, because L0 still has not updated the NGVA->HPA page tables. However, nmi_known_unmasked is true so vmx_recover_nmi_blocking does not do anything. The fix is to update nmi_known_unmasked when preparing vmcs02 from vmcs12. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 497cf64794fd..39a6222bf968 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10042,6 +10042,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vm_entry_instruction_len); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); } else { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); } -- cgit v1.2.3 From 1d518c6820daf4e00d29adfba980aee05f605f0f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 25 Jul 2017 00:43:15 -0700 Subject: KVM: LAPIC: Fix reentrancy issues with preempt notifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preempt can occur in the preemption timer expiration handler: CPU0 CPU1 preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer hv_timer_is_use == true sched_out sched_in kvm_arch_vcpu_load kvm_lapic_restart_hv_timer restart_apic_timer start_hv_timer already-expired timer or sw timer triggerd in the window start_sw_timer cancel_hv_timer /* back in kvm_lapic_expired_hv_timer */ cancel_hv_timer WARN_ON(!apic->lapic_timer.hv_timer_in_use); ==> Oops This can be reproduced if CONFIG_PREEMPT is enabled. ------------[ cut here ]------------ WARNING: CPU: 4 PID: 2972 at /home/kernel/linux/arch/x86/kvm//lapic.c:1563 kvm_lapic_expired_hv_timer+0x9e/0xb0 [kvm] CPU: 4 PID: 2972 Comm: qemu-system-x86 Tainted: G OE 4.13.0-rc2+ #16 RIP: 0010:kvm_lapic_expired_hv_timer+0x9e/0xb0 [kvm] Call Trace: handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xb8/0xd70 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xdd1/0x1be0 [kvm] ? kvm_arch_vcpu_load+0x47/0x230 [kvm] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x81/0x220 entry_SYSCALL64_slow_path+0x25/0x25 ------------[ cut here ]------------ WARNING: CPU: 4 PID: 2972 at /home/kernel/linux/arch/x86/kvm//lapic.c:1498 cancel_hv_timer.isra.40+0x4f/0x60 [kvm] CPU: 4 PID: 2972 Comm: qemu-system-x86 Tainted: G W OE 4.13.0-rc2+ #16 RIP: 0010:cancel_hv_timer.isra.40+0x4f/0x60 [kvm] Call Trace: kvm_lapic_expired_hv_timer+0x3e/0xb0 [kvm] handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xb8/0xd70 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xdd1/0x1be0 [kvm] ? kvm_arch_vcpu_load+0x47/0x230 [kvm] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x81/0x220 entry_SYSCALL64_slow_path+0x25/0x25 This patch fixes it by making the caller of cancel_hv_timer, start_hv_timer and start_sw_timer be in preemption-disabled regions, which trivially avoid any reentrancy issue with preempt notifier. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li [Add more WARNs. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2819d4c123eb..589dcc117086 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,11 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + WARN_ON(preemptible()); WARN_ON(!apic->lapic_timer.hv_timer_in_use); - preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; - preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) @@ -1507,6 +1506,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) struct kvm_timer *ktimer = &apic->lapic_timer; int r; + WARN_ON(preemptible()); if (!kvm_x86_ops->set_hv_timer) return false; @@ -1538,6 +1538,8 @@ static bool start_hv_timer(struct kvm_lapic *apic) static void start_sw_timer(struct kvm_lapic *apic) { struct kvm_timer *ktimer = &apic->lapic_timer; + + WARN_ON(preemptible()); if (apic->lapic_timer.hv_timer_in_use) cancel_hv_timer(apic); if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) @@ -1552,15 +1554,20 @@ static void start_sw_timer(struct kvm_lapic *apic) static void restart_apic_timer(struct kvm_lapic *apic) { + preempt_disable(); if (!start_hv_timer(apic)) start_sw_timer(apic); + preempt_enable(); } void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - WARN_ON(!apic->lapic_timer.hv_timer_in_use); + preempt_disable(); + /* If the preempt notifier has already run, it also called apic_timer_expired */ + if (!apic->lapic_timer.hv_timer_in_use) + goto out; WARN_ON(swait_active(&vcpu->wq)); cancel_hv_timer(apic); apic_timer_expired(apic); @@ -1569,6 +1576,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) advance_periodic_target_expiration(apic); restart_apic_timer(apic); } +out: + preempt_enable(); } EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); @@ -1582,9 +1591,11 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; + preempt_disable(); /* Possibly the TSC deadline timer is not enabled yet */ if (apic->lapic_timer.hv_timer_in_use) start_sw_timer(apic); + preempt_enable(); } EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); -- cgit v1.2.3 From 0da12a7a81f1e2255e89dc783c565e84801475a2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 17 Jun 2017 20:00:55 +0530 Subject: powerpc/mm/hash: Free the subpage_prot_table correctly Fixes: dad6f37c2602e ("powerpc: subpage_protect: Increase the array size to take care of 64TB") Signed-off-by: Aneesh Kumar K.V Tested-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/subpage-prot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index e94fbd4c8845..781532d7bc4d 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm) } } addr = 0; - for (i = 0; i < 2; ++i) { + for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) { p = spt->protptrs[i]; if (!p) continue; -- cgit v1.2.3 From a3c0d2fb082cdd6d70455fe6fb5ed584f05950ea Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jul 2017 21:45:27 +0200 Subject: ARM: dts: exynos: Add clocks to audss block to fix silent hang on Exynos4412 Add necessary parent clocks for audss (Audio SubSystem, MAUDIO) clock controller block. This allows driver to keep EPLL enabled before accessing any MAUDIO registers thus fixing silent hang. This silent hang appeared with commit 6edfa11cb396 ("clk: samsung: Add enable/disable operation for PLL36XX clocks"), e.g. on Odroid U3 usually with last (but unrelated) messages: [ 2.382741] input: gpio_keys as /devices/platform/gpio_keys/input/input0 [ 2.405686] usb 1-3: new high-speed USB device number 3 using exynos-ehci [ 2.419843] max77686-rtc max77686-rtc: setting system clock to 2017-06-21 17:04:13 UTC (1498064653) Signed-off-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/exynos4.dtsi | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index 497a9470c888..5739389f5bb8 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -59,6 +59,9 @@ compatible = "samsung,exynos4210-audss-clock"; reg = <0x03810000 0x0C>; #clock-cells = <1>; + clocks = <&clock CLK_FIN_PLL>, <&clock CLK_FOUT_EPLL>, + <&clock CLK_SCLK_AUDIO0>, <&clock CLK_SCLK_AUDIO0>; + clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in"; }; i2s0: i2s@03830000 { -- cgit v1.2.3 From 0b048ff2cff5e1fb6894aa9cd7b31670cfe33f22 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 15 Jun 2017 14:57:30 -0700 Subject: ARM: dts: da850-evm: drop unused VPIF endpoints Drop the unused endpoints. They should only be used when there is an actual remote-endpoint connected. Signed-off-by: Kevin Hilman Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850-evm.dts | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index a423e8ebfb37..67e72bc72e80 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -301,25 +301,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>, <&vpif_display_pins>; status = "okay"; - - /* VPIF capture port */ - port@0 { - vpif_input_ch0: endpoint@0 { - reg = <0>; - bus-width = <8>; - }; - - vpif_input_ch1: endpoint@1 { - reg = <1>; - bus-width = <8>; - data-shift = <8>; - }; - }; - - /* VPIF display port */ - port@1 { - vpif_output_ch0: endpoint { - bus-width = <8>; - }; - }; }; -- cgit v1.2.3 From 6ea57ad6b9526f8106d0bf410b41a7223462ff89 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Mon, 17 Jul 2017 10:47:18 -0700 Subject: ARM: dts: da850-lcdk: drop unused VPIF endpoints Drop the unused endpoints. They should only be used when there is an actual remote-endpoint connected. Cc: Sekhar Nori Signed-off-by: Kevin Hilman Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850-lcdk.dts | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index b837fec70eec..a0f0916156e6 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -318,11 +318,4 @@ pinctrl-names = "default"; pinctrl-0 = <&vpif_capture_pins>; status = "okay"; - - /* VPIF capture port */ - port { - vpif_ch0: endpoint { - bus-width = <8>; - }; - }; }; -- cgit v1.2.3 From 8b9740178f2a380339fc1ce6d780bb442580eb2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Jun 2017 10:00:17 +0200 Subject: ARM: davinci: don't mark vpif_input structures as 'const' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A change to the platform data definitions caused a warning in the board code: arch/arm/mach-davinci/board-da850-evm.c:1221:13: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] arch/arm/mach-davinci/board-da850-evm.c:1231:13: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] This is a bit unfortunate, since we generally like structure definitions to be const, but as this is legacy code, the easiest way out is still to remove the 'const' annotation here. Fixes: 4a5f8ae50b66 ("[media] davinci: vpif_capture: get subdevs from DT when available") Fixes: 231ce279e6e3 ("ARM: davinci: fix const warnings") Acked-by: Sekhar Nori Signed-off-by: Arnd Bergmann --- arch/arm/mach-davinci/board-da850-evm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index b5625d009288..e568c8c6f69c 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1166,7 +1166,7 @@ static struct tvp514x_platform_data tvp5146_pdata = { #define TVP514X_STD_ALL (V4L2_STD_NTSC | V4L2_STD_PAL) -static const struct vpif_input da850_ch0_inputs[] = { +static struct vpif_input da850_ch0_inputs[] = { { .input = { .index = 0, @@ -1181,7 +1181,7 @@ static const struct vpif_input da850_ch0_inputs[] = { }, }; -static const struct vpif_input da850_ch1_inputs[] = { +static struct vpif_input da850_ch1_inputs[] = { { .input = { .index = 0, -- cgit v1.2.3 From d997211e1ea14a67baadf391a8106d4330244700 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 15 Mar 2014 11:21:06 +0100 Subject: ARM: sa1100/pxa: fix MTD_XIP build In commit 3169663ac5902 "ARM: sa11x0/pxa: convert OS timer registers to IOMEM", the definition of the OSCR macro was changed to be an __iomem pointer, but the same register is also used by the XIP code. This patch does the corresponding change here as well. On PXA, the IRQ register definitions were removed even earlier, in commit 5d284e353eb1 ("ARM: pxa: avoid accessing interrupt registers directly"). This patch unfortunately brings some of that back. An earlier version of my patch moved the code into an external function, which could not work for CONFIG_XIP_KERNEL+CONFIG_MTD_XIP, so this restores something close to the original code. Link: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/241716.html Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/include/mach/mtd-xip.h | 10 +++++++--- arch/arm/mach-sa1100/include/mach/mtd-xip.h | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-pxa/include/mach/mtd-xip.h b/arch/arm/mach-pxa/include/mach/mtd-xip.h index 990d2bf2fb45..9bf4ea6a6f74 100644 --- a/arch/arm/mach-pxa/include/mach/mtd-xip.h +++ b/arch/arm/mach-pxa/include/mach/mtd-xip.h @@ -17,11 +17,15 @@ #include -#define xip_irqpending() (ICIP & ICMR) +/* restored July 2017, this did not build since 2011! */ + +#define ICIP io_p2v(0x40d00000) +#define ICMR io_p2v(0x40d00004) +#define xip_irqpending() (readl(ICIP) & readl(ICMR)) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl(OSCR) +#define xip_elapsed_since(x) (signed)((readl(OSCR) - (x)) / 4) /* * xip_cpu_idle() is used when waiting for a delay equal or larger than diff --git a/arch/arm/mach-sa1100/include/mach/mtd-xip.h b/arch/arm/mach-sa1100/include/mach/mtd-xip.h index b3d684098fbf..cb76096a2e36 100644 --- a/arch/arm/mach-sa1100/include/mach/mtd-xip.h +++ b/arch/arm/mach-sa1100/include/mach/mtd-xip.h @@ -20,7 +20,7 @@ #define xip_irqpending() (ICIP & ICMR) /* we sample OSCR and convert desired delta to usec (1/4 ~= 1000000/3686400) */ -#define xip_currtime() (OSCR) -#define xip_elapsed_since(x) (signed)((OSCR - (x)) / 4) +#define xip_currtime() readl_relaxed(OSCR) +#define xip_elapsed_since(x) (signed)((readl_relaxed(OSCR) - (x)) / 4) #endif /* __ARCH_SA1100_MTD_XIP_H__ */ -- cgit v1.2.3 From 31d5cf1476a09c95d8f5c2d1d77fa57f7d802b52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Jan 2014 21:17:41 +0100 Subject: ARM: davinci: normalize clk API davinci still has its own clk implementation, but lacks a clk_get_parent() helper, which can lead to link errors in randconfig builds. This adds the usual implementation. Acked-by: Sekhar Nori Signed-off-by: Arnd Bergmann --- arch/arm/mach-davinci/clock.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index f5dce9b4e617..f77a4f766050 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -218,6 +218,15 @@ int clk_set_parent(struct clk *clk, struct clk *parent) } EXPORT_SYMBOL(clk_set_parent); +struct clk *clk_get_parent(struct clk *clk) +{ + if (!clk) + return NULL; + + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + int clk_register(struct clk *clk) { if (clk == NULL || IS_ERR(clk)) -- cgit v1.2.3 From 77a374c29992b1a0e2f4a6e2867324c4de6e23c4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Jan 2014 21:17:41 +0100 Subject: ARM: sa1100: normalize clk API sa1100 provides its own variant of the clk API rather than using the generic COMMON_CLK API. This generally works, but it causes some link errors with drivers using the clk_set_rate, clk_get_parent, clk_set_parent or clk_round_rate functions when a platform lacks those interfaces. This adds trivial stub implementations for each of them, based on the behavior of the COMMON_CLK implementation: - set_rate() and set_parent() report success without doing anything - round_rate() returns the clk rate - get_parent() returns NULL. This adds the minimal bloat and should do the right thing for the simple clock hardware in this SoC. Signed-off-by: Arnd Bergmann --- arch/arm/mach-sa1100/clock.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index 0db46895c82a..7d52cd97d96e 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -35,6 +35,31 @@ struct clk clk_##_name = { \ static DEFINE_SPINLOCK(clocks_lock); +/* Dummy clk routine to build generic kernel parts that may be using them */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + return clk_get_rate(clk); +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + static void clk_gpio27_enable(struct clk *clk) { /* -- cgit v1.2.3 From a3287c41ff405025bc57b165a0f6cd698bbbc1be Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Jul 2017 16:30:34 +0100 Subject: drivers/perf: arm_pmu: Request PMU SPIs with IRQF_PER_CPU Since the PMU register interface is banked per CPU, CPU PMU interrrupts cannot be handled by a CPU other than the one with the PMU asserting the interrupt. This means that migrating PMU SPIs, as we do during a CPU hotplug operation doesn't make any sense and can lead to the IRQ being disabled entirely if we route a spurious IRQ to the new affinity target. This has been observed in practice on AMD Seattle, where CPUs on the non-boot cluster appear to take a spurious PMU IRQ when coming online, which is routed to CPU0 where it cannot be handled. This patch passes IRQF_PERCPU for PMU SPIs and forcefully sets their affinity prior to requesting them, ensuring that they cannot be migrated during hotplug events. This interacts badly with the DB8500 erratum workaround that ping-pongs the interrupt affinity from the handler, so we avoid passing IRQF_PERCPU in that case by allowing the IRQ flags to be overridden in the platdata. Fixes: 3cf7ee98b848 ("drivers/perf: arm_pmu: move irq request/free into probe") Cc: Mark Rutland Cc: Linus Walleij Signed-off-by: Will Deacon --- arch/arm/mach-ux500/cpu-db8500.c | 1 + drivers/perf/arm_pmu.c | 41 ++++++++++++++++++++++++++-------------- include/linux/perf/arm_pmu.h | 4 ++++ 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 28083ef72819..71a34e8c345a 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -133,6 +133,7 @@ static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) static struct arm_pmu_platdata db8500_pmu_platdata = { .handle_irq = db8500_pmu_handler, + .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, }; static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index dc459eb1246b..1c5e0f333779 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -569,22 +569,41 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) if (irq != other_irq) { pr_warn("mismatched PPIs detected.\n"); err = -EINVAL; + goto err_out; } } else { - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu); + unsigned long irq_flags; + + err = irq_force_affinity(irq, cpumask_of(cpu)); + + if (err && num_possible_cpus() > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + goto err_out; + } + + if (platdata && platdata->irq_flags) { + irq_flags = platdata->irq_flags; + } else { + irq_flags = IRQF_PERCPU | + IRQF_NOBALANCING | + IRQF_NO_THREAD; + } + + err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } + if (err) + goto err_out; cpumask_set_cpu(cpu, &armpmu->active_irqs); - return 0; + +err_out: + pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); + return err; } int armpmu_request_irqs(struct arm_pmu *armpmu) @@ -628,12 +647,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) enable_percpu_irq(irq, IRQ_TYPE_NONE); return 0; } - - if (irq_force_affinity(irq, cpumask_of(cpu)) && - num_possible_cpus() > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - } } return 0; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 1360dd6d5e61..af0f44effd44 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,14 @@ * interrupt and passed the address of the low level handler, * and can be used to implement any platform specific handling * before or after calling it. + * + * @irq_flags: if non-zero, these flags will be passed to request_irq + * when requesting interrupts for this PMU device. */ struct arm_pmu_platdata { irqreturn_t (*handle_irq)(int irq, void *dev, irq_handler_t pmu_handler); + unsigned long irq_flags; }; #ifdef CONFIG_ARM_PMU -- cgit v1.2.3 From 072b6e3692532b6281bf781ded1c7a986ac17471 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 6 Jul 2017 10:53:34 +0200 Subject: ARM: dts: sunxi: h3/h5: Correct emac register size The datasheet said that emac register size is 0x10000 not 0x104 Signed-off-by: Corentin Labbe Signed-off-by: Maxime Ripard [wens@csie.org: Fixed commit subject prefix] Signed-off-by: Chen-Yu Tsai --- arch/arm/boot/dts/sunxi-h3-h5.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 6f2162608006..d38282b9e5d4 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -394,7 +394,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun8i-h3-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x104>; + reg = <0x01c30000 0x10000>; interrupts = ; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; -- cgit v1.2.3 From 3a4bae5fd44aa1cf49780dd25b3a89e6a39e8560 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 10 Jul 2017 20:44:40 +0200 Subject: arm64: allwinner: sun50i-a64: Correct emac register size The datasheet said that emac register size is 0x10000 not 0x100 Signed-off-by: Corentin Labbe Signed-off-by: Maxime Ripard [wens@csie.org: Fixed commit subject prefix] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 9d00622ce845..bd0f33b77f57 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -452,7 +452,7 @@ emac: ethernet@1c30000 { compatible = "allwinner,sun50i-a64-emac"; syscon = <&syscon>; - reg = <0x01c30000 0x100>; + reg = <0x01c30000 0x10000>; interrupts = ; interrupt-names = "macirq"; resets = <&ccu RST_BUS_EMAC>; -- cgit v1.2.3 From c50f9fb6c535edf4731eecb53d91ebc297e82e5b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 14 Jul 2017 14:42:52 +0800 Subject: ARM: dts: sun8i: a83t: Switch to CCU device tree binding macros Now that the CCU device tree binding headers have been merged, we can use the properly named macros in the device tree, instead of raw numbers. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a83t.dtsi | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 8923ba625b76..19a8f4fcfab5 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -44,7 +44,9 @@ #include +#include #include +#include / { interrupt-parent = <&gic>; @@ -175,8 +177,8 @@ compatible = "allwinner,sun8i-a83t-dma"; reg = <0x01c02000 0x1000>; interrupts = ; - clocks = <&ccu 21>; - resets = <&ccu 7>; + clocks = <&ccu CLK_BUS_DMA>; + resets = <&ccu RST_BUS_DMA>; #dma-cells = <1>; }; @@ -195,7 +197,7 @@ , ; reg = <0x01c20800 0x400>; - clocks = <&ccu 45>, <&osc24M>, <&osc16Md512>; + clocks = <&ccu CLK_BUS_PIO>, <&osc24M>, <&osc16Md512>; clock-names = "apb", "hosc", "losc"; gpio-controller; interrupt-controller; @@ -247,8 +249,8 @@ "allwinner,sun8i-h3-spdif"; reg = <0x01c21000 0x400>; interrupts = ; - clocks = <&ccu 44>, <&ccu 76>; - resets = <&ccu 32>; + clocks = <&ccu CLK_BUS_SPDIF>, <&ccu CLK_SPDIF>; + resets = <&ccu RST_BUS_SPDIF>; clock-names = "apb", "spdif"; dmas = <&dma 2>; dma-names = "tx"; @@ -263,8 +265,8 @@ interrupts = ; reg-shift = <2>; reg-io-width = <4>; - clocks = <&ccu 53>; - resets = <&ccu 40>; + clocks = <&ccu CLK_BUS_UART0>; + resets = <&ccu RST_BUS_UART0>; status = "disabled"; }; -- cgit v1.2.3 From ef8aa4e0a0df2470148203725acd8b6e75acdc0b Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Sun, 22 Nov 2015 15:24:28 +0100 Subject: ARM: ep93xx: normalize clk API It's a combination of the patch from Arnd Bergmann, which added empty stubs for clk_round_rate() and clk_set_parent() and a working trivial implementation of clk_get_parent(). The later is required for ADC driver. Signed-off-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/clock.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 39ef3b613912..beec5f16443a 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -475,6 +475,26 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + static char fclk_divisors[] = { 1, 2, 4, 8, 16, 1, 1, 1 }; static char hclk_divisors[] = { 1, 2, 4, 5, 6, 8, 16, 32 }; -- cgit v1.2.3 From bd7fefe1f06ca6cc2d1503def95bf53e2549a44b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Jan 2014 21:17:41 +0100 Subject: ARM: w90x900: normalize clk API w90x900 still provides its own variant of the clk API rather than using the generic COMMON_CLK API. This generally works, but it causes some link errors with drivers using the clk_set_rate, clk_get_parent, clk_set_parent or clk_round_rate functions when a platform lacks those interfaces. This adds empty stub implementations for each of them, and I don't even try to do something useful here but instead just print a WARN() message to make it obvious what is going on if they ever end up being called. The drivers that call these won't be used on these platforms (otherwise we'd get a link error today), so the added code is harmless bloat and will warn about accidental use. A while ago there was a proposal to change w90x900 to use the common-clk implementation, which would be the way it should be handled properly. Signed-off-by: Arnd Bergmann --- arch/arm/mach-w90x900/clock.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-w90x900/clock.c b/arch/arm/mach-w90x900/clock.c index ac6fd1a2cb59..3f93fac98d97 100644 --- a/arch/arm/mach-w90x900/clock.c +++ b/arch/arm/mach-w90x900/clock.c @@ -93,3 +93,32 @@ void nuc900_subclk_enable(struct clk *clk, int enable) __raw_writel(clken, W90X900_VA_CLKPWR + SUBCLK); } + +/* dummy functions, should not be called */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + WARN_ON(clk); + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); -- cgit v1.2.3 From 47589c4af939bcc01da5d9a803fe0413c72a31c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 17 Jan 2016 00:37:36 +0100 Subject: ARM: rpc: rename RAM_SIZE macro The RAM_SIZE macro in mach/hardware.h conflicts with macros of the same name in multiple drivers, leading to annoying build warnings: In file included from drivers/net/ethernet/cirrus/cs89x0.c:79:0: drivers/net/ethernet/cirrus/cs89x0.h:324:0: error: "RAM_SIZE" redefined [-Werror] #define RAM_SIZE 0x1000 /* The card has 4k bytes or RAM */ ^ In file included from /git/arm-soc/arch/arm/mach-rpc/include/mach/io.h:16:0, from /git/arm-soc/arch/arm/include/asm/io.h:194, from /git/arm-soc/include/linux/scatterlist.h:8, from /git/arm-soc/include/linux/dmaengine.h:24, from /git/arm-soc/include/linux/netdevice.h:38, from /git/arm-soc/drivers/net/ethernet/cirrus/cs89x0.c:54: arch/arm/mach-rpc/include/mach/hardware.h:28:0: note: this is the location of the previous definition #define RAM_SIZE 0x10000000 We don't use RAM_SIZE/RAM_START at all, so we could just remove them, but it might be nice to leave them for documentation purposes, so this renames them to RPC_RAM_SIZE/RPC_RAM_START in order to avoid the build warnings Signed-off-by: Arnd Bergmann --- arch/arm/mach-rpc/include/mach/hardware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h index aa79fa47373a..622d4e5df029 100644 --- a/arch/arm/mach-rpc/include/mach/hardware.h +++ b/arch/arm/mach-rpc/include/mach/hardware.h @@ -25,8 +25,8 @@ * *_SIZE is the size of the region * *_BASE is the virtual address */ -#define RAM_SIZE 0x10000000 -#define RAM_START 0x10000000 +#define RPC_RAM_SIZE 0x10000000 +#define RPC_RAM_START 0x10000000 #define EASI_SIZE 0x08000000 /* EASI I/O */ #define EASI_START 0x08000000 -- cgit v1.2.3 From 595a9f9a570f0372228f1f0cca95583043e54a4b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Jan 2016 14:41:03 +0100 Subject: ARM: omap1/ams-delta: warn about failed regulator enable The modem pm handler in the ams-delta board uses regulator_enable() but does not check for a successful return code: board-ams-delta.c:521:3: error: ignoring return value of 'regulator_enable', declared with attribute warn_unused_result [-Werror=unused-result] It is not easy to propagate that return code to the callers in uart_configure_port/uart_suspend_port/uart_resume_port, unless we change all UART drivers, and it is unclear what those would do with the return code. Instead, this patch uses a runtime warning to replace the compiletime warning. I have checked that the regulator in question is hardcoded to a fixed-voltage GPIO regulator, and that should never fail to get enabled if I understand the code right. Acked-by: Tony Lindgren Acked-by: Aaro Koskinen Link: https://patchwork.kernel.org/patch/8391981/ Signed-off-by: Arnd Bergmann --- arch/arm/mach-omap1/board-ams-delta.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 6613a6ff5dbc..6cbc69c92913 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -510,6 +510,7 @@ static void __init ams_delta_init(void) static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) { struct modem_private_data *priv = port->private_data; + int ret; if (IS_ERR(priv->regulator)) return; @@ -518,9 +519,16 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) return; if (state == 0) - regulator_enable(priv->regulator); + ret = regulator_enable(priv->regulator); else if (old == 0) - regulator_disable(priv->regulator); + ret = regulator_disable(priv->regulator); + else + ret = 0; + + if (ret) + dev_warn(port->dev, + "ams_delta modem_pm: failed to %sable regulator: %d\n", + state ? "dis" : "en", ret); } static struct plat_serial8250_port ams_delta_modem_ports[] = { -- cgit v1.2.3 From 1f3b4d8fcc28d68bc4b01bec0b886f31c4ea3efb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Jan 2016 13:32:43 +0100 Subject: ARM: ixp4xx: use normal prototype for {read,write}s{b,w,l} ixp4xx defines the arguments to its __indirect_writesb() and other functions as pointers to fixed-size data. This is not necessarily wrong, and it works most of the time, but it causes warnings in at least one driver: drivers/net/ethernet/smsc/smc91x.c: In function 'smc_rcv': drivers/net/ethernet/smsc/smc91x.c:495:21: error: passing argument 2 of '__indirect_readsw' from incompatible pointer type [-Werror=incompatible-pointer-types] SMC_PULL_DATA(lp, data, packet_len - 4); All other definitions of the same functions pass void pointers, so doing the same here avoids the warnings. Signed-off-by: Arnd Bergmann Acked-by: Krzysztof Halasa --- arch/arm/mach-ixp4xx/include/mach/io.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index 7a0c13bf4269..d04d3ec97ac0 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -95,8 +95,10 @@ static inline void __indirect_writeb(u8 value, volatile void __iomem *p) } static inline void __indirect_writesb(volatile void __iomem *bus_addr, - const u8 *vaddr, int count) + const void *p, int count) { + const u8 *vaddr = p; + while (count--) writeb(*vaddr++, bus_addr); } @@ -118,8 +120,10 @@ static inline void __indirect_writew(u16 value, volatile void __iomem *p) } static inline void __indirect_writesw(volatile void __iomem *bus_addr, - const u16 *vaddr, int count) + const void *p, int count) { + const u16 *vaddr = p; + while (count--) writew(*vaddr++, bus_addr); } @@ -137,8 +141,9 @@ static inline void __indirect_writel(u32 value, volatile void __iomem *p) } static inline void __indirect_writesl(volatile void __iomem *bus_addr, - const u32 *vaddr, int count) + const void *p, int count) { + const u32 *vaddr = p; while (count--) writel(*vaddr++, bus_addr); } @@ -160,8 +165,10 @@ static inline u8 __indirect_readb(const volatile void __iomem *p) } static inline void __indirect_readsb(const volatile void __iomem *bus_addr, - u8 *vaddr, u32 count) + void *p, u32 count) { + u8 *vaddr = p; + while (count--) *vaddr++ = readb(bus_addr); } @@ -183,8 +190,10 @@ static inline u16 __indirect_readw(const volatile void __iomem *p) } static inline void __indirect_readsw(const volatile void __iomem *bus_addr, - u16 *vaddr, u32 count) + void *p, u32 count) { + u16 *vaddr = p; + while (count--) *vaddr++ = readw(bus_addr); } @@ -204,8 +213,10 @@ static inline u32 __indirect_readl(const volatile void __iomem *p) } static inline void __indirect_readsl(const volatile void __iomem *bus_addr, - u32 *vaddr, u32 count) + void *p, u32 count) { + u32 *vaddr = p; + while (count--) *vaddr++ = readl(bus_addr); } -- cgit v1.2.3 From c1ae3f7c4b7c4ae0c83014969c99e8108e268e16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Dec 2015 14:40:00 +0100 Subject: ARM: sirf: mark sirfsoc_init_late as __maybe_unused sirfsoc_init_late is called by each of the three individual SoC definitions, but in a randconfig build, we can encounter a situation where they are all disabled: arch/arm/mach-prima2/common.c:18:123: warning: 'sirfsoc_init_late' defined but not used [-Wunused-function] While that is not a useful configuration, the warning also doesn't help, so this patch marks the function as __maybe_unused to let the compiler know it is there intentionally. Signed-off-by: Arnd Bergmann --- arch/arm/mach-prima2/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index 8cadb302a7d2..ffe05c27087e 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -15,7 +15,7 @@ #include #include "common.h" -static void __init sirfsoc_init_late(void) +static void __init __maybe_unused sirfsoc_init_late(void) { sirfsoc_pm_init(); } -- cgit v1.2.3 From d1c888878cb19229653aecfbbfd928300ba2805b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Dec 2015 17:09:18 +0100 Subject: ARM: omap1: avoid unused variable warning The osk_mistral_init() contains code that is only compiled when CONFIG_PM is set, but it uses a variable that is declared outside of the #ifdef: arch/arm/mach-omap1/board-osk.c: In function 'osk_mistral_init': arch/arm/mach-omap1/board-osk.c:513:7: warning: unused variable 'ret' [-Wunused-variable] This removes the #ifdef around the user of the variable, make it always used. Signed-off-by: Arnd Bergmann Suggested-by: Tony Lindgren Acked-by: Aaro Koskinen --- arch/arm/mach-omap1/board-osk.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 4dfb99504810..95ac1929aede 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -441,13 +441,11 @@ static struct spi_board_info __initdata mistral_boardinfo[] = { { .chip_select = 0, } }; -#ifdef CONFIG_PM static irqreturn_t osk_mistral_wake_interrupt(int irq, void *ignored) { return IRQ_HANDLED; } -#endif static void __init osk_mistral_init(void) { @@ -515,7 +513,6 @@ static void __init osk_mistral_init(void) gpio_direction_input(OMAP_MPUIO(2)); irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING); -#ifdef CONFIG_PM /* share the IRQ in case someone wants to use the * button for more than wakeup from system sleep. */ @@ -529,7 +526,6 @@ static void __init osk_mistral_init(void) ret); } else enable_irq_wake(irq); -#endif } else printk(KERN_ERR "OSK+Mistral: wakeup button is awol\n"); -- cgit v1.2.3 From 293ea3d0ab9970c7f3d8163793d3407e5a86497a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Jan 2016 16:57:33 +0100 Subject: ARM: omap2: mark unused functions as __maybe_unused The omap_generic_init() and omap_hwmod_init_postsetup() functions are used in the initialization for all OMAP2+ SoC types, but in the extreme case that those are all disabled, we get a warning about unused code: arch/arm/mach-omap2/io.c:412:123: error: 'omap_hwmod_init_postsetup' defined but not used [-Werror=unused-function] arch/arm/mach-omap2/board-generic.c:30:123: error: 'omap_generic_init' defined but not used [-Werror=unused-function] This annotates both as __maybe_unused to shut up that warning. Signed-off-by: Arnd Bergmann Acked-by: Tony Lindgren Reviewed-by: Sebastian Reichel --- arch/arm/mach-omap2/board-generic.c | 2 +- arch/arm/mach-omap2/io.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index dc9e34e670a2..b1e661bb5521 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -28,7 +28,7 @@ static const struct of_device_id omap_dt_match_table[] __initconst = { { } }; -static void __init omap_generic_init(void) +static void __init __maybe_unused omap_generic_init(void) { pdata_quirks_init(omap_dt_match_table); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 1d739d1a0a65..1cd20e4d56b0 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -410,7 +410,7 @@ static int _set_hwmod_postsetup_state(struct omap_hwmod *oh, void *data) return omap_hwmod_set_postsetup_state(oh, *(u8 *)data); } -static void __init omap_hwmod_init_postsetup(void) +static void __init __maybe_unused omap_hwmod_init_postsetup(void) { u8 postsetup_state; -- cgit v1.2.3 From 1c1953f351263bdc0857326bde7ff95a43700b6c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Jan 2016 13:04:01 +0100 Subject: ARM: mmp: mark usb_dma_mask as __maybe_unused This variable may be used by some devices that each have their on Kconfig symbol, or by none of them, and that causes a build warning: arch/arm/mach-mmp/devices.c:241:12: error: 'usb_dma_mask' defined but not used [-Werror=unused-variable] Marking it __maybe_unused avoids the warning. Signed-off-by: Arnd Bergmann --- arch/arm/mach-mmp/devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 3330ac7cfbef..671c7a09ab3d 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -238,7 +238,7 @@ void pxa_usb_phy_deinit(void __iomem *phy_reg) #endif #if IS_ENABLED(CONFIG_USB_SUPPORT) -static u64 usb_dma_mask = ~(u32)0; +static u64 __maybe_unused usb_dma_mask = ~(u32)0; #if IS_ENABLED(CONFIG_USB_MV_UDC) struct resource pxa168_u2o_resources[] = { -- cgit v1.2.3 From cd5bad4135e8e72144a19429d359c36fd4172a88 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Mar 2014 00:17:09 +0100 Subject: ARM: ep93xx: use ARM_PATCH_PHYS_VIRT correctly Just like ARCH_MULTIPLATFORM, we want to use ARM_PATCH_PHYS_VIRT when possible, but that fails for NOMMU or XIP_KERNEL configurations. Using 'imply' instead of 'select' gets this right and only uses the symbol when we don't have to hardcode the address anyway. Signed-off-by: Arnd Bergmann Acked-by: Alexander Sverdlin --- arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a208bfe367b5..61a0cb15067e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -380,7 +380,7 @@ config ARCH_EP93XX bool "EP93xx-based" select ARCH_HAS_HOLES_MEMORYMODEL select ARM_AMBA - select ARM_PATCH_PHYS_VIRT + imply ARM_PATCH_PHYS_VIRT select ARM_VIC select AUTO_ZRELADDR select CLKDEV_LOOKUP -- cgit v1.2.3 From c4caa8db4c90a6d0e2258e34a203961e07903c05 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 10 Jun 2016 10:51:04 +0200 Subject: ARM: ixp4xx: fix ioport_unmap definition An empty macro definition can cause unexpected behavior, in case of the ixp4xx ioport_unmap, we get two warnings: drivers/net/wireless/marvell/libertas/if_cs.c: In function 'if_cs_release': drivers/net/wireless/marvell/libertas/if_cs.c:826:3: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] ioport_unmap(card->iobase); drivers/vfio/pci/vfio_pci_rdwr.c: In function 'vfio_pci_vga_rw': drivers/vfio/pci/vfio_pci_rdwr.c:230:15: error: the omitted middle operand in ?: will always be 'true', suggest explicit middle operand [-Werror=parentheses] is_ioport ? ioport_unmap(iomem) : iounmap(iomem); This uses an inline function to define the macro in a safer way. Signed-off-by: Arnd Bergmann Acked-by: Krzysztof Halasa --- arch/arm/mach-ixp4xx/include/mach/io.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h index d04d3ec97ac0..844e8ac593e2 100644 --- a/arch/arm/mach-ixp4xx/include/mach/io.h +++ b/arch/arm/mach-ixp4xx/include/mach/io.h @@ -534,8 +534,15 @@ static inline void iowrite32_rep(void __iomem *addr, const void *vaddr, #endif } -#define ioport_map(port, nr) ((void __iomem*)(port + PIO_OFFSET)) -#define ioport_unmap(addr) +#define ioport_map(port, nr) ioport_map(port, nr) +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return ((void __iomem*)((port) + PIO_OFFSET)); +} +#define ioport_unmap(addr) ioport_unmap(addr) +static inline void ioport_unmap(void __iomem *addr) +{ +} #endif /* CONFIG_PCI */ #endif /* __ASM_ARM_ARCH_IO_H */ -- cgit v1.2.3 From 1d20d8a9fce8f1e2ef00a0f3d068fa18d59ddf8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Mar 2014 18:41:37 +0100 Subject: ARM: pxa: select both FB and FB_W100 for eseries We get a link error trying to access the w100fb_gpio_read/write functions from the platform when the driver is a loadable module or not built-in, so the platform already uses 'select' to hard-enable the driver. However, that fails if the framebuffer subsystem is disabled altogether. I've considered various ways to fix this properly, but they all seem like too much work or too risky, so this simply adds another 'select' to force the subsystem on as well. Fixes: 82427de2c7c3 ("ARM: pxa: PXA_ESERIES depends on FB_W100.") Signed-off-by: Arnd Bergmann --- arch/arm/mach-pxa/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 76fbc115ec33..ce7d97babb0f 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -566,6 +566,7 @@ config MACH_ICONTROL config ARCH_PXA_ESERIES bool "PXA based Toshiba e-series PDAs" select FB_W100 + select FB select PXA25x config MACH_E330 -- cgit v1.2.3 From 3ac8093cf5f7203140a3d8c28785d0d0dd89cd7c Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 19 Jul 2017 15:55:13 +0300 Subject: arm64: defconfig: enable missing HWSPINLOCK The hardware spinlock drivers now depend on HWSPINLOCK (instead of selecting it), so we need to explicitly enable it after commit 35fc8a07d7f9 ("Make HWSPINLOCK a menuconfig to ease disabling") Without HWSPINLOCK, various drivers are left with unsatisfied dependencies and Qcom boards using shared memory based communication to request regulators are failing to boot and mount rootfs. Fix this by explicitly enabling HWSPINLOCK in defconfig. Signed-off-by: Georgi Djakov Signed-off-by: Andy Gross --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6c7d147eed54..b4ca115b3be1 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -476,6 +476,7 @@ CONFIG_QCOM_CLK_SMD_RPM=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8994=y CONFIG_MSM_MMCC_8996=y +CONFIG_HWSPINLOCK=y CONFIG_HWSPINLOCK_QCOM=y CONFIG_ARM_MHU=y CONFIG_PLATFORM_MHU=y -- cgit v1.2.3 From c9c98bc5cc2ab4e0d5c9ad58286fa7e1670dfded Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 28 Jul 2017 01:35:53 +1000 Subject: powerpc/mm: Fix pmd/pte_devmap() on non-leaf entries The Radix MMU translation tree as defined in ISA v3.0 contains two different types of entry, directories and leaves. Leaves are identified by _PAGE_PTE being set. The formats of the two entries are different, with the directory entries containing no spare bits for use by software. In particular the bit we use for _PAGE_DEVMAP is not reserved for software, and is part of the NLB (Next Level Base) field, essentially the address of the next level in the tree. Note that the Linux pte_t is not == _PAGE_PTE. A huge page pmd entry (or devmap!) is also a leaf and so has _PAGE_PTE set, even though we use a pmd_t for it in Linux. The fix is to ensure that the pmd/pte_devmap() confirm they are looking at a leaf entry (_PAGE_PTE) as well as checking _PAGE_DEVMAP. Fixes: ebd31197931d ("powerpc/mm: Add devmap support for ppc64") Signed-off-by: Oliver O'Halloran Tested-by: Laurent Vivier Tested-by: Jose Ricardo Ziviani Reviewed-by: Suraj Jitindar Singh Reviewed-by: Aneesh Kumar K.V [mpe: Add a comment in the code and flesh out change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..818a58fc3f4f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -608,9 +608,17 @@ static inline pte_t pte_mkdevmap(pte_t pte) return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); } +/* + * This is potentially called with a pmd as the argument, in which case it's not + * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set. + * That's because the bit we use for _PAGE_DEVMAP is not reserved for software + * use in page directory entries (ie. non-ptes). + */ static inline int pte_devmap(pte_t pte) { - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP)); + u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); + + return (pte_raw(pte) & mask) == mask; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -- cgit v1.2.3 From cd63f3cf1d59b7ad8419eba1cac8f9126e79cc43 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 19 Jul 2017 01:44:13 -0400 Subject: powerpc/tm: Fix saving of TM SPRs in core dump Currently flush_tmregs_to_thread() does not save the TM SPRs (TFHAR, TFIAR, TEXASR) to the thread struct, unless the process is currently inside a suspended transaction. If the process is core dumping, and the TM SPRs have changed since the last time the process was context switched, then we will save stale values of the TM SPRs to the core dump. Fix it by saving the live register state to the thread struct in that case. Fixes: 08e1c01d6aed ("powerpc/ptrace: Enable support for TM SPR state") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Gustavo Romero Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 925a4ef90559..660ed39e9c9a 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * If task is not current, it will have been flushed already to * it's thread_struct during __switch_to(). * - * A reclaim flushes ALL the state. + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. */ - if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(TM_CAUSE_SIGNAL); + if (tsk != current) + return; + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&(tsk->thread)); + } } #else static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } -- cgit v1.2.3 From 20c6c189045539d29f4854d92b7ea9c329e1edfc Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 25 Jul 2017 14:50:53 -0700 Subject: x86/boot: Disable the address-of-packed-member compiler warning The clang warning 'address-of-packed-member' is disabled for the general kernel code, also disable it for the x86 boot code. This suppresses a bunch of warnings like this when building with clang: ./arch/x86/include/asm/processor.h:535:30: warning: taking address of packed member 'sp0' of class or structure 'x86_hw_tss' may result in an unaligned pointer value [-Waddress-of-packed-member] return this_cpu_read_stable(cpu_tss.x86_tss.sp0); ^~~~~~~~~~~~~~~~~~~ ./arch/x86/include/asm/percpu.h:391:59: note: expanded from macro 'this_cpu_read_stable' #define this_cpu_read_stable(var) percpu_stable_op("mov", var) ^~~ ./arch/x86/include/asm/percpu.h:228:16: note: expanded from macro 'percpu_stable_op' : "p" (&(var))); ^~~ Signed-off-by: Matthias Kaehlcke Cc: Doug Anderson Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170725215053.135586-1-mka@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 2c860ad4fe06..8a958274b54c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -34,6 +34,7 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n -- cgit v1.2.3 From 92bbd16e500c85bc210ba48caecbfbdb721bb5b4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 24 Jul 2017 11:46:09 +0100 Subject: arm64: mmu: Place guard page after mapping of kernel image The vast majority of virtual allocations in the vmalloc region are followed by a guard page, which can help to avoid overruning on vma into another, which may map a read-sensitive device. This patch adds a guard page to the end of the kernel image mapping (i.e. following the data/bss segments). Cc: Mark Rutland Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 23c2d89a362e..f1eb15e0e864 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -496,7 +496,7 @@ void mark_rodata_ro(void) static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma, - int flags) + int flags, unsigned long vm_flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -507,10 +507,13 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, early_pgtable_alloc, flags); + if (!(vm_flags & VM_NO_GUARD)) + size += PAGE_SIZE; + vma->addr = va_start; vma->phys_addr = pa_start; vma->size = size; - vma->flags = VM_MAP; + vma->flags = VM_MAP | vm_flags; vma->caller = __builtin_return_address(0); vm_area_add_early(vma); @@ -541,14 +544,15 @@ static void __init map_kernel(pgd_t *pgd) * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. */ - map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0); + map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0, + VM_NO_GUARD); map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, - &vmlinux_rodata, NO_CONT_MAPPINGS); + &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, - &vmlinux_inittext, 0); + &vmlinux_inittext, 0, VM_NO_GUARD); map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, - &vmlinux_initdata, 0); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0); + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* -- cgit v1.2.3 From 7b7622bb95eb587cbaa79608e47b832a82a262b1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 27 Jul 2017 23:23:37 +1000 Subject: powerpc/smp: Call smp_ops->setup_cpu() directly on the boot CPU In smp_cpus_done() we need to call smp_ops->setup_cpu() for the boot CPU, which means it has to run *on* the boot CPU. In the past we ensured it ran on the boot CPU by changing the CPU affinity mask of current directly. That was removed in commit 6d11b87d55eb ("powerpc/smp: Replace open coded task affinity logic"), and replaced with a work queue call. Unfortunately using a work queue leads to a lockdep warning, now that the CPU hotplug lock is a regular semaphore: ====================================================== WARNING: possible circular locking dependency detected ... kworker/0:1/971 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){++++++}, at: [] apply_workqueue_attrs+0x34/0xa0 but task is already holding lock: ((&wfc.work)){+.+.+.}, at: [] process_one_work+0x25c/0x800 ... CPU0 CPU1 ---- ---- lock((&wfc.work)); lock(cpu_hotplug_lock.rw_sem); lock((&wfc.work)); lock(cpu_hotplug_lock.rw_sem); Although the deadlock can't happen in practice, because smp_cpus_done() only runs in early boot before CPU hotplug is allowed, lockdep can't tell that. Luckily in commit 8fb12156b8db ("init: Pin init task to the boot CPU, initially") tglx changed the generic code to pin init to the boot CPU to begin with. The unpinning of init from the boot CPU happens in sched_init_smp(), which is called after smp_cpus_done(). So smp_cpus_done() is always called on the boot CPU, which means we don't need the work queue call at all - and the lockdep warning goes away. Signed-off-by: Michael Ellerman Reviewed-by: Thomas Gleixner --- arch/powerpc/kernel/smp.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c6b8bace1766..b0ea6d4d4853 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -985,21 +985,13 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -static __init long smp_setup_cpu_workfn(void *data __always_unused) -{ - smp_ops->setup_cpu(boot_cpuid); - return 0; -} - void __init smp_cpus_done(unsigned int max_cpus) { /* - * We want the setup_cpu() here to be called on the boot CPU, but - * init might run on any CPU, so make sure it's invoked on the boot - * CPU. + * We are running pinned to the boot CPU, see rest_init(). */ if (smp_ops && smp_ops->setup_cpu) - work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); + smp_ops->setup_cpu(boot_cpuid); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); -- cgit v1.2.3 From 65c5ec11c25eff6ba6e9b1cbfff014875fddd1e0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 26 Jul 2017 23:19:04 +1000 Subject: powerpc/boot: Fix 64-bit boot wrapper build with non-biarch compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically the boot wrapper was always built 32-bit big endian, even for 64-bit kernels. That was because old firmwares didn't necessarily support booting a 64-bit image. Because of that arch/powerpc/boot/Makefile uses CROSS32CC for compilation. However when we added 64-bit little endian support, we also added support for building the boot wrapper 64-bit. However we kept using CROSS32CC, because in most cases it is just CC and everything works. However if the user doesn't specify CROSS32_COMPILE (which no one ever does AFAIK), and CC is *not* biarch (32/64-bit capable), then CROSS32CC becomes just "gcc". On native systems that is probably OK, but if we're cross building it definitely isn't, leading to eg: gcc ... -m64 -mlittle-endian -mabi=elfv2 ... arch/powerpc/boot/cpm-serial.c gcc: error: unrecognized argument in option ‘-mabi=elfv2’ gcc: error: unrecognized command line option ‘-mlittle-endian’ make: *** [zImage] Error 2 To fix it, stop using CROSS32CC, because we may or may not be building 32-bit. Instead setup a BOOTCC, which defaults to CC, and only use CROSS32_COMPILE if it's set and we're building for 32-bit. Fixes: 147c05168fc8 ("powerpc/boot: Add support for 64bit little endian wrapper") Signed-off-by: Michael Ellerman Reviewed-by: Cyril Bur --- arch/powerpc/boot/Makefile | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7814a7b1523..6f952fe1f084 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -isystem $(shell $(CROSS32CC) -print-file-name=include) \ -D$(compress-y) +BOOTCC := $(CC) ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 +else +BOOTCFLAGS += -m32 +ifdef CROSS32_COMPILE + BOOTCC := $(CROSS32_COMPILE)gcc +endif endif + +BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) + ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian else @@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ - cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< quiet_cmd_bootas = BOOTAS $@ - cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ -- cgit v1.2.3 From 253fd51e2f533552ae35a0c661705da6c4842c1b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 26 Jul 2017 15:26:40 +1000 Subject: powerpc/powernv/pci: Return failure for some uses of dma_set_mask() Commit 8e3f1b1d8255 ("powerpc/powernv/pci: Enable 64-bit devices to access >4GB DMA space") introduced the ability for PCI device drivers to request a DMA mask between 64 and 32 bits and actually get a mask greater than 32-bits. However currently if certain machine configuration dependent conditions are not meet the code silently falls back to a 32-bit mask. This makes it hard for device drivers to detect which mask they actually got. Instead we should return an error when the request could not be fulfilled which allows drivers to either fallback or implement other workarounds as documented in DMA-API-HOWTO.txt. Signed-off-by: Alistair Popple Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 437613588df1..b900eb1d5e17 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1852,6 +1852,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) /* 4GB offset bypasses 32-bit space */ set_dma_offset(&pdev->dev, (1ULL << 32)); set_dma_ops(&pdev->dev, &dma_direct_ops); + } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) { + /* + * Fail the request if a DMA mask between 32 and 64 bits + * was requested but couldn't be fulfilled. Ideally we + * would do this for 64-bits but historically we have + * always fallen back to 32-bits. + */ + return -ENOMEM; } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); -- cgit v1.2.3 From 6d0f581d1768d3eaba15776e7dd1fdfec10cfe36 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 28 Jul 2017 17:42:59 -0700 Subject: xtensa: fix cache aliasing handling code for WT cache Currently building kernel for xtensa core with aliasing WT cache fails with the following messages: mm/memory.c:2152: undefined reference to `flush_dcache_page' mm/memory.c:2332: undefined reference to `local_flush_cache_page' mm/memory.c:1919: undefined reference to `local_flush_cache_range' mm/memory.c:4179: undefined reference to `copy_to_user_page' mm/memory.c:4183: undefined reference to `copy_from_user_page' This happens because implementation of these functions is only compiled when data cache is WB, which looks wrong: even when data cache doesn't need flushing it still needs invalidation. The functions like __flush_[invalidate_]dcache_* are correctly defined for both WB and WT caches (and even if they weren't that'd still be ok, just slower). Fix this by providing the same implementation of the above functions for both WB and WT cache. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/cache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 1a804a2f9a5b..dbb1cdef3663 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -120,10 +120,6 @@ void copy_user_highpage(struct page *dst, struct page *src, preempt_enable(); } -#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ - -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK - /* * Any time the kernel writes to a user page cache page, or it is about to * read from a page cache page this routine is called. @@ -208,7 +204,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __invalidate_icache_page_alias(virt, phys); } -#endif +#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ void update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) @@ -225,7 +221,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) flush_tlb_page(vma, addr); -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { unsigned long phys = page_to_phys(page); @@ -256,7 +252,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) * flush_dcache_page() on the page. */ -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, -- cgit v1.2.3 From fce8dc5e50e44d8f644e0f7854e6d7d518fedb68 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Jul 2017 16:45:20 +0200 Subject: ARM: shmobile: rcar-gen2: Fix deadlock in regulator quirk Simon Horman reported that Koelsch and Lager hang during boot, and bisected this to commit 1c3c5eab171590f8 ("sched/core: Enable might_sleep() and smp_processor_id() checks early"). The da9063/da9210 regulator quirk for R-Car Gen2 boards uses a bus notifier, and unregisters the notifier when it is no longer needed. However, a notifier must not be unregistered from within the call chain. This bug went unnoticed, as blocking_notifier_chain_unregister() didn't take the semaphore during early boot. The aforementioned commit changed that behavior, leading to a deadlock. Fix this by removing the call to bus_unregister_notifier(), and keeping local completion state instead. Reported-by: Simon Horman Fixes: 663fbb52159cca6f ("ARM: shmobile: R-Car Gen2: Add da9063/da9210 regulator quirk") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 73e3adbc1330..44438f344dc8 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -67,8 +67,12 @@ static int regulator_quirk_notify(struct notifier_block *nb, { struct device *dev = data; struct i2c_client *client; + static bool done; u32 mon; + if (done) + return 0; + mon = ioread32(irqc + IRQC_MONITOR); dev_dbg(dev, "%s: %ld, IRQC_MONITOR = 0x%x\n", __func__, action, mon); if (mon & REGULATOR_IRQ_MASK) @@ -99,7 +103,7 @@ static int regulator_quirk_notify(struct notifier_block *nb, remove: dev_info(dev, "IRQ2 is not asserted, removing quirk\n"); - bus_unregister_notifier(&i2c_bus_type, nb); + done = true; iounmap(irqc); return 0; } -- cgit v1.2.3 From 4815d3c56d1e10449a44089a47544d9ba84fad0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Jul 2017 14:45:03 +0200 Subject: cpufreq: x86: Make scaling_cur_freq behave more as expected After commit f8475cef9008 "x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF" the scaling_cur_freq policy attribute in sysfs only behaves as expected on x86 with APERF/MPERF registers available when it is read from at least twice in a row. The value returned by the first read may not be meaningful, because the computations in there use cached values from the previous iteration of aperfmperf_snapshot_khz() which may be stale. To prevent that from happening, modify arch_freq_get_on_cpu() to call aperfmperf_snapshot_khz() twice, with a short delay between these calls, if the previous invocation of aperfmperf_snapshot_khz() was too far back in the past (specifically, more that 1s ago). Also, as pointed out by Doug Smythies, aperf_delta is limited now and the multiplication of it by cpu_khz won't overflow, so simplify the s->khz computations too. Fixes: f8475cef9008 "x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF" Reported-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/aperfmperf.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index d869c8671e36..7cf7c70b6ef2 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -8,20 +8,25 @@ * This file is licensed under GPLv2. */ -#include +#include +#include #include #include #include struct aperfmperf_sample { unsigned int khz; - unsigned long jiffies; + ktime_t time; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU(struct aperfmperf_sample, samples); +#define APERFMPERF_CACHE_THRESHOLD_MS 10 +#define APERFMPERF_REFRESH_DELAY_MS 20 +#define APERFMPERF_STALE_THRESHOLD_MS 1000 + /* * aperfmperf_snapshot_khz() * On the current CPU, snapshot APERF, MPERF, and jiffies @@ -33,9 +38,11 @@ static void aperfmperf_snapshot_khz(void *dummy) u64 aperf, aperf_delta; u64 mperf, mperf_delta; struct aperfmperf_sample *s = this_cpu_ptr(&samples); + ktime_t now = ktime_get(); + s64 time_delta = ktime_ms_delta(now, s->time); - /* Don't bother re-computing within 10 ms */ - if (time_before(jiffies, s->jiffies + HZ/100)) + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; rdmsrl(MSR_IA32_APERF, aperf); @@ -51,22 +58,21 @@ static void aperfmperf_snapshot_khz(void *dummy) if (mperf_delta == 0) return; - /* - * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then - * khz = (cpu_khz * aperf_delta) / mperf_delta - */ - if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); - else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ - s->khz = div64_u64(aperf_delta, - div64_u64(mperf_delta, cpu_khz)); - s->jiffies = jiffies; + s->time = now; s->aperf = aperf; s->mperf = mperf; + + /* If the previous iteration was too long ago, discard it. */ + if (time_delta > APERFMPERF_STALE_THRESHOLD_MS) + s->khz = 0; + else + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); } unsigned int arch_freq_get_on_cpu(int cpu) { + unsigned int khz; + if (!cpu_khz) return 0; @@ -74,6 +80,12 @@ unsigned int arch_freq_get_on_cpu(int cpu) return 0; smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + khz = per_cpu(samples.khz, cpu); + if (khz) + return khz; + + msleep(APERFMPERF_REFRESH_DELAY_MS); + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); return per_cpu(samples.khz, cpu); } -- cgit v1.2.3 From 13d57093c141db2036364d6be35e394fc5b64728 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 30 Jul 2017 16:20:19 -0400 Subject: parisc: Handle vma's whose context is not current in flush_cache_range In testing James' patch to drivers/parisc/pdc_stable.c, I hit the BUG statement in flush_cache_range() during a system shutdown: kernel BUG at arch/parisc/kernel/cache.c:595! CPU: 2 PID: 6532 Comm: kworker/2:0 Not tainted 4.13.0-rc2+ #1 Workqueue: events free_ioctx IAOQ[0]: flush_cache_range+0x144/0x148 IAOQ[1]: flush_cache_page+0x0/0x1a8 RP(r2): flush_cache_range+0xec/0x148 Backtrace: [<00000000402910ac>] unmap_page_range+0x84/0x880 [<00000000402918f4>] unmap_single_vma+0x4c/0x60 [<0000000040291a18>] zap_page_range_single+0x110/0x160 [<0000000040291c34>] unmap_mapping_range+0x174/0x1a8 [<000000004026ccd8>] truncate_pagecache+0x50/0xa8 [<000000004026cd84>] truncate_setsize+0x54/0x70 [<000000004033d534>] put_aio_ring_file+0x44/0xb0 [<000000004033d5d8>] aio_free_ring+0x38/0x140 [<000000004033d714>] free_ioctx+0x34/0xa8 [<00000000401b0028>] process_one_work+0x1b8/0x4d0 [<00000000401b04f4>] worker_thread+0x1b4/0x648 [<00000000401b9128>] kthread+0x1b0/0x208 [<0000000040150020>] end_fault_vector+0x20/0x28 [<0000000040639518>] nf_ip_reroute+0x50/0xa8 [<0000000040638ed0>] nf_ip_route+0x10/0x78 [<0000000040638c90>] xfrm4_mode_tunnel_input+0x180/0x1f8 CPU: 2 PID: 6532 Comm: kworker/2:0 Not tainted 4.13.0-rc2+ #1 Workqueue: events free_ioctx Backtrace: [<0000000040163bf0>] show_stack+0x20/0x38 [<0000000040688480>] dump_stack+0xa8/0x120 [<0000000040163dc4>] die_if_kernel+0x19c/0x2b0 [<0000000040164d0c>] handle_interruption+0xa24/0xa48 This patch modifies flush_cache_range() to handle non current contexts. In as much as this occurs infrequently, the simplest approach is to flush the entire cache when this happens. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 85a92db70afc..19c0c141bc3f 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -587,13 +587,12 @@ void flush_cache_range(struct vm_area_struct *vma, if (parisc_requires_coherency()) flush_tlb_range(vma, start, end); - if ((end - start) >= parisc_cache_flush_threshold) { + if ((end - start) >= parisc_cache_flush_threshold + || vma->vm_mm->context != mfsp(3)) { flush_cache_all(); return; } - BUG_ON(vma->vm_mm->context != mfsp(3)); - flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); -- cgit v1.2.3 From 8f8201dfed91a43ac38c899c82f81eef3d36afd9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 31 Jul 2017 08:38:27 +0200 Subject: parisc: Increase thread and stack size to 32kb Since kernel 4.11 the thread and irq stacks on parisc randomly overflow the default size of 16k. The reason why stack usage suddenly grew is yet unknown. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/thread_info.h | 2 +- arch/parisc/kernel/irq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 88fe0aad4390..bc208136bbb2 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -34,7 +34,7 @@ struct thread_info { /* thread information allocation */ -#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 5404e4086cb9..0ca254085a66 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -380,7 +380,7 @@ static inline int eirr_to_irq(unsigned long eirr) /* * IRQ STACK - used for irq handler */ -#define IRQ_STACK_SIZE (4096 << 2) /* 16k irq stack size */ +#define IRQ_STACK_SIZE (4096 << 3) /* 32k irq stack size */ union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; -- cgit v1.2.3 From cc491f1d3583146eaee635c86b9c9227fa835c6c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 29 Jul 2017 22:50:27 +1000 Subject: powerpc/64s: Fix stack setup in watchdog soft_nmi_common() The watchdog soft-NMI exception stack setup loads a stack pointer twice, which is an obvious error. It ends up using the system reset interrupt (true-NMI) stack, which is also a bug because the watchdog could be preempted by a system reset interrupt that overwrites the NMI stack. Change the soft-NMI to use the "emergency stack". The current kernel stack is not used, because of the longer-term goal to prevent asynchronous stack access using soft-disable. Fixes: 2104180a5369 ("powerpc/64s: implement arch-specific hardlockup watchdog") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9029afd1fa2a..f14f3c04ec7e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100) std r10,PACA_EXGEN+EX_R13(r13); \ EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) +/* + * Branch to soft_nmi_interrupt using the emergency stack. The emergency + * stack is one that is usable by maskable interrupts so long as MSR_EE + * remains off. It is used for recovery when something has corrupted the + * normal kernel stack, for example. The "soft NMI" must not use the process + * stack because we want irq disabled sections to avoid touching the stack + * at all (other than PMU interrupts), so use the emergency stack for this, + * and run it entirely with interrupts hard disabled. + */ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) - ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, system_reset, soft_nmi_interrupt, -- cgit v1.2.3 From 74ad3d28af2104b92dd83a43add79e6a8c45d8e2 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 6 Jul 2017 09:34:19 -0700 Subject: parisc: Define CONFIG_CPU_BIG_ENDIAN While working on enabling queued rwlock on SPARC, found this following code in include/asm-generic/qrwlock.h which uses CONFIG_CPU_BIG_ENDIAN to clear a byte. static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) { return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); } Problem is many of the fixed big endian architectures don't define CPU_BIG_ENDIAN and clears the wrong byte. Define CPU_BIG_ENDIAN for parisc architecture to fix it. Signed-off-by: Babu Moger Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 531da9eb8f43..dda1f558ef35 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -47,6 +47,9 @@ config PARISC and later HP3000 series). The PA-RISC Linux project home page is at . +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y -- cgit v1.2.3 From bb68cfe2f5a7f43058aed299fdbb73eb281734ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 31 Jul 2017 22:07:09 +0200 Subject: x86/hpet: Cure interface abuse in the resume path The HPET resume path abuses irq_domain_[de]activate_irq() to restore the MSI message in the HPET chip for the boot CPU on resume and it relies on an implementation detail of the interrupt core code, which magically makes the HPET unmask call invoked via a irq_disable/enable pair. This worked as long as the irq code did unconditionally invoke the unmask() callback. With the recent changes which keep track of the masked state to avoid expensive hardware access, this does not longer work. As a consequence the HPET timer interrupts are not unmasked which breaks resume as the boot CPU waits forever that a timer interrupt arrives. Make the restore of the MSI message explicit and invoke the unmask() function directly. While at it get rid of the pointless affinity setting as nothing can change the affinity of the interrupt and the vector across suspend/resume. The restore of the MSI message reestablishes the previous affinity setting which is the correct one. Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls") Reported-and-tested-by: Tomi Sarvela Reported-by: Martin Peres Signed-off-by: Thomas Gleixner Acked-by: "Rafael J. Wysocki" Cc: jeffy.chen@rock-chips.com Cc: Peter Zijlstra Cc: Marc Zyngier Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707312158590.2287@nanos --- arch/x86/kernel/hpet.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 16f82a3aaec7..8ce4212e2b8d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -345,21 +345,10 @@ static int hpet_shutdown(struct clock_event_device *evt, int timer) return 0; } -static int hpet_resume(struct clock_event_device *evt, int timer) -{ - if (!timer) { - hpet_enable_legacy_int(); - } else { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); - irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_hardirq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); - enable_irq(hdev->irq); - } +static int hpet_resume(struct clock_event_device *evt) +{ + hpet_enable_legacy_int(); hpet_print_config(); - return 0; } @@ -417,7 +406,7 @@ static int hpet_legacy_set_periodic(struct clock_event_device *evt) static int hpet_legacy_resume(struct clock_event_device *evt) { - return hpet_resume(evt, 0); + return hpet_resume(evt); } static int hpet_legacy_next_event(unsigned long delta, @@ -510,8 +499,14 @@ static int hpet_msi_set_periodic(struct clock_event_device *evt) static int hpet_msi_resume(struct clock_event_device *evt) { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + struct irq_data *data = irq_get_irq_data(hdev->irq); + struct msi_msg msg; - return hpet_resume(evt, hdev->num); + /* Restore the MSI msg and unmask the interrupt */ + irq_chip_compose_msi_msg(data, &msg); + hpet_msi_write(hdev, &msg); + hpet_msi_unmask(data); + return 0; } static int hpet_msi_next_event(unsigned long delta, -- cgit v1.2.3 From c6f97add0f2ac83b98b06dbdda58fa47638ae7b0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jul 2017 18:15:27 +0100 Subject: arm64: Use arch_timer_get_rate when trapping CNTFRQ_EL0 In an ideal world, CNTFRQ_EL0 always contains the timer frequency for the kernel to use. Sadly, we get quite a few broken systems where the firmware authors cannot be bothered to program that register on all CPUs, and rely on DT to provide that frequency. So when trapping CNTFRQ_EL0, make sure to return the actual rate (as known by the kernel), and not CNTFRQ_EL0. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d48f47080213..8a62648848e5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -523,7 +523,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; - pt_regs_write_reg(regs, rt, read_sysreg(cntfrq_el0)); + pt_regs_write_reg(regs, rt, arch_timer_get_rate()); regs->pc += 4; } -- cgit v1.2.3 From 7f81e55c737a8fa82c71f290945d729a4902f8d2 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:02:46 -0700 Subject: xtensa: don't limit csum_partial export by CONFIG_NET csum_partial and csum_partial_copy_generic are defined unconditionally and are available even when CONFIG_NET is disabled. They are used not only by the network drivers, but also by scsi and media. Don't limit these functions export by CONFIG_NET. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/xtensa_ksyms.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d159e9b9c018..672391003e40 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) } EXPORT_SYMBOL(__sync_fetch_and_or_4); -#ifdef CONFIG_NET /* * Networking support */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -#endif /* CONFIG_NET */ /* * Architecture-specific symbols -- cgit v1.2.3 From bc652eb6a0d5cffaea7dc8e8ad488aab2a1bf1ed Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:15:15 -0700 Subject: xtensa: mm/cache: add missing EXPORT_SYMBOLs Functions clear_user_highpage, copy_user_highpage, flush_dcache_page, local_flush_cache_range and local_flush_cache_page may be used from modules. Export them. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index dbb1cdef3663..3c75c4e597da 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) clear_page_alias(kvaddr, paddr); preempt_enable(); } +EXPORT_SYMBOL(clear_user_highpage); void copy_user_highpage(struct page *dst, struct page *src, unsigned long vaddr, struct vm_area_struct *vma) @@ -119,6 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src, copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr); preempt_enable(); } +EXPORT_SYMBOL(copy_user_highpage); /* * Any time the kernel writes to a user page cache page, or it is about to @@ -172,7 +174,7 @@ void flush_dcache_page(struct page *page) /* There shouldn't be an entry in the cache for this page anymore. */ } - +EXPORT_SYMBOL(flush_dcache_page); /* * For now, flush the whole cache. FIXME?? @@ -184,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma, __flush_invalidate_dcache_all(); __invalidate_icache_all(); } +EXPORT_SYMBOL(local_flush_cache_range); /* * Remove any entry in the cache for this page. @@ -203,6 +206,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); } +EXPORT_SYMBOL(local_flush_cache_page); #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ -- cgit v1.2.3 From 7313c698050387a11c21afb0c6b4c61f21f7c042 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Jul 2017 10:31:25 +0200 Subject: KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do this in the caller of nested_vmx_vmexit instead. nested_vmx_check_exception was doing a vmwrite to the vmcs02's VM_EXIT_INTR_ERROR_CODE field, so that prepare_vmcs12 would move the field to vmcs12->vm_exit_intr_error_code. However that isn't possible on pre-Haswell machines. Moving the vmcs12 write to the callers fixes it. Reported-by: Jim Mattson Signed-off-by: Paolo Bonzini [Changed nested_vmx_reflect_vmexit() return type to (int)1 from (bool)1, thanks to fengguang.wu@intel.com] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 39a6222bf968..19465b73cc07 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2442,7 +2442,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 0; if (vcpu->arch.exception.nested_apf) { - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, @@ -2450,6 +2450,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) return 1; } + vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); @@ -2667,7 +2668,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_exit_handled() will not pass related exits to L1. + * nested_vmx_exit_reflected() will not pass related exits to L1. * These rules have exceptions below. */ @@ -8019,12 +8020,11 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ -static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) +static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) { u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_reason = vmx->exit_reason; trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, vmcs_readl(EXIT_QUALIFICATION), @@ -8169,6 +8169,29 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) } } +static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * At this point, the exit interruption info in exit_intr_info + * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT + * we need to query the in-kernel LAPIC. + */ + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); + if ((exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, + vmcs_readl(EXIT_QUALIFICATION)); + return 1; +} + static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { *info1 = vmcs_readl(EXIT_QUALIFICATION); @@ -8415,12 +8438,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu, exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; - } + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) + return nested_vmx_reflect_vmexit(vcpu, exit_reason); if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { dump_vmcs(); @@ -9509,12 +9528,14 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { + vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); - else + } else { kvm_inject_page_fault(vcpu, fault); + } } static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, @@ -10847,13 +10868,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->vm_exit_reason = exit_reason; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - if ((vmcs12->vm_exit_intr_info & - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) - vmcs12->vm_exit_intr_error_code = - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); -- cgit v1.2.3 From b96fb439774e1bfb7d027ad324fa48606167cb52 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Jul 2017 12:29:32 +0200 Subject: KVM: nVMX: fixes to nested virt interrupt injection There are three issues in nested_vmx_check_exception: 1) it is not taking PFEC_MATCH/PFEC_MASK into account, as reported by Wanpeng Li; 2) it should rebuild the interruption info and exit qualification fields from scratch, as reported by Jim Mattson, because the values from the L2->L0 vmexit may be invalid (e.g. if an emulated instruction causes a page fault, the EPT misconfig's exit qualification is incorrect). 3) CR2 and DR6 should not be written for exception intercept vmexits (CR2 only for AMD). This patch fixes the first two and adds a comment about the last, outlining the fix. Cc: Jim Mattson Cc: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++++ arch/x86/kvm/vmx.c | 87 ++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 72 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4d8141e533c3..1107626938cc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = error_code; + + /* + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be + * written only when inject_pending_event runs (DR6 would written here + * too). This should be conditional on a new capability---if the + * capability is disabled, kvm_multiple_exception would write the + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + */ if (svm->vcpu.arch.exception.nested_apf) svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 19465b73cc07..714d4364ef87 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -927,6 +927,10 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); +static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, + u16 error_code); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2428,6 +2432,30 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, + unsigned long exit_qual) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned int nr = vcpu->arch.exception.nr; + u32 intr_info = nr | INTR_INFO_VALID_MASK; + + if (vcpu->arch.exception.has_error_code) { + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } + + if (kvm_exception_is_soft(nr)) + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && + vmx_get_nmi_mask(vcpu)) + intr_info |= INTR_INFO_UNBLOCK_NMI; + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); +} + /* * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. @@ -2437,24 +2465,38 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); unsigned int nr = vcpu->arch.exception.nr; - if (!((vmcs12->exception_bitmap & (1u << nr)) || - (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) - return 0; + if (nr == PF_VECTOR) { + if (vcpu->arch.exception.nested_apf) { + nested_vmx_inject_exception_vmexit(vcpu, + vcpu->arch.apf.nested_apf_token); + return 1; + } + /* + * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 + * can be written only when inject_pending_event runs. This should be + * conditional on a new capability---if the capability is disabled, + * kvm_multiple_exception would write the ancillary information to + * CR2 or DR6, for backwards ABI-compatibility. + */ + if (nested_vmx_is_page_fault_vmexit(vmcs12, + vcpu->arch.exception.error_code)) { + nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); + return 1; + } + } else { + unsigned long exit_qual = 0; + if (nr == DB_VECTOR) + exit_qual = vcpu->arch.dr6; - if (vcpu->arch.exception.nested_apf) { - vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, - vcpu->arch.apf.nested_apf_token); - return 1; + if (vmcs12->exception_bitmap & (1u << nr)) { + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + return 1; + } } - vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); - return 1; + return 0; } static void vmx_queue_exception(struct kvm_vcpu *vcpu) @@ -9529,10 +9571,11 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { - vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); + vmcs12->vm_exit_intr_error_code = fault->error_code; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | + INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, + fault->address); } else { kvm_inject_page_fault(vcpu, fault); } @@ -10115,12 +10158,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when * !enable_ept, EB.PF is 1, so the "or" will always be 1. - * - * A problem with this approach (when !enable_ept) is that L1 may be - * injected with more page faults than it asked for. This could have - * caused problems, but in practice existing hypervisors don't care. - * To fix this, we will need to emulate the PFEC checking (on the L1 - * page tables), using walk_addr(), when injecting PFs to L1. */ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, enable_ept ? vmcs12->page_fault_error_code_mask : 0); -- cgit v1.2.3 From 337c017ccdf2653d0040099433fc1a2b1beb5926 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 1 Aug 2017 05:20:03 -0700 Subject: KVM: async_pf: make rcu irq exit if not triggered from idle task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARNING: CPU: 5 PID: 1242 at kernel/rcu/tree_plugin.h:323 rcu_note_context_switch+0x207/0x6b0 CPU: 5 PID: 1242 Comm: unity-settings- Not tainted 4.13.0-rc2+ #1 RIP: 0010:rcu_note_context_switch+0x207/0x6b0 Call Trace: __schedule+0xda/0xba0 ? kvm_async_pf_task_wait+0x1b2/0x270 schedule+0x40/0x90 kvm_async_pf_task_wait+0x1cc/0x270 ? prepare_to_swait+0x22/0x70 do_async_page_fault+0x77/0xb0 ? do_async_page_fault+0x77/0xb0 async_page_fault+0x28/0x30 RIP: 0010:__d_lookup_rcu+0x90/0x1e0 I encounter this when trying to stress the async page fault in L1 guest w/ L2 guests running. Commit 9b132fbe5419 (Add rcu user eqs exception hooks for async page fault) adds rcu_irq_enter/exit() to kvm_async_pf_task_wait() to exit cpu idle eqs when needed, to protect the code that needs use rcu. However, we need to call the pair even if the function calls schedule(), as seen from the above backtrace. This patch fixes it by informing the RCU subsystem exit/enter the irq towards/away from idle for both n.halted and !n.halted. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Paul E. McKenney Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kernel/kvm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 71c17a5be983..d04e30e3c0ff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token) if (hlist_unhashed(&n.link)) break; + rcu_irq_exit(); + if (!n.halted) { local_irq_enable(); schedule(); @@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ - rcu_irq_exit(); native_safe_halt(); local_irq_disable(); - rcu_irq_enter(); } + + rcu_irq_enter(); } if (!n.halted) finish_swait(&n.wq, &wait); -- cgit v1.2.3 From f29bb7861a5107cc1afbf5a565c3109aa88d5984 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 29 Jul 2017 22:52:09 +0300 Subject: powerpc/83xx/mpc832x_rdb: fix of_irq_to_resource() error check of_irq_to_resource() has recently been fixed to return negative error #'s along with 0 in case of failure, however the Freescale MPC832x RDB board code still only regards 0 as a failure indication -- fix it up. Fixes: 7a4228bbff76 ("of: irq: use of_irq_get() in of_irq_to_resource()") Signed-off-by: Sergei Shtylyov Acked-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index d7c9b186954d..763ffca9628d 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto err; ret = of_irq_to_resource(np, 0, &res[1]); - if (!ret) + if (ret <= 0) goto err; pdev = platform_device_alloc("mpc83xx_spi", i); -- cgit v1.2.3 From d7a65c4905bc9c304ecf3d8aa566802f6119480f Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 1 Aug 2017 18:01:35 +0200 Subject: ARM64: dts: marvell: armada-37xx: Fix the number of GPIO on south bridge The number of pins in South Bridge is 30 and not 29. There is a fix for the driver for the pinctrl, but a fix is also need at device tree level for the GPIO. Fixes: afda007feda5 ("ARM64: dts: marvell: Add pinctrl nodes for Armada 3700") Cc: Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index dbcc3d4e2ed5..51763d674050 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -219,7 +219,7 @@ reg = <0x18800 0x100>, <0x18C00 0x20>; gpiosb: gpio { #gpio-cells = <2>; - gpio-ranges = <&pinctrl_sb 0 0 29>; + gpio-ranges = <&pinctrl_sb 0 0 30>; gpio-controller; interrupts = , -- cgit v1.2.3 From f4ef19108608c81769db69976999d056c070a6f0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 1 Aug 2017 16:05:25 -0700 Subject: KVM: X86: Fix loss of pending INIT due to race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SMP VM start, AP may lost INIT because of receiving INIT between kvm_vcpu_ioctl_x86_get/set_vcpu_events. vcpu 0 vcpu 1 kvm_vcpu_ioctl_x86_get_vcpu_events events->smi.latched_init = 0 send INIT to vcpu1 set vcpu1's pending_events kvm_vcpu_ioctl_x86_set_vcpu_events if (events->smi.latched_init == 0) clear INIT in pending_events This patch fixes it by just update SMM related flags if we are in SMM. Thanks Peng Hao for the report and original commit message. Reported-by: Peng Hao Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c97c82814c4..037055a31b13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3159,15 +3159,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_set_hflags(vcpu, hflags); vcpu->arch.smi_pending = events->smi.pending; - if (events->smi.smm_inside_nmi) - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; - else - vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (lapic_in_kernel(vcpu)) { - if (events->smi.latched_init) - set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + + if (events->smi.smm) { + if (events->smi.smm_inside_nmi) + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else - clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; + if (lapic_in_kernel(vcpu)) { + if (events->smi.latched_init) + set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + else + clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + } } } -- cgit v1.2.3 From ebd28fcb55e288030abb5bca4869603b3e1f5f7c Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Wed, 2 Aug 2017 11:20:51 +0800 Subject: KVM: X86: init irq->level in kvm_pv_kick_cpu_op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'lapic_irq' is a local variable and its 'level' field isn't initialized, so 'level' is random, it doesn't matter but makes UBSAN unhappy: UBSAN: Undefined behaviour in .../lapic.c:... load of value 10 is not a valid value for type '_Bool' ... Call Trace: [] dump_stack+0x1e/0x20 [] ubsan_epilogue+0x12/0x55 [] __ubsan_handle_load_invalid_value+0x118/0x162 [] kvm_apic_set_irq+0xc3/0xf0 [kvm] [] kvm_irq_delivery_to_apic_fast+0x450/0x910 [kvm] [] kvm_irq_delivery_to_apic+0xfa/0x7a0 [kvm] [] kvm_emulate_hypercall+0x62e/0x760 [kvm] [] handle_vmcall+0x1a/0x30 [kvm_intel] [] vmx_handle_exit+0x7a2/0x1fa0 [kvm_intel] ... Signed-off-by: Longpeng(Mike) Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 037055a31b13..d734aa8c5b4f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6218,6 +6218,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.shorthand = 0; lapic_irq.dest_mode = 0; + lapic_irq.level = 0; lapic_irq.dest_id = apicid; lapic_irq.msi_redir_hint = false; -- cgit v1.2.3 From 9f744c59746078280ef28163aa136ef3f625804e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Jul 2017 15:54:46 +0200 Subject: KVM: nVMX: do not pin the VMCS12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the current implementation of VMCS12 does a memcpy in and out of guest memory, we do not need current_vmcs12 and current_vmcs12_page anymore. current_vmptr is enough to read and write the VMCS12. And David Matlack noted: This patch also fixes dirty tracking (memslot->dirty_bitmap) of the VMCS12 page by using kvm_write_guest. nested_release_page() only marks the struct page dirty. Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand [Added David Matlack's note and nested_release_page_clean() fix.] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 714d4364ef87..082cdb9011eb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -416,9 +416,6 @@ struct nested_vmx { /* The guest-physical address of the current VMCS L1 keeps for L2 */ gpa_t current_vmptr; - /* The host-usable pointer to the above */ - struct page *current_vmcs12_page; - struct vmcs12 *current_vmcs12; /* * Cache of the guest's VMCS, existing outside of guest memory. * Loaded from guest memory during VMPTRLD. Flushed to guest @@ -7182,10 +7179,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) if (vmx->nested.current_vmptr == -1ull) return; - /* current_vmptr and current_vmcs12 are always set/reset together */ - if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) - return; - if (enable_shadow_vmcs) { /* copy to memory all shadowed fields in case they were modified */ @@ -7198,13 +7191,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) vmx->nested.posted_intr_nv = -1; /* Flush VMCS12 to guest memory */ - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12, - VMCS12_SIZE); + kvm_vcpu_write_guest_page(&vmx->vcpu, + vmx->nested.current_vmptr >> PAGE_SHIFT, + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; } /* @@ -7622,14 +7613,14 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) } nested_release_vmcs12(vmx); - vmx->nested.current_vmcs12 = new_vmcs12; - vmx->nested.current_vmcs12_page = page; /* * Load VMCS12 from guest memory since it is not already * cached. */ - memcpy(vmx->nested.cached_vmcs12, - vmx->nested.current_vmcs12, VMCS12_SIZE); + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); + kunmap(page); + nested_release_page_clean(page); + set_current_vmptr(vmx, vmptr); } @@ -9284,7 +9275,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->nested.current_vmcs12 = NULL; vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; -- cgit v1.2.3 From 8ca44e88c32f86721c6ceca8e5e9b0b40c98907d Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 1 Aug 2017 14:00:39 -0700 Subject: kvm: nVMX: don't flush VMCS12 during VMXOFF or VCPU teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the Intel SDM, software cannot rely on the current VMCS to be coherent after a VMXOFF or shutdown. So this is a valid way to handle VMCS12 flushes. 24.11.1 Software Use of Virtual-Machine Control Structures ... If a logical processor leaves VMX operation, any VMCSs active on that logical processor may be corrupted (see below). To prevent such corruption of a VMCS that may be used either after a return to VMX operation or on another logical processor, software should execute VMCLEAR for that VMCS before executing the VMXOFF instruction or removing power from the processor (e.g., as part of a transition to the S3 and S4 power states). ... This fixes a "suspicious rcu_dereference_check() usage!" warning during kvm_vm_release() because nested_release_vmcs12() calls kvm_vcpu_write_guest_page() without holding kvm->srcu. Signed-off-by: David Matlack Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 082cdb9011eb..2099b1495b57 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -419,7 +419,7 @@ struct nested_vmx { /* * Cache of the guest's VMCS, existing outside of guest memory. * Loaded from guest memory during VMPTRLD. Flushed to guest - * memory during VMXOFF, VMCLEAR, VMPTRLD. + * memory during VMCLEAR and VMPTRLD. */ struct vmcs12 *cached_vmcs12; /* @@ -7174,6 +7174,12 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } +static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) +{ + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmcs_write64(VMCS_LINK_POINTER, -1ull); +} + static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { if (vmx->nested.current_vmptr == -1ull) @@ -7184,9 +7190,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) they were modified */ copy_shadow_to_vmcs12(vmx); vmx->nested.sync_shadow_vmcs = false; - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_SHADOW_VMCS); - vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; @@ -7209,12 +7213,14 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); - nested_release_vmcs12(vmx); + vmx->nested.posted_intr_nv = -1; + vmx->nested.current_vmptr = -1ull; if (vmx->nested.msr_bitmap) { free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } if (enable_shadow_vmcs) { + vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); free_vmcs(vmx->vmcs01.shadow_vmcs); vmx->vmcs01.shadow_vmcs = NULL; -- cgit v1.2.3 From c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 1 Aug 2017 14:00:40 -0700 Subject: KVM: nVMX: mark vmcs12 pages dirty on L2 exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The host physical addresses of L1's Virtual APIC Page and Posted Interrupt descriptor are loaded into the VMCS02. The CPU may write to these pages via their host physical address while L2 is running, bypassing address-translation-based dirty tracking (e.g. EPT write protection). Mark them dirty on every exit from L2 to prevent them from getting out of sync with dirty tracking. Also mark the virtual APIC page and the posted interrupt descriptor dirty when KVM is virtualizing posted interrupt processing. Signed-off-by: David Matlack Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2099b1495b57..cbad87e7e829 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4995,6 +4995,28 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + gfn_t gfn; + + /* + * Don't need to mark the APIC access page dirty; it is never + * written to by the CPU during APIC virtualization. + */ + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } + + if (nested_cpu_has_posted_intr(vmcs12)) { + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } +} + + static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5002,18 +5024,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) void *vapic_page; u16 status; - if (vmx->nested.pi_desc && - vmx->nested.pi_pending) { - vmx->nested.pi_pending = false; - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return; - - max_irr = find_last_bit( - (unsigned long *)vmx->nested.pi_desc->pir, 256); + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) + return; - if (max_irr == 256) - return; + vmx->nested.pi_pending = false; + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) + return; + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); + if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -5025,6 +5044,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } + + nested_mark_vmcs12_pages_dirty(vcpu); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, @@ -8072,6 +8093,18 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); + /* + * The host physical addresses of some pages of guest memory + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU + * may write to these pages via their host physical address while + * L2 is running, bypassing any address-translation-based dirty + * tracking (e.g. EPT write protection). + * + * Mark them dirty on every exit from L2 to prevent them from + * getting out of sync with dirty tracking. + */ + nested_mark_vmcs12_pages_dirty(vcpu); + if (vmx->nested.nested_run_pending) return false; -- cgit v1.2.3 From 6550c4df7e50d09f86385ce20fd7e969a5638da3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 31 Jul 2017 19:25:27 -0700 Subject: KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------[ cut here ]------------ WARNING: CPU: 5 PID: 2288 at arch/x86/kvm/vmx.c:11124 nested_vmx_vmexit+0xd64/0xd70 [kvm_intel] CPU: 5 PID: 2288 Comm: qemu-system-x86 Not tainted 4.13.0-rc2+ #7 RIP: 0010:nested_vmx_vmexit+0xd64/0xd70 [kvm_intel] Call Trace: vmx_check_nested_events+0x131/0x1f0 [kvm_intel] ? vmx_check_nested_events+0x131/0x1f0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x5dd/0x1be0 [kvm] ? vmx_vcpu_load+0x1be/0x220 [kvm_intel] ? kvm_arch_vcpu_load+0x62/0x230 [kvm] kvm_vcpu_ioctl+0x340/0x700 [kvm] ? kvm_vcpu_ioctl+0x340/0x700 [kvm] ? __fget+0xfc/0x210 do_vfs_ioctl+0xa4/0x6a0 ? __fget+0x11d/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x8f/0x750 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL64_slow_path+0x25/0x25 This can be reproduced by booting L1 guest w/ 'noapic' grub parameter, which means that tells the kernel to not make use of any IOAPICs that may be present in the system. Actually external_intr variable in nested_vmx_vmexit() is the req_int_win variable passed from vcpu_enter_guest() which means that the L0's userspace requests an irq window. I observed the scenario (!kvm_cpu_has_interrupt(vcpu) && L0's userspace reqeusts an irq window) is true, so there is no interrupt which L1 requires to inject to L2, we should not attempt to emualte "Acknowledge interrupt on exit" for the irq window requirement in this scenario. This patch fixes it by not attempt to emulate "Acknowledge interrupt on exit" if there is no L1 requirement to inject an interrupt to L2. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li [Added code comment to make it obvious that the behavior is not correct. We should do a userspace exit with open interrupt window instead of the nested VM exit. This patch still improves the behavior, so it was accepted as a (temporary) workaround.] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cbad87e7e829..9b21b1223035 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11131,8 +11131,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); - if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - && nested_exit_intr_ack_set(vcpu)) { + /* + * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of + * the VM-exit interrupt information (valid interrupt) is always set to + * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need + * kvm_cpu_has_interrupt(). See the commit message for details. + */ + if (nested_exit_intr_ack_set(vcpu) && + exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + kvm_cpu_has_interrupt(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | -- cgit v1.2.3 From 09539f9b123652e969894d6299ae0df2fe12cb5d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 Jul 2017 11:53:22 +1000 Subject: powerpc/perf: POWER9 PMU stops after idle workaround POWER9 DD2 PMU can stop after a state-loss idle in some conditions. A solution is to set then clear MMCRA[60] after wake from state-loss idle. MMCRA[60] is a non-architected bit, see the user manual for details. Signed-off-by: Nicholas Piggin Acked-by: Madhavan Srinivasan Reviewed-by: Vaidyanathan Srinivasan Acked-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..e6252c5a57a4 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -460,11 +460,17 @@ pnv_restore_hyp_resource_arch300: /* * Workaround for POWER9, if we lost resources, the ERAT * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). + * to reload MMCR0 (see comment above). We also need to set + * then clear bit 60 in MMCRA to ensure the PMU starts running. */ blt cr3,1f PPC_INVALIDATE_ERAT ld r1,PACAR1(r13) + mfspr r4,SPRN_MMCRA + ori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 + xori r4,r4,(1 << (63-60)) + mtspr SPRN_MMCRA,r4 ld r4,_MMCR0(r1) mtspr SPRN_MMCR0,r4 1: -- cgit v1.2.3 From 3db40c312c2c1eb2187c5731102fa8ff380e6e40 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 23:59:28 +1000 Subject: powerpc/64: Fix __check_irq_replay missing decrementer interrupt If the decrementer wraps again and de-asserts the decrementer exception while hard-disabled, __check_irq_replay() has a test to notice the wrap when interrupts are re-enabled. The decrementer check must be done when clearing the PACA_IRQ_HARD_DIS flag, not when the PACA_IRQ_DEC flag is tested. Previously this worked because the decrementer interrupt was always the first one checked after clearing the hard disable flag, but HMI check was moved ahead of that, which introduced this bug. This can cause a missed decrementer interrupt if we soft-disable interrupts then take an HMI which is recorded in irq_happened, then hard-disable interrupts for > 4s to wrap the decrementer. Fixes: e0e0d6b7390b ("powerpc/64: Replay hypervisor maintenance interrupt first") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0bcec745a672..f291f7826abc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void) /* Clear bit 0 which we wouldn't clear otherwise */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) { + local_paca->irq_happened |= PACA_IRQ_DEC; + happened |= PACA_IRQ_DEC; + } + } + } /* * Force the delivery of pending soft-disabled interrupts on PS3. @@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) + if (happened & PACA_IRQ_DEC) return 0x900; /* Finally check if an external interrupt happened */ -- cgit v1.2.3 From 985333b0eef8603b02181c4ec0a722b82be9642d Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Fri, 28 Jul 2017 15:27:49 +0200 Subject: ARM: dts: tango4: Request RGMII RX and TX clock delays RX and TX clock delays are required. Request them explicitly. Fixes: cad008b8a77e6 ("ARM: dts: tango4: Initial device trees") Cc: stable@vger.kernel.org Signed-off-by: Marc Gonzalez Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/tango4-vantage-1172.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/tango4-vantage-1172.dts b/arch/arm/boot/dts/tango4-vantage-1172.dts index 86d8df98802f..13bcc460bcb2 100644 --- a/arch/arm/boot/dts/tango4-vantage-1172.dts +++ b/arch/arm/boot/dts/tango4-vantage-1172.dts @@ -22,7 +22,7 @@ }; ð0 { - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; phy-handle = <ð0_phy>; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From 6d332747fa5f0a6843b56b5b129168ba909336d1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 25 Jul 2017 14:53:03 +0100 Subject: arm64: Fix potential race with hardware DBM in ptep_set_access_flags() In a system with DBM (dirty bit management) capable agents there is a possible race between a CPU executing ptep_set_access_flags() (maybe non-DBM capable) and a hardware update of the dirty state (clearing of PTE_RDONLY). The scenario: a) the pte is writable (PTE_WRITE set), clean (PTE_RDONLY set) and old (PTE_AF clear) b) ptep_set_access_flags() is called as a result of a read access and it needs to set the pte to writable, clean and young (PTE_AF set) c) a DBM-capable agent, as a result of a different write access, is marking the entry as young (setting PTE_AF) and dirty (clearing PTE_RDONLY) The current ptep_set_access_flags() implementation would set the PTE_RDONLY bit in the resulting value overriding the DBM update and losing the dirty state. This patch fixes such race by setting PTE_RDONLY to the most permissive (lowest value) of the current entry and the new one. Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c7861c9864e6..2509e4fe6992 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -163,26 +163,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* only preserve the access flags and write permission */ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - /* - * PTE_RDONLY is cleared by default in the asm below, so set it in - * back if necessary (read-only or clean PTE). - */ + /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* * Setting the flags must be done atomically to avoid racing with the - * hardware update of the access/dirty state. + * hardware update of the access/dirty state. The PTE_RDONLY bit must + * be set to the most permissive (lowest value) of *ptep and entry + * (calculated as: a & b == ~(~a | ~b)). */ + pte_val(entry) ^= PTE_RDONLY; asm volatile("// ptep_set_access_flags\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_RDONLY\n" + " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" " orr %0, %0, %4 // set flags\n" + " eor %0, %0, %3 // negate final PTE_RDONLY\n" " stxr %w1, %0, %2\n" " cbnz %w1, 1b\n" : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + : "L" (PTE_RDONLY), "r" (pte_val(entry))); flush_tlb_fix_spurious_fault(vma, address); return 1; -- cgit v1.2.3 From 82cd588052815eb4146f9f7c5347ca5e32c56360 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 3 Aug 2017 11:03:58 -0700 Subject: arm64: avoid overflow in VA_START and PAGE_OFFSET The bitmask used to define these values produces overflow, as seen by this compiler warning: arch/arm64/kernel/head.S:47:8: warning: integer overflow in preprocessor expression #elif (PAGE_OFFSET & 0x1fffff) != 0 ^~~~~~~~~~~ arch/arm64/include/asm/memory.h:52:46: note: expanded from macro 'PAGE_OFFSET' #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) ~~~~~~~~~~~~~~~~~~ ^ It would be preferrable to use GENMASK_ULL() instead, but it's not set up to be used from assembly (the UL() macro token pastes UL suffixes when not included in assembly sources). Suggested-by: Ard Biesheuvel Suggested-by: Yury Norov Suggested-by: Matthias Kaehlcke Signed-off-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 32f82723338a..ef39dcb9ca6a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -64,8 +64,10 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) << VA_BITS) -#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) +#define VA_START (UL(0xffffffffffffffff) - \ + (UL(1) << VA_BITS) + 1) +#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ + (UL(1) << (VA_BITS - 1)) + 1) #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) -- cgit v1.2.3 From 0ede1c401332173ab0693121dc6cde04a4dbf131 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Aug 2017 09:47:52 -0700 Subject: sparc64: Fix exception handling in UltraSPARC-III memcpy. Mikael Pettersson reported that some test programs in the strace-4.18 testsuite cause an OOPS. After some debugging it turns out that garbage values are returned when an exception occurs, causing the fixup memset() to be run with bogus arguments. The problem is that two of the exception handler stubs write the successfully copied length into the wrong register. Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.") Reported-by: Mikael Pettersson Tested-by: Mikael Pettersson Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/lib/U3memcpy.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif -- cgit v1.2.3 From 9e48cd4a77d5170433a2e611eeda7accdf96604d Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 24 Jul 2017 11:44:17 +0530 Subject: sparc64: properly name the cpu constants Acked-by: Sam Ravnborg Acked-by: David S. Miller Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- arch/sparc/include/asm/spitfire.h | 14 ++++++++++++++ arch/sparc/kernel/head_64.S | 16 ++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 1d8321c827a8..9cc2afe10ef0 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -51,6 +51,20 @@ #define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff +/* + * The following CPU_ID_xxx constants are used + * to identify the CPU type in the setup phase + * (see head_64.S) + */ +#define CPU_ID_NIAGARA1 ('1') +#define CPU_ID_NIAGARA2 ('2') +#define CPU_ID_NIAGARA3 ('3') +#define CPU_ID_NIAGARA4 ('4') +#define CPU_ID_NIAGARA5 ('5') +#define CPU_ID_M6 ('6') +#define CPU_ID_M7 ('7') +#define CPU_ID_SONOMA1 ('N') + #ifndef __ASSEMBLY__ enum ultra_tlb_layout { diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 41a407328667..ddb5e24adf49 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -424,22 +424,22 @@ EXPORT_SYMBOL(sun4v_chip_type) nop 70: ldub [%g1 + 7], %g2 - cmp %g2, '3' + cmp %g2, CPU_ID_NIAGARA3 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA3, %g4 - cmp %g2, '4' + cmp %g2, CPU_ID_NIAGARA4 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA4, %g4 - cmp %g2, '5' + cmp %g2, CPU_ID_NIAGARA5 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 - cmp %g2, '6' + cmp %g2, CPU_ID_M6 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M6, %g4 - cmp %g2, '7' + cmp %g2, CPU_ID_M7 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 - cmp %g2, 'N' + cmp %g2, CPU_ID_SONOMA1 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_SN, %g4 ba,pt %xcc, 49f @@ -448,10 +448,10 @@ EXPORT_SYMBOL(sun4v_chip_type) 91: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 ldub [%g1 + 17], %g2 - cmp %g2, '1' + cmp %g2, CPU_ID_NIAGARA1 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA1, %g4 - cmp %g2, '2' + cmp %g2, CPU_ID_NIAGARA2 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA2, %g4 -- cgit v1.2.3 From 7d484acb2f90643de7e242fd47e48c3ebb22df3a Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 24 Jul 2017 11:44:18 +0530 Subject: sparc64: recognize and support sparc M8 cpu type Recognize SPARC-M8 cpu type, hardware caps and cpu distribution map. Signed-off-by: Allen Pais Signed-off-by: David Aldridge Signed-off-by: David S. Miller --- arch/sparc/include/asm/spitfire.h | 2 ++ arch/sparc/kernel/cpu.c | 6 ++++++ arch/sparc/kernel/cpumap.c | 1 + arch/sparc/kernel/head_64.S | 6 ++++++ arch/sparc/kernel/setup_64.c | 15 +++++++++++++-- arch/sparc/mm/init_64.c | 2 ++ 6 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 9cc2afe10ef0..1b1286d05069 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -47,6 +47,7 @@ #define SUN4V_CHIP_NIAGARA5 0x05 #define SUN4V_CHIP_SPARC_M6 0x06 #define SUN4V_CHIP_SPARC_M7 0x07 +#define SUN4V_CHIP_SPARC_M8 0x08 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff @@ -63,6 +64,7 @@ #define CPU_ID_NIAGARA5 ('5') #define CPU_ID_M6 ('6') #define CPU_ID_M7 ('7') +#define CPU_ID_M8 ('8') #define CPU_ID_SONOMA1 ('N') #ifndef __ASSEMBLY__ diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 493e023a468a..ef4f18f7a674 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "sparc-m7"; break; + case SUN4V_CHIP_SPARC_M8: + sparc_cpu_type = "SPARC-M8"; + sparc_fpu_type = "SPARC-M8 integrated FPU"; + sparc_pmu_type = "sparc-m8"; + break; + case SUN4V_CHIP_SPARC_SN: sparc_cpu_type = "SPARC-SN"; sparc_fpu_type = "SPARC-SN integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 45c820e1cba5..90d550bbfeef 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC_M6: case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index ddb5e24adf49..78e0211753d2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -439,6 +439,9 @@ EXPORT_SYMBOL(sun4v_chip_type) cmp %g2, CPU_ID_M7 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 + cmp %g2, CPU_ID_M8 + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M8, %g4 cmp %g2, CPU_ID_SONOMA1 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_SN, %g4 @@ -600,6 +603,9 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M8 be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 4d9c3e13c150..150ee7d4b059 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -288,10 +288,17 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); - if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || - sun4v_chip_type == SUN4V_CHIP_SPARC_SN) + + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: + case SUN4V_CHIP_SPARC_SN: sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, &__sun_m7_2insn_patch_end); + break; + default: + break; + } sun4v_hvapi_init(); } @@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; @@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; @@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | @@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index fed73f14aa49..c24f4bfc3b84 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2161,6 +2161,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: pagecv_flag = 0x00; break; @@ -2313,6 +2314,7 @@ void __init paging_init(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: page_cache4v_flag = _PAGE_CP_4V; break; -- cgit v1.2.3 From fdaccf74fe83ab3438df014efd55788a1dd414b5 Mon Sep 17 00:00:00 2001 From: Vijay Kumar Date: Fri, 28 Jul 2017 19:29:32 -0600 Subject: sparc64: Increase max_phys_bits to 51 and VA bits to 53 for M8. On M8 chips, use a max_phys_bits value of 51. Also, M8 supports VA bits up to 54 bits. However, for now restrict VA bits to 53 due to 4-level pagetable limitation. Signed-off-by: Vijay Kumar Reviewed-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index c24f4bfc3b84..afa0099f3748 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1944,12 +1944,22 @@ static void __init setup_page_offset(void) break; case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC_SN: - default: /* M7 and later support 52-bit virtual addresses. */ sparc64_va_hole_top = 0xfff8000000000000UL; sparc64_va_hole_bottom = 0x0008000000000000UL; max_phys_bits = 49; break; + case SUN4V_CHIP_SPARC_M8: + default: + /* M8 and later support 54-bit virtual addresses. + * However, restricting M8 and above VA bits to 53 + * as 4-level page table cannot support more than + * 53 VA bits. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + max_phys_bits = 51; + break; } } -- cgit v1.2.3 From b0a0c2566f28e71e5e32121992ac8060cec75510 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2017 14:20:54 +0200 Subject: bpf, s390: fix jit branch offset related to ldimm64 While testing some other work that required JIT modifications, I run into test_bpf causing a hang when JIT enabled on s390. The problematic test case was the one from ddc665a4bb4b (bpf, arm64: fix jit branch offset related to ldimm64), and turns out that we do have a similar issue on s390 as well. In bpf_jit_prog() we update next instruction address after returning from bpf_jit_insn() with an insn_count. bpf_jit_insn() returns either -1 in case of error (e.g. unsupported insn), 1 or 2. The latter is only the case for ldimm64 due to spanning 2 insns, however, next address is only set to i + 1 not taking actual insn_count into account, thus fix is to use insn_count instead of 1. bpf_jit_enable in mode 2 provides also disasm on s390: Before fix: 000003ff800349b6: a7f40003 brc 15,3ff800349bc ; target 000003ff800349ba: 0000 unknown 000003ff800349bc: e3b0f0700024 stg %r11,112(%r15) 000003ff800349c2: e3e0f0880024 stg %r14,136(%r15) 000003ff800349c8: 0db0 basr %r11,%r0 000003ff800349ca: c0ef00000000 llilf %r14,0 000003ff800349d0: e320b0360004 lg %r2,54(%r11) 000003ff800349d6: e330b03e0004 lg %r3,62(%r11) 000003ff800349dc: ec23ffeda065 clgrj %r2,%r3,10,3ff800349b6 ; jmp 000003ff800349e2: e3e0b0460004 lg %r14,70(%r11) 000003ff800349e8: e3e0b04e0004 lg %r14,78(%r11) 000003ff800349ee: b904002e lgr %r2,%r14 000003ff800349f2: e3b0f0700004 lg %r11,112(%r15) 000003ff800349f8: e3e0f0880004 lg %r14,136(%r15) 000003ff800349fe: 07fe bcr 15,%r14 After fix: 000003ff80ef3db4: a7f40003 brc 15,3ff80ef3dba 000003ff80ef3db8: 0000 unknown 000003ff80ef3dba: e3b0f0700024 stg %r11,112(%r15) 000003ff80ef3dc0: e3e0f0880024 stg %r14,136(%r15) 000003ff80ef3dc6: 0db0 basr %r11,%r0 000003ff80ef3dc8: c0ef00000000 llilf %r14,0 000003ff80ef3dce: e320b0360004 lg %r2,54(%r11) 000003ff80ef3dd4: e330b03e0004 lg %r3,62(%r11) 000003ff80ef3dda: ec230006a065 clgrj %r2,%r3,10,3ff80ef3de6 ; jmp 000003ff80ef3de0: e3e0b0460004 lg %r14,70(%r11) 000003ff80ef3de6: e3e0b04e0004 lg %r14,78(%r11) ; target 000003ff80ef3dec: b904002e lgr %r2,%r14 000003ff80ef3df0: e3b0f0700004 lg %r11,112(%r15) 000003ff80ef3df6: e3e0f0880004 lg %r14,136(%r15) 000003ff80ef3dfc: 07fe bcr 15,%r14 test_bpf.ko suite runs fine after the fix. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Tested-by: Michael Holzheu Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 01c6fbc3e85b..1803797fc885 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1253,7 +1253,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); -- cgit v1.2.3 From b6bd53f9c4e825fca82fe1392157c78443c814ab Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 3 Aug 2017 17:10:12 -0700 Subject: MIPS: Add missing file for eBPF JIT. Inexplicably, commit f381bf6d82f0 ("MIPS: Add support for eBPF JIT.") lost a file somewhere on its path to Linus' tree. Add back the missing ebpf_jit.c so that we can build with CONFIG_BPF_JIT selected. This version of ebpf_jit.c is identical to the original except for two minor change need to resolve conflicts with changes merged from the BPF branch: A) Set prog->jited_len = image_size; B) Use BPF_TAIL_CALL instead of BPF_CALL | BPF_X Fixes: f381bf6d82f0 ("MIPS: Add support for eBPF JIT.") Signed-off-by: David Daney Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/mips/net/ebpf_jit.c | 1950 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1950 insertions(+) create mode 100644 arch/mips/net/ebpf_jit.c (limited to 'arch') diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c new file mode 100644 index 000000000000..3f87b96da5c4 --- /dev/null +++ b/arch/mips/net/ebpf_jit.c @@ -0,0 +1,1950 @@ +/* + * Just-In-Time compiler for eBPF filters on MIPS + * + * Copyright (c) 2017 Cavium, Inc. + * + * Based on code from: + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers used by JIT */ +#define MIPS_R_ZERO 0 +#define MIPS_R_AT 1 +#define MIPS_R_V0 2 /* BPF_R0 */ +#define MIPS_R_V1 3 +#define MIPS_R_A0 4 /* BPF_R1 */ +#define MIPS_R_A1 5 /* BPF_R2 */ +#define MIPS_R_A2 6 /* BPF_R3 */ +#define MIPS_R_A3 7 /* BPF_R4 */ +#define MIPS_R_A4 8 /* BPF_R5 */ +#define MIPS_R_T4 12 /* BPF_AX */ +#define MIPS_R_T5 13 +#define MIPS_R_T6 14 +#define MIPS_R_T7 15 +#define MIPS_R_S0 16 /* BPF_R6 */ +#define MIPS_R_S1 17 /* BPF_R7 */ +#define MIPS_R_S2 18 /* BPF_R8 */ +#define MIPS_R_S3 19 /* BPF_R9 */ +#define MIPS_R_S4 20 /* BPF_TCC */ +#define MIPS_R_S5 21 +#define MIPS_R_S6 22 +#define MIPS_R_S7 23 +#define MIPS_R_T8 24 +#define MIPS_R_T9 25 +#define MIPS_R_SP 29 +#define MIPS_R_RA 31 + +/* eBPF flags */ +#define EBPF_SAVE_S0 BIT(0) +#define EBPF_SAVE_S1 BIT(1) +#define EBPF_SAVE_S2 BIT(2) +#define EBPF_SAVE_S3 BIT(3) +#define EBPF_SAVE_S4 BIT(4) +#define EBPF_SAVE_RA BIT(5) +#define EBPF_SEEN_FP BIT(6) +#define EBPF_SEEN_TC BIT(7) +#define EBPF_TCC_IN_V1 BIT(8) + +/* + * For the mips64 ISA, we need to track the value range or type for + * each JIT register. The BPF machine requires zero extended 32-bit + * values, but the mips64 ISA requires sign extended 32-bit values. + * At each point in the BPF program we track the state of every + * register so that we can zero extend or sign extend as the BPF + * semantics require. + */ +enum reg_val_type { + /* uninitialized */ + REG_UNKNOWN, + /* not known to be 32-bit compatible. */ + REG_64BIT, + /* 32-bit compatible, no truncation needed for 64-bit ops. */ + REG_64BIT_32BIT, + /* 32-bit compatible, need truncation for 64-bit ops. */ + REG_32BIT, + /* 32-bit zero extended. */ + REG_32BIT_ZERO_EX, + /* 32-bit no sign/zero extension needed. */ + REG_32BIT_POS +}; + +/* + * high bit of offsets indicates if long branch conversion done at + * this insn. + */ +#define OFFSETS_B_CONV BIT(31) + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @stack_size: eBPF stack size + * @tmp_offset: eBPF $sp offset to 8-byte temporary memory + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + * @reg_val_types Packed enum reg_val_type for each register. + */ +struct jit_ctx { + const struct bpf_prog *skf; + int stack_size; + int tmp_offset; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; + u64 *reg_val_types; + unsigned int long_b_conversion:1; + unsigned int gen_b_offsets:1; +}; + +static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type) +{ + *rvt &= ~(7ull << (reg * 3)); + *rvt |= ((u64)type << (reg * 3)); +} + +static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, + int index, int reg) +{ + return (ctx->reg_val_types[index] >> (reg * 3)) & 7; +} + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +static unsigned int j_target(struct jit_ctx *ctx, int target_idx) +{ + unsigned long target_va, base_va; + unsigned int r; + + if (!ctx->target) + return 0; + + base_va = (unsigned long)ctx->target; + target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV); + + if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful)) + return (unsigned int)-1; + r = target_va & 0x0ffffffful; + return r; +} + +/* Compute the immediate value for PC-relative branches. */ +static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (!ctx->gen_b_offsets) + return 0; + + /* + * We want a pc-relative branch. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) - + (ctx->idx * 4) - 4; +} + +int bpf_jit_enable __read_mostly; + +enum which_ebpf_reg { + src_reg, + src_reg_no_fp, + dst_reg, + dst_reg_fp_ok +}; + +/* + * For eBPF, the register mapping naturally falls out of the + * requirements of eBPF and the MIPS n64 ABI. We don't maintain a + * separate frame pointer, so BPF_REG_10 relative accesses are + * adjusted to be $sp relative. + */ +int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn, + enum which_ebpf_reg w) +{ + int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? + insn->src_reg : insn->dst_reg; + + switch (ebpf_reg) { + case BPF_REG_0: + return MIPS_R_V0; + case BPF_REG_1: + return MIPS_R_A0; + case BPF_REG_2: + return MIPS_R_A1; + case BPF_REG_3: + return MIPS_R_A2; + case BPF_REG_4: + return MIPS_R_A3; + case BPF_REG_5: + return MIPS_R_A4; + case BPF_REG_6: + ctx->flags |= EBPF_SAVE_S0; + return MIPS_R_S0; + case BPF_REG_7: + ctx->flags |= EBPF_SAVE_S1; + return MIPS_R_S1; + case BPF_REG_8: + ctx->flags |= EBPF_SAVE_S2; + return MIPS_R_S2; + case BPF_REG_9: + ctx->flags |= EBPF_SAVE_S3; + return MIPS_R_S3; + case BPF_REG_10: + if (w == dst_reg || w == src_reg_no_fp) + goto bad_reg; + ctx->flags |= EBPF_SEEN_FP; + /* + * Needs special handling, return something that + * cannot be clobbered just in case. + */ + return MIPS_R_ZERO; + case BPF_REG_AX: + return MIPS_R_T4; + default: +bad_reg: + WARN(1, "Illegal bpf reg: %d\n", ebpf_reg); + return -EINVAL; + } +} +/* + * eBPF stack frame will be something like: + * + * Entry $sp ------> +--------------------------------+ + * | $ra (optional) | + * +--------------------------------+ + * | $s0 (optional) | + * +--------------------------------+ + * | $s1 (optional) | + * +--------------------------------+ + * | $s2 (optional) | + * +--------------------------------+ + * | $s3 (optional) | + * +--------------------------------+ + * | $s4 (optional) | + * +--------------------------------+ + * | tmp-storage (if $ra saved) | + * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10 + * | BPF_REG_10 relative storage | + * | MAX_BPF_STACK (optional) | + * | . | + * | . | + * | . | + * $sp --------> +--------------------------------+ + * + * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized + * area is not allocated. + */ +static int gen_int_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0; + int store_offset; + int locals_size; + + if (ctx->flags & EBPF_SAVE_RA) + /* + * If RA we are doing a function call and may need + * extra 8-byte tmp area. + */ + stack_adjust += 16; + if (ctx->flags & EBPF_SAVE_S0) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S1) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S2) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S3) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S4) + stack_adjust += 8; + + BUILD_BUG_ON(MAX_BPF_STACK & 7); + locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; + + stack_adjust += locals_size; + ctx->tmp_offset = locals_size; + + ctx->stack_size = stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in $v1 from the caller. + */ + emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust); + else + return 0; + + store_offset = stack_adjust - 8; + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + + if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) + emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); + + return 0; +} + +static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) +{ + const struct bpf_prog *prog = ctx->skf; + int stack_adjust = ctx->stack_size; + int store_offset = stack_adjust - 8; + int r0 = MIPS_R_V0; + + if (dest_reg == MIPS_R_RA && + get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX) + /* Don't let zero extended value escape. */ + emit_instr(ctx, sll, r0, r0, 0); + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + emit_instr(ctx, jr, dest_reg); + + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust); + else + emit_instr(ctx, nop); + + return 0; +} + +static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, + struct jit_ctx *ctx) +{ + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm); + } else { + int lower = (s16)(insn->imm & 0xffff); + int upper = insn->imm - lower; + + emit_instr(ctx, lui, reg, upper >> 16); + emit_instr(ctx, addiu, reg, reg, lower); + } + +} + +static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int idx) +{ + int upper_bound, lower_bound; + int dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + + if (dst < 0) + return dst; + + switch (BPF_OP(insn->code)) { + case BPF_MOV: + case BPF_ADD: + upper_bound = S16_MAX; + lower_bound = S16_MIN; + break; + case BPF_SUB: + upper_bound = -(int)S16_MIN; + lower_bound = -(int)S16_MAX; + break; + case BPF_AND: + case BPF_OR: + case BPF_XOR: + upper_bound = 0xffff; + lower_bound = 0; + break; + case BPF_RSH: + case BPF_LSH: + case BPF_ARSH: + /* Shift amounts are truncated, no need for bounds */ + upper_bound = S32_MAX; + lower_bound = S32_MIN; + break; + default: + return -EINVAL; + } + + /* + * Immediate move clobbers the register, so no sign/zero + * extension needed. + */ + if (BPF_CLASS(insn->code) == BPF_ALU64 && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + /* BPF_ALU | BPF_LSH doesn't need separate sign extension */ + if (BPF_CLASS(insn->code) == BPF_ALU && + BPF_OP(insn->code) != BPF_LSH && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT) + emit_instr(ctx, sll, dst, dst, 0); + + if (insn->imm >= lower_bound && insn->imm <= upper_bound) { + /* single insn immediate case */ + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_MOV: + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, andi, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, ori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddiu, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, daddiu, dst, dst, -insn->imm); + break; + case BPF_ALU64 | BPF_RSH: + emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_RSH: + emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_LSH: + emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_LSH: + emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_ARSH: + emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_ARSH: + emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU | BPF_MOV: + emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addiu, dst, dst, insn->imm); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, addiu, dst, dst, -insn->imm); + break; + default: + return -EINVAL; + } + } else { + /* multi insn immediate case */ + if (BPF_OP(insn->code) == BPF_MOV) { + gen_imm_to_reg(insn, dst, ctx); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, and, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, or, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xor, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, subu, dst, dst, MIPS_R_AT); + break; + default: + return -EINVAL; + } + } + } + + return 0; +} + +static void * __must_check +ool_skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return skb_header_pointer(skb, offset, len, buffer); +} + +static int size_to_len(const struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + return 1; + case BPF_H: + return 2; + case BPF_W: + return 4; + case BPF_DW: + return 8; + } + return 0; +} + +static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) +{ + if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value); + } else if (value >= 0xffffffff80000000ull || + (value < 0x80000000 && value > 0xffff)) { + emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16)); + emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff)); + } else { + int i; + bool seen_part = false; + int needed_shift = 0; + + for (i = 0; i < 4; i++) { + u64 part = (value >> (16 * (3 - i))) & 0xffff; + + if (seen_part && needed_shift > 0 && (part || i == 3)) { + emit_instr(ctx, dsll_safe, dst, dst, needed_shift); + needed_shift = 0; + } + if (part) { + if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) { + emit_instr(ctx, lui, dst, (s32)(s16)part); + needed_shift = -16; + } else { + emit_instr(ctx, ori, dst, + seen_part ? dst : MIPS_R_ZERO, + (unsigned int)part); + } + seen_part = true; + } + if (seen_part) + needed_shift += 16; + } + } +} + +static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) +{ + int off, b_off; + + ctx->flags |= EBPF_SEEN_TC; + /* + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1); + emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* + * if (--TCC < 0) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, daddiu, MIPS_R_T5, + (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bltz, MIPS_R_T5, b_off); + /* + * prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1); + off = offsetof(struct bpf_array, ptrs); + emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* Delay slot */ + emit_instr(ctx, nop); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT); + /* All systems are go... propagate TCC */ + emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO); + /* Skip first instruction (TCC initialization) */ + emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4); + return build_int_epilogue(ctx, MIPS_R_T9); +} + +static bool use_bbit_insns(void) +{ + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return true; + default: + return false; + } +} + +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + +/* Returns the number of insn slots consumed. */ +static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int this_idx, int exit_idx) +{ + int src, dst, r, td, ts, mem_off, b_off; + bool need_swap, did_move, cmp_eq; + unsigned int target; + u64 t64; + s64 t64s; + + switch (insn->code) { + case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */ + r = gen_imm_insn(insn, ctx, this_idx); + if (r < 0) + return r; + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, dmultu, MIPS_R_AT, dst); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, multu, dst, MIPS_R_AT); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, divu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, ddivu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + did_move = false; + if (insn->src_reg == BPF_REG_10) { + if (BPF_OP(insn->code) == BPF_MOV) { + emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK); + did_move = true; + } else { + emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK); + src = MIPS_R_AT; + } + } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, daddu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, dmultu, dst, src); + emit_instr(ctx, mflo, dst); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, ddivu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, dsllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, dsrlv, dst, dst, src); + break; + case BPF_ARSH: + emit_instr(ctx, dsrav, dst, dst, src); + break; + default: + pr_err("ALU64_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + did_move = false; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + /* sign extend */ + emit_instr(ctx, sll, tmp_reg, src, 0); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, addu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, addu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, subu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, mul, dst, dst, src); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, divu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, sllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, srlv, dst, dst, src); + break; + default: + pr_err("ALU_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_JMP | BPF_EXIT: + if (this_idx + 1 < exit_idx) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + if (insn->imm == 0) { + src = MIPS_R_ZERO; + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + src = MIPS_R_AT; + } + goto jeq_common; + case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */ + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (td == REG_32BIT && ts != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } else if (ts == REG_32BIT && td != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, dst, 0); + dst = MIPS_R_AT; + } + if (BPF_OP(insn->code) == BPF_JSET) { + emit_instr(ctx, and, MIPS_R_AT, dst, src); + cmp_eq = false; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JSGT) { + emit_instr(ctx, dsubu, MIPS_R_AT, dst, src); + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, blez, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bgtz, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + break; + } else if (BPF_OP(insn->code) == BPF_JSGE) { + emit_instr(ctx, slt, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGT) { + /* dst or src could be AT */ + emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + /* SP known to be non-zero, movz becomes boolean not */ + emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8); + emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGE) { + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else { /* JNE/JEQ case */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + } +jeq_common: + /* + * If the next insn is EXIT and we are jumping arround + * only it, invert the sense of the compare and + * conditionally jump to the exit. Poor man's branch + * chaining. + */ + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, bne, dst, src, b_off); + else + emit_instr(ctx, beq, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, beq, dst, src, b_off); + else + emit_instr(ctx, bne, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JSGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (insn->imm == 0) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bltz, dst, b_off); + else + emit_instr(ctx, blez, dst, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bgez, dst, b_off); + else + emit_instr(ctx, bgtz, dst, b_off); + emit_instr(ctx, nop); + break; + } + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = insn->imm + (cmp_eq ? 0 : 1); + if (t64s >= S16_MIN && t64s <= S16_MAX) { + emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + cmp_eq = (BPF_OP(insn->code) == BPF_JGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1); + if (t64s >= 0 && t64s <= S16_MAX) { + emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + break; + } + t64 = (u32)insn->imm; + emit_const_to_reg(ctx, MIPS_R_AT, t64); + emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = false; + goto jeq_common; + + case BPF_JMP | BPF_JA: + /* + * Prefer relative branch for easier debugging, but + * fall back if needed. + */ + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + emit_instr(ctx, j, target); + } else { + emit_instr(ctx, b, b_off); + } + emit_instr(ctx, nop); + break; + case BPF_LD | BPF_DW | BPF_IMM: + if (insn->src_reg != 0) + return -EINVAL; + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32); + emit_const_to_reg(ctx, dst, t64); + return 2; /* Double slot insn */ + + case BPF_JMP | BPF_CALL: + ctx->flags |= EBPF_SAVE_RA; + t64s = (s64)insn->imm + (s64)__bpf_call_base; + emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot */ + emit_instr(ctx, nop); + break; + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(ctx, this_idx)) + return -EINVAL; + break; + + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_DW | BPF_ABS: + ctx->flags |= EBPF_SAVE_RA; + + gen_imm_to_reg(insn, MIPS_R_A1, ctx); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + + if (insn->imm < 0) { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); + } else { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + } + goto ld_skb_common; + + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_DW | BPF_IND: + ctx->flags |= EBPF_SAVE_RA; + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, MIPS_R_A1, src, 0); + src = MIPS_R_A1; + } + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); + } + /* truncate to 32-bit int */ + emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); + + emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); + +ld_skb_common: + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot move */ + emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); + + /* Check the error value */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); + emit_instr(ctx, nop); + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } else { + emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + +#ifdef __BIG_ENDIAN + need_swap = false; +#else + need_swap = true; +#endif + dst = MIPS_R_V0; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + break; + case BPF_W: + emit_instr(ctx, lw, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + break; + case BPF_DW: + emit_instr(ctx, ld, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + break; + } + + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (insn->imm == 64 && td == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm != 64 && + (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + +#ifdef __BIG_ENDIAN + need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE); +#else + need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE); +#endif + if (insn->imm == 16) { + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, andi, dst, dst, 0xffff); + } else if (insn->imm == 32) { + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + } else { /* 64-bit*/ + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + } + break; + + case BPF_ST | BPF_B | BPF_MEM: + case BPF_ST | BPF_H | BPF_MEM: + case BPF_ST | BPF_W | BPF_MEM: + case BPF_ST | BPF_DW | BPF_MEM: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst); + break; + case BPF_DW: + emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst); + break; + } + break; + + case BPF_LDX | BPF_B | BPF_MEM: + case BPF_LDX | BPF_H | BPF_MEM: + case BPF_LDX | BPF_W | BPF_MEM: + case BPF_LDX | BPF_DW | BPF_MEM: + if (insn->src_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + src = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + mem_off = insn->off; + } + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, mem_off, src); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, mem_off, src); + break; + case BPF_W: + emit_instr(ctx, lw, dst, mem_off, src); + break; + case BPF_DW: + emit_instr(ctx, ld, dst, mem_off, src); + break; + } + break; + + case BPF_STX | BPF_B | BPF_MEM: + case BPF_STX | BPF_H | BPF_MEM: + case BPF_STX | BPF_W | BPF_MEM: + case BPF_STX | BPF_DW | BPF_MEM: + case BPF_STX | BPF_W | BPF_XADD: + case BPF_STX | BPF_DW | BPF_XADD: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return dst; + if (BPF_MODE(insn->code) == BPF_XADD) { + switch (BPF_SIZE(insn->code)) { + case BPF_W: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } + emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst); + /* + * On failure back up to LL (-4 + * instructions of 4 bytes each + */ + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + } + } else { /* BPF_MEM */ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, src, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, src, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, src, mem_off, dst); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, sd, src, mem_off, dst); + break; + } + } + break; + + default: + pr_err("NOT HANDLED %d - (%02x)\n", + this_idx, (unsigned int)insn->code); + return -EINVAL; + } + return 1; +} + +#define RVT_VISITED_MASK 0xc000000000000000ull +#define RVT_FALL_THROUGH 0x4000000000000000ull +#define RVT_BRANCH_TAKEN 0x8000000000000000ull +#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN) + +static int build_int_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + int i, r; + + for (i = 0; i < prog->len; ) { + insn = prog->insnsi + i; + if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) { + /* dead instruction, don't emit it. */ + i++; + continue; + } + + if (ctx->target == NULL) + ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4); + + r = build_one_insn(insn, ctx, i, prog->len); + if (r < 0) + return r; + i += r; + } + /* epilogue offset */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + /* + * All exits have an offset of the epilogue, some offsets may + * not have been set due to banch-around threading, so set + * them now. + */ + if (ctx->target == NULL) + for (i = 0; i < prog->len; i++) { + insn = prog->insnsi + i; + if (insn->code == (BPF_JMP | BPF_EXIT)) + ctx->offsets[i] = ctx->idx * 4; + } + return 0; +} + +/* return the last idx processed, or negative for error */ +static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt, + int start_idx, bool follow_taken) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + u64 exit_rvt = initial_rvt; + u64 *rvt = ctx->reg_val_types; + int idx; + int reg; + + for (idx = start_idx; idx < prog->len; idx++) { + rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt; + insn = prog->insnsi + idx; + switch (BPF_CLASS(insn->code)) { + case BPF_ALU: + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_MUL: + case BPF_DIV: + case BPF_OR: + case BPF_AND: + case BPF_LSH: + case BPF_RSH: + case BPF_NEG: + case BPF_MOD: + case BPF_XOR: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + case BPF_MOV: + if (BPF_SRC(insn->code)) { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } + break; + case BPF_END: + if (insn->imm == 64) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + else if (insn->imm == 32) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + else /* insn->imm == 16 */ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_ALU64: + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (BPF_SRC(insn->code)) { + /* REG to REG move*/ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + } + break; + default: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LD: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + if (BPF_MODE(insn->code) == BPF_IMM) { + s64 val; + + val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32)); + if (val > 0 && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else if (val >= S32_MIN && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + rvt[idx] |= RVT_DONE; + idx++; + } else { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + if (BPF_MODE(insn->code) == BPF_IMM) + set_reg_val_type(&exit_rvt, insn->dst_reg, + insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LDX: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_JMP: + switch (BPF_OP(insn->code)) { + case BPF_EXIT: + rvt[idx] = RVT_DONE | exit_rvt; + rvt[prog->len] = exit_rvt; + return idx; + case BPF_JA: + rvt[idx] |= RVT_DONE; + idx += insn->off; + break; + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + case BPF_JNE: + case BPF_JSGT: + case BPF_JSGE: + if (follow_taken) { + rvt[idx] |= RVT_BRANCH_TAKEN; + idx += insn->off; + follow_taken = false; + } else { + rvt[idx] |= RVT_FALL_THROUGH; + } + break; + case BPF_CALL: + set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT); + /* Upon call return, argument registers are clobbered. */ + for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + rvt[idx] |= RVT_DONE; + break; + default: + WARN(1, "Unhandled BPF_JMP case.\n"); + rvt[idx] |= RVT_DONE; + break; + } + break; + default: + rvt[idx] |= RVT_DONE; + break; + } + } + return idx; +} + +/* + * Track the value range (i.e. 32-bit vs. 64-bit) of each register at + * each eBPF insn. This allows unneeded sign and zero extension + * operations to be omitted. + * + * Doesn't handle yet confluence of control paths with conflicting + * ranges, but it is good enough for most sane code. + */ +static int reg_val_propagate(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + u64 exit_rvt; + int reg; + int i; + + /* + * 11 registers * 3 bits/reg leaves top bits free for other + * uses. Bit-62..63 used to see if we have visited an insn. + */ + exit_rvt = 0; + + /* Upon entry, argument registers are 64-bit. */ + for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + /* + * First follow all conditional branches on the fall-through + * edge of control flow.. + */ + reg_val_propagate_range(ctx, exit_rvt, 0, false); +restart_search: + /* + * Then repeatedly find the first conditional branch where + * both edges of control flow have not been taken, and follow + * the branch taken edge. We will end up restarting the + * search once per conditional branch insn. + */ + for (i = 0; i < prog->len; i++) { + u64 rvt = ctx->reg_val_types[i]; + + if ((rvt & RVT_VISITED_MASK) == RVT_DONE || + (rvt & RVT_VISITED_MASK) == 0) + continue; + if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) { + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true); + } else { /* RVT_BRANCH_TAKEN */ + WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n"); + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false); + } + goto restart_search; + } + /* + * Eventually all conditional branches have been followed on + * both branches and we are done. Any insn that has not been + * visited at this point is dead. + */ + + return 0; +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *p; + + /* We are guaranteed to have aligned memory. */ + for (p = area; size >= sizeof(u32); size -= sizeof(u32)) + uasm_i_break(&p, BRK_BUG); /* Increments p */ +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *orig_prog = prog; + bool tmp_blinded = false; + struct bpf_prog *tmp; + struct bpf_binary_header *header = NULL; + struct jit_ctx ctx; + unsigned int image_size; + u8 *image_ptr; + + if (!bpf_jit_enable || !cpu_has_mips64r2) + return prog; + + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + goto out_err; + + ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL); + if (ctx.reg_val_types == NULL) + goto out_err; + + ctx.skf = prog; + + if (reg_val_propagate(&ctx)) + goto out_err; + + /* + * First pass discovers used resources and instruction offsets + * assuming short branches are used. + */ + if (build_int_body(&ctx)) + goto out_err; + + /* + * If no calls are made (EBPF_SAVE_RA), then tail call count + * in $v1, else we must save in n$s4. + */ + if (ctx.flags & EBPF_SEEN_TC) { + if (ctx.flags & EBPF_SAVE_RA) + ctx.flags |= EBPF_SAVE_S4; + else + ctx.flags |= EBPF_TCC_IN_V1; + } + + /* + * Second pass generates offsets, if any branches are out of + * range a jump-around long sequence is generated, and we have + * to try again from the beginning to generate the new + * offsets. This is done until no additional conversions are + * necessary. + */ + do { + ctx.idx = 0; + ctx.gen_b_offsets = 1; + ctx.long_b_conversion = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + } while (ctx.long_b_conversion); + + image_size = 4 * ctx.idx; + + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) + goto out_err; + + ctx.target = (u32 *)image_ptr; + + /* Third pass generates the code */ + ctx.idx = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + + /* Update the icache */ + flush_icache_range((unsigned long)ctx.target, + (unsigned long)(ctx.target + ctx.idx * sizeof(u32))); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(prog->len, image_size, 2, ctx.target); + + bpf_jit_binary_lock_ro(header); + prog->bpf_func = (void *)ctx.target; + prog->jited = 1; + prog->jited_len = image_size; +out_normal: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + kfree(ctx.offsets); + kfree(ctx.reg_val_types); + + return prog; + +out_err: + prog = orig_prog; + if (header) + bpf_jit_binary_free(header); + goto out_normal; +} -- cgit v1.2.3 From 00e06297b351d286199c6cf542ea70b8fbabafee Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 3 Aug 2017 21:16:15 +0200 Subject: MIPS: mm: remove duplicate "const" qualifier on insn_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following gcc 7.x build error: arch/mips/mm/uasm-mips.c:51:26: error: duplicate ‘const’ declaration specifier [-Werror=duplicate-decl-specifier] static const struct insn const insn_table[insn_invalid] = { Signed-off-by: Thomas Petazzoni Fixes: ce807d5f67ed ("MIPS: Optimize uasm insn lookup.") Cc: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16926/ Signed-off-by: Ralf Baechle --- arch/mips/mm/uasm-mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 3f74f6c1f065..9fea6c6bbf49 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -48,7 +48,7 @@ #include "uasm.c" -static const struct insn const insn_table[insn_invalid] = { +static const struct insn insn_table[insn_invalid] = { [insn_addiu] = {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, [insn_addu] = {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD}, [insn_and] = {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD}, -- cgit v1.2.3 From 6f542ebeaee0ee552a902ce3892220fc22c7ec8e Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Thu, 3 Aug 2017 08:20:22 +0200 Subject: MIPS: Fix race on setting and getting cpu_online_mask While testing cpu hoptlug (cpu down and up in loops) on kernel 4.4, it was observed that occasionally check for cpu online will fail in kernel/cpu.c, _cpu_up: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/kernel/cpu.c?h=v4.4.79#n485 518 /* Arch-specific enabling code. */ 519 ret = __cpu_up(cpu, idle); 520 521 if (ret != 0) 522 goto out_notify; 523 BUG_ON(!cpu_online(cpu)); Reason is race between start_secondary and _cpu_up. cpu_callin_map is set before cpu_online_mask. In __cpu_up, cpu_callin_map is waited for, but cpu online mask is not, resulting in race in which secondary processor started and set cpu_callin_map, but not yet set the online mask,resulting in above BUG being hit. Upstream differs in the area. cpu_online check is in bringup_wait_for_ap, which is after cpu reached AP_ONLINE_IDLE,where secondary passed its start function. Nonetheless, fix makes start_secondary safe and not depending on other locks throughout the code. It protects as well against cpu_online checks put in between sometimes in the future. Fix this by moving completion after all flags are set. Signed-off-by: Matija Glavinic Pecotic Cc: Alexander Sverdlin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16925/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 770d4d1516cb..6bace7695788 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -376,9 +376,6 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); - complete(&cpu_running); - synchronise_count_slave(cpu); - set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -386,6 +383,9 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); + complete(&cpu_running); + synchronise_count_slave(cpu); + /* * irq will be enabled in ->smp_finish(), enabling it too early * is dangerous. -- cgit v1.2.3 From 33a73649acc412848996ae0921d577f33eb54af5 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 2 Aug 2017 17:04:04 +0200 Subject: MIPS: gitignore: ignore generated .c files Add ashldi3.c and bswapsi.c to the list of ignored files. Signed-off-by: Bartosz Golaszewski Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16905/ Signed-off-by: Ralf Baechle --- arch/mips/boot/compressed/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 arch/mips/boot/compressed/.gitignore (limited to 'arch') diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore new file mode 100644 index 000000000000..ebae133f1d00 --- /dev/null +++ b/arch/mips/boot/compressed/.gitignore @@ -0,0 +1,2 @@ +ashldi3.c +bswapsi.c -- cgit v1.2.3 From 81a67e52763d1db6b3200c648d1efa16daddc536 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 2 Aug 2017 12:39:28 -0500 Subject: MIPS: Octeon: Fix broken EDAC driver. Commit "MIPS: Octeon: Remove unused L2C types and macros." broke the the EDAC driver. Bring back 'cvmx-l2d-defs.h' file and the missing types for L2C. Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C types and macros.") Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C types and macros.") Signed-off-by: Steven J. Hill Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Cc: # 4.12+ Patchwork: https://patchwork.linux-mips.org/patch/16906/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/octeon/cvmx-l2c-defs.h | 37 ++++++++++++++++- arch/mips/include/asm/octeon/cvmx-l2d-defs.h | 60 ++++++++++++++++++++++++++++ arch/mips/include/asm/octeon/cvmx.h | 1 + 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 arch/mips/include/asm/octeon/cvmx-l2d-defs.h (limited to 'arch') diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h index d045973ddb33..3ea84acf1814 100644 --- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h +++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h @@ -33,6 +33,10 @@ #define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull)) #define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull)) #define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull)) +#define CVMX_L2C_ERR_TDTX(block_id) \ + (CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull) +#define CVMX_L2C_ERR_TTGX(block_id) \ + (CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull) #define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull)) #define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull)) #define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull)) @@ -66,9 +70,40 @@ ((offset) & 1) * 8) #define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull) + \ ((offset) & 31) * 8) -#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull)) +union cvmx_l2c_err_tdtx { + uint64_t u64; + struct cvmx_l2c_err_tdtx_s { + __BITFIELD_FIELD(uint64_t dbe:1, + __BITFIELD_FIELD(uint64_t sbe:1, + __BITFIELD_FIELD(uint64_t vdbe:1, + __BITFIELD_FIELD(uint64_t vsbe:1, + __BITFIELD_FIELD(uint64_t syn:10, + __BITFIELD_FIELD(uint64_t reserved_22_49:28, + __BITFIELD_FIELD(uint64_t wayidx:18, + __BITFIELD_FIELD(uint64_t reserved_2_3:2, + __BITFIELD_FIELD(uint64_t type:2, + ;))))))))) + } s; +}; + +union cvmx_l2c_err_ttgx { + uint64_t u64; + struct cvmx_l2c_err_ttgx_s { + __BITFIELD_FIELD(uint64_t dbe:1, + __BITFIELD_FIELD(uint64_t sbe:1, + __BITFIELD_FIELD(uint64_t noway:1, + __BITFIELD_FIELD(uint64_t reserved_56_60:5, + __BITFIELD_FIELD(uint64_t syn:6, + __BITFIELD_FIELD(uint64_t reserved_22_49:28, + __BITFIELD_FIELD(uint64_t wayidx:15, + __BITFIELD_FIELD(uint64_t reserved_2_6:5, + __BITFIELD_FIELD(uint64_t type:2, + ;))))))))) + } s; +}; + union cvmx_l2c_cfg { uint64_t u64; struct cvmx_l2c_cfg_s { diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h new file mode 100644 index 000000000000..a951ad5d65ad --- /dev/null +++ b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h @@ -0,0 +1,60 @@ +/***********************license start*************** + * Author: Cavium Networks + * + * Contact: support@caviumnetworks.com + * This file is part of the OCTEON SDK + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this file; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * or visit http://www.gnu.org/licenses/. + * + * This file may also be available under a different license from Cavium. + * Contact Cavium Networks for more information + ***********************license end**************************************/ + +#ifndef __CVMX_L2D_DEFS_H__ +#define __CVMX_L2D_DEFS_H__ + +#define CVMX_L2D_ERR (CVMX_ADD_IO_SEG(0x0001180080000010ull)) +#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull)) + + +union cvmx_l2d_err { + uint64_t u64; + struct cvmx_l2d_err_s { + __BITFIELD_FIELD(uint64_t reserved_6_63:58, + __BITFIELD_FIELD(uint64_t bmhclsel:1, + __BITFIELD_FIELD(uint64_t ded_err:1, + __BITFIELD_FIELD(uint64_t sec_err:1, + __BITFIELD_FIELD(uint64_t ded_intena:1, + __BITFIELD_FIELD(uint64_t sec_intena:1, + __BITFIELD_FIELD(uint64_t ecc_ena:1, + ;))))))) + } s; +}; + +union cvmx_l2d_fus3 { + uint64_t u64; + struct cvmx_l2d_fus3_s { + __BITFIELD_FIELD(uint64_t reserved_40_63:24, + __BITFIELD_FIELD(uint64_t ema_ctl:3, + __BITFIELD_FIELD(uint64_t reserved_34_36:3, + __BITFIELD_FIELD(uint64_t q3fus:34, + ;)))) + } s; +}; + +#endif diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h index 9742202f2a32..e638735cc3ac 100644 --- a/arch/mips/include/asm/octeon/cvmx.h +++ b/arch/mips/include/asm/octeon/cvmx.h @@ -62,6 +62,7 @@ enum cvmx_mips_space { #include #include #include +#include #include #include #include -- cgit v1.2.3 From bed58464303fabdbf126544cbdd95a9e1c2e9949 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 2 Aug 2017 12:39:50 -0500 Subject: MIPS: OCTEON: Fix USB platform code breakage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error when CONFIG_SMP is turned off: CC [M] arch/mips/cavium-octeon/octeon-usb.o arch/mips/cavium-octeon/octeon-usb.c: In function ‘dwc3_octeon_device_init’: arch/mips/cavium-octeon/octeon-usb.c:540:4: error: implicit declaration of function ‘devm_iounmap’ [-Werror=implicit-function-declaration] devm_iounmap(&pdev->dev, base); Signed-off-by: Steven J. Hill Reviewed-by: James Hogan Tested-by: Matt Redfearn Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16907/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index 542be1cd0f32..bfdfaf32d2c4 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include /* USB Control Register */ union cvm_usbdrd_uctl_ctl { -- cgit v1.2.3 From ae5b0675942ab30cde96099c68a2290bd1aafcca Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 1 Aug 2017 13:32:57 -0700 Subject: Revert "MIPS: Don't unnecessarily include kmalloc.h into ." Commit 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") claimed that the inclusion of the machine's kmalloc.h from asm/cache.h is unnecessary, but this is not true. Without including kmalloc.h we don't get a definition for ARCH_DMA_MINALIGN, which means we no longer suitably align DMA. Further to this the definition of ARCH_KMALLOC_MINALIGN provided by linux/slab.h ends up being set to the alignment of an unsigned long long value rather than to ARCH_DMA_MINALIGN, which means that buffers allocated using kmalloc may no longer be safely aligned for use with DMA. Fix this by re-adding the include of kmalloc.h in asm/cache.h. This reverts commit 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") Signed-off-by: Paul Burton Fixes: 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") Cc: linux-mips@linux-mips.org Cc: stable # v4.12+ Patchwork: https://patchwork.linux-mips.org/patch/16895/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index fc67947ed658..8b14c2706aa5 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -9,6 +9,8 @@ #ifndef _ASM_CACHE_H #define _ASM_CACHE_H +#include + #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -- cgit v1.2.3 From 44a12806d010944a5727f1dc99123121e3e2c8c6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 7 Aug 2017 21:25:01 +1000 Subject: Revert "powerpc/64: Avoid restore_math call if possible in syscall exit" This reverts commit bc4f65e4cf9d6cc43e0e9ba0b8648cf9201cd55f. As reported by Andreas, this commit is causing unrecoverable SLB misses in the system call exit path: Unrecoverable exception 4100 at c00000000000a1ec Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2 PowerMac ... CPU: 0 PID: 18626 Comm: rm Not tainted 4.13.0-rc3 #1 task: c00000018335e080 task.stack: c000000139e50000 NIP: c00000000000a1ec LR: c00000000000a118 CTR: 0000000000000000 REGS: c000000139e53bb0 TRAP: 4100 Not tainted (4.13.0-rc3) MSR: 9000000000001030 CR: 24000044 XER: 20000000 SOFTE: 1 GPR00: 0000000000000000 c000000139e53e30 c000000000abb500 fffffffffffffffe GPR04: c0000001eb866298 0000000000000000 0000000000000000 c00000018335e080 GPR08: 900000000000d032 0000000000000000 0000000000000002 fffffffffffff001 GPR12: c000000139e50000 c00000000ffff000 00003fffa8c0dca0 00003fffa8c0dc88 GPR16: 0000000010000000 0000000000000001 00003fffa8c0eaa0 0000000000000000 GPR20: 00003fffa8c27528 00003fffa8c27b00 0000000000000000 0000000000000000 GPR24: 00003fffa8c0d918 00003ffff1b3efa0 00003fffa8c26d68 0000000000000000 GPR28: 00003fffa8c249e8 00003fffa8c263d0 00003fffa8c27550 00003ffff1b3ef10 NIP [c00000000000a1ec] system_call_exit+0xc0/0x21c LR [c00000000000a118] system_call+0x58/0x6c Call Trace: [c000000139e53e30] [c00000000000a118] system_call+0x58/0x6c (unreliable) Instruction dump: 64a51000 7c6300d0 f8a101a0 4bffff9c 3c000000 60000006 780007c6 64000000 60000000 7c004039 4082001c e8ed0170 <88070b78> 88c70b79 7c003214 2c200000 This is caused by us trying to load THREAD_LOAD_FP with MSR_RI=0, and taking an SLB miss on the thread struct. Reported-by: Andreas Schwab Diagnosed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 60 +++++++++++++----------------------------- arch/powerpc/kernel/process.c | 4 --- 2 files changed, 18 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 49d8422767b4..e925c1c99c71 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,17 +223,27 @@ system_call_exit: andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- .Lsyscall_exit_work - /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ - li r7,MSR_FP + andi. r0,r8,MSR_FP + beq 2f #ifdef CONFIG_ALTIVEC - oris r7,r7,MSR_VEC@h + andis. r0,r8,MSR_VEC@h + bne 3f #endif - and r0,r8,r7 - cmpd r0,r7 - bne .Lsyscall_restore_math -.Lsyscall_restore_math_cont: +2: addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI + mtmsrd r10,1 /* Restore RI */ +#endif + bl restore_math +#ifdef CONFIG_PPC_BOOK3S + li r11,0 + mtmsrd r11,1 +#endif + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO - cmpld r3,r11 +3: cmpld r3,r11 ld r5,_CCR(r1) bge- .Lsyscall_error .Lsyscall_error_cont: @@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r5,_CCR(r1) b .Lsyscall_error_cont -.Lsyscall_restore_math: - /* - * Some initial tests from restore_math to avoid the heavyweight - * C code entry and MSR manipulations. - */ - LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK) - and. r0,r0,r8 - bne 1f - - ld r7,PACACURRENT(r13) - lbz r0,THREAD+THREAD_LOAD_FP(r7) -#ifdef CONFIG_ALTIVEC - lbz r6,THREAD+THREAD_LOAD_VEC(r7) - add r0,r0,r6 -#endif - cmpdi r0,0 - beq .Lsyscall_restore_math_cont - -1: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S - li r10,MSR_RI - mtmsrd r10,1 /* Restore RI */ -#endif - bl restore_math -#ifdef CONFIG_PPC_BOOK3S - li r11,0 - mtmsrd r11,1 -#endif - /* Restore volatiles, reload MSR from updated one */ - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - b .Lsyscall_restore_math_cont - /* Traced system call support */ .Lsyscall_dotrace: bl save_nvgprs diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..ec480966f9bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - /* - * Syscall exit makes a similar initial check before branching - * to restore_math. Keep them in synch. - */ if (!msr_tm_active(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; -- cgit v1.2.3 From b399ee28c29c07f6a7ad87dade9148828757e6e9 Mon Sep 17 00:00:00 2001 From: Goran Ferenc Date: Thu, 27 Jul 2017 18:08:47 +0200 Subject: MIPS: VDSO: Fix clobber lists in fallback code paths Extend clobber lists to include all GP registers. Fixes: 0b523a85e134 ("MIPS: VDSO: Add implementation of gettimeofday() fallback") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: James Hogan Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16879/ Signed-off-by: Ralf Baechle --- arch/mips/vdso/gettimeofday.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c index 974276e828b2..e2690d7ca4dd 100644 --- a/arch/mips/vdso/gettimeofday.c +++ b/arch/mips/vdso/gettimeofday.c @@ -35,7 +35,8 @@ static __always_inline long gettimeofday_fallback(struct timeval *_tv, " syscall\n" : "=r" (ret), "=r" (error) : "r" (tv), "r" (tz), "r" (nr) - : "memory"); + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); return error ? -ret : ret; } @@ -55,7 +56,8 @@ static __always_inline long clock_gettime_fallback(clockid_t _clkid, " syscall\n" : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) - : "memory"); + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); return error ? -ret : ret; } -- cgit v1.2.3 From 68fe55680d0f3342969f49412fceabb90bdfadba Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 30 Jul 2017 21:28:15 +0100 Subject: MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS regression Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") regression and remove assembly errors: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat" triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU instruction. This is because with that option in place the instruction becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU) sequence that uses $at as a temporary register. With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though, and this is already enforced in arch/mips/Makefile, so choose the 32-bit expansion variant for the supported configurations and then replace the 64-bit variant with #error just in case. Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.8+ Patchwork: https://patchwork.linux-mips.org/patch/16893/ Signed-off-by: Ralf Baechle --- arch/mips/dec/int-handler.S | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 1910223a9c02..cea2bb1621e6 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -147,23 +147,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1, cpu_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, cpu_mask_nr_tbl lui t1, %hi(cpu_mask_nr_tbl) addiu t1, %lo(cpu_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, cpu_mask_nr_tbl - .set push - .set noat - lui t1, %highest(cpu_mask_nr_tbl) - lui AT, %hi(cpu_mask_nr_tbl) - daddiu t1, t1, %higher(cpu_mask_nr_tbl) - daddiu AT, AT, %lo(cpu_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 1: lw t2,(t1) nop @@ -214,23 +203,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1,asic_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, asic_mask_nr_tbl lui t1, %hi(asic_mask_nr_tbl) addiu t1, %lo(asic_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, asic_mask_nr_tbl - .set push - .set noat - lui t1, %highest(asic_mask_nr_tbl) - lui AT, %hi(asic_mask_nr_tbl) - daddiu t1, t1, %higher(asic_mask_nr_tbl) - daddiu AT, AT, %lo(asic_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 2: lw t2,(t1) nop -- cgit v1.2.3 From 21da5332327b6d183bd93336ecf29c70bc609b7b Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 26 Jul 2017 08:41:08 +0100 Subject: MIPS: Introduce cpu_tcache_line_size There exist macros to return the cache line size of the L1 dcache and L2 scache but there is currently no macro for the L3 tcache. Add this macro which will be used by the following patch "MIPS: PCI: Fix smp_processor_id() in preemptible" Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16871/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu-features.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 8baa9033b181..721b698bfe3c 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -428,6 +428,9 @@ #ifndef cpu_scache_line_size #define cpu_scache_line_size() cpu_data[0].scache.linesz #endif +#ifndef cpu_tcache_line_size +#define cpu_tcache_line_size() cpu_data[0].tcache.linesz +#endif #ifndef cpu_hwrena_impl_bits #define cpu_hwrena_impl_bits 0 -- cgit v1.2.3 From 735302665c353d6756e7fa2a2cf41b039299f732 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 26 Jul 2017 08:41:09 +0100 Subject: MIPS: PCI: Fix smp_processor_id() in preemptible Commit 1c3c5eab1715 ("sched/core: Enable might_sleep() and smp_processor_id() checks early") enables checks for might_sleep() and smp_processor_id() being used in preemptible code earlier in the boot than before. This results in a new BUG from pcibios_set_cache_line_size(). BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is pcibios_set_cache_line_size+0x10/0x70 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc1-00007-g3ce3e4ba4275 #615 Stack: 0000000000000000 ffffffff81189694 0000000000000000 ffffffff81822318 000000000000004e 0000000000000001 800000000e20bd08 20c49ba5e3540000 0000000000000000 0000000000000000 ffffffff818d0000 0000000000000000 0000000000000000 ffffffff81189328 ffffffff818ce692 0000000000000000 0000000000000000 ffffffff81189bc8 ffffffff818d0000 0000000000000000 ffffffff81828907 ffffffff81769970 800000020ec78d80 ffffffff818c7b48 0000000000000001 0000000000000001 ffffffff818652b0 ffffffff81896268 ffffffff818c0000 800000020ec7fb40 800000020ec7fc58 ffffffff81684cac 0000000000000000 ffffffff8118ab50 0000000000000030 ffffffff81769970 0000000000000001 ffffffff81122a58 0000000000000000 0000000000000000 ... Call Trace: [] show_stack+0x90/0xb0 [] dump_stack+0xac/0xf0 [] check_preemption_disabled+0x120/0x128 [] pcibios_set_cache_line_size+0x10/0x70 [] do_one_initcall+0x48/0x140 [] kernel_init_freeable+0x194/0x24c [] kernel_init+0x14/0x118 [] ret_from_kernel_thread+0x14/0x1c Fix this by using the cpu_*cache_line_size() macros instead. These macros are the "proper" way to determine the CPU cache sizes. This makes use of the newly added cpu_tcache_line_size. Fixes: 1c3c5eab1715 ("sched/core: Enable might_sleep() and smp_processor_id() checks early") Signed-off-by: Matt Redfearn Suggested-by: James Hogan Reviewed-by: James Hogan Signed-off-by: Ralf Baechle --- arch/mips/pci/pci.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index bd67ac74fe2d..9632436d74d7 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -28,16 +28,15 @@ EXPORT_SYMBOL(PCIBIOS_MIN_MEM); static int __init pcibios_set_cache_line_size(void) { - struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int lsize; /* * Set PCI cacheline size to that of the highest level in the * cache hierarchy. */ - lsize = c->dcache.linesz; - lsize = c->scache.linesz ? : lsize; - lsize = c->tcache.linesz ? : lsize; + lsize = cpu_dcache_line_size(); + lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_tcache_line_size() ? : lsize; BUG_ON(!lsize); -- cgit v1.2.3 From 785a12afdb4a52903447fd890633c82fdda4b6f7 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 8 Aug 2017 14:13:15 +0530 Subject: powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails Currently, we use the opal call opal_slw_set_reg() to inform the Sleep-Winkle Engine (SLW) to restore the contents of some of the Hypervisor state on wakeup from deep idle states that lose full hypervisor context (characterized by the flag OPAL_PM_LOSE_FULL_CONTEXT). However, the current code has a bug in that if opal_slw_set_reg() fails, we don't disable the use of these deep states (winkle on POWER8, stop4 onwards on POWER9). This patch fixes this bug by ensuring that if programing the sleep-winkle engine to restore the hypervisor states in pnv_save_sprs_for_deep_states() fails, then we exclude such states by clearing the OPAL_PM_LOSE_FULL_CONTEXT flag from supported_cpuidle_states. As a result POWER8 will be prevented from using winkle for CPU-Hotplug, and POWER9 will put the offlined CPUs to the default stop state when available. Further, we ensure in the initialization of the cpuidle-powernv driver to only include those states whose flags are present in supported_cpuidle_states, thereby skipping OPAL_PM_LOSE_FULL_CONTEXT states when they have been disabled due to stop-api failure. Fixes: 1e1601b38e6 ("powerpc/powernv/idle: Restore SPRs for deep idle states via stop API.") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 41 ++++++++++++++++++++++++++++++++--- drivers/cpuidle/cpuidle-powernv.c | 10 +++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..a553aeea7af6 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; */ static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; static int pnv_save_sprs_for_deep_states(void) @@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) update_subcore_sibling_mask(); - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + int rc = pnv_save_sprs_for_deep_states(); + + if (likely(!rc)) + return; + + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = + pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = + pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } + } } u32 pnv_get_supported_cpuidle_states(void) @@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) pnv_deepest_stop_psscr_val; srr1 = power9_idle_stop(psscr); - } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { + } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && + (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { srr1 = power7_idle_insn(PNV_THREAD_WINKLE); } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { @@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, max_residency_ns = residency_ns[i]; pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i]; + pnv_deepest_stop_flag = flags[i]; deepest_stop_found = true; } diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 37b0698b7193..42896a67aeae 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, return -1; } +extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { struct device_node *power_mgt; @@ -248,6 +249,8 @@ static int powernv_add_idle_states(void) const char *names[CPUIDLE_STATE_MAX]; u32 has_stop_states = 0; int i, rc; + u32 supported_flags = pnv_get_supported_cpuidle_states(); + /* Currently we have snooze statically defined */ @@ -362,6 +365,13 @@ static int powernv_add_idle_states(void) for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; bool stops_timebase = false; + + /* + * Skip the platform idle state whose flag isn't in + * the supported_cpuidle_states flag mask. + */ + if ((flags[i] & supported_flags) != flags[i]) + continue; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it -- cgit v1.2.3 From 527f10285bc3f700407bf3aab0ea7b3b9f9338da Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Aug 2017 16:18:04 -0700 Subject: MIPS: Prevent building MT support for microMIPS kernels We don't currently support the MT ASE for microMIPS kernels, and there are no CPUs currently in existence that use both. They can however both be enabled in Kconfig, resulting in build failures such as: AS arch/mips/kernel/cps-vec.o arch/mips/kernel/cps-vec.S: Assembler messages: arch/mips/kernel/cps-vec.S:242: Warning: the 32-bit microMIPS architecture does not support the `mt' extension arch/mips/kernel/cps-vec.S:276: Error: unrecognized opcode `mttc0 $13,$2,2' arch/mips/kernel/cps-vec.S:282: Error: unrecognized opcode `mttc0 $8,$1,2' arch/mips/kernel/cps-vec.S:285: Error: unrecognized opcode `mttc0 $0,$2,1' ... Fix this by preventing MT from being enabled when targeting microMIPS. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16951/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8dd20358464f..48d91d5be4e9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2260,7 +2260,7 @@ config CPU_R4K_CACHE_TLB config MIPS_MT_SMP bool "MIPS MT SMP support (1 TC on each available VPE)" - depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 + depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 && !CPU_MICROMIPS select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select SYNC_R4K -- cgit v1.2.3 From 5fc9484f5e41b239d1e7a123219e53f333e43ba5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Aug 2017 16:16:47 -0700 Subject: MIPS: Set ISA bit in entry-y for microMIPS kernels When building a kernel for the microMIPS ISA, ensure that the ISA bit (ie. bit 0) in the entry address is set. Otherwise we may include an entry address in images which bootloaders will jump to as MIPS32 code. I originally tried using "objdump -f" to obtain the entry address, which works for microMIPS but it always outputs a 32 bit address for a 32 bit ELF whilst nm will sign extend to 64 bit. That matters for systems where we might want to run a MIPS32 kernel on a MIPS64 CPU & load it with a MIPS64 bootloader, which would then jump to a non-canonical (non-sign-extended) address. This works in all cases as it only changes the behaviour for microMIPS kernels, but isn't the prettiest solution. A possible alternative would be to write a custom tool to just extract, sign extend & print the entry point of an ELF executable. I'm open to feedback if that would be preferred. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16950/ Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 04343625b929..bc2708c9ada4 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -243,8 +243,21 @@ include arch/mips/Kbuild.platforms ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif -entry-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ + +entry-noisa-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ | grep "\bkernel_entry\b" | cut -f1 -d \ ) +ifdef CONFIG_CPU_MICROMIPS + # + # Set the ISA bit, since the kernel_entry symbol in the ELF will have it + # clear which would lead to images containing addresses which bootloaders may + # jump to as MIPS32 code. + # + entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \ + $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \ + $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y))))))))) +else + entry-y = $(entry-noisa-y) +endif cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ -- cgit v1.2.3 From 7310d5c8c55e8987a854507f71022f8de676bbf4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Aug 2017 20:57:55 +1000 Subject: powerpc/configs: Re-enable HARD/SOFT lockup detectors In commit 05a4a9527931 ("kernel/watchdog: split up config options"), CONFIG_LOCKUP_DETECTOR was split into two separate config options, CONFIG_HARDLOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR. Our defconfigs still have CONFIG_LOCKUP_DETECTOR=y, but that is no longer user selectable, and we don't mention the new options, so we end up with none of them enabled. So update the defconfigs to turn on the new SOFT and HARD options, the end result being the same as what we had previously. Fixes: 05a4a9527931 ("kernel/watchdog: split up config options") Signed-off-by: Michael Ellerman --- arch/powerpc/configs/powernv_defconfig | 3 ++- arch/powerpc/configs/ppc64_defconfig | 3 ++- arch/powerpc/configs/pseries_defconfig | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 0695ce047d56..34fc9bbfca9e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5175028c56ce..c5246d29f385 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1a61aa20dfba..fd5d98a0b95c 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -- cgit v1.2.3 From 0459ddfdb31e7d812b555a2530ecbacdf96961a6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:21 +1000 Subject: powerpc: NMI IPI improve lock primitive When the NMI IPI lock is contended, spin at low SMT priority, using loads only, and with interrupts enabled (where possible). This improves behaviour under high contention (e.g., a system crash when a number of CPUs are trying to enter the debugger). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cf0e1245b8cc..8d3320562c70 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) hard_irq_disable(); while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) static void nmi_ipi_lock(void) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); } static void nmi_ipi_unlock(void) @@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_lock_start(&flags); while (nmi_ipi_busy_count) { nmi_ipi_unlock_end(&flags); - cpu_relax(); + spin_until_cond(nmi_ipi_busy_count == 0); nmi_ipi_lock_start(&flags); } -- cgit v1.2.3 From d8e2a4053574002135fbb032c2e74f1d1dbb2103 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:22 +1000 Subject: powerpc/watchdog: Improve watchdog lock primitive - Hard-disable interrupts before taking the lock, which prevents soft-NMI re-entrancy and therefore can prevent deadlocks. - Use raw_ variants of local_irq_disable to avoid irq debugging. - When the lock is contended, spin at low SMT priority, using loads only, and with interrupts enabled (where possible). Some stalls have been noticed at high loads that go away with improved locking. There should not be so much locking contention in the first place (which is addressed in a subsequent patch), but locking should still be improved. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..fda4d044d326 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) * This may be called from low level interrupt handlers at some * point in future. */ - local_irq_save(*flags); - while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) - cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); /* Make it soft-NMI safe */ + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) { + raw_local_irq_restore(*flags); + spin_until_cond(!test_bit(0, &__wd_smp_lock)); + raw_local_irq_save(*flags); + hard_irq_disable(); + } } static inline void wd_smp_unlock(unsigned long *flags) { clear_bit_unlock(0, &__wd_smp_lock); - local_irq_restore(*flags); + raw_local_irq_restore(*flags); } static void wd_lockup_ipi(struct pt_regs *regs) -- cgit v1.2.3 From 26c5c6e129ee725f103938262a034861ada467ae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:23 +1000 Subject: powerpc/watchdog: Moderate touch_nmi_watchdog overhead Some code can go into a tight loop calling touch_nmi_watchdog (e.g., stop_machine CPU hotplug code). This can cause contention on watchdog locks particularly if all CPUs with watchdog enabled are spinning in the loops. Avoid this storm of activity by running the watchdog timer callback from this path if we have exceeded the timer period since it was last run. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index fda4d044d326..426dd34891d6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -263,9 +263,11 @@ static void wd_timer_fn(unsigned long data) void arch_touch_nmi_watchdog(void) { + unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - watchdog_timer_interrupt(cpu); + if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) + watchdog_timer_interrupt(cpu); } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -- cgit v1.2.3 From 8e23692175ad465628b8c86c1acc154fecad97be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:24 +1000 Subject: powerpc/watchdog: Fix final-check recovered case When the watchdog decides to panic, it takes the lock and double checks everything (to avoid races with the CPU being unstuck or panic()ed by something else). The exit label was misplaced and would result in all-CPUs backtrace and watchdog panic even in the case that the condition was found to be resolved. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 426dd34891d6..7d16dafa1bb6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -144,7 +144,6 @@ static void watchdog_smp_panic(int cpu, u64 tb) for_each_cpu(c, &wd_smp_cpus_pending) set_cpu_stuck(c, tb); -out: wd_smp_unlock(&flags); printk_safe_flush(); @@ -157,6 +156,11 @@ out: if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + + return; + +out: + wd_smp_unlock(&flags); } static void wd_smp_clear_cpu_pending(int cpu, u64 tb) -- cgit v1.2.3 From 87607a30be92f1ecee3af6f4a5779e179db98118 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:25 +1000 Subject: powerpc/watchdog: Fix marking of stuck CPUs When the SMP detector finds other CPUs stuck, it iterates over them and marks them as stuck. This pulls them out of the pending mask and allows the detector to continue with remaining good CPUs (if nmi_watchdog=panic is not enabled). The code to dothat was buggy because when setting a CPU stuck, if the pending mask became empty, it resets it to keep the watchdog running. However the iterator will continue to run over the new pending mask and mark remaining good CPUs sas stuck. Fix this by doing it with cpumask bitwise operations. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d16dafa1bb6..12e90ae712fe 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -101,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) nmi_panic(regs, "Hard LOCKUP"); } -static void set_cpu_stuck(int cpu, u64 tb) +static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { - cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); - cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); + cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -112,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) &wd_smp_cpus_stuck); } } +static void set_cpu_stuck(int cpu, u64 tb) +{ + set_cpumask_stuck(cpumask_of(cpu), tb); +} static void watchdog_smp_panic(int cpu, u64 tb) { @@ -140,9 +144,8 @@ static void watchdog_smp_panic(int cpu, u64 tb) } smp_flush_nmi_ipi(1000000); - /* Take the stuck CPU out of the watch group */ - for_each_cpu(c, &wd_smp_cpus_pending) - set_cpu_stuck(c, tb); + /* Take the stuck CPUs out of the watch group */ + set_cpumask_stuck(&wd_smp_cpus_pending, tb); wd_smp_unlock(&flags); -- cgit v1.2.3 From 96ea91e7b6ee2c406598d859e7348b4829404eea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:26 +1000 Subject: powerpc/watchdog: add locking around init/exit functions When CPUs start and stop the watchdog, they manipulate shared data that is normally protected by the lock. Other CPUs can be running concurrently at this time, so it's a good idea to use locking here to be on the safe side. Remove the barrier which is undocumented and didn't do anything. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 12e90ae712fe..34721a257a77 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -297,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); return 0; @@ -311,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu) if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; + wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); if (cpumask_weight(&wd_cpus_enabled) == 1) { cpumask_set_cpu(cpu, &wd_smp_cpus_pending); wd_smp_last_reset_tb = get_tb(); } - smp_wmb(); + wd_smp_unlock(&flags); + start_watchdog_timer_on(cpu); return 0; @@ -324,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; /* Can happen in CPU unplug case */ stop_watchdog_timer_on(cpu); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_unlock(&flags); + wd_smp_clear_cpu_pending(cpu, get_tb()); return 0; -- cgit v1.2.3 From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:05 -0700 Subject: mm: refactor TLB gathering API This patch is a preparatory patch for solving race problems caused by TLB batch. For that, we will increase/decrease TLB flush pending count of mm_struct whenever tlb_[gather|finish]_mmu is called. Before making it simple, this patch separates architecture specific part and rename it to arch_tlb_[gather|finish]_mmu and generic part just calls it. It shouldn't change any behavior. Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 6 ++++-- arch/ia64/include/asm/tlb.h | 6 ++++-- arch/s390/include/asm/tlb.h | 12 ++++++------ arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 8 +++++--- include/asm-generic/tlb.h | 7 ++++--- include/linux/mm_types.h | 6 ++++++ mm/memory.c | 28 +++++++++++++++++++++------- 8 files changed, 54 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 3f2eb76243e3..7f5b2a2d3861 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->fullmm = !(start | (end+1)); @@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index fced197b9626..93cadc04ac62 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); @@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start * collected. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 7317b3108a88..d574d0820dc8 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -47,10 +47,9 @@ struct mmu_table_batch { extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_gather_mmu(struct mmu_gather *tlb, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline void +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -static inline void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 46e0d635e36f..89786560dbd4 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { if (tlb->fullmm) flush_tlb_mm(tlb->mm); diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 600a2e9bfee2..2a901eca7145 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/* tlb_finish_mmu +/* arch_tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources * that were required. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8afa4335e5b2..8f71521e7a44 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,10 +112,11 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); +void arch_tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, - unsigned long end); +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e478ebd2706..c605f2a3a68e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return mm->cpu_vm_mask_var; } +struct mmu_gather; +extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end); +extern void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); + #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * Memory barriers to keep this state in sync are graciously provided by diff --git a/mm/memory.c b/mm/memory.c index f65beaad319b..34cba5113e06 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * Called at the end of the shootdown operation to free up any resources * that were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { struct mmu_gather_batch *batch, *next; @@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +/* tlb_gather_mmu + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + arch_tlb_finish_mmu(tlb, start, end); +} + /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. -- cgit v1.2.3 From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:12 -0700 Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem Nadav reported parallel MADV_DONTNEED on same range has a stale TLB problem and Mel fixed it[1] and found same problem on MADV_FREE[2]. Quote from Mel Gorman: "The race in question is CPU 0 running madv_free and updating some PTEs while CPU 1 is also running madv_free and looking at the same PTEs. CPU 1 may have writable TLB entries for a page but fail the pte_dirty check (because CPU 0 has updated it already) and potentially fail to flush. Hence, when madv_free on CPU 1 returns, there are still potentially writable TLB entries and the underlying PTE is still present so that a subsequent write does not necessarily propagate the dirty bit to the underlying PTE any more. Reclaim at some unknown time at the future may then see that the PTE is still clean and discard the page even though a write has happened in the meantime. I think this is possible but I could have missed some protection in madv_free that prevents it happening." This patch aims for solving both problems all at once and is ready for other problem with KSM, MADV_FREE and soft-dirty story[3]. TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can catch there are parallel threads going on. In that case, forcefully, flush TLB to prevent for user to access memory via stale TLB entry although it fail to gather page table entry. I confirmed this patch works with [4] test program Nadav gave so this patch supersedes "mm: Always flush VMA ranges affected by zap_page_range v2" in current mmotm. NOTE: This patch modifies arch-specific TLB gathering interface(x86, ia64, s390, sh, um). It seems most of architecture are straightforward but s390 need to be careful because tlb_flush_mmu works only if mm->context.flush_mm is set to non-zero which happens only a pte entry really is cleared by ptep_get_and_clear and friends. However, this problem never changes the pte entries but need to flush to prevent memory access from stale tlb. [1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net [2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de [3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com [4] https://patchwork.kernel.org/patch/9861621/ [minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu] Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Reported-by: Nadav Amit Reported-by: Mel Gorman Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 7 ++++++- arch/ia64/include/asm/tlb.h | 4 +++- arch/s390/include/asm/tlb.h | 7 ++++++- arch/sh/include/asm/tlb.h | 4 ++-- arch/um/include/asm/tlb.h | 7 ++++++- include/asm-generic/tlb.h | 2 +- include/linux/mm_types.h | 8 ++++++++ mm/memory.c | 18 ++++++++++++++++-- 8 files changed, 48 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 7f5b2a2d3861..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->range_start = start; + tlb->range_end = end; + } + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 93cadc04ac62..cbe5ac3699bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) + tlb->need_flush = 1; /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d574d0820dc8..2eb8ff0d6fca 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + } + tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 89786560dbd4..51a8bc967e75 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { - if (tlb->fullmm) + if (tlb->fullmm || force) flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 2a901eca7145..344d95619d03 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + tlb->need_flush = 1; + } tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8f71521e7a44..faddde44de8c 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool force); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 892a7b0196fd..3cadee0a3508 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 0; } +/* + * Returns true if there are two above TLB batching threads in parallel. + */ +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); diff --git a/mm/memory.c b/mm/memory.c index 34cba5113e06..e158f7ac6730 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * that were required. */ void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); } void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - arch_tlb_finish_mmu(tlb, start, end); + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); } /* -- cgit v1.2.3 From c138d81163d82db044dcaf1141395713f03bf0bf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:12 +0200 Subject: x86: provide an init_mem_mapping hypervisor hook Provide a hook in hypervisor_x86 called after setting up initial memory mapping. This is needed e.g. by Xen HVM guests to map the hypervisor shared info page. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/include/asm/hypervisor.h | 10 ++++++++++ arch/x86/mm/init.c | 3 +++ 2 files changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 21126155a739..0ead9dbb9130 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -43,6 +43,9 @@ struct hypervisor_x86 { /* pin current vcpu to specified physical cpu (run rarely) */ void (*pin_vcpu)(int); + + /* called during init_mem_mapping() to setup early mappings. */ + void (*init_mem_mapping)(void); }; extern const struct hypervisor_x86 *x86_hyper; @@ -57,8 +60,15 @@ extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor_platform(void); extern bool hypervisor_x2apic_available(void); extern void hypervisor_pin_vcpu(int cpu); + +static inline void hypervisor_init_mem_mapping(void) +{ + if (x86_hyper && x86_hyper->init_mem_mapping) + x86_hyper->init_mem_mapping(); +} #else static inline void init_hypervisor_platform(void) { } static inline bool hypervisor_x2apic_available(void) { return false; } +static inline void hypervisor_init_mem_mapping(void) { } #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 673541eb3b3f..bf3f1065d6ad 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -18,6 +18,7 @@ #include /* for MAX_DMA_PFN */ #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -636,6 +637,8 @@ void __init init_mem_mapping(void) load_cr3(swapper_pg_dir); __flush_tlb_all(); + hypervisor_init_mem_mapping(); + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } -- cgit v1.2.3 From 10231f69eb039550864ff3eb395da0c63c03ed5f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:13 +0200 Subject: xen: split up xen_hvm_init_shared_info() Instead of calling xen_hvm_init_shared_info() on boot and resume split it up into a boot time function searching for the pfn to use and a mapping function doing the hypervisor mapping call. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 45 +++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 87d791356ea9..d23531f5f17e 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -21,29 +21,9 @@ #include "mmu.h" #include "smp.h" -void __ref xen_hvm_init_shared_info(void) +void xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; - u64 pa; - - if (HYPERVISOR_shared_info == &xen_dummy_shared_info) { - /* - * Search for a free page starting at 4kB physical address. - * Low memory is preferred to avoid an EPT large page split up - * by the mapping. - * Starting below X86_RESERVE_LOW (usually 64kB) is fine as - * the BIOS used for HVM guests is well behaved and won't - * clobber memory other than the first 4kB. - */ - for (pa = PAGE_SIZE; - !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || - memblock_is_reserved(pa); - pa += PAGE_SIZE) - ; - - memblock_reserve(pa, PAGE_SIZE); - HYPERVISOR_shared_info = __va(pa); - } xatp.domid = DOMID_SELF; xatp.idx = 0; @@ -53,6 +33,28 @@ void __ref xen_hvm_init_shared_info(void) BUG(); } +static void __init reserve_shared_info(void) +{ + u64 pa; + + /* + * Search for a free page starting at 4kB physical address. + * Low memory is preferred to avoid an EPT large page split up + * by the mapping. + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as + * the BIOS used for HVM guests is well behaved and won't + * clobber memory other than the first 4kB. + */ + for (pa = PAGE_SIZE; + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || + memblock_is_reserved(pa); + pa += PAGE_SIZE) + ; + + memblock_reserve(pa, PAGE_SIZE); + HYPERVISOR_shared_info = __va(pa); +} + static void __init init_hvm_pv_info(void) { int major, minor; @@ -153,6 +155,7 @@ static void __init xen_hvm_guest_init(void) init_hvm_pv_info(); + reserve_shared_info(); xen_hvm_init_shared_info(); /* -- cgit v1.2.3 From 4ca83dcf4e3bc0c98836dbb97553792ca7ea5429 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:14 +0200 Subject: xen: fix hvm guest with kaslr enabled A Xen HVM guest running with KASLR enabled will die rather soon today because the shared info page mapping is using va() too early. This was introduced by commit a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 ("xen: allocate page for shared info page from low memory"). In order to fix this use early_memremap() to get a temporary virtual address for shared info until va() can be used safely. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index d23531f5f17e..de503c225ae1 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,8 @@ #include "mmu.h" #include "smp.h" +static unsigned long shared_info_pfn; + void xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; @@ -28,7 +31,7 @@ void xen_hvm_init_shared_info(void) xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); + xatp.gpfn = shared_info_pfn; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); } @@ -51,8 +54,16 @@ static void __init reserve_shared_info(void) pa += PAGE_SIZE) ; + shared_info_pfn = PHYS_PFN(pa); + memblock_reserve(pa, PAGE_SIZE); - HYPERVISOR_shared_info = __va(pa); + HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE); +} + +static void __init xen_hvm_init_mem_mapping(void) +{ + early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); + HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); } static void __init init_hvm_pv_info(void) @@ -221,5 +232,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm = { .init_platform = xen_hvm_guest_init, .pin_vcpu = xen_pin_vcpu, .x2apic_available = xen_x2apic_para_available, + .init_mem_mapping = xen_hvm_init_mem_mapping, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); -- cgit v1.2.3 From 07e72f9a3f01b370be59ef2cf3ae13e47f696d71 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 5 Aug 2017 01:43:45 +0300 Subject: ARM: OMAP2+: Register SoC device attributes from machine .init() SoC device attributes are registered with a call to soc_device_register() from the machine .init_late() operation, which is called from the late initcall, after all drivers built-in drivers have been probed. This results in the impossibility for drivers to use SoC device matching in their probe function. The omap_soc_device_init() function is safe to call from the machine .init() operation, as all data it depends on is initialized from the .init_early() operation. Move SoC device attribute registration to machine .init() like on all other ARM platforms. Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Signed-off-by: Tomi Valkeinen --- arch/arm/mach-omap2/board-generic.c | 1 + arch/arm/mach-omap2/io.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index b1e661bb5521..583fc39d84cd 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -33,6 +33,7 @@ static void __init __maybe_unused omap_generic_init(void) pdata_quirks_init(omap_dt_match_table); omapdss_init_of(); + omap_soc_device_init(); } #ifdef CONFIG_SOC_OMAP2420 diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 1cd20e4d56b0..cb5d7314cf99 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -428,7 +428,6 @@ static void __init __maybe_unused omap_hwmod_init_postsetup(void) static void __init __maybe_unused omap_common_late_init(void) { omap2_common_pm_late_init(); - omap_soc_device_init(); } #ifdef CONFIG_SOC_OMAP2420 -- cgit v1.2.3 From d86bd82c1ae7b4e3304be969cecd4ece63b78eea Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 11 Aug 2017 16:49:10 +0300 Subject: ARM: OMAP2+: Remove unused omapdrm platform device The omapdrm platform device is unused, as a replacement is now registered in the omapdss driver. Remove it. Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Reviewed-by: Tomi Valkeinen Signed-off-by: Tomi Valkeinen --- arch/arm/mach-omap2/Makefile | 2 +- arch/arm/mach-omap2/display.c | 7 ------ arch/arm/mach-omap2/display.h | 1 - arch/arm/mach-omap2/drm.c | 53 ------------------------------------------- 4 files changed, 1 insertion(+), 62 deletions(-) delete mode 100644 arch/arm/mach-omap2/drm.c (limited to 'arch') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 779fb1f680b3..b3b3b3a19183 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -8,7 +8,7 @@ ccflags-y := -I$(srctree)/$(src)/include \ # Common support obj-y := id.o io.o control.o devices.o fb.o timer.o pm.o \ common.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \ - omap_device.o omap-headsmp.o sram.o drm.o + omap_device.o omap-headsmp.o sram.o hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ omap_hwmod_common_data.o diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 8fa01c0ecdb2..5f5697668de8 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -384,13 +384,6 @@ int __init omapdss_init_of(void) return r; } - /* create DRM device */ - r = omap_init_drm(); - if (r < 0) { - pr_err("Unable to register omapdrm device\n"); - return r; - } - /* create vrfb device */ r = omap_init_vrfb(); if (r < 0) { diff --git a/arch/arm/mach-omap2/display.h b/arch/arm/mach-omap2/display.h index 9a39646d4316..42ec2e99a2f4 100644 --- a/arch/arm/mach-omap2/display.h +++ b/arch/arm/mach-omap2/display.h @@ -26,7 +26,6 @@ struct omap_dss_dispc_dev_attr { bool has_framedonetv_irq; }; -int omap_init_drm(void); int omap_init_vrfb(void); int omap_init_fb(void); int omap_init_vout(void); diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c deleted file mode 100644 index 44fef961bb70..000000000000 --- a/arch/arm/mach-omap2/drm.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * DRM/KMS device registration for TI OMAP platforms - * - * Copyright (C) 2012 Texas Instruments - * Author: Rob Clark - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "soc.h" -#include "display.h" - -#if IS_ENABLED(CONFIG_DRM_OMAP) - -static struct omap_drm_platform_data platform_data; - -static struct platform_device omap_drm_device = { - .dev = { - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &platform_data, - }, - .name = "omapdrm", - .id = 0, -}; - -int __init omap_init_drm(void) -{ - platform_data.omaprev = GET_OMAP_TYPE; - - return platform_device_register(&omap_drm_device); - -} -#else -int __init omap_init_drm(void) { return 0; } -#endif -- cgit v1.2.3 From 5ce783025c827340cdc966508a0d445dcd591ba9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 11 Aug 2017 16:49:11 +0300 Subject: ARM: OMAP2+: Don't register omapdss device for omapdrm The omapdrm driver doesn't need the omapdss device anymore. Although it can't be removed completely as the fbdev driver still requires it, we can condition its registration to the usage of the omapfb driver. Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Reviewed-by: Tomi Valkeinen Signed-off-by: Tomi Valkeinen --- arch/arm/mach-omap2/display.c | 111 +++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 5f5697668de8..798fc718fffe 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -66,6 +66,7 @@ */ #define FRAMEDONE_IRQ_TIMEOUT 100 +#if defined(CONFIG_FB_OMAP2) static struct platform_device omap_display_device = { .name = "omapdss", .id = -1, @@ -163,6 +164,64 @@ static enum omapdss_version __init omap_display_get_version(void) return OMAPDSS_VER_UNKNOWN; } +static int __init omapdss_init_fbdev(void) +{ + static struct omap_dss_board_info board_data = { + .dsi_enable_pads = omap_dsi_enable_pads, + .dsi_disable_pads = omap_dsi_disable_pads, + .set_min_bus_tput = omap_dss_set_min_bus_tput, + }; + struct device_node *node; + + board_data.version = omap_display_get_version(); + if (board_data.version == OMAPDSS_VER_UNKNOWN) { + pr_err("DSS not supported on this SoC\n"); + return -ENODEV; + } + + omap_display_device.dev.platform_data = &board_data; + + r = platform_device_register(&omap_display_device); + if (r < 0) { + pr_err("Unable to register omapdss device\n"); + return r; + } + + /* create vrfb device */ + r = omap_init_vrfb(); + if (r < 0) { + pr_err("Unable to register omapvrfb device\n"); + return r; + } + + /* create FB device */ + r = omap_init_fb(); + if (r < 0) { + pr_err("Unable to register omapfb device\n"); + return r; + } + + /* create V4L2 display device */ + r = omap_init_vout(); + if (r < 0) { + pr_err("Unable to register omap_vout device\n"); + return r; + } + + /* add DSI info for omap4 */ + node = of_find_node_by_name(NULL, "omap4_padconf_global"); + if (node) + omap4_dsi_mux_syscon = syscon_node_to_regmap(node); + + return 0; +} +#else +static inline int omapdss_init_fbdev(void) +{ + return 0; +} +#endif /* CONFIG_FB_OMAP2 */ + static void dispc_disable_outputs(void) { u32 v, irq_mask = 0; @@ -335,16 +394,9 @@ static struct device_node * __init omapdss_find_dss_of_node(void) int __init omapdss_init_of(void) { int r; - enum omapdss_version ver; struct device_node *node; struct platform_device *pdev; - static struct omap_dss_board_info board_data = { - .dsi_enable_pads = omap_dsi_enable_pads, - .dsi_disable_pads = omap_dsi_disable_pads, - .set_min_bus_tput = omap_dss_set_min_bus_tput, - }; - /* only create dss helper devices if dss is enabled in the .dts */ node = omapdss_find_dss_of_node(); @@ -354,13 +406,6 @@ int __init omapdss_init_of(void) if (!of_device_is_available(node)) return 0; - ver = omap_display_get_version(); - - if (ver == OMAPDSS_VER_UNKNOWN) { - pr_err("DSS not supported on this SoC\n"); - return -ENODEV; - } - pdev = of_find_device_by_node(node); if (!pdev) { @@ -374,41 +419,5 @@ int __init omapdss_init_of(void) return r; } - board_data.version = ver; - - omap_display_device.dev.platform_data = &board_data; - - r = platform_device_register(&omap_display_device); - if (r < 0) { - pr_err("Unable to register omapdss device\n"); - return r; - } - - /* create vrfb device */ - r = omap_init_vrfb(); - if (r < 0) { - pr_err("Unable to register omapvrfb device\n"); - return r; - } - - /* create FB device */ - r = omap_init_fb(); - if (r < 0) { - pr_err("Unable to register omapfb device\n"); - return r; - } - - /* create V4L2 display device */ - r = omap_init_vout(); - if (r < 0) { - pr_err("Unable to register omap_vout device\n"); - return r; - } - - /* add DSI info for omap4 */ - node = of_find_node_by_name(NULL, "omap4_padconf_global"); - if (node) - omap4_dsi_mux_syscon = syscon_node_to_regmap(node); - - return 0; + return omapdss_init_fbdev(); } -- cgit v1.2.3