summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt5
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml4
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.txt38
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml89
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml7
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,timrot.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml9
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,timer.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml2
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/arm/include/asm/vdso/vsyscall.h2
-rw-r--r--arch/arm/kernel/vdso.c10
-rw-r--r--arch/arm/mm/Kconfig2
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h7
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h6
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h8
-rw-r--r--arch/arm64/kernel/vdso32/Makefile1
-rw-r--r--arch/loongarch/Kconfig3
-rw-r--r--arch/loongarch/include/asm/thread_info.h76
-rw-r--r--arch/loongarch/kernel/time.c2
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/riscv/Kconfig13
-rw-r--r--arch/riscv/boot/dts/sophgo/sg2042.dtsi2
-rw-r--r--arch/riscv/boot/dts/sophgo/sg2044.dtsi2
-rw-r--r--arch/riscv/include/asm/thread_info.h31
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/thread_info.h50
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig17
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/sev-handle-vc.c3
-rw-r--r--arch/x86/boot/compressed/sev.c132
-rw-r--r--arch/x86/boot/cpuflags.c13
-rw-r--r--arch/x86/boot/startup/Makefile22
-rw-r--r--arch/x86/boot/startup/exports.h14
-rw-r--r--arch/x86/boot/startup/gdt_idt.c4
-rw-r--r--arch/x86/boot/startup/map_kernel.c4
-rw-r--r--arch/x86/boot/startup/sev-shared.c327
-rw-r--r--arch/x86/boot/startup/sev-startup.c210
-rw-r--r--arch/x86/boot/startup/sme.c30
-rw-r--r--arch/x86/coco/core.c3
-rw-r--r--arch/x86/coco/sev/Makefile8
-rw-r--r--arch/x86/coco/sev/core.c276
-rw-r--r--arch/x86/coco/sev/noinstr.c (renamed from arch/x86/coco/sev/sev-nmi.c)74
-rw-r--r--arch/x86/coco/sev/vc-handle.c22
-rw-r--r--arch/x86/coco/sev/vc-shared.c143
-rw-r--r--arch/x86/include/asm/apic.h11
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/sev-common.h1
-rw-r--r--arch/x86/include/asm/sev-internal.h28
-rw-r--r--arch/x86/include/asm/sev.h41
-rw-r--r--arch/x86/include/asm/thread_info.h76
-rw-r--r--arch/x86/include/uapi/asm/svm.h4
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c85
-rw-r--r--arch/x86/kernel/apic/vector.c28
-rw-r--r--arch/x86/kernel/apic/x2apic_savic.c428
-rw-r--r--arch/x86/kernel/head64.c5
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c6
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S6
-rw-r--r--arch/x86/platform/pvh/head.S2
-rw-r--r--arch/x86/tools/relocs.c8
-rw-r--r--arch/x86/virt/svm/sev.c7
-rw-r--r--drivers/acpi/arm64/gtdt.c29
-rw-r--r--drivers/clocksource/Kconfig13
-rw-r--r--drivers/clocksource/Makefile3
-rw-r--r--drivers/clocksource/arm_arch_timer.c686
-rw-r--r--drivers/clocksource/arm_arch_timer_mmio.c440
-rw-r--r--drivers/clocksource/arm_global_timer.c44
-rw-r--r--drivers/clocksource/clps711x-timer.c23
-rw-r--r--drivers/clocksource/ingenic-sysost.c27
-rw-r--r--drivers/clocksource/scx200_hrt.c1
-rw-r--r--drivers/clocksource/sh_cmt.c84
-rw-r--r--drivers/clocksource/timer-cs5535.c1
-rw-r--r--drivers/clocksource/timer-econet-en751221.c2
-rw-r--r--drivers/clocksource/timer-nxp-pit.c382
-rw-r--r--drivers/clocksource/timer-nxp-stm.c2
-rw-r--r--drivers/clocksource/timer-rtl-otto.c42
-rw-r--r--drivers/clocksource/timer-stm32-lp.c1
-rw-r--r--drivers/clocksource/timer-sun5i.c2
-rw-r--r--drivers/clocksource/timer-tegra186.c38
-rw-r--r--drivers/clocksource/timer-ti-dm.c119
-rw-r--r--drivers/clocksource/timer-vf-pit.c194
-rw-r--r--drivers/crypto/ccp/Makefile3
-rw-r--r--drivers/crypto/ccp/psp-dev.c20
-rw-r--r--drivers/crypto/ccp/psp-dev.h8
-rw-r--r--drivers/crypto/ccp/sev-dev.c182
-rw-r--r--drivers/crypto/ccp/sev-dev.h3
-rw-r--r--drivers/crypto/ccp/sfs.c311
-rw-r--r--drivers/crypto/ccp/sfs.h47
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c4
-rw-r--r--drivers/irqchip/irq-aspeed-scu-ic.c256
-rw-r--r--drivers/irqchip/irq-gic-v2m.c13
-rw-r--r--drivers/irqchip/irq-gic-v3.c3
-rw-r--r--drivers/irqchip/irq-gic-v5-irs.c2
-rw-r--r--drivers/irqchip/irq-gic-v5-its.c26
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c105
-rw-r--r--drivers/irqchip/irq-loongson-pch-lpc.c9
-rw-r--r--drivers/irqchip/irq-msi-lib.c14
-rw-r--r--drivers/irqchip/irq-nvic.c3
-rw-r--r--drivers/irqchip/irq-renesas-rza1.c3
-rw-r--r--drivers/irqchip/irq-renesas-rzg2l.c2
-rw-r--r--drivers/irqchip/irq-sg2042-msi.c26
-rw-r--r--drivers/irqchip/irq-sifive-plic.c10
-rw-r--r--drivers/media/rc/pwm-ir-tx.c5
-rw-r--r--drivers/of/irq.c1
-rw-r--r--drivers/pci/msi/irqdomain.c53
-rw-r--r--include/asm-generic/thread_info_tif.h48
-rw-r--r--include/asm-generic/vdso/vsyscall.h4
-rw-r--r--include/clocksource/arm_arch_timer.h5
-rw-r--r--include/dt-bindings/interrupt-controller/aspeed-scu-ic.h14
-rw-r--r--include/linux/cc_platform.h8
-rw-r--r--include/linux/hrtimer.h14
-rw-r--r--include/linux/hrtimer_defs.h2
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/jiffies.h2
-rw-r--r--include/linux/msi.h2
-rw-r--r--include/linux/platform_data/dmtimer-omap.h4
-rw-r--r--include/linux/psp-platform-access.h2
-rw-r--r--include/linux/rseq.h11
-rw-r--r--include/uapi/linux/psp-sfs.h87
-rw-r--r--include/vdso/datapage.h9
-rw-r--r--include/vdso/gettime.h1
-rw-r--r--include/vdso/jiffies.h2
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/Kconfig.preempt13
-rw-r--r--kernel/futex/syscalls.c106
-rw-r--r--kernel/irq/Kconfig6
-rw-r--r--kernel/irq/chip.c37
-rw-r--r--kernel/irq/devres.c127
-rw-r--r--kernel/irq/handle.c49
-rw-r--r--kernel/irq/irq_test.c55
-rw-r--r--kernel/irq/irqdesc.c7
-rw-r--r--kernel/rseq.c10
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/smp.c11
-rw-r--r--kernel/softirq.c145
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/clocksource.c7
-rw-r--r--kernel/time/hrtimer.c40
-rw-r--r--kernel/time/itimer.c3
-rw-r--r--kernel/time/posix-timers.c7
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/tick-common.c16
-rw-r--r--kernel/time/tick-internal.h2
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--lib/test_objpool.c2
-rw-r--r--lib/vdso/Kconfig25
-rw-r--r--lib/vdso/Makefile2
-rw-r--r--lib/vdso/datastore.c6
-rw-r--r--lib/vdso/gettimeofday.c27
-rw-r--r--scripts/gdb/linux/timerlist.py2
-rw-r--r--sound/core/hrtimer.c2
-rw-r--r--tools/objtool/arch/x86/decode.c12
-rw-r--r--tools/objtool/builtin-check.c2
-rw-r--r--tools/objtool/check.c48
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h1
-rw-r--r--tools/objtool/noreturns.h1
-rw-r--r--tools/testing/selftests/futex/functional/Makefile8
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c3
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c100
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c67
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c266
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c129
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c103
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c139
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c99
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh62
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/kselftest.h14
-rw-r--r--tools/testing/selftests/kselftest_harness.h13
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/rseq/rseq.c8
-rw-r--r--tools/testing/selftests/vDSO/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h7
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c101
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c123
200 files changed, 5063 insertions, 3521 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bf1e6ca6aeb8..e019db1633fd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2606,6 +2606,11 @@
for it. Intended to get systems with badly broken
firmware running.
+ irqhandler.duration_warn_us= [KNL]
+ Warn if an IRQ handler exceeds the specified duration
+ threshold in microseconds. Useful for identifying
+ long-running IRQs in the system.
+
irqpoll [HW]
When an interrupt is not handled search all handlers
for it. Also check all handlers each timer
diff --git a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml
index d5287a2bf866..d998a9d69b91 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml
@@ -5,7 +5,7 @@
$id: http://devicetree.org/schemas/interrupt-controller/aspeed,ast2500-scu-ic.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Aspeed AST25XX and AST26XX SCU Interrupt Controller
+title: Aspeed AST25XX, AST26XX, AST27XX SCU Interrupt Controller
maintainers:
- Eddie James <eajames@linux.ibm.com>
@@ -16,6 +16,10 @@ properties:
- aspeed,ast2500-scu-ic
- aspeed,ast2600-scu-ic0
- aspeed,ast2600-scu-ic1
+ - aspeed,ast2700-scu-ic0
+ - aspeed,ast2700-scu-ic1
+ - aspeed,ast2700-scu-ic2
+ - aspeed,ast2700-scu-ic3
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml b/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml
index 5eccd10d95ce..67be6d095fe4 100644
--- a/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml
+++ b/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml
@@ -75,6 +75,10 @@ patternProperties:
- aspeed,ast2500-scu-ic
- aspeed,ast2600-scu-ic0
- aspeed,ast2600-scu-ic1
+ - aspeed,ast2700-scu-ic0
+ - aspeed,ast2700-scu-ic1
+ - aspeed,ast2700-scu-ic2
+ - aspeed,ast2700-scu-ic3
'^silicon-id@[0-9a-f]+$':
description: Unique hardware silicon identifiers within the SoC
diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt
deleted file mode 100644
index 3cb2f4c98d64..000000000000
--- a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Faraday Technology timer
-
-This timer is a generic IP block from Faraday Technology, embedded in the
-Cortina Systems Gemini SoCs and other designs.
-
-Required properties:
-
-- compatible : Must be one of
- "faraday,fttmr010"
- "cortina,gemini-timer", "faraday,fttmr010"
- "moxa,moxart-timer", "faraday,fttmr010"
- "aspeed,ast2400-timer"
- "aspeed,ast2500-timer"
- "aspeed,ast2600-timer"
-
-- reg : Should contain registers location and length
-- interrupts : Should contain the three timer interrupts usually with
- flags for falling edge
-
-Optionally required properties:
-
-- clocks : a clock to provide the tick rate for "faraday,fttmr010"
-- clock-names : should be "EXTCLK" and "PCLK" for the external tick timer
- and peripheral clock respectively, for "faraday,fttmr010"
-- syscon : a phandle to the global Gemini system controller if the compatible
- type is "cortina,gemini-timer"
-
-Example:
-
-timer@43000000 {
- compatible = "faraday,fttmr010";
- reg = <0x43000000 0x1000>;
- interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */
- <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */
- <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */
- clocks = <&extclk>, <&pclk>;
- clock-names = "EXTCLK", "PCLK";
-};
diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml
new file mode 100644
index 000000000000..39506323556c
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/faraday,fttmr010.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Faraday FTTMR010 timer
+
+maintainers:
+ - Joel Stanley <joel@jms.id.au>
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description:
+ This timer is a generic IP block from Faraday Technology, embedded in the
+ Cortina Systems Gemini SoCs and other designs.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: moxa,moxart-timer
+ - const: faraday,fttmr010
+ - enum:
+ - aspeed,ast2400-timer
+ - aspeed,ast2500-timer
+ - aspeed,ast2600-timer
+ - cortina,gemini-timer
+ - faraday,fttmr010
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ minItems: 1
+ maxItems: 8
+ description: One interrupt per timer
+
+ clocks:
+ minItems: 1
+ items:
+ - description: Peripheral clock
+ - description: External tick clock
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: PCLK
+ - const: EXTCLK
+
+ resets:
+ maxItems: 1
+
+ syscon:
+ description: System controller phandle for Gemini systems
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: cortina,gemini-timer
+ then:
+ required:
+ - syscon
+ else:
+ properties:
+ syscon: false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ timer@43000000 {
+ compatible = "faraday,fttmr010";
+ reg = <0x43000000 0x1000>;
+ interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */
+ <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */
+ <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */
+ clocks = <&pclk>, <&extclk>;
+ clock-names = "PCLK", "EXTCLK";
+ };
diff --git a/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml
index 0e4a8ddc3de3..e3b61b62521e 100644
--- a/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml
@@ -14,7 +14,9 @@ properties:
const: fsl,ftm-timer
reg:
- maxItems: 1
+ items:
+ - description: clock event device
+ - description: clock source device
interrupts:
maxItems: 1
@@ -50,7 +52,8 @@ examples:
ftm@400b8000 {
compatible = "fsl,ftm-timer";
- reg = <0x400b8000 0x1000>;
+ reg = <0x400b8000 0x1000>,
+ <0x400b9000 0x1000>;
interrupts = <0 44 IRQ_TYPE_LEVEL_HIGH>;
clock-names = "ftm-evt", "ftm-src", "ftm-evt-counter-en", "ftm-src-counter-en";
clocks = <&clks VF610_CLK_FTM2>, <&clks VF610_CLK_FTM3>,
diff --git a/Documentation/devicetree/bindings/timer/fsl,timrot.yaml b/Documentation/devicetree/bindings/timer/fsl,timrot.yaml
new file mode 100644
index 000000000000..d181f274ef9f
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/fsl,timrot.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/fsl,timrot.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale MXS Timer
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - fsl,imx23-timrot
+ - fsl,imx28-timrot
+ - const: fsl,timrot
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: irq for timer0
+ - description: irq for timer1
+ - description: irq for timer2
+ - description: irq for timer3
+
+ clocks:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ timer: timer@80068000 {
+ compatible = "fsl,imx28-timrot", "fsl,timrot";
+ reg = <0x80068000 0x2000>;
+ interrupts = <48>, <49>, <50>, <51>;
+ clocks = <&clks 26>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml b/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml
index bee2c35bd0e2..42e130654d58 100644
--- a/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml
+++ b/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml
@@ -15,8 +15,13 @@ description:
properties:
compatible:
- enum:
- - fsl,vf610-pit
+ oneOf:
+ - enum:
+ - fsl,vf610-pit
+ - nxp,s32g2-pit
+ - items:
+ - const: nxp,s32g3-pit
+ - const: nxp,s32g2-pit
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/timer/mediatek,timer.yaml b/Documentation/devicetree/bindings/timer/mediatek,timer.yaml
index f68fc7050c56..e3e38066c2cb 100644
--- a/Documentation/devicetree/bindings/timer/mediatek,timer.yaml
+++ b/Documentation/devicetree/bindings/timer/mediatek,timer.yaml
@@ -26,6 +26,7 @@ properties:
- items:
- enum:
- mediatek,mt2701-timer
+ - mediatek,mt6572-timer
- mediatek,mt6580-timer
- mediatek,mt6582-timer
- mediatek,mt6589-timer
@@ -44,6 +45,7 @@ properties:
- mediatek,mt8188-timer
- mediatek,mt8192-timer
- mediatek,mt8195-timer
+ - mediatek,mt8196-timer
- mediatek,mt8365-systimer
- const: mediatek,mt6765-timer
diff --git a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml
index 10578f544581..a4b229e0e78a 100644
--- a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml
+++ b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml
@@ -26,6 +26,7 @@ properties:
- items:
- enum:
- axis,artpec8-mct
+ - axis,artpec9-mct
- google,gs101-mct
- samsung,exynos2200-mct-peris
- samsung,exynos3250-mct
@@ -131,6 +132,7 @@ allOf:
contains:
enum:
- axis,artpec8-mct
+ - axis,artpec9-mct
- google,gs101-mct
- samsung,exynos2200-mct-peris
- samsung,exynos5260-mct
diff --git a/MAINTAINERS b/MAINTAINERS
index 62d16c20a888..331728e93f69 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1989,6 +1989,7 @@ S: Maintained
F: arch/arm/include/asm/arch_timer.h
F: arch/arm64/include/asm/arch_timer.h
F: drivers/clocksource/arm_arch_timer.c
+F: drivers/clocksource/arm_arch_timer_mmio.c
ARM GENERIC INTERRUPT CONTROLLER DRIVERS
M: Marc Zyngier <maz@kernel.org>
diff --git a/arch/Kconfig b/arch/Kconfig
index 2162276995a0..5440616f0774 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1657,7 +1657,7 @@ config HAVE_SPARSE_SYSCALL_NR
related optimizations for a given architecture.
config ARCH_HAS_VDSO_ARCH_DATA
- depends on GENERIC_VDSO_DATA_STORE
+ depends on HAVE_GENERIC_VDSO
bool
config ARCH_HAS_VDSO_TIME_DATA
@@ -1778,6 +1778,10 @@ config ARCH_VMLINUX_NEEDS_RELOCS
relocations preserved. This is used by some architectures to
construct bespoke relocation tables for KASLR.
+# Select if architecture uses the common generic TIF bits
+config HAVE_GENERIC_TIF_BITS
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h
index 4e7226ad02ec..ff1c729af05f 100644
--- a/arch/arm/include/asm/vdso/vsyscall.h
+++ b/arch/arm/include/asm/vdso/vsyscall.h
@@ -7,8 +7,6 @@
#include <vdso/datapage.h>
#include <asm/cacheflush.h>
-extern bool cntvct_ok;
-
static __always_inline
void __arch_sync_vdso_time_data(struct vdso_time_data *vdata)
{
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 325448ffbba0..e38a30477f3d 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -54,11 +54,9 @@ struct elfinfo {
char *dynstr; /* ptr to .dynstr section */
};
-/* Cached result of boot-time check for whether the arch timer exists,
- * and if so, whether the virtual counter is useable.
+/* Boot-time check for whether the arch timer exists, and if so,
+ * whether the virtual counter is usable.
*/
-bool cntvct_ok __ro_after_init;
-
static bool __init cntvct_functional(void)
{
struct device_node *np;
@@ -159,7 +157,7 @@ static void __init patch_vdso(void *ehdr)
* want programs to incur the slight additional overhead of
* dispatching through the VDSO only to fall back to syscalls.
*/
- if (!cntvct_ok) {
+ if (!cntvct_functional()) {
vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64");
@@ -197,8 +195,6 @@ static int __init vdso_init(void)
vdso_total_pages = VDSO_NR_PAGES; /* for the data/vvar pages */
vdso_total_pages += text_pages;
- cntvct_ok = cntvct_functional();
-
patch_vdso(vdso_start);
return 0;
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5c1023a6d78c..7b27ee9482b3 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -926,9 +926,7 @@ config VDSO
default y if ARM_ARCH_TIMER
select HAVE_GENERIC_VDSO
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_32
select GENERIC_GETTIMEOFDAY
- select GENERIC_VDSO_DATA_STORE
help
Place in the process address space an ELF shared object
providing fast implementations of gettimeofday and
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 616b702f10d4..b3e13f67d598 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -166,8 +166,6 @@ config ARM64
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
- select GENERIC_VDSO_DATA_STORE
- select GENERIC_VDSO_TIME_NS
select HARDIRQS_SW_RESEND
select HAS_IOPORT
select HAVE_MOVE_PMD
@@ -1750,7 +1748,6 @@ config COMPAT_VDSO
bool "Enable vDSO for 32-bit applications"
depends on !CPU_BIG_ENDIAN
depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != ""
- select GENERIC_COMPAT_VDSO
default y
help
Place in the process address space of 32-bit applications an
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
index 3ac35f4a667c..6d75e03d3827 100644
--- a/arch/arm64/include/asm/vdso/compat_barrier.h
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -7,11 +7,10 @@
#ifndef __ASSEMBLY__
/*
- * Warning: This code is meant to be used with
- * ENABLE_COMPAT_VDSO only.
+ * Warning: This code is meant to be used from the compat vDSO only.
*/
-#ifndef ENABLE_COMPAT_VDSO
-#error This header is meant to be used with ENABLE_COMPAT_VDSO only
+#ifdef __arch64__
+#error This header is meant to be used with from the compat vDSO only
#endif
#ifdef dmb
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index d60ea7a72a9c..7d1a116549b1 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -2,8 +2,8 @@
/*
* Copyright (C) 2018 ARM Limited
*/
-#ifndef __ASM_VDSO_GETTIMEOFDAY_H
-#define __ASM_VDSO_GETTIMEOFDAY_H
+#ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
+#define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
#ifndef __ASSEMBLY__
@@ -163,4 +163,4 @@ static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
#endif /* !__ASSEMBLY__ */
-#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
+#endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index da1ab8759592..c59e84105b43 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -5,6 +5,8 @@
#ifndef __ASM_VDSO_GETTIMEOFDAY_H
#define __ASM_VDSO_GETTIMEOFDAY_H
+#ifdef __aarch64__
+
#ifndef __ASSEMBLY__
#include <asm/alternative.h>
@@ -96,4 +98,10 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(
#endif /* !__ASSEMBLY__ */
+#else /* !__aarch64__ */
+
+#include "compat_gettimeofday.h"
+
+#endif /* __aarch64__ */
+
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 5de4deaf4299..ffa3536581f6 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -57,7 +57,6 @@ VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
-VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
# KBUILD_CFLAGS from top-level Makefile
VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1f08941fdea4..e6225539579f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -110,8 +110,6 @@ config LOONGARCH
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_DATA_STORE
- select GENERIC_VDSO_TIME_NS
select GPIOLIB
select HAS_IOPORT
select HAVE_ARCH_AUDITSYSCALL
@@ -142,6 +140,7 @@ config LOONGARCH
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 9dfa2ef00816..4d7117fcdc78 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -65,50 +65,42 @@ register unsigned long current_stack_pointer __asm__("$sp");
* access
* - pending work-to-be-done flags are in LSW
* - other flags in MSW
+ *
+ * Tell the generic TIF infrastructure which special bits loongarch supports
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* lazy rescheduling necessary */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
-#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
-#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
-#define TIF_NOHZ 6 /* in adaptive nohz mode */
-#define TIF_UPROBE 7 /* breakpointed or singlestepping */
-#define TIF_USEDFPU 8 /* FPU was used by this task this quantum (SMP) */
-#define TIF_USEDSIMD 9 /* SIMD has been used this quantum */
-#define TIF_MEMDIE 10 /* is terminating due to OOM killer */
-#define TIF_FIXADE 11 /* Fix address errors in software */
-#define TIF_LOGADE 12 /* Log address errors to syslog */
-#define TIF_32BIT_REGS 13 /* 32-bit general purpose registers */
-#define TIF_32BIT_ADDR 14 /* 32-bit address space */
-#define TIF_LOAD_WATCH 15 /* If set, load watch registers */
-#define TIF_SINGLESTEP 16 /* Single Step */
-#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
-#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
-#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
-#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
-#define TIF_PATCH_PENDING 21 /* pending live patching update */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_NOHZ 16 /* in adaptive nohz mode */
+#define TIF_USEDFPU 17 /* FPU was used by this task this quantum (SMP) */
+#define TIF_USEDSIMD 18 /* SIMD has been used this quantum */
+#define TIF_FIXADE 19 /* Fix address errors in software */
+#define TIF_LOGADE 20 /* Log address errors to syslog */
+#define TIF_32BIT_REGS 21 /* 32-bit general purpose registers */
+#define TIF_32BIT_ADDR 22 /* 32-bit address space */
+#define TIF_LOAD_WATCH 23 /* If set, load watch registers */
+#define TIF_SINGLESTEP 24 /* Single Step */
+#define TIF_LSX_CTX_LIVE 25 /* LSX context must be preserved */
+#define TIF_LASX_CTX_LIVE 26 /* LASX context must be preserved */
+#define TIF_USEDLBT 27 /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE 28 /* LBT context must be preserved */
-#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
-#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
-#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
-#define _TIF_UPROBE (1<<TIF_UPROBE)
-#define _TIF_USEDFPU (1<<TIF_USEDFPU)
-#define _TIF_USEDSIMD (1<<TIF_USEDSIMD)
-#define _TIF_FIXADE (1<<TIF_FIXADE)
-#define _TIF_LOGADE (1<<TIF_LOGADE)
-#define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
-#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
-#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
-#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
-#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
-#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
-#define _TIF_USEDLBT (1<<TIF_USEDLBT)
-#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
-#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
+#define _TIF_NOHZ BIT(TIF_NOHZ)
+#define _TIF_USEDFPU BIT(TIF_USEDFPU)
+#define _TIF_USEDSIMD BIT(TIF_USEDSIMD)
+#define _TIF_FIXADE BIT(TIF_FIXADE)
+#define _TIF_LOGADE BIT(TIF_LOGADE)
+#define _TIF_32BIT_REGS BIT(TIF_32BIT_REGS)
+#define _TIF_32BIT_ADDR BIT(TIF_32BIT_ADDR)
+#define _TIF_LOAD_WATCH BIT(TIF_LOAD_WATCH)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LSX_CTX_LIVE BIT(TIF_LSX_CTX_LIVE)
+#define _TIF_LASX_CTX_LIVE BIT(TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT BIT(TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE BIT(TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index f3092f2de8b5..6fb92cc1a4c9 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -112,8 +112,6 @@ static int arch_timer_starting(unsigned int cpu)
static int arch_timer_dying(unsigned int cpu)
{
- constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device));
-
/* Clear Timer Interrupt */
write_csr_tintclear(CSR_TINTCLR_TI);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 447b2fcaa316..608e01ed6cff 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -51,7 +51,6 @@ config MIPS
select GENERIC_SMP_IDLE_THREAD
select GENERIC_IDLE_POLL_SETUP
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_DATA_STORE
select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
select HAS_IOPORT if !NO_IOPORT_MAP || ISA
select HAVE_ARCH_COMPILER_H
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 12fb3da59700..22173a897670 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -210,8 +210,6 @@ config PPC
select GENERIC_PCI_IOMAP if PCI
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_DATA_STORE
- select GENERIC_VDSO_TIME_NS
select HAS_IOPORT if PCI
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 613ab57cda74..a9ef4eeb72ab 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -53,7 +53,7 @@ config RISCV
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN
- select ARCH_HAS_VDSO_ARCH_DATA if GENERIC_VDSO_DATA_STORE
+ select ARCH_HAS_VDSO_ARCH_DATA if HAVE_GENERIC_VDSO
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK if ACPI
select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU
@@ -110,7 +110,7 @@ config RISCV
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
- select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
+ select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO && 64BIT
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IOREMAP if MMU
select GENERIC_IRQ_IPI if SMP
@@ -123,9 +123,7 @@ config RISCV
select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
- select GENERIC_TIME_VSYSCALL if MMU && 64BIT
- select GENERIC_VDSO_DATA_STORE if MMU
- select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
+ select GENERIC_TIME_VSYSCALL if GENERIC_GETTIMEOFDAY
select HARDIRQS_SW_RESEND
select HAS_IOPORT if MMU
select HAVE_ALIGNED_STRUCT_PAGE
@@ -164,11 +162,12 @@ config RISCV
select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if MMU
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_VDSO if MMU && 64BIT
+ select HAVE_GENERIC_VDSO if MMU
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_BZIP2 if !XIP_KERNEL && !EFI_ZBOOT
select HAVE_KERNEL_GZIP if !XIP_KERNEL && !EFI_ZBOOT
@@ -224,7 +223,7 @@ config RISCV
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select UACCESS_MEMCPY if !MMU
- select VDSO_GETRANDOM if HAVE_GENERIC_VDSO
+ select VDSO_GETRANDOM if HAVE_GENERIC_VDSO && 64BIT
select USER_STACKTRACE_SUPPORT
select ZONE_DMA32 if 64BIT
diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
index b3e4d3c18fdc..6430c6e25c00 100644
--- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi
+++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
@@ -190,7 +190,7 @@
reg-names = "clr", "doorbell";
msi-controller;
#msi-cells = <0>;
- msi-ranges = <&intc 64 IRQ_TYPE_LEVEL_HIGH 32>;
+ msi-ranges = <&intc 64 IRQ_TYPE_EDGE_RISING 32>;
};
rpgate: clock-controller@7030010368 {
diff --git a/arch/riscv/boot/dts/sophgo/sg2044.dtsi b/arch/riscv/boot/dts/sophgo/sg2044.dtsi
index 6ec955744b0c..320c4d1d08e6 100644
--- a/arch/riscv/boot/dts/sophgo/sg2044.dtsi
+++ b/arch/riscv/boot/dts/sophgo/sg2044.dtsi
@@ -214,7 +214,7 @@
reg-names = "clr", "doorbell";
#msi-cells = <0>;
msi-controller;
- msi-ranges = <&intc 352 IRQ_TYPE_LEVEL_HIGH 512>;
+ msi-ranges = <&intc 352 IRQ_TYPE_EDGE_RISING 512>;
status = "disabled";
};
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index c33d8b7dd488..836d80dd2921 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -107,23 +107,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
* - pending work-to-be-done flags are in lowest half-word
* - other flags in upper half-word(s)
*/
-#define TIF_NEED_RESCHED 0 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_SIGPENDING 3 /* signal pending */
-#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
-#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
-#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
-#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
-#define TIF_32BIT 11 /* compat-mode 32bit process */
-#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
-
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
+
+/*
+ * Tell the generic TIF infrastructure which bits riscv supports
+ */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+#define TIF_32BIT 16 /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE 17 /* restore Vector before returing to user */
+
+#define _TIF_RISCV_V_DEFER_RESTORE BIT(TIF_RISCV_V_DEFER_RESTORE)
#endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7104534f0a0f..2414ee3ff002 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -174,8 +174,6 @@ config S390
select GENERIC_GETTIMEOFDAY
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
- select GENERIC_VDSO_DATA_STORE
- select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
@@ -206,6 +204,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_GRAPH_FUNC
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index f6ed2c8192c8..7878e9bfbf07 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -56,49 +56,31 @@ void arch_setup_new_exec(void);
/*
* thread information flags bit numbers
+ *
+ * Tell the generic TIF infrastructure which special bits s390 supports
*/
-#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
-#define TIF_SIGPENDING 1 /* signal pending */
-#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
-#define TIF_UPROBE 4 /* breakpointed or single-stepping */
-#define TIF_PATCH_PENDING 5 /* pending live patching update */
-#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
-#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
-#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
-#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
-#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
-#define TIF_31BIT 16 /* 32bit process */
-#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP 19 /* This task is single stepped */
-#define TIF_BLOCK_STEP 20 /* This task is block stepped */
-#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
-#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
-#define TIF_SECCOMP 26 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_RESTORE_SIGMASK
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific bits */
+#define TIF_ASCE_PRIMARY 16 /* primary asce is kernel asce */
+#define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */
+#define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */
+#define TIF_31BIT 20 /* 32bit process */
+#define TIF_SINGLE_STEP 21 /* This task is single stepped */
+#define TIF_BLOCK_STEP 22 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */
-#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
-#define _TIF_UPROBE BIT(TIF_UPROBE)
-#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
-#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
#define _TIF_31BIT BIT(TIF_31BIT)
-#define _TIF_MEMDIE BIT(TIF_MEMDIE)
-#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP)
#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP)
-#define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP BIT(TIF_SECCOMP)
-#define _TIF_SYSCALL_TRACEPOINT BIT(TIF_SYSCALL_TRACEPOINT)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f7fb3d88c57b..36b985d0e7bf 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -3,6 +3,8 @@
# Branch profiling isn't noinstr-safe. Disable it for arch/x86/*
subdir-ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
+obj-y += boot/startup/
+
obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/
obj-y += entry/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 679c7f980aa3..75f3de70df51 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,7 +14,6 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
- select GENERIC_VDSO_32
select HAVE_DEBUG_STACKOVERFLOW
select KMAP_LOCAL
select MODULES_USE_ELF_REL
@@ -182,8 +181,6 @@ config X86
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
- select GENERIC_VDSO_DATA_STORE
- select GENERIC_VDSO_TIME_NS
select GENERIC_VDSO_OVERFLOW_PROTECT
select GUP_GET_PXX_LOW_HIGH if X86_PAE
select HARDIRQS_SW_RESEND
@@ -239,6 +236,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA if X86_32
select HAVE_EXIT_THREAD
+ select HAVE_GENERIC_TIF_BITS
select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_GRAPH_FUNC if HAVE_FUNCTION_GRAPH_TRACER
@@ -487,6 +485,19 @@ config X86_X2APIC
If in doubt, say Y.
+config AMD_SECURE_AVIC
+ bool "AMD Secure AVIC"
+ depends on AMD_MEM_ENCRYPT && X86_X2APIC
+ help
+ Enable this to get AMD Secure AVIC support on guests that have this feature.
+
+ AMD Secure AVIC provides hardware acceleration for performance sensitive
+ APIC accesses and support for managing guest owned APIC state for SEV-SNP
+ guests. Secure AVIC does not support xAPIC mode. It has functional
+ dependency on x2apic being enabled in the guest.
+
+ If you don't know what to do here, say N.
+
config X86_POSTED_MSI
bool "Enable MSI and MSI-x delivery by posted interrupts"
depends on X86_64 && IRQ_REMAP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4b4e2a3ac6df..4db7e4bf69f5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -275,7 +275,6 @@ archprepare: $(cpufeaturemasks.hdr)
###
# Kernel objects
-core-y += arch/x86/boot/startup/
libs-y += arch/x86/lib/
# drivers-y are linked after core-y
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3a38fdcdb9bd..74657589264d 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -73,7 +73,7 @@ LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|_sinittext\|__inittext_end\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
quiet_cmd_voffset = VOFFSET $@
cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 94b5991da001..0f41ca0e52c0 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -332,6 +332,8 @@ static size_t parse_elf(void *output)
}
const unsigned long kernel_text_size = VO___start_rodata - VO__text;
+const unsigned long kernel_inittext_offset = VO__sinittext - VO__text;
+const unsigned long kernel_inittext_size = VO___inittext_end - VO__sinittext;
const unsigned long kernel_total_size = VO__end - VO__text;
static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c
index 89dd02de2a0f..7530ad8b768b 100644
--- a/arch/x86/boot/compressed/sev-handle-vc.c
+++ b/arch/x86/boot/compressed/sev-handle-vc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
+#include "error.h"
#include "sev.h"
#include <linux/kernel.h>
@@ -14,6 +15,8 @@
#include <asm/fpu/xcr.h>
#define __BOOT_COMPRESSED
+#undef __init
+#define __init
/* Basic instruction decoding support needed */
#include "../../lib/inat.c"
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index fd1b67dfea22..6e5c32a53d03 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -32,102 +32,47 @@ struct ghcb *boot_ghcb;
#undef __init
#define __init
-#undef __head
-#define __head
-
#define __BOOT_COMPRESSED
-extern struct svsm_ca *boot_svsm_caa;
-extern u64 boot_svsm_caa_pa;
-
-struct svsm_ca *svsm_get_caa(void)
-{
- return boot_svsm_caa;
-}
-
-u64 svsm_get_caa_pa(void)
-{
- return boot_svsm_caa_pa;
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call);
-
u8 snp_vmpl;
+u16 ghcb_version;
+
+u64 boot_svsm_caa_pa;
/* Include code for early handlers */
#include "../../boot/startup/sev-shared.c"
-int svsm_perform_call_protocol(struct svsm_call *call)
-{
- struct ghcb *ghcb;
- int ret;
-
- if (boot_ghcb)
- ghcb = boot_ghcb;
- else
- ghcb = NULL;
-
- do {
- ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
- : svsm_perform_msr_protocol(call);
- } while (ret == -EAGAIN);
-
- return ret;
-}
-
static bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
}
-static void __page_state_change(unsigned long paddr, enum psc_op op)
-{
- u64 val, msr;
-
- /*
- * If private -> shared then invalidate the page before requesting the
- * state change in the RMP table.
- */
- if (op == SNP_PAGE_STATE_SHARED)
- pvalidate_4k_page(paddr, paddr, false);
-
- /* Save the current GHCB MSR value */
- msr = sev_es_rd_ghcb_msr();
-
- /* Issue VMGEXIT to change the page state in RMP table. */
- sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
- VMGEXIT();
-
- /* Read the response of the VMGEXIT. */
- val = sev_es_rd_ghcb_msr();
- if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
-
- /* Restore the GHCB MSR value */
- sev_es_wr_ghcb_msr(msr);
-
- /*
- * Now that page state is changed in the RMP table, validate it so that it is
- * consistent with the RMP entry.
- */
- if (op == SNP_PAGE_STATE_PRIVATE)
- pvalidate_4k_page(paddr, paddr, true);
-}
-
void snp_set_page_private(unsigned long paddr)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
if (!sev_snp_enabled())
return;
- __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
+ __page_state_change(paddr, paddr, &d);
}
void snp_set_page_shared(unsigned long paddr)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_SHARED,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
if (!sev_snp_enabled())
return;
- __page_state_change(paddr, SNP_PAGE_STATE_SHARED);
+ __page_state_change(paddr, paddr, &d);
}
bool early_setup_ghcb(void)
@@ -152,8 +97,14 @@ bool early_setup_ghcb(void)
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ (struct svsm_ca *)boot_svsm_caa_pa,
+ boot_svsm_caa_pa
+ };
+
for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE)
- __page_state_change(pa, SNP_PAGE_STATE_PRIVATE);
+ __page_state_change(pa, pa, &d);
}
void sev_es_shutdown_ghcb(void)
@@ -235,15 +186,23 @@ bool sev_es_check_ghcb_fault(unsigned long address)
MSR_AMD64_SNP_VMSA_REG_PROT | \
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
+ MSR_AMD64_SNP_SECURE_AVIC | \
MSR_AMD64_SNP_RESERVED_MASK)
+#ifdef CONFIG_AMD_SECURE_AVIC
+#define SNP_FEATURE_SECURE_AVIC MSR_AMD64_SNP_SECURE_AVIC
+#else
+#define SNP_FEATURE_SECURE_AVIC 0
+#endif
+
/*
* SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
* by the guest kernel. As and when a new feature is implemented in the
* guest kernel, a corresponding bit should be added to the mask.
*/
#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \
- MSR_AMD64_SNP_SECURE_TSC)
+ MSR_AMD64_SNP_SECURE_TSC | \
+ SNP_FEATURE_SECURE_AVIC)
u64 snp_get_unsupported_features(u64 status)
{
@@ -347,7 +306,7 @@ static bool early_snp_init(struct boot_params *bp)
* running at VMPL0. The CA will be used to communicate with the
* SVSM and request its services.
*/
- svsm_setup_ca(cc_info);
+ svsm_setup_ca(cc_info, rip_rel_ptr(&boot_ghcb_page));
/*
* Pass run-time kernel a pointer to CC info via boot_params so EFI
@@ -391,6 +350,8 @@ static int sev_check_cpu_support(void)
if (!(eax & BIT(1)))
return -ENODEV;
+ sev_snp_needs_sfw = !(ebx & BIT(31));
+
return ebx & 0x3f;
}
@@ -453,30 +414,16 @@ void sev_enable(struct boot_params *bp)
*/
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
u64 hv_features;
- int ret;
hv_features = get_hv_features();
if (!(hv_features & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
/*
- * Enforce running at VMPL0 or with an SVSM.
- *
- * Use RMPADJUST (see the rmpadjust() function for a description of
- * what the instruction does) to update the VMPL1 permissions of a
- * page. If the guest is running at VMPL0, this will succeed. If the
- * guest is running at any other VMPL, this will fail. Linux SNP guests
- * only ever run at a single VMPL level so permission mask changes of a
- * lesser-privileged VMPL are a don't-care.
- */
- ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1);
-
- /*
- * Running at VMPL0 is not required if an SVSM is present and the hypervisor
- * supports the required SVSM GHCB events.
+ * Running at VMPL0 is required unless an SVSM is present and
+ * the hypervisor supports the required SVSM GHCB events.
*/
- if (ret &&
- !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL)))
+ if (snp_vmpl && !(hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
@@ -550,7 +497,6 @@ bool early_is_sevsnp_guest(void)
/* Obtain the address of the calling area to use */
boot_rdmsr(MSR_SVSM_CAA, &m);
- boot_svsm_caa = (void *)m.q;
boot_svsm_caa_pa = m.q;
/*
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 63e037e94e4c..916bac09b464 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -106,18 +106,5 @@ void get_cpuflags(void)
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
&cpu.flags[1]);
}
-
- if (max_amd_level >= 0x8000001f) {
- u32 ebx;
-
- /*
- * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in
- * the virtualization flags entry (word 8) and set by
- * scattered.c, so the bit needs to be explicitly set.
- */
- cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored);
- if (ebx & BIT(31))
- set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags);
- }
}
}
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
index b514f7e81332..e8fdf020b422 100644
--- a/arch/x86/boot/startup/Makefile
+++ b/arch/x86/boot/startup/Makefile
@@ -4,6 +4,7 @@ KBUILD_AFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += -D__DISABLE_EXPORTS -mcmodel=small -fPIC \
-Os -DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
-fno-stack-protector -D__NO_FORTIFY \
-fno-jump-tables \
-include $(srctree)/include/linux/hidden.h
@@ -19,6 +20,7 @@ KCOV_INSTRUMENT := n
obj-$(CONFIG_X86_64) += gdt_idt.o map_kernel.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o sev-startup.o
+pi-objs := $(patsubst %.o,$(obj)/%.o,$(obj-y))
lib-$(CONFIG_X86_64) += la57toggle.o
lib-$(CONFIG_EFI_MIXED) += efi-mixed.o
@@ -28,3 +30,23 @@ lib-$(CONFIG_EFI_MIXED) += efi-mixed.o
# to be linked into the decompressor or the EFI stub but not vmlinux
#
$(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y
+
+#
+# Invoke objtool for each object individually to check for absolute
+# relocations, even if other objtool actions are being deferred.
+#
+$(pi-objs): objtool-enabled = 1
+$(pi-objs): objtool-args = $(if $(delay-objtool),,$(objtool-args-y)) --noabs
+
+#
+# Confine the startup code by prefixing all symbols with __pi_ (for position
+# independent). This ensures that startup code can only call other startup
+# code, or code that has explicitly been made accessible to it via a symbol
+# alias.
+#
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
+targets += $(obj-y)
+obj-y := $(patsubst %.o,%.pi.o,$(obj-y))
diff --git a/arch/x86/boot/startup/exports.h b/arch/x86/boot/startup/exports.h
new file mode 100644
index 000000000000..01d2363dc445
--- /dev/null
+++ b/arch/x86/boot/startup/exports.h
@@ -0,0 +1,14 @@
+
+/*
+ * The symbols below are functions that are implemented by the startup code,
+ * but called at runtime by the SEV code residing in the core kernel.
+ */
+PROVIDE(early_set_pages_state = __pi_early_set_pages_state);
+PROVIDE(early_snp_set_memory_private = __pi_early_snp_set_memory_private);
+PROVIDE(early_snp_set_memory_shared = __pi_early_snp_set_memory_shared);
+PROVIDE(get_hv_features = __pi_get_hv_features);
+PROVIDE(sev_es_terminate = __pi_sev_es_terminate);
+PROVIDE(snp_cpuid = __pi_snp_cpuid);
+PROVIDE(snp_cpuid_get_table = __pi_snp_cpuid_get_table);
+PROVIDE(svsm_issue_call = __pi_svsm_issue_call);
+PROVIDE(svsm_process_result_codes = __pi_svsm_process_result_codes);
diff --git a/arch/x86/boot/startup/gdt_idt.c b/arch/x86/boot/startup/gdt_idt.c
index a3112a69b06a..d16102abdaec 100644
--- a/arch/x86/boot/startup/gdt_idt.c
+++ b/arch/x86/boot/startup/gdt_idt.c
@@ -24,7 +24,7 @@
static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
/* This may run while still in the direct mapping */
-void __head startup_64_load_idt(void *vc_handler)
+void startup_64_load_idt(void *vc_handler)
{
struct desc_ptr desc = {
.address = (unsigned long)rip_rel_ptr(bringup_idt_table),
@@ -46,7 +46,7 @@ void __head startup_64_load_idt(void *vc_handler)
/*
* Setup boot CPU state needed before kernel switches to virtual addresses.
*/
-void __head startup_64_setup_gdt_idt(void)
+void __init startup_64_setup_gdt_idt(void)
{
struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
void *handler = NULL;
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
index 332dbe6688c4..83ba98d61572 100644
--- a/arch/x86/boot/startup/map_kernel.c
+++ b/arch/x86/boot/startup/map_kernel.c
@@ -30,7 +30,7 @@ static inline bool check_la57_support(void)
return true;
}
-static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
+static unsigned long __init sme_postprocess_startup(struct boot_params *bp,
pmdval_t *pmd,
unsigned long p2v_offset)
{
@@ -84,7 +84,7 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
* the 1:1 mapping of memory. Kernel virtual addresses can be determined by
* subtracting p2v_offset from the RIP-relative address.
*/
-unsigned long __head __startup_64(unsigned long p2v_offset,
+unsigned long __init __startup_64(unsigned long p2v_offset,
struct boot_params *bp)
{
pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
index a34cd19796f9..4e22ffd73516 100644
--- a/arch/x86/boot/startup/sev-shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -12,35 +12,12 @@
#include <asm/setup_data.h>
#ifndef __BOOT_COMPRESSED
-#define error(v) pr_err(v)
#define has_cpuflag(f) boot_cpu_has(f)
#else
#undef WARN
#define WARN(condition, format...) (!!(condition))
-#undef vc_forward_exception
-#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
#endif
-/*
- * SVSM related information:
- * During boot, the page tables are set up as identity mapped and later
- * changed to use kernel virtual addresses. Maintain separate virtual and
- * physical addresses for the CAA to allow SVSM functions to be used during
- * early boot, both with identity mapped virtual addresses and proper kernel
- * virtual addresses.
- */
-struct svsm_ca *boot_svsm_caa __ro_after_init;
-u64 boot_svsm_caa_pa __ro_after_init;
-
-/*
- * Since feature negotiation related variables are set early in the boot
- * process they must reside in the .data section so as not to be zeroed
- * out when the .bss section is later cleared.
- *
- * GHCB protocol version negotiated with the hypervisor.
- */
-static u16 ghcb_version __ro_after_init;
-
/* Copy of the SNP firmware's CPUID page. */
static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
@@ -54,17 +31,9 @@ static u32 cpuid_std_range_max __ro_after_init;
static u32 cpuid_hyp_range_max __ro_after_init;
static u32 cpuid_ext_range_max __ro_after_init;
-bool __init sev_es_check_cpu_features(void)
-{
- if (!has_cpuflag(X86_FEATURE_RDRAND)) {
- error("RDRAND instruction not supported - no trusted source of randomness available\n");
- return false;
- }
+bool sev_snp_needs_sfw;
- return true;
-}
-
-void __head __noreturn
+void __noreturn
sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
@@ -83,7 +52,7 @@ sev_es_terminate(unsigned int set, unsigned int reason)
/*
* The hypervisor features are available from GHCB version 2 onward.
*/
-u64 get_hv_features(void)
+u64 __init get_hv_features(void)
{
u64 val;
@@ -100,72 +69,7 @@ u64 get_hv_features(void)
return GHCB_MSR_HV_FT_RESP_VAL(val);
}
-void snp_register_ghcb_early(unsigned long paddr)
-{
- unsigned long pfn = paddr >> PAGE_SHIFT;
- u64 val;
-
- sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
- VMGEXIT();
-
- val = sev_es_rd_ghcb_msr();
-
- /* If the response GPA is not ours then abort the guest */
- if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
- (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
-}
-
-bool sev_es_negotiate_protocol(void)
-{
- u64 val;
-
- /* Do the GHCB protocol version negotiation */
- sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
- VMGEXIT();
- val = sev_es_rd_ghcb_msr();
-
- if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
- return false;
-
- if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
- GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
- return false;
-
- ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
-
- return true;
-}
-
-static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
- u32 ret;
-
- ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
- if (!ret)
- return ES_OK;
-
- if (ret == 1) {
- u64 info = ghcb->save.sw_exit_info_2;
- unsigned long v = info & SVM_EVTINJ_VEC_MASK;
-
- /* Check if exception information from hypervisor is sane. */
- if ((info & SVM_EVTINJ_VALID) &&
- ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
- ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
- ctxt->fi.vector = v;
-
- if (info & SVM_EVTINJ_VALID_ERR)
- ctxt->fi.error_code = info >> 32;
-
- return ES_EXCEPTION;
- }
- }
-
- return ES_VMM_ERROR;
-}
-
-static inline int svsm_process_result_codes(struct svsm_call *call)
+int svsm_process_result_codes(struct svsm_call *call)
{
switch (call->rax_out) {
case SVSM_SUCCESS:
@@ -193,7 +97,7 @@ static inline int svsm_process_result_codes(struct svsm_call *call)
* - RAX specifies the SVSM protocol/callid as input and the return code
* as output.
*/
-static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
+void svsm_issue_call(struct svsm_call *call, u8 *pending)
{
register unsigned long rax asm("rax") = call->rax;
register unsigned long rcx asm("rcx") = call->rcx;
@@ -216,7 +120,7 @@ static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
call->r9_out = r9;
}
-static int svsm_perform_msr_protocol(struct svsm_call *call)
+int svsm_perform_msr_protocol(struct svsm_call *call)
{
u8 pending = 0;
u64 val, resp;
@@ -247,63 +151,6 @@ static int svsm_perform_msr_protocol(struct svsm_call *call)
return svsm_process_result_codes(call);
}
-static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
-{
- struct es_em_ctxt ctxt;
- u8 pending = 0;
-
- vc_ghcb_invalidate(ghcb);
-
- /*
- * Fill in protocol and format specifiers. This can be called very early
- * in the boot, so use rip-relative references as needed.
- */
- ghcb->protocol_version = ghcb_version;
- ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
-
- ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
- ghcb_set_sw_exit_info_1(ghcb, 0);
- ghcb_set_sw_exit_info_2(ghcb, 0);
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
- svsm_issue_call(call, &pending);
-
- if (pending)
- return -EINVAL;
-
- switch (verify_exception_info(ghcb, &ctxt)) {
- case ES_OK:
- break;
- case ES_EXCEPTION:
- vc_forward_exception(&ctxt);
- fallthrough;
- default:
- return -EINVAL;
- }
-
- return svsm_process_result_codes(call);
-}
-
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2)
-{
- /* Fill in protocol and format specifiers */
- ghcb->protocol_version = ghcb_version;
- ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
-
- ghcb_set_sw_exit_code(ghcb, exit_code);
- ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
- ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
-
- sev_es_wr_ghcb_msr(__pa(ghcb));
- VMGEXIT();
-
- return verify_exception_info(ghcb, ctxt);
-}
-
static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
{
u64 val;
@@ -342,44 +189,7 @@ static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
return ret;
}
-static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
-{
- u32 cr4 = native_read_cr4();
- int ret;
-
- ghcb_set_rax(ghcb, leaf->fn);
- ghcb_set_rcx(ghcb, leaf->subfn);
-
- if (cr4 & X86_CR4_OSXSAVE)
- /* Safe to read xcr0 */
- ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
- else
- /* xgetbv will cause #UD - use reset value for xcr0 */
- ghcb_set_xcr0(ghcb, 1);
-
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
- if (ret != ES_OK)
- return ret;
-
- if (!(ghcb_rax_is_valid(ghcb) &&
- ghcb_rbx_is_valid(ghcb) &&
- ghcb_rcx_is_valid(ghcb) &&
- ghcb_rdx_is_valid(ghcb)))
- return ES_VMM_ERROR;
- leaf->eax = ghcb->save.rax;
- leaf->ebx = ghcb->save.rbx;
- leaf->ecx = ghcb->save.rcx;
- leaf->edx = ghcb->save.rdx;
-
- return ES_OK;
-}
-
-static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
-{
- return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
- : __sev_cpuid_hv_msr(leaf);
-}
/*
* This may be called early while still running on the initial identity
@@ -412,7 +222,7 @@ const struct snp_cpuid_table *snp_cpuid_get_table(void)
*
* Return: XSAVE area size on success, 0 otherwise.
*/
-static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
u64 xfeatures_found = 0;
@@ -448,7 +258,7 @@ static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
return xsave_size;
}
-static bool __head
+static bool
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -484,21 +294,21 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
return false;
}
-static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+static void snp_cpuid_hv_msr(void *ctx, struct cpuid_leaf *leaf)
{
- if (sev_cpuid_hv(ghcb, ctxt, leaf))
+ if (__sev_cpuid_hv_msr(leaf))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}
-static int __head
-snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
- struct cpuid_leaf *leaf)
+static int
+snp_cpuid_postprocess(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf)
{
struct cpuid_leaf leaf_hv = *leaf;
switch (leaf->fn) {
case 0x1:
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* initial APIC ID */
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
@@ -517,7 +327,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
break;
case 0xB:
leaf_hv.subfn = 0;
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* extended APIC ID */
leaf->edx = leaf_hv.edx;
@@ -565,7 +375,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
}
break;
case 0x8000001E:
- snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
+ cpuid_fn(ctx, &leaf_hv);
/* extended APIC ID */
leaf->eax = leaf_hv.eax;
@@ -586,8 +396,8 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-int __head
-snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -621,7 +431,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
return 0;
}
- return snp_cpuid_postprocess(ghcb, ctxt, leaf);
+ return snp_cpuid_postprocess(cpuid_fn, ctx, leaf);
}
/*
@@ -629,7 +439,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
* page yet, so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit-code.
*/
-void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
@@ -648,13 +458,24 @@ void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
leaf.fn = fn;
leaf.subfn = subfn;
- ret = snp_cpuid(NULL, NULL, &leaf);
+ /*
+ * If SNP is active, then snp_cpuid() uses the CPUID table to obtain the
+ * CPUID values (with possible HV interaction during post-processing of
+ * the values). But if SNP is not active (no CPUID table present), then
+ * snp_cpuid() returns -EOPNOTSUPP so that an SEV-ES guest can call the
+ * HV to obtain the CPUID information.
+ */
+ ret = snp_cpuid(snp_cpuid_hv_msr, NULL, &leaf);
if (!ret)
goto cpuid_done;
if (ret != -EOPNOTSUPP)
goto fail;
+ /*
+ * This is reached by a SEV-ES guest and needs to invoke the HV for
+ * the CPUID data.
+ */
if (__sev_cpuid_hv_msr(&leaf))
goto fail;
@@ -705,7 +526,7 @@ struct cc_setup_data {
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol.
*/
-static __head
+static __init
struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{
struct cc_setup_data *sd = NULL;
@@ -733,7 +554,7 @@ struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process.
*/
-static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
{
const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
int i;
@@ -761,13 +582,24 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
}
}
-static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
+static int svsm_call_msr_protocol(struct svsm_call *call)
+{
+ int ret;
+
+ do {
+ ret = svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static void svsm_pval_4k_page(unsigned long paddr, bool validate,
+ struct svsm_ca *caa, u64 caa_pa)
{
struct svsm_pvalidate_call *pc;
struct svsm_call call = {};
unsigned long flags;
u64 pc_pa;
- int ret;
/*
* This can be called very early in the boot, use native functions in
@@ -775,10 +607,10 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
*/
flags = native_local_irq_save();
- call.caa = svsm_get_caa();
+ call.caa = caa;
pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
- pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+ pc_pa = caa_pa + offsetof(struct svsm_ca, svsm_buffer);
pc->num_entries = 1;
pc->cur_index = 0;
@@ -792,20 +624,24 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
call.rcx = pc_pa;
- ret = svsm_perform_call_protocol(&call);
- if (ret)
+ /*
+ * Use the MSR protocol exclusively, so that this code is usable in
+ * startup code where VA/PA translations of the GHCB page's address may
+ * be problematic.
+ */
+ if (svsm_call_msr_protocol(&call))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
native_local_irq_restore(flags);
}
-static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
- bool validate)
+static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
+ bool validate, struct svsm_ca *caa, u64 caa_pa)
{
int ret;
if (snp_vmpl) {
- svsm_pval_4k_page(paddr, validate);
+ svsm_pval_4k_page(paddr, validate, caa, caa_pa);
} else {
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
if (ret)
@@ -816,15 +652,51 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
* If validating memory (making it private) and affected by the
* cache-coherency vulnerability, perform the cache eviction mitigation.
*/
- if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO))
+ if (validate && sev_snp_needs_sfw)
sev_evict_cache((void *)vaddr, 1);
}
+static void __page_state_change(unsigned long vaddr, unsigned long paddr,
+ const struct psc_desc *desc)
+{
+ u64 val, msr;
+
+ /*
+ * If private -> shared then invalidate the page before requesting the
+ * state change in the RMP table.
+ */
+ if (desc->op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(vaddr, paddr, false, desc->ca, desc->caa_pa);
+
+ /* Save the current GHCB MSR value */
+ msr = sev_es_rd_ghcb_msr();
+
+ /* Issue VMGEXIT to change the page state in RMP table. */
+ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, desc->op));
+ VMGEXIT();
+
+ /* Read the response of the VMGEXIT. */
+ val = sev_es_rd_ghcb_msr();
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ /* Restore the GHCB MSR value */
+ sev_es_wr_ghcb_msr(msr);
+
+ /*
+ * Now that page state is changed in the RMP table, validate it so that it is
+ * consistent with the RMP entry.
+ */
+ if (desc->op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(vaddr, paddr, true, desc->ca, desc->caa_pa);
+}
+
/*
* Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
* services needed when not running in VMPL0.
*/
-static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
+static bool __init svsm_setup_ca(const struct cc_blob_sev_info *cc_info,
+ void *page)
{
struct snp_secrets_page *secrets_page;
struct snp_cpuid_table *cpuid_table;
@@ -847,7 +719,7 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
* routine is running identity mapped when called, both by the decompressor
* code and the early kernel code.
*/
- if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1))
+ if (!rmpadjust((unsigned long)page, RMP_PG_SIZE_4K, 1))
return false;
/*
@@ -875,11 +747,6 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
if (caa & (PAGE_SIZE - 1))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
- /*
- * The CA is identity mapped when this routine is called, both by the
- * decompressor code and the early kernel code.
- */
- boot_svsm_caa = (struct svsm_ca *)caa;
boot_svsm_caa_pa = caa;
/* Advertise the SVSM presence via CPUID. */
diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c
index 0b7e3b950183..09725428d3e6 100644
--- a/arch/x86/boot/startup/sev-startup.c
+++ b/arch/x86/boot/startup/sev-startup.c
@@ -41,143 +41,14 @@
#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
-/* For early boot hypervisor communication in SEV-ES enabled guests */
-struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
-
-/*
- * Needs to be in the .data section because we need it NULL before bss is
- * cleared
- */
-struct ghcb *boot_ghcb __section(".data");
-
-/* Bitmap of SEV features supported by the hypervisor */
-u64 sev_hv_features __ro_after_init;
-
-/* Secrets page physical address from the CC blob */
-u64 sev_secrets_pa __ro_after_init;
-
-/* For early boot SVSM communication */
-struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
-
-DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
-DEFINE_PER_CPU(u64, svsm_caa_pa);
-
-/*
- * Nothing shall interrupt this code path while holding the per-CPU
- * GHCB. The backup GHCB is only for NMIs interrupting this path.
- *
- * Callers must disable local interrupts around it.
- */
-noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (unlikely(data->ghcb_active)) {
- /* GHCB is already in use - save its contents */
-
- if (unlikely(data->backup_ghcb_active)) {
- /*
- * Backup-GHCB is also already in use. There is no way
- * to continue here so just kill the machine. To make
- * panic() work, mark GHCBs inactive so that messages
- * can be printed out.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- instrumentation_begin();
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- instrumentation_end();
- }
-
- /* Mark backup_ghcb active before writing to it */
- data->backup_ghcb_active = true;
-
- state->ghcb = &data->backup_ghcb;
-
- /* Backup GHCB content */
- *state->ghcb = *ghcb;
- } else {
- state->ghcb = NULL;
- data->ghcb_active = true;
- }
-
- return ghcb;
-}
-
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
-noinstr void __sev_put_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- WARN_ON(!irqs_disabled());
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (state->ghcb) {
- /* Restore GHCB from Backup */
- *ghcb = *state->ghcb;
- data->backup_ghcb_active = false;
- state->ghcb = NULL;
- } else {
- /*
- * Invalidate the GHCB so a VMGEXIT instruction issued
- * from userspace won't appear to be valid.
- */
- vc_ghcb_invalidate(ghcb);
- data->ghcb_active = false;
- }
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call)
-{
- struct ghcb_state state;
- unsigned long flags;
- struct ghcb *ghcb;
- int ret;
-
- /*
- * This can be called very early in the boot, use native functions in
- * order to avoid paravirt issues.
- */
- flags = native_local_irq_save();
-
- if (sev_cfg.ghcbs_initialized)
- ghcb = __sev_get_ghcb(&state);
- else if (boot_ghcb)
- ghcb = boot_ghcb;
- else
- ghcb = NULL;
-
- do {
- ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
- : svsm_perform_msr_protocol(call);
- } while (ret == -EAGAIN);
-
- if (sev_cfg.ghcbs_initialized)
- __sev_put_ghcb(&state);
-
- native_local_irq_restore(flags);
-
- return ret;
-}
-
-void __head
+void
early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op)
+ unsigned long npages, const struct psc_desc *desc)
{
unsigned long paddr_end;
- u64 val;
vaddr = vaddr & PAGE_MASK;
@@ -185,42 +56,22 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr_end = paddr + (npages << PAGE_SHIFT);
while (paddr < paddr_end) {
- /* Page validation must be rescinded before changing to shared */
- if (op == SNP_PAGE_STATE_SHARED)
- pvalidate_4k_page(vaddr, paddr, false);
-
- /*
- * Use the MSR protocol because this function can be called before
- * the GHCB is established.
- */
- sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
- VMGEXIT();
-
- val = sev_es_rd_ghcb_msr();
-
- if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
- goto e_term;
-
- if (GHCB_MSR_PSC_RESP_VAL(val))
- goto e_term;
-
- /* Page validation must be performed after changing to private */
- if (op == SNP_PAGE_STATE_PRIVATE)
- pvalidate_4k_page(vaddr, paddr, true);
+ __page_state_change(vaddr, paddr, desc);
vaddr += PAGE_SIZE;
paddr += PAGE_SIZE;
}
-
- return;
-
-e_term:
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
}
-void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_PRIVATE,
+ rip_rel_ptr(&boot_svsm_ca_page),
+ boot_svsm_caa_pa
+ };
+
/*
* This can be invoked in early boot while running identity mapped, so
* use an open coded check for SNP instead of using cc_platform_has().
@@ -234,12 +85,18 @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
* Ask the hypervisor to mark the memory pages as private in the RMP
* table.
*/
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
+ early_set_pages_state(vaddr, paddr, npages, &d);
}
-void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
+ struct psc_desc d = {
+ SNP_PAGE_STATE_SHARED,
+ rip_rel_ptr(&boot_svsm_ca_page),
+ boot_svsm_caa_pa
+ };
+
/*
* This can be invoked in early boot while running identity mapped, so
* use an open coded check for SNP instead of using cc_platform_has().
@@ -250,7 +107,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
return;
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
- early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
+ early_set_pages_state(vaddr, paddr, npages, &d);
}
/*
@@ -266,7 +123,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
*
* Scan for the blob in that order.
*/
-static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+static struct cc_blob_sev_info *__init find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -287,15 +144,15 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
found_cc_info:
if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
- snp_abort();
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
return cc_info;
}
-static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
+static void __init svsm_setup(struct cc_blob_sev_info *cc_info)
{
+ struct snp_secrets_page *secrets = (void *)cc_info->secrets_phys;
struct svsm_call call = {};
- int ret;
u64 pa;
/*
@@ -303,7 +160,7 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
* running at VMPL0. The CA will be used to communicate with the
* SVSM to perform the SVSM services.
*/
- if (!svsm_setup_ca(cc_info))
+ if (!svsm_setup_ca(cc_info, rip_rel_ptr(&boot_svsm_ca_page)))
return;
/*
@@ -315,25 +172,25 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
/*
- * Switch over to the boot SVSM CA while the current CA is still
- * addressable. There is no GHCB at this point so use the MSR protocol.
+ * Switch over to the boot SVSM CA while the current CA is still 1:1
+ * mapped and thus addressable with VA == PA. There is no GHCB at this
+ * point so use the MSR protocol.
*
* SVSM_CORE_REMAP_CA call:
* RAX = 0 (Protocol=0, CallID=0)
* RCX = New CA GPA
*/
- call.caa = svsm_get_caa();
+ call.caa = (struct svsm_ca *)secrets->svsm_caa;
call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
call.rcx = pa;
- ret = svsm_perform_call_protocol(&call);
- if (ret)
+
+ if (svsm_call_msr_protocol(&call))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
- boot_svsm_caa = (struct svsm_ca *)pa;
boot_svsm_caa_pa = pa;
}
-bool __head snp_init(struct boot_params *bp)
+bool __init snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
@@ -361,8 +218,3 @@ bool __head snp_init(struct boot_params *bp)
return true;
}
-
-void __head __noreturn snp_abort(void)
-{
- sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
-}
diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c
index 70ea1748c0a7..e7ea65f3f1d6 100644
--- a/arch/x86/boot/startup/sme.c
+++ b/arch/x86/boot/startup/sme.c
@@ -91,7 +91,7 @@ struct sme_populate_pgd_data {
*/
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
-static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
@@ -106,7 +106,7 @@ static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
memset(pgd_p, 0, pgd_size);
}
-static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -143,7 +143,7 @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
return pud;
}
-static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -159,7 +159,7 @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
}
-static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
@@ -185,7 +185,7 @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
-static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
@@ -195,7 +195,7 @@ static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
}
}
-static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
@@ -205,7 +205,7 @@ static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
}
}
-static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
@@ -229,22 +229,22 @@ static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
__sme_map_range_pte(ppd);
}
-static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
-static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
-static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
-static unsigned long __head sme_pgtable_calc(unsigned long len)
+static unsigned long __init sme_pgtable_calc(unsigned long len)
{
unsigned long entries = 0, tables = 0;
@@ -281,7 +281,7 @@ static unsigned long __head sme_pgtable_calc(unsigned long len)
return entries + tables;
}
-void __head sme_encrypt_kernel(struct boot_params *bp)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
@@ -485,7 +485,7 @@ void __head sme_encrypt_kernel(struct boot_params *bp)
native_write_cr3(__native_read_cr3());
}
-void __head sme_enable(struct boot_params *bp)
+void __init sme_enable(struct boot_params *bp)
{
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
@@ -521,6 +521,7 @@ void __head sme_enable(struct boot_params *bp)
return;
me_mask = 1UL << (ebx & 0x3f);
+ sev_snp_needs_sfw = !(ebx & BIT(31));
/* Check the SEV MSR whether SEV or SME is enabled */
sev_status = msr = native_rdmsrq(MSR_AMD64_SEV);
@@ -531,7 +532,7 @@ void __head sme_enable(struct boot_params *bp)
* enablement abort the guest.
*/
if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED))
- snp_abort();
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
@@ -567,7 +568,6 @@ void __head sme_enable(struct boot_params *bp)
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/* Local version for startup code, which never operates on user page tables */
-__weak
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d4610af68114..989ca9f72ba3 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -104,6 +104,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_HOST_SEV_SNP:
return cc_flags.host_sev_snp;
+ case CC_ATTR_SNP_SECURE_AVIC:
+ return sev_status & MSR_AMD64_SNP_SECURE_AVIC;
+
default:
return false;
}
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
index 342d79f0ab6a..3b8ae214a6a6 100644
--- a/arch/x86/coco/sev/Makefile
+++ b/arch/x86/coco/sev/Makefile
@@ -1,10 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += core.o sev-nmi.o vc-handle.o
+obj-y += core.o noinstr.o vc-handle.o
# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
-UBSAN_SANITIZE_sev-nmi.o := n
+UBSAN_SANITIZE_noinstr.o := n
# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining
-KASAN_SANITIZE_sev-nmi.o := n
-KCSAN_SANITIZE_sev-nmi.o := n
+KASAN_SANITIZE_noinstr.o := n
+KCSAN_SANITIZE_noinstr.o := n
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 14ef5908fb27..9ae3b11754e6 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -46,6 +46,48 @@
#include <asm/cmdline.h>
#include <asm/msr.h>
+/* Bitmap of SEV features supported by the hypervisor */
+u64 sev_hv_features __ro_after_init;
+SYM_PIC_ALIAS(sev_hv_features);
+
+/* Secrets page physical address from the CC blob */
+u64 sev_secrets_pa __ro_after_init;
+SYM_PIC_ALIAS(sev_secrets_pa);
+
+/* For early boot SVSM communication */
+struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
+SYM_PIC_ALIAS(boot_svsm_ca_page);
+
+/*
+ * SVSM related information:
+ * During boot, the page tables are set up as identity mapped and later
+ * changed to use kernel virtual addresses. Maintain separate virtual and
+ * physical addresses for the CAA to allow SVSM functions to be used during
+ * early boot, both with identity mapped virtual addresses and proper kernel
+ * virtual addresses.
+ */
+u64 boot_svsm_caa_pa __ro_after_init;
+SYM_PIC_ALIAS(boot_svsm_caa_pa);
+
+DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
+DEFINE_PER_CPU(u64, svsm_caa_pa);
+
+static inline struct svsm_ca *svsm_get_caa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa);
+ else
+ return rip_rel_ptr(&boot_svsm_ca_page);
+}
+
+static inline u64 svsm_get_caa_pa(void)
+{
+ if (sev_cfg.use_cas)
+ return this_cpu_read(svsm_caa_pa);
+ else
+ return boot_svsm_caa_pa;
+}
+
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
#define AP_INIT_CS_LIMIT 0xffff
#define AP_INIT_DS_LIMIT 0xffff
@@ -79,6 +121,7 @@ static const char * const sev_status_feat_names[] = {
[MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
[MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
+ [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
};
/*
@@ -100,6 +143,26 @@ DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
*/
u8 snp_vmpl __ro_after_init;
EXPORT_SYMBOL_GPL(snp_vmpl);
+SYM_PIC_ALIAS(snp_vmpl);
+
+/*
+ * Since feature negotiation related variables are set early in the boot
+ * process they must reside in the .data section so as not to be zeroed
+ * out when the .bss section is later cleared.
+ *
+ * GHCB protocol version negotiated with the hypervisor.
+ */
+u16 ghcb_version __ro_after_init;
+SYM_PIC_ALIAS(ghcb_version);
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+struct ghcb *boot_ghcb __section(".data");
static u64 __init get_snp_jump_table_addr(void)
{
@@ -154,6 +217,73 @@ static u64 __init get_jump_table_addr(void)
return ret;
}
+static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
+{
+ struct es_em_ctxt ctxt;
+ u8 pending = 0;
+
+ vc_ghcb_invalidate(ghcb);
+
+ /*
+ * Fill in protocol and format specifiers. This can be called very early
+ * in the boot, so use rip-relative references as needed.
+ */
+ ghcb->protocol_version = ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+
+ svsm_issue_call(call, &pending);
+
+ if (pending)
+ return -EINVAL;
+
+ switch (verify_exception_info(ghcb, &ctxt)) {
+ case ES_OK:
+ break;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
+
+ return svsm_process_result_codes(call);
+}
+
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ int ret;
+
+ flags = native_local_irq_save();
+
+ if (sev_cfg.ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : __pi_svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ if (sev_cfg.ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ native_local_irq_restore(flags);
+
+ return ret;
+}
+
static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
int ret, u64 svsm_ret)
{
@@ -531,8 +661,11 @@ static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
unsigned long vaddr_end;
/* Use the MSR protocol when a GHCB is not available. */
- if (!boot_ghcb)
- return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
+ if (!boot_ghcb) {
+ struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
+
+ return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
+ }
vaddr = vaddr & PAGE_MASK;
vaddr_end = vaddr + (npages << PAGE_SHIFT);
@@ -973,6 +1106,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned
vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
+ if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
+
/* SVME must be set. */
vmsa->efer = EFER_SVME;
@@ -1107,6 +1243,105 @@ int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
return 0;
}
+u64 savic_ghcb_msr_read(u32 reg)
+{
+ u64 msr = APIC_BASE_MSR + (reg >> 4);
+ struct pt_regs regs = { .cx = msr };
+ struct es_em_ctxt ctxt = { .regs = &regs };
+ struct ghcb_state state;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false);
+ if (res != ES_OK) {
+ pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
+ /* MSR read failures are treated as fatal errors */
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ }
+
+ __sev_put_ghcb(&state);
+
+ return regs.ax | regs.dx << 32;
+}
+
+void savic_ghcb_msr_write(u32 reg, u64 value)
+{
+ u64 msr = APIC_BASE_MSR + (reg >> 4);
+ struct pt_regs regs = {
+ .cx = msr,
+ .ax = lower_32_bits(value),
+ .dx = upper_32_bits(value)
+ };
+ struct es_em_ctxt ctxt = { .regs = &regs };
+ struct ghcb_state state;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true);
+ if (res != ES_OK) {
+ pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
+ /* MSR writes should never fail. Any failure is fatal error for SNP guest */
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ }
+
+ __sev_put_ghcb(&state);
+}
+
+enum es_result savic_register_gpa(u64 gpa)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
+ ghcb_set_rbx(ghcb, gpa);
+ res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
+ SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
+
+ __sev_put_ghcb(&state);
+
+ return res;
+}
+
+enum es_result savic_unregister_gpa(u64 *gpa)
+{
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
+ res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
+ SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
+ if (gpa && res == ES_OK)
+ *gpa = ghcb->save.rbx;
+
+ __sev_put_ghcb(&state);
+
+ return res;
+}
+
static void snp_register_per_cpu_ghcb(void)
{
struct sev_es_runtime_data *data;
@@ -1233,7 +1468,8 @@ static void __init alloc_runtime_data(int cpu)
struct svsm_ca *caa;
/* Allocate the SVSM CA page if an SVSM is present */
- caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
+ caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
+ : &boot_svsm_ca_page;
per_cpu(svsm_caa, cpu) = caa;
per_cpu(svsm_caa_pa, cpu) = __pa(caa);
@@ -1287,32 +1523,9 @@ void __init sev_es_init_vc_handling(void)
init_ghcb(cpu);
}
- /* If running under an SVSM, switch to the per-cpu CA */
- if (snp_vmpl) {
- struct svsm_call call = {};
- unsigned long flags;
- int ret;
-
- local_irq_save(flags);
-
- /*
- * SVSM_CORE_REMAP_CA call:
- * RAX = 0 (Protocol=0, CallID=0)
- * RCX = New CA GPA
- */
- call.caa = svsm_get_caa();
- call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
- call.rcx = this_cpu_read(svsm_caa_pa);
- ret = svsm_perform_call_protocol(&call);
- if (ret)
- panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
- ret, call.rax_out);
-
+ if (snp_vmpl)
sev_cfg.use_cas = true;
- local_irq_restore(flags);
- }
-
sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */
@@ -1590,15 +1803,6 @@ void sev_show_status(void)
pr_cont("\n");
}
-void __init snp_update_svsm_ca(void)
-{
- if (!snp_vmpl)
- return;
-
- /* Update the CAA to a proper kernel address */
- boot_svsm_caa = &boot_svsm_ca_page;
-}
-
#ifdef CONFIG_SYSFS
static ssize_t vmpl_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/noinstr.c
index d8dfaddfb367..b527eafb6312 100644
--- a/arch/x86/coco/sev/sev-nmi.c
+++ b/arch/x86/coco/sev/noinstr.c
@@ -106,3 +106,77 @@ void noinstr __sev_es_nmi_complete(void)
__sev_put_ghcb(&state);
}
+
+/*
+ * Nothing shall interrupt this code path while holding the per-CPU
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
+ *
+ * Callers must disable local interrupts around it.
+ */
+noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (unlikely(data->ghcb_active)) {
+ /* GHCB is already in use - save its contents */
+
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ instrumentation_begin();
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ instrumentation_end();
+ }
+
+ /* Mark backup_ghcb active before writing to it */
+ data->backup_ghcb_active = true;
+
+ state->ghcb = &data->backup_ghcb;
+
+ /* Backup GHCB content */
+ *state->ghcb = *ghcb;
+ } else {
+ state->ghcb = NULL;
+ data->ghcb_active = true;
+ }
+
+ return ghcb;
+}
+
+noinstr void __sev_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ WARN_ON(!irqs_disabled());
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ /*
+ * Invalidate the GHCB so a VMGEXIT instruction issued
+ * from userspace won't appear to be valid.
+ */
+ vc_ghcb_invalidate(ghcb);
+ data->ghcb_active = false;
+ }
+}
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
index c3b4acbde0d8..7fc136a35334 100644
--- a/arch/x86/coco/sev/vc-handle.c
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -351,6 +351,8 @@ fault:
}
#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__)
+#define error(v)
+#define has_cpuflag(f) boot_cpu_has(f)
#include "vc-shared.c"
@@ -402,14 +404,10 @@ static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool
return ES_OK;
}
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write)
{
struct pt_regs *regs = ctxt->regs;
enum es_result ret;
- bool write;
-
- /* Is it a WRMSR? */
- write = ctxt->insn.opcode.bytes[1] == 0x30;
switch (regs->cx) {
case MSR_SVSM_CAA:
@@ -419,6 +417,15 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
return __vc_handle_secure_tsc_msrs(ctxt, write);
break;
+ case MSR_AMD64_SAVIC_CONTROL:
+ /*
+ * AMD64_SAVIC_CONTROL should not be intercepted when
+ * Secure AVIC is enabled. Terminate the Secure AVIC guest
+ * if the interception is enabled.
+ */
+ if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ return ES_VMM_ERROR;
+ break;
default:
break;
}
@@ -439,6 +446,11 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30);
+}
+
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
{
int trapnr = ctxt->fi.vector;
diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c
index 2c0ab0fdc060..9b01c9ad81be 100644
--- a/arch/x86/coco/sev/vc-shared.c
+++ b/arch/x86/coco/sev/vc-shared.c
@@ -409,15 +409,109 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ u32 ret;
+
+ ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
+ if (!ret)
+ return ES_OK;
+
+ if (ret == 1) {
+ u64 info = ghcb->save.sw_exit_info_2;
+ unsigned long v = info & SVM_EVTINJ_VEC_MASK;
+
+ /* Check if exception information from hypervisor is sane. */
+ if ((info & SVM_EVTINJ_VALID) &&
+ ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+ ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+ ctxt->fi.vector = v;
+
+ if (info & SVM_EVTINJ_VALID_ERR)
+ ctxt->fi.error_code = info >> 32;
+
+ return ES_EXCEPTION;
+ }
+ }
+
+ return ES_VMM_ERROR;
+}
+
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
+{
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ return verify_exception_info(ghcb, ctxt);
+}
+
+static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ u32 cr4 = native_read_cr4();
+ int ret;
+
+ ghcb_set_rax(ghcb, leaf->fn);
+ ghcb_set_rcx(ghcb, leaf->subfn);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #UD - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ leaf->eax = ghcb->save.rax;
+ leaf->ebx = ghcb->save.rbx;
+ leaf->ecx = ghcb->save.rcx;
+ leaf->edx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+struct cpuid_ctx {
+ struct ghcb *ghcb;
+ struct es_em_ctxt *ctxt;
+};
+
+static void snp_cpuid_hv_ghcb(void *p, struct cpuid_leaf *leaf)
+{
+ struct cpuid_ctx *ctx = p;
+
+ if (__sev_cpuid_hv_ghcb(ctx->ghcb, ctx->ctxt, leaf))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
+}
+
static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
+ struct cpuid_ctx ctx = { ghcb, ctxt };
struct pt_regs *regs = ctxt->regs;
struct cpuid_leaf leaf;
int ret;
leaf.fn = regs->ax;
leaf.subfn = regs->cx;
- ret = snp_cpuid(ghcb, ctxt, &leaf);
+ ret = snp_cpuid(snp_cpuid_hv_ghcb, &ctx, &leaf);
if (!ret) {
regs->ax = leaf.eax;
regs->bx = leaf.ebx;
@@ -502,3 +596,50 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
return ES_OK;
}
+
+void snp_register_ghcb_early(unsigned long paddr)
+{
+ unsigned long pfn = paddr >> PAGE_SHIFT;
+ u64 val;
+
+ sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
+ VMGEXIT();
+
+ val = sev_es_rd_ghcb_msr();
+
+ /* If the response GPA is not ours then abort the guest */
+ if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
+ (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
+}
+
+bool __init sev_es_check_cpu_features(void)
+{
+ if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+ error("RDRAND instruction not supported - no trusted source of randomness available\n");
+ return false;
+ }
+
+ return true;
+}
+
+bool sev_es_negotiate_protocol(void)
+{
+ u64 val;
+
+ /* Do the GHCB protocol version negotiation */
+ sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+ return false;
+
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
+ return false;
+
+ ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
+
+ return true;
+}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 07ba4935e873..a26e66d66444 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -305,6 +305,8 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
+ void (*setup)(void);
+ void (*teardown)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
void (*init_apic_ldr)(void);
@@ -317,6 +319,8 @@ struct apic {
/* wakeup secondary CPU using 64-bit wakeup point */
int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+ void (*update_vector)(unsigned int cpu, unsigned int vector, bool set);
+
char *name;
};
@@ -470,6 +474,12 @@ static __always_inline bool apic_id_valid(u32 apic_id)
return apic_id <= apic->max_apic_id;
}
+static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ if (apic->update_vector)
+ apic->update_vector(cpu, vector, set);
+}
+
#else /* CONFIG_X86_LOCAL_APIC */
static inline u32 apic_read(u32 reg) { return 0; }
@@ -481,6 +491,7 @@ static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }
+static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { }
#define apic_update_callback(_callback, _fn) do { } while (0)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 02b23aa78955..f7b67cb73915 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -82,6 +82,8 @@
#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
+extern const unsigned long kernel_inittext_offset;
+extern const unsigned long kernel_inittext_size;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 5a68e9db6518..01ccdd168df0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,12 +2,6 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
-#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector
-#else
-#define __head __section(".head.text") __no_sanitize_undefined __no_kstack_erase
-#endif
-
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void (*free_pgt_page)(void *, void *); /* free buf for page table */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 73393a66d3ab..718a55d82fe4 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -706,8 +706,15 @@
#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
#define MSR_AMD64_SNP_SMT_PROT_BIT 17
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
#define MSR_AMD64_RMP_CFG 0xc0010136
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 692af46603a1..914eb32581c7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,7 @@ extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
extern void startup_64_load_idt(void *vc_handler);
+extern void __pi_startup_64_load_idt(void *vc_handler);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 0020d77a0800..01a6e4dbe423 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -208,6 +208,7 @@ struct snp_psc_desc {
#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
+#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
index 3dfd306d1c9e..c58c47c68ab6 100644
--- a/arch/x86/include/asm/sev-internal.h
+++ b/arch/x86/include/asm/sev-internal.h
@@ -2,7 +2,6 @@
#define DR7_RESET_VALUE 0x400
-extern struct ghcb boot_ghcb_page;
extern u64 sev_hv_features;
extern u64 sev_secrets_pa;
@@ -56,31 +55,15 @@ DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op);
+ unsigned long npages, const struct psc_desc *desc);
DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
DECLARE_PER_CPU(u64, svsm_caa_pa);
-extern struct svsm_ca *boot_svsm_caa;
extern u64 boot_svsm_caa_pa;
-static __always_inline struct svsm_ca *svsm_get_caa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa);
- else
- return boot_svsm_caa;
-}
-
-static __always_inline u64 svsm_get_caa_pa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa_pa);
- else
- return boot_svsm_caa_pa;
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call);
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt);
+void vc_forward_exception(struct es_em_ctxt *ctxt);
static inline u64 sev_es_rd_ghcb_msr(void)
{
@@ -97,9 +80,8 @@ static __always_inline void sev_es_wr_ghcb_msr(u64 val)
native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
}
-void snp_register_ghcb_early(unsigned long paddr);
-bool sev_es_negotiate_protocol(void);
-bool sev_es_check_cpu_features(void);
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
u64 get_hv_features(void);
const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 465b19fd1a2d..f9046c4b9a2b 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -503,6 +503,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
}
void setup_ghcb(void);
+void snp_register_ghcb_early(unsigned long paddr);
void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
@@ -511,14 +512,12 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __noreturn snp_abort(void);
void snp_dmi_setup(void);
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
-void snp_update_svsm_ca(void);
int prepare_pte_enc(struct pte_enc_desc *d);
void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
void snp_kexec_finish(void);
@@ -533,6 +532,10 @@ int snp_svsm_vtpm_send_command(u8 *buffer);
void __init snp_secure_tsc_prepare(void);
void __init snp_secure_tsc_init(void);
+enum es_result savic_register_gpa(u64 gpa);
+enum es_result savic_unregister_gpa(u64 *gpa);
+u64 savic_ghcb_msr_read(u32 reg);
+void savic_ghcb_msr_write(u32 reg, u64 value);
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
@@ -540,8 +543,6 @@ static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
-void vc_forward_exception(struct es_em_ctxt *ctxt);
-
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn;
@@ -552,7 +553,13 @@ struct cpuid_leaf {
u32 edx;
};
-int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf);
+int svsm_perform_msr_protocol(struct svsm_call *call);
+int __pi_svsm_perform_msr_protocol(struct svsm_call *call);
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf);
+
+void svsm_issue_call(struct svsm_call *call, u8 *pending);
+int svsm_process_result_codes(struct svsm_call *call);
void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
@@ -560,7 +567,18 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
u64 exit_code, u64 exit_info_1,
u64 exit_info_2);
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+
+extern u16 ghcb_version;
extern struct ghcb *boot_ghcb;
+extern bool sev_snp_needs_sfw;
+
+struct psc_desc {
+ enum psc_op op;
+ struct svsm_ca *ca;
+ u64 caa_pa;
+};
static inline void sev_evict_cache(void *va, int npages)
{
@@ -600,7 +618,6 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npag
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
-static inline void snp_abort(void) { }
static inline void snp_dmi_setup(void) { }
static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{
@@ -610,7 +627,6 @@ static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
-static inline void snp_update_svsm_ca(void) { }
static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
static inline void snp_kexec_finish(void) { }
@@ -624,6 +640,10 @@ static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
static inline void __init snp_secure_tsc_prepare(void) { }
static inline void __init snp_secure_tsc_init(void) { }
static inline void sev_evict_cache(void *va, int npages) {}
+static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
+static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -635,9 +655,13 @@ void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
-void snp_leak_pages(u64 pfn, unsigned int npages);
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp);
void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
+static inline void snp_leak_pages(u64 pfn, unsigned int pages)
+{
+ __snp_leak_pages(pfn, pages, true);
+}
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_rmptable_init(void) { return -ENOSYS; }
@@ -650,6 +674,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
return -ENODEV;
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {}
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
static inline void snp_fixup_e820_tables(void) {}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9282465eea21..e71e0e8362ed 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,56 +80,42 @@ struct thread_info {
#endif
/*
- * thread information flags
- * - these are process state flags that various assembly files
- * may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
*/
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
-#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
-#define TIF_SSBD 6 /* Speculative store bypass disable */
-#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
-#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
-#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING 13 /* pending live patching update */
-#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
-#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD 16 /* Speculative store bypass disable */
+#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */
+#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */
+#define TIF_NOTSC 21 /* TSC is not accessible in userland */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
-
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SSBD (1 << TIF_SSBD)
-#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
-#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
-#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
-#define _TIF_NOCPUID (1 << TIF_NOCPUID)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_ADDR32 (1 << TIF_ADDR32)
+#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
+
+#define _TIF_SSBD BIT(TIF_SSBD)
+#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID BIT(TIF_NOCPUID)
+#define _TIF_NOTSC BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 9c640a521a67..650e3256ea7d 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -118,6 +118,10 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
+#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 52d1808ee360..581db89477f9 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
+obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
obj-y += apic_flat_64.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d73ba5a7b623..680d305589a3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -592,6 +592,8 @@ static void setup_APIC_timer(void)
0xF, ~0UL);
} else
clockevents_register_device(levt);
+
+ apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true);
}
/*
@@ -1168,6 +1170,9 @@ void disable_local_APIC(void)
if (!apic_accessible())
return;
+ if (apic->teardown)
+ apic->teardown();
+
apic_soft_disable();
#ifdef CONFIG_X86_32
@@ -1428,63 +1433,61 @@ union apic_ir {
u32 regs[APIC_IR_REGS];
};
-static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
+static bool apic_check_and_eoi_isr(union apic_ir *isr)
{
int i, bit;
- /* Read the IRRs */
- for (i = 0; i < APIC_IR_REGS; i++)
- irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
-
/* Read the ISRs */
for (i = 0; i < APIC_IR_REGS; i++)
isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+ /* If the ISR map empty, nothing to do here. */
+ if (bitmap_empty(isr->map, APIC_IR_BITS))
+ return true;
+
/*
- * If the ISR map is not empty. ACK the APIC and run another round
- * to verify whether a pending IRR has been unblocked and turned
- * into a ISR.
+ * There can be multiple ISR bits set when a high priority
+ * interrupt preempted a lower priority one. Issue an EOI for each
+ * set bit. The priority traversal order does not matter as there
+ * can't be new ISR bits raised at this point. What matters is that
+ * an EOI is issued for each ISR bit.
*/
- if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
- /*
- * There can be multiple ISR bits set when a high priority
- * interrupt preempted a lower priority one. Issue an ACK
- * per set bit.
- */
- for_each_set_bit(bit, isr->map, APIC_IR_BITS)
- apic_eoi();
- return true;
- }
+ for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+ apic_eoi();
- return !bitmap_empty(irr->map, APIC_IR_BITS);
+ /* Reread the ISRs, they should be empty now */
+ for (i = 0; i < APIC_IR_REGS; i++)
+ isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
+
+ return bitmap_empty(isr->map, APIC_IR_BITS);
}
/*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
+ * If a CPU services an interrupt and crashes before issuing EOI to the
+ * local APIC, the corresponding ISR bit is still set when the crashing CPU
+ * jumps into a crash kernel. Read the ISR and issue an EOI for each set
+ * bit to acknowledge it as otherwise these slots would be locked forever
+ * waiting for an EOI.
*
- * Most probably by now the CPU has serviced that pending interrupt and it
- * might not have done the apic_eoi() because it thought, interrupt
- * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
- * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
- * a vector might get locked. It was noticed for timer irq (vector
- * 0x31). Issue an extra EOI to clear ISR.
+ * If there are pending bits in the IRR, then they won't be converted into
+ * ISR bits as the CPU has interrupts disabled. They will be delivered once
+ * the CPU enables interrupts and there is nothing which can prevent that.
*
- * If there are pending IRR bits they turn into ISR bits after a higher
- * priority ISR bit has been acked.
+ * In the worst case this results in spurious interrupt warnings.
*/
-static void apic_pending_intr_clear(void)
+static void apic_clear_isr(void)
{
- union apic_ir irr, isr;
+ union apic_ir ir;
unsigned int i;
- /* 512 loops are way oversized and give the APIC a chance to obey. */
- for (i = 0; i < 512; i++) {
- if (!apic_check_and_ack(&irr, &isr))
- return;
- }
- /* Dump the IRR/ISR content if that failed */
- pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
+ if (!apic_check_and_eoi_isr(&ir))
+ pr_warn("APIC: Stale ISR: %256pb\n", ir.map);
+
+ for (i = 0; i < APIC_IR_REGS; i++)
+ ir.regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+ if (!bitmap_empty(ir.map, APIC_IR_BITS))
+ pr_warn("APIC: Stale IRR: %256pb\n", ir.map);
}
/**
@@ -1503,6 +1506,9 @@ static void setup_local_APIC(void)
return;
}
+ if (apic->setup)
+ apic->setup();
+
/*
* If this comes from kexec/kcrash the APIC might be enabled in
* SPIV. Soft disable it before doing further initialization.
@@ -1541,8 +1547,7 @@ static void setup_local_APIC(void)
value |= 0x10;
apic_write(APIC_TASKPRI, value);
- /* Clear eventually stale ISR/IRR bits */
- apic_pending_intr_clear();
+ apic_clear_isr();
/*
* Now that we are all set up, enable the APIC
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a947b46a8b64..bddc54465399 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,13 +134,20 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
apicd->hw_irq_cfg.vector = vector;
apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+
+ apic_update_vector(cpu, vector, true);
+
irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
- trace_vector_config(irqd->irq, vector, cpu,
- apicd->hw_irq_cfg.dest_apicid);
+ trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid);
}
-static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
- unsigned int newcpu)
+static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed)
+{
+ apic_update_vector(cpu, vector, false);
+ irq_matrix_free(vector_matrix, cpu, vector, managed);
+}
+
+static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
struct irq_desc *desc = irq_data_to_desc(irqd);
@@ -174,8 +181,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
apicd->prev_cpu = apicd->cpu;
WARN_ON_ONCE(apicd->cpu == newcpu);
} else {
- irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
- managed);
+ apic_free_vector(apicd->cpu, apicd->vector, managed);
}
setnew:
@@ -261,7 +267,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
trace_vector_alloc(irqd->irq, vector, resvd, vector);
if (vector < 0)
return vector;
- apic_update_vector(irqd, vector, cpu);
+ chip_data_update(irqd, vector, cpu);
return 0;
}
@@ -337,7 +343,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
trace_vector_alloc_managed(irqd->irq, vector, vector);
if (vector < 0)
return vector;
- apic_update_vector(irqd, vector, cpu);
+ chip_data_update(irqd, vector, cpu);
return 0;
}
@@ -357,7 +363,7 @@ static void clear_irq_vector(struct irq_data *irqd)
apicd->prev_cpu);
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+ apic_free_vector(apicd->cpu, vector, managed);
apicd->vector = 0;
/* Clean up move in progress */
@@ -366,7 +372,7 @@ static void clear_irq_vector(struct irq_data *irqd)
return;
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+ apic_free_vector(apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
hlist_del_init(&apicd->clist);
@@ -905,7 +911,7 @@ static void free_moved_vector(struct apic_chip_data *apicd)
* affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
- irq_matrix_free(vector_matrix, cpu, vector, managed);
+ apic_free_vector(cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
new file mode 100644
index 000000000000..dbc5678bc3b6
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure AVIC Support (SEV-SNP Guests)
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/cpumask.h>
+#include <linux/percpu-defs.h>
+#include <linux/align.h>
+
+#include <asm/apic.h>
+#include <asm/sev.h>
+
+#include "local.h"
+
+struct secure_avic_page {
+ u8 regs[PAGE_SIZE];
+} __aligned(PAGE_SIZE);
+
+static struct secure_avic_page __percpu *savic_page __ro_after_init;
+
+static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
+}
+
+static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset)
+{
+ return &per_cpu_ptr(savic_page, cpu)->regs[offset];
+}
+
+static inline void update_vector(unsigned int cpu, unsigned int offset,
+ unsigned int vector, bool set)
+{
+ void *bitmap = get_reg_bitmap(cpu, offset);
+
+ if (set)
+ apic_set_vector(vector, bitmap);
+ else
+ apic_clear_vector(vector, bitmap);
+}
+
+#define SAVIC_ALLOWED_IRR 0x204
+
+/*
+ * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers
+ * result in #VC exception (for non-accelerated register accesses)
+ * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler
+ * can read/write the x2APIC register in the guest APIC backing page.
+ *
+ * Since doing this would increase the latency of accessing x2APIC
+ * registers, instead of doing RDMSR/WRMSR based accesses and
+ * handling the APIC register reads/writes in the #VC exception handler,
+ * the read() and write() callbacks directly read/write the APIC register
+ * from/to the vCPU's APIC backing page.
+ */
+static u32 savic_read(u32 reg)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TMCCT:
+ case APIC_TDCR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ return savic_ghcb_msr_read(reg);
+ case APIC_ID:
+ case APIC_LVR:
+ case APIC_TASKPRI:
+ case APIC_ARBPRI:
+ case APIC_PROCPRI:
+ case APIC_LDR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_EFEAT:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ return apic_get_reg(ap, reg);
+ case APIC_ICR:
+ return (u32)apic_get_reg64(ap, reg);
+ case APIC_ISR ... APIC_ISR + 0x70:
+ case APIC_TMR ... APIC_TMR + 0x70:
+ if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+ "APIC register read offset 0x%x not aligned at 16 bytes", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ /* IRR and ALLOWED_IRR offset range */
+ case APIC_IRR ... APIC_IRR + 0x74:
+ /*
+ * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
+ * their respective base offset. APIC_IRRs are in the range
+ *
+ * (0x200, 0x210, ..., 0x270)
+ *
+ * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range
+ *
+ * (0x204, 0x214, ..., 0x274).
+ *
+ * Filter out everything else.
+ */
+ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+ IS_ALIGNED(reg - 4, 16)),
+ "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ default:
+ pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg);
+ return 0;
+ }
+}
+
+#define SAVIC_NMI_REQ 0x278
+
+/*
+ * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware
+ * updates the APIC_IRR in the APIC backing page of the vCPU. In addition,
+ * hardware evaluates the new APIC_IRR update for interrupt injection to
+ * the vCPU. So, self IPIs are hardware-accelerated.
+ */
+static inline void self_ipi_reg_write(unsigned int vector)
+{
+ native_apic_msr_write(APIC_SELF_IPI, vector);
+}
+
+static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi)
+{
+ if (nmi)
+ apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1);
+ else
+ update_vector(cpu, APIC_IRR, vector, true);
+}
+
+static void send_ipi_allbut(unsigned int vector, bool nmi)
+{
+ unsigned int cpu, src_cpu;
+
+ guard(irqsave)();
+
+ src_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (cpu == src_cpu)
+ continue;
+ send_ipi_dest(cpu, vector, nmi);
+ }
+}
+
+static inline void self_ipi(unsigned int vector, bool nmi)
+{
+ u32 icr_low = APIC_SELF_IPI | vector;
+
+ if (nmi)
+ icr_low |= APIC_DM_NMI;
+
+ native_x2apic_icr_write(icr_low, 0);
+}
+
+static void savic_icr_write(u32 icr_low, u32 icr_high)
+{
+ unsigned int dsh, vector;
+ u64 icr_data;
+ bool nmi;
+
+ dsh = icr_low & APIC_DEST_ALLBUT;
+ vector = icr_low & APIC_VECTOR_MASK;
+ nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI);
+
+ switch (dsh) {
+ case APIC_DEST_SELF:
+ self_ipi(vector, nmi);
+ break;
+ case APIC_DEST_ALLINC:
+ self_ipi(vector, nmi);
+ fallthrough;
+ case APIC_DEST_ALLBUT:
+ send_ipi_allbut(vector, nmi);
+ break;
+ default:
+ send_ipi_dest(icr_high, vector, nmi);
+ break;
+ }
+
+ icr_data = ((u64)icr_high) << 32 | icr_low;
+ if (dsh != APIC_DEST_SELF)
+ savic_ghcb_msr_write(APIC_ICR, icr_data);
+ apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data);
+}
+
+static void savic_write(u32 reg, u32 data)
+{
+ void *ap = this_cpu_ptr(savic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVTERR:
+ savic_ghcb_msr_write(reg, data);
+ break;
+ case APIC_TASKPRI:
+ case APIC_EOI:
+ case APIC_SPIV:
+ case SAVIC_NMI_REQ:
+ case APIC_ESR:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ apic_set_reg(ap, reg, data);
+ break;
+ case APIC_ICR:
+ savic_icr_write(data, 0);
+ break;
+ case APIC_SELF_IPI:
+ self_ipi_reg_write(data);
+ break;
+ /* ALLOWED_IRR offsets are writable */
+ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+ if (IS_ALIGNED(reg - 4, 16)) {
+ apic_set_reg(ap, reg, data);
+ break;
+ }
+ fallthrough;
+ default:
+ pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg);
+ }
+}
+
+static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh)
+{
+ unsigned int icr_low;
+
+ icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL);
+ savic_icr_write(icr_low, dest);
+}
+
+static void savic_send_ipi(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ send_ipi(dest, vector, 0);
+}
+
+static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self)
+{
+ unsigned int cpu, this_cpu;
+
+ guard(irqsave)();
+
+ this_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, mask) {
+ if (excl_self && cpu == this_cpu)
+ continue;
+ send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0);
+ }
+}
+
+static void savic_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, false);
+}
+
+static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, true);
+}
+
+static void savic_send_ipi_allbutself(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLBUT);
+}
+
+static void savic_send_ipi_all(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLINC);
+}
+
+static void savic_send_ipi_self(int vector)
+{
+ self_ipi_reg_write(vector);
+}
+
+static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set);
+}
+
+static void savic_eoi(void)
+{
+ unsigned int cpu;
+ int vec;
+
+ cpu = raw_smp_processor_id();
+ vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR));
+ if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR"))
+ return;
+
+ /* Is level-triggered interrupt? */
+ if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) {
+ update_vector(cpu, APIC_ISR, vec, false);
+ /*
+ * Propagate the EOI write to the hypervisor for level-triggered
+ * interrupts. Return to the guest from GHCB protocol event takes
+ * care of re-evaluating interrupt state.
+ */
+ savic_ghcb_msr_write(APIC_EOI, 0);
+ } else {
+ /*
+ * Hardware clears APIC_ISR and re-evaluates the interrupt state
+ * to determine if there is any pending interrupt which can be
+ * delivered to CPU.
+ */
+ native_apic_msr_eoi();
+ }
+}
+
+static void savic_teardown(void)
+{
+ /* Disable Secure AVIC */
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0);
+ savic_unregister_gpa(NULL);
+}
+
+static void savic_setup(void)
+{
+ void *ap = this_cpu_ptr(savic_page);
+ enum es_result res;
+ unsigned long gpa;
+
+ /*
+ * Before Secure AVIC is enabled, APIC MSR reads are intercepted.
+ * APIC_ID MSR read returns the value from the hypervisor.
+ */
+ apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID));
+
+ gpa = __pa(ap);
+
+ /*
+ * The NPT entry for a vCPU's APIC backing page must always be
+ * present when the vCPU is running in order for Secure AVIC to
+ * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot
+ * be resumed if the NPT entry for the APIC backing page is not
+ * present. Notify GPA of the vCPU's APIC backing page to the
+ * hypervisor by calling savic_register_gpa(). Before executing
+ * VMRUN, the hypervisor makes use of this information to make sure
+ * the APIC backing page is mapped in NPT.
+ */
+ res = savic_register_gpa(gpa);
+ if (res != ES_OK)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ native_wrmsrq(MSR_AMD64_SAVIC_CONTROL,
+ gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI);
+}
+
+static int savic_probe(void)
+{
+ if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
+ return 0;
+
+ if (!x2apic_mode) {
+ pr_err("Secure AVIC enabled in non x2APIC mode\n");
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+ /* unreachable */
+ }
+
+ savic_page = alloc_percpu(struct secure_avic_page);
+ if (!savic_page)
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
+
+ return 1;
+}
+
+static struct apic apic_x2apic_savic __ro_after_init = {
+
+ .name = "secure avic x2apic",
+ .probe = savic_probe,
+ .acpi_madt_oem_check = savic_acpi_madt_oem_check,
+ .setup = savic_setup,
+ .teardown = savic_teardown,
+
+ .dest_mode_logical = false,
+
+ .disable_esr = 0,
+
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+
+ .max_apic_id = UINT_MAX,
+ .x2apic_set_max_apicid = true,
+ .get_apic_id = x2apic_get_apic_id,
+
+ .calc_dest_apicid = apic_default_calc_apicid,
+
+ .send_IPI = savic_send_ipi,
+ .send_IPI_mask = savic_send_ipi_mask,
+ .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself,
+ .send_IPI_allbutself = savic_send_ipi_allbutself,
+ .send_IPI_all = savic_send_ipi_all,
+ .send_IPI_self = savic_send_ipi_self,
+
+ .nmi_to_offline_cpu = true,
+
+ .read = savic_read,
+ .write = savic_write,
+ .eoi = savic_eoi,
+ .icr_read = native_x2apic_icr_read,
+ .icr_write = savic_icr_write,
+
+ .update_vector = savic_update_vector,
+};
+
+apic_driver(apic_x2apic_savic);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 533fcf5636fc..fd28b53dbac5 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -52,10 +52,13 @@ SYM_PIC_ALIAS(next_early_pgt);
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
unsigned int __pgtable_l5_enabled __ro_after_init;
+SYM_PIC_ALIAS(__pgtable_l5_enabled);
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
+SYM_PIC_ALIAS(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
+SYM_PIC_ALIAS(ptrs_per_p4d);
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
@@ -316,5 +319,5 @@ void early_setup_idt(void)
handler = vc_boot_ghcb;
}
- startup_64_load_idt(handler);
+ __pi_startup_64_load_idt(handler);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 76743dfad6ab..80ef5d386b03 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -61,7 +61,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* any particular GDT layout, because we load our own as soon as we
* can.
*/
-__HEAD
+ __INIT
SYM_CODE_START(startup_32)
movl pa(initial_stack),%ecx
@@ -136,6 +136,9 @@ SYM_CODE_END(startup_32)
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
+#ifdef CONFIG_HOTPLUG_CPU
+ .text
+#endif
SYM_FUNC_START(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3e9b3a3bd039..21816b48537c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -33,7 +33,7 @@
* because we need identity-mapped pages.
*/
- __HEAD
+ __INIT
.code64
SYM_CODE_START_NOALIGN(startup_64)
UNWIND_HINT_END_OF_STACK
@@ -71,7 +71,7 @@ SYM_CODE_START_NOALIGN(startup_64)
xorl %edx, %edx
wrmsr
- call startup_64_setup_gdt_idt
+ call __pi_startup_64_setup_gdt_idt
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* subsequent code. Pass the boot_params pointer as the first argument.
*/
movq %r15, %rdi
- call sme_enable
+ call __pi_sme_enable
#endif
/* Sanitize CPU configuration */
@@ -111,7 +111,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* programmed into CR3.
*/
movq %r15, %rsi
- call __startup_64
+ call __pi___startup_64
/* Form the CR3 value being sure to include the CR3 modifier */
leaq early_top_pgt(%rip), %rcx
@@ -562,7 +562,7 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
/* Call C handler */
movq %rsp, %rdi
movq ORIG_RAX(%rsp), %rsi
- call do_vc_no_ghcb
+ call __pi_do_vc_no_ghcb
/* Unwind pt_regs */
POP_REGS
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4fa0be732af1..d7af4a64c211 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -160,11 +160,6 @@ SECTIONS
} :text = 0xcccccccc
- /* bootstrapping code */
- .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
- HEAD_TEXT
- } :text = 0xcccccccc
-
/* End of text section, which should occupy whole number of pages */
_etext = .;
. = ALIGN(PAGE_SIZE);
@@ -227,6 +222,8 @@ SECTIONS
*/
.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
*(.altinstr_aux)
+ . = ALIGN(PAGE_SIZE);
+ __inittext_end = .;
}
INIT_DATA_SECTION(16)
@@ -535,3 +532,5 @@ xen_elfnote_entry_value =
xen_elfnote_phys32_entry_value =
ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
#endif
+
+#include "../boot/startup/exports.h"
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index faf3a13fb6ba..2f8c32173972 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -536,12 +536,6 @@ void __init sme_early_init(void)
x86_init.resources.dmi_setup = snp_dmi_setup;
}
- /*
- * Switch the SVSM CA mapping (if active) from identity mapped to
- * kernel mapped.
- */
- snp_update_svsm_ca();
-
if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index f8a33b25ae86..edbf9c998848 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -16,7 +16,7 @@
.text
.code64
-SYM_FUNC_START(sme_encrypt_execute)
+SYM_FUNC_START(__pi_sme_encrypt_execute)
/*
* Entry parameters:
@@ -69,9 +69,9 @@ SYM_FUNC_START(sme_encrypt_execute)
ANNOTATE_UNRET_SAFE
ret
int3
-SYM_FUNC_END(sme_encrypt_execute)
+SYM_FUNC_END(__pi_sme_encrypt_execute)
-SYM_FUNC_START(__enc_copy)
+SYM_FUNC_START_LOCAL(__enc_copy)
ANNOTATE_NOENDBR
/*
* Routine used to encrypt memory in place.
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 1d78e5631bb8..344030c1a81d 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -24,7 +24,7 @@
#include <asm/nospec-branch.h>
#include <xen/interface/elfnote.h>
- __HEAD
+ __INIT
/*
* Entry point for PVH guests.
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5778bc498415..e5a2b9a912d1 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -740,10 +740,10 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
{
- int headtext = !strcmp(sec_name(sec->shdr.sh_info), ".head.text");
unsigned r_type = ELF64_R_TYPE(rel->r_info);
ElfW(Addr) offset = rel->r_offset;
int shn_abs = (sym->st_shndx == SHN_ABS) && !is_reloc(S_REL, symname);
+
if (sym->st_shndx == SHN_UNDEF)
return 0;
@@ -783,12 +783,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
}
- if (headtext) {
- die("Absolute reference to symbol '%s' not permitted in .head.text\n",
- symname);
- break;
- }
-
/*
* Relocation offsets for 64 bit kernels are output
* as 32 bits and sign extended back to 64 bits when
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 942372e69b4d..ee643a6cd691 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -1029,7 +1029,7 @@ int rmp_make_shared(u64 pfn, enum pg_level level)
}
EXPORT_SYMBOL_GPL(rmp_make_shared);
-void snp_leak_pages(u64 pfn, unsigned int npages)
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp)
{
struct page *page = pfn_to_page(pfn);
@@ -1052,14 +1052,15 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
(PageHead(page) && compound_nr(page) <= npages))
list_add_tail(&page->buddy_list, &snp_leaked_pages_list);
- dump_rmpentry(pfn);
+ if (dump_rmp)
+ dump_rmpentry(pfn);
snp_nr_leaked_pages++;
pfn++;
page++;
}
spin_unlock(&snp_leaked_pages_list_lock);
}
-EXPORT_SYMBOL_GPL(snp_leak_pages);
+EXPORT_SYMBOL_GPL(__snp_leak_pages);
void kdump_sev_callback(void)
{
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c
index 70f8290b659d..fd995a1d3d24 100644
--- a/drivers/acpi/arm64/gtdt.c
+++ b/drivers/acpi/arm64/gtdt.c
@@ -388,11 +388,11 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
return 0;
}
-static int __init gtdt_sbsa_gwdt_init(void)
+static int __init gtdt_platform_timer_init(void)
{
void *platform_timer;
struct acpi_table_header *table;
- int ret, timer_count, gwdt_count = 0;
+ int ret, timer_count, gwdt_count = 0, mmio_timer_count = 0;
if (acpi_disabled)
return 0;
@@ -414,20 +414,41 @@ static int __init gtdt_sbsa_gwdt_init(void)
goto out_put_gtdt;
for_each_platform_timer(platform_timer) {
+ ret = 0;
+
if (is_non_secure_watchdog(platform_timer)) {
ret = gtdt_import_sbsa_gwdt(platform_timer, gwdt_count);
if (ret)
- break;
+ continue;
gwdt_count++;
+ } else if (is_timer_block(platform_timer)) {
+ struct arch_timer_mem atm = {};
+ struct platform_device *pdev;
+
+ ret = gtdt_parse_timer_block(platform_timer, &atm);
+ if (ret)
+ continue;
+
+ pdev = platform_device_register_data(NULL, "gtdt-arm-mmio-timer",
+ gwdt_count, &atm,
+ sizeof(atm));
+ if (IS_ERR(pdev)) {
+ pr_err("Can't register timer %d\n", gwdt_count);
+ continue;
+ }
+
+ mmio_timer_count++;
}
}
if (gwdt_count)
pr_info("found %d SBSA generic Watchdog(s).\n", gwdt_count);
+ if (mmio_timer_count)
+ pr_info("found %d Generic MMIO timer(s).\n", mmio_timer_count);
out_put_gtdt:
acpi_put_table(table);
return ret;
}
-device_initcall(gtdt_sbsa_gwdt_init);
+device_initcall(gtdt_platform_timer_init);
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 645f517a1ac2..ffcd23668763 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -395,8 +395,7 @@ config ARM_GLOBAL_TIMER
config ARM_GT_INITIAL_PRESCALER_VAL
int "ARM global timer initial prescaler value"
- default 2 if ARCH_ZYNQ
- default 1
+ default 0
depends on ARM_GLOBAL_TIMER
help
When the ARM global timer initializes, its current rate is declared
@@ -406,6 +405,7 @@ config ARM_GT_INITIAL_PRESCALER_VAL
bounds about how much the parent clock is allowed to decrease or
increase wrt the initial clock value.
This affects CPU_FREQ max delta from the initial frequency.
+ Use 0 to use auto-detection in the driver.
config ARM_TIMER_SP804
bool "Support for Dual Timer SP804 module"
@@ -474,11 +474,14 @@ config FSL_FTM_TIMER
help
Support for Freescale FlexTimer Module (FTM) timer.
-config VF_PIT_TIMER
- bool
+config NXP_PIT_TIMER
+ bool "NXP Periodic Interrupt Timer" if COMPILE_TEST
select CLKSRC_MMIO
help
- Support for Periodic Interrupt Timer on Freescale Vybrid Family SoCs.
+ Support for Periodic Interrupt Timer on Freescale / NXP
+ SoCs. This periodic timer is found on the Vybrid Family and
+ the Automotive S32G2/3 platforms. It contains 4 channels
+ where two can be coupled to form a 64 bits channel.
config SYS_SUPPORTS_SH_CMT
bool
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 205bf3b0a8f3..ec4452ee958f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_CLKSRC_LPC32XX) += timer-lpc32xx.o
obj-$(CONFIG_CLKSRC_MPS2) += mps2-timer.o
obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o
obj-$(CONFIG_FSL_FTM_TIMER) += timer-fsl-ftm.o
-obj-$(CONFIG_VF_PIT_TIMER) += timer-vf-pit.o
+obj-$(CONFIG_NXP_PIT_TIMER) += timer-nxp-pit.o
obj-$(CONFIG_CLKSRC_QCOM) += timer-qcom.o
obj-$(CONFIG_MTK_TIMER) += timer-mediatek.o
obj-$(CONFIG_MTK_CPUX_TIMER) += timer-mediatek-cpux.o
@@ -64,6 +64,7 @@ obj-$(CONFIG_REALTEK_OTTO_TIMER) += timer-rtl-otto.o
obj-$(CONFIG_ARC_TIMERS) += arc_timer.o
obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
+obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer_mmio.o
obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o
obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o
obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp804.o
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 80ba6a54248c..90aeff44a276 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -34,42 +34,12 @@
#include <clocksource/arm_arch_timer.h>
-#define CNTTIDR 0x08
-#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
-
-#define CNTACR(n) (0x40 + ((n) * 4))
-#define CNTACR_RPCT BIT(0)
-#define CNTACR_RVCT BIT(1)
-#define CNTACR_RFRQ BIT(2)
-#define CNTACR_RVOFF BIT(3)
-#define CNTACR_RWVT BIT(4)
-#define CNTACR_RWPT BIT(5)
-
-#define CNTPCT_LO 0x00
-#define CNTVCT_LO 0x08
-#define CNTFRQ 0x10
-#define CNTP_CVAL_LO 0x20
-#define CNTP_CTL 0x2c
-#define CNTV_CVAL_LO 0x30
-#define CNTV_CTL 0x3c
-
/*
* The minimum amount of time a generic counter is guaranteed to not roll over
* (40 years)
*/
#define MIN_ROLLOVER_SECS (40ULL * 365 * 24 * 3600)
-static unsigned arch_timers_present __initdata;
-
-struct arch_timer {
- void __iomem *base;
- struct clock_event_device evt;
-};
-
-static struct arch_timer *arch_timer_mem __ro_after_init;
-
-#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
-
static u32 arch_timer_rate __ro_after_init;
static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
@@ -85,7 +55,6 @@ static struct clock_event_device __percpu *arch_timer_evt;
static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;
static bool arch_timer_c3stop __ro_after_init;
-static bool arch_timer_mem_use_virtual __ro_after_init;
static bool arch_counter_suspend_stop __ro_after_init;
#ifdef CONFIG_GENERIC_GETTIMEOFDAY
static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER;
@@ -121,76 +90,6 @@ static int arch_counter_get_width(void)
/*
* Architected system timer support.
*/
-
-static __always_inline
-void arch_timer_reg_write(int access, enum arch_timer_reg reg, u64 val,
- struct clock_event_device *clk)
-{
- if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
- struct arch_timer *timer = to_arch_timer(clk);
- switch (reg) {
- case ARCH_TIMER_REG_CTRL:
- writel_relaxed((u32)val, timer->base + CNTP_CTL);
- break;
- case ARCH_TIMER_REG_CVAL:
- /*
- * Not guaranteed to be atomic, so the timer
- * must be disabled at this point.
- */
- writeq_relaxed(val, timer->base + CNTP_CVAL_LO);
- break;
- default:
- BUILD_BUG();
- }
- } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
- struct arch_timer *timer = to_arch_timer(clk);
- switch (reg) {
- case ARCH_TIMER_REG_CTRL:
- writel_relaxed((u32)val, timer->base + CNTV_CTL);
- break;
- case ARCH_TIMER_REG_CVAL:
- /* Same restriction as above */
- writeq_relaxed(val, timer->base + CNTV_CVAL_LO);
- break;
- default:
- BUILD_BUG();
- }
- } else {
- arch_timer_reg_write_cp15(access, reg, val);
- }
-}
-
-static __always_inline
-u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
- struct clock_event_device *clk)
-{
- u32 val;
-
- if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
- struct arch_timer *timer = to_arch_timer(clk);
- switch (reg) {
- case ARCH_TIMER_REG_CTRL:
- val = readl_relaxed(timer->base + CNTP_CTL);
- break;
- default:
- BUILD_BUG();
- }
- } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
- struct arch_timer *timer = to_arch_timer(clk);
- switch (reg) {
- case ARCH_TIMER_REG_CTRL:
- val = readl_relaxed(timer->base + CNTV_CTL);
- break;
- default:
- BUILD_BUG();
- }
- } else {
- val = arch_timer_reg_read_cp15(access, reg);
- }
-
- return val;
-}
-
static noinstr u64 raw_counter_get_cntpct_stable(void)
{
return __arch_counter_get_cntpct_stable();
@@ -424,7 +323,7 @@ void erratum_set_next_event_generic(const int access, unsigned long evt,
unsigned long ctrl;
u64 cval;
- ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+ ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
@@ -436,7 +335,7 @@ void erratum_set_next_event_generic(const int access, unsigned long evt,
write_sysreg(cval, cntv_cval_el0);
}
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+ arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl);
}
static __maybe_unused int erratum_set_next_event_virt(unsigned long evt,
@@ -667,10 +566,10 @@ static __always_inline irqreturn_t timer_handler(const int access,
{
unsigned long ctrl;
- ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
+ ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL);
if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
ctrl |= ARCH_TIMER_CTRL_IT_MASK;
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
+ arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl);
evt->event_handler(evt);
return IRQ_HANDLED;
}
@@ -692,28 +591,14 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
}
-static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
-{
- struct clock_event_device *evt = dev_id;
-
- return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
-}
-
-static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
-{
- struct clock_event_device *evt = dev_id;
-
- return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
-}
-
static __always_inline int arch_timer_shutdown(const int access,
struct clock_event_device *clk)
{
unsigned long ctrl;
- ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+ ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL);
ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+ arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl);
return 0;
}
@@ -728,23 +613,13 @@ static int arch_timer_shutdown_phys(struct clock_event_device *clk)
return arch_timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk);
}
-static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk)
-{
- return arch_timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk);
-}
-
-static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk)
-{
- return arch_timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk);
-}
-
static __always_inline void set_next_event(const int access, unsigned long evt,
struct clock_event_device *clk)
{
unsigned long ctrl;
u64 cnt;
- ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+ ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
@@ -753,8 +628,8 @@ static __always_inline void set_next_event(const int access, unsigned long evt,
else
cnt = __arch_counter_get_cntvct();
- arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+ arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CVAL, evt + cnt);
+ arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl);
}
static int arch_timer_set_next_event_virt(unsigned long evt,
@@ -771,60 +646,6 @@ static int arch_timer_set_next_event_phys(unsigned long evt,
return 0;
}
-static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
-{
- u32 cnt_lo, cnt_hi, tmp_hi;
-
- do {
- cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
- cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
- tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
- } while (cnt_hi != tmp_hi);
-
- return ((u64) cnt_hi << 32) | cnt_lo;
-}
-
-static __always_inline void set_next_event_mem(const int access, unsigned long evt,
- struct clock_event_device *clk)
-{
- struct arch_timer *timer = to_arch_timer(clk);
- unsigned long ctrl;
- u64 cnt;
-
- ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
-
- /* Timer must be disabled before programming CVAL */
- if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
- ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
- }
-
- ctrl |= ARCH_TIMER_CTRL_ENABLE;
- ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
-
- if (access == ARCH_TIMER_MEM_VIRT_ACCESS)
- cnt = arch_counter_get_cnt_mem(timer, CNTVCT_LO);
- else
- cnt = arch_counter_get_cnt_mem(timer, CNTPCT_LO);
-
- arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk);
- arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
-}
-
-static int arch_timer_set_next_event_virt_mem(unsigned long evt,
- struct clock_event_device *clk)
-{
- set_next_event_mem(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
- return 0;
-}
-
-static int arch_timer_set_next_event_phys_mem(unsigned long evt,
- struct clock_event_device *clk)
-{
- set_next_event_mem(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
- return 0;
-}
-
static u64 __arch_timer_check_delta(void)
{
#ifdef CONFIG_ARM64
@@ -850,63 +671,41 @@ static u64 __arch_timer_check_delta(void)
return CLOCKSOURCE_MASK(arch_counter_get_width());
}
-static void __arch_timer_setup(unsigned type,
- struct clock_event_device *clk)
+static void __arch_timer_setup(struct clock_event_device *clk)
{
+ typeof(clk->set_next_event) sne;
u64 max_delta;
clk->features = CLOCK_EVT_FEAT_ONESHOT;
- if (type == ARCH_TIMER_TYPE_CP15) {
- typeof(clk->set_next_event) sne;
-
- arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
-
- if (arch_timer_c3stop)
- clk->features |= CLOCK_EVT_FEAT_C3STOP;
- clk->name = "arch_sys_timer";
- clk->rating = 450;
- clk->cpumask = cpumask_of(smp_processor_id());
- clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
- switch (arch_timer_uses_ppi) {
- case ARCH_TIMER_VIRT_PPI:
- clk->set_state_shutdown = arch_timer_shutdown_virt;
- clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
- sne = erratum_handler(set_next_event_virt);
- break;
- case ARCH_TIMER_PHYS_SECURE_PPI:
- case ARCH_TIMER_PHYS_NONSECURE_PPI:
- case ARCH_TIMER_HYP_PPI:
- clk->set_state_shutdown = arch_timer_shutdown_phys;
- clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
- sne = erratum_handler(set_next_event_phys);
- break;
- default:
- BUG();
- }
+ arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
- clk->set_next_event = sne;
- max_delta = __arch_timer_check_delta();
- } else {
- clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
- clk->name = "arch_mem_timer";
- clk->rating = 400;
- clk->cpumask = cpu_possible_mask;
- if (arch_timer_mem_use_virtual) {
- clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
- clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
- clk->set_next_event =
- arch_timer_set_next_event_virt_mem;
- } else {
- clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
- clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
- clk->set_next_event =
- arch_timer_set_next_event_phys_mem;
- }
-
- max_delta = CLOCKSOURCE_MASK(56);
+ if (arch_timer_c3stop)
+ clk->features |= CLOCK_EVT_FEAT_C3STOP;
+ clk->name = "arch_sys_timer";
+ clk->rating = 450;
+ clk->cpumask = cpumask_of(smp_processor_id());
+ clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
+ switch (arch_timer_uses_ppi) {
+ case ARCH_TIMER_VIRT_PPI:
+ clk->set_state_shutdown = arch_timer_shutdown_virt;
+ clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
+ sne = erratum_handler(set_next_event_virt);
+ break;
+ case ARCH_TIMER_PHYS_SECURE_PPI:
+ case ARCH_TIMER_PHYS_NONSECURE_PPI:
+ case ARCH_TIMER_HYP_PPI:
+ clk->set_state_shutdown = arch_timer_shutdown_phys;
+ clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
+ sne = erratum_handler(set_next_event_phys);
+ break;
+ default:
+ BUG();
}
+ clk->set_next_event = sne;
+ max_delta = __arch_timer_check_delta();
+
clk->set_state_shutdown(clk);
clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
@@ -1029,7 +828,7 @@ static int arch_timer_starting_cpu(unsigned int cpu)
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
u32 flags;
- __arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk);
+ __arch_timer_setup(clk);
flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
@@ -1075,22 +874,12 @@ static void __init arch_timer_of_configure_rate(u32 rate, struct device_node *np
pr_warn("frequency not available\n");
}
-static void __init arch_timer_banner(unsigned type)
+static void __init arch_timer_banner(void)
{
- pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
- type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "",
- type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
- " and " : "",
- type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
+ pr_info("cp15 timer running at %lu.%02luMHz (%s).\n",
(unsigned long)arch_timer_rate / 1000000,
(unsigned long)(arch_timer_rate / 10000) % 100,
- type & ARCH_TIMER_TYPE_CP15 ?
- (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" :
- "",
- type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "",
- type & ARCH_TIMER_TYPE_MEM ?
- arch_timer_mem_use_virtual ? "virt" : "phys" :
- "");
+ (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys");
}
u32 arch_timer_get_rate(void)
@@ -1108,11 +897,6 @@ bool arch_timer_evtstrm_available(void)
return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
}
-static noinstr u64 arch_counter_get_cntvct_mem(void)
-{
- return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
-}
-
static struct arch_timer_kvm_info arch_timer_kvm_info;
struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
@@ -1120,42 +904,35 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
return &arch_timer_kvm_info;
}
-static void __init arch_counter_register(unsigned type)
+static void __init arch_counter_register(void)
{
u64 (*scr)(void);
+ u64 (*rd)(void);
u64 start_count;
int width;
- /* Register the CP15 based counter if we have one */
- if (type & ARCH_TIMER_TYPE_CP15) {
- u64 (*rd)(void);
-
- if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
- arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
- if (arch_timer_counter_has_wa()) {
- rd = arch_counter_get_cntvct_stable;
- scr = raw_counter_get_cntvct_stable;
- } else {
- rd = arch_counter_get_cntvct;
- scr = arch_counter_get_cntvct;
- }
+ if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
+ arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
+ if (arch_timer_counter_has_wa()) {
+ rd = arch_counter_get_cntvct_stable;
+ scr = raw_counter_get_cntvct_stable;
} else {
- if (arch_timer_counter_has_wa()) {
- rd = arch_counter_get_cntpct_stable;
- scr = raw_counter_get_cntpct_stable;
- } else {
- rd = arch_counter_get_cntpct;
- scr = arch_counter_get_cntpct;
- }
+ rd = arch_counter_get_cntvct;
+ scr = arch_counter_get_cntvct;
}
-
- arch_timer_read_counter = rd;
- clocksource_counter.vdso_clock_mode = vdso_default;
} else {
- arch_timer_read_counter = arch_counter_get_cntvct_mem;
- scr = arch_counter_get_cntvct_mem;
+ if (arch_timer_counter_has_wa()) {
+ rd = arch_counter_get_cntpct_stable;
+ scr = raw_counter_get_cntpct_stable;
+ } else {
+ rd = arch_counter_get_cntpct;
+ scr = arch_counter_get_cntpct;
+ }
}
+ arch_timer_read_counter = rd;
+ clocksource_counter.vdso_clock_mode = vdso_default;
+
width = arch_counter_get_width();
clocksource_counter.mask = CLOCKSOURCE_MASK(width);
cyclecounter.mask = CLOCKSOURCE_MASK(width);
@@ -1303,76 +1080,10 @@ out:
return err;
}
-static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
-{
- int ret;
- irq_handler_t func;
-
- arch_timer_mem = kzalloc(sizeof(*arch_timer_mem), GFP_KERNEL);
- if (!arch_timer_mem)
- return -ENOMEM;
-
- arch_timer_mem->base = base;
- arch_timer_mem->evt.irq = irq;
- __arch_timer_setup(ARCH_TIMER_TYPE_MEM, &arch_timer_mem->evt);
-
- if (arch_timer_mem_use_virtual)
- func = arch_timer_handler_virt_mem;
- else
- func = arch_timer_handler_phys_mem;
-
- ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &arch_timer_mem->evt);
- if (ret) {
- pr_err("Failed to request mem timer irq\n");
- kfree(arch_timer_mem);
- arch_timer_mem = NULL;
- }
-
- return ret;
-}
-
-static const struct of_device_id arch_timer_of_match[] __initconst = {
- { .compatible = "arm,armv7-timer", },
- { .compatible = "arm,armv8-timer", },
- {},
-};
-
-static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
- { .compatible = "arm,armv7-timer-mem", },
- {},
-};
-
-static bool __init arch_timer_needs_of_probing(void)
-{
- struct device_node *dn;
- bool needs_probing = false;
- unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM;
-
- /* We have two timers, and both device-tree nodes are probed. */
- if ((arch_timers_present & mask) == mask)
- return false;
-
- /*
- * Only one type of timer is probed,
- * check if we have another type of timer node in device-tree.
- */
- if (arch_timers_present & ARCH_TIMER_TYPE_CP15)
- dn = of_find_matching_node(NULL, arch_timer_mem_of_match);
- else
- dn = of_find_matching_node(NULL, arch_timer_of_match);
-
- if (dn && of_device_is_available(dn))
- needs_probing = true;
-
- of_node_put(dn);
-
- return needs_probing;
-}
-
static int __init arch_timer_common_init(void)
{
- arch_timer_banner(arch_timers_present);
- arch_counter_register(arch_timers_present);
+ arch_timer_banner();
+ arch_counter_register();
return arch_timer_arch_init();
}
@@ -1421,13 +1132,11 @@ static int __init arch_timer_of_init(struct device_node *np)
u32 rate;
bool has_names;
- if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
+ if (arch_timer_evt) {
pr_warn("multiple nodes in dt, skipping\n");
return 0;
}
- arch_timers_present |= ARCH_TIMER_TYPE_CP15;
-
has_names = of_property_present(np, "interrupt-names");
for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {
@@ -1472,283 +1181,22 @@ static int __init arch_timer_of_init(struct device_node *np)
if (ret)
return ret;
- if (arch_timer_needs_of_probing())
- return 0;
-
return arch_timer_common_init();
}
TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
-static u32 __init
-arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
-{
- void __iomem *base;
- u32 rate;
-
- base = ioremap(frame->cntbase, frame->size);
- if (!base) {
- pr_err("Unable to map frame @ %pa\n", &frame->cntbase);
- return 0;
- }
-
- rate = readl_relaxed(base + CNTFRQ);
-
- iounmap(base);
-
- return rate;
-}
-
-static struct arch_timer_mem_frame * __init
-arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
-{
- struct arch_timer_mem_frame *frame, *best_frame = NULL;
- void __iomem *cntctlbase;
- u32 cnttidr;
- int i;
-
- cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size);
- if (!cntctlbase) {
- pr_err("Can't map CNTCTLBase @ %pa\n",
- &timer_mem->cntctlbase);
- return NULL;
- }
-
- cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
-
- /*
- * Try to find a virtual capable frame. Otherwise fall back to a
- * physical capable frame.
- */
- for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
- u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
- CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
-
- frame = &timer_mem->frame[i];
- if (!frame->valid)
- continue;
-
- /* Try enabling everything, and see what sticks */
- writel_relaxed(cntacr, cntctlbase + CNTACR(i));
- cntacr = readl_relaxed(cntctlbase + CNTACR(i));
-
- if ((cnttidr & CNTTIDR_VIRT(i)) &&
- !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
- best_frame = frame;
- arch_timer_mem_use_virtual = true;
- break;
- }
-
- if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
- continue;
-
- best_frame = frame;
- }
-
- iounmap(cntctlbase);
-
- return best_frame;
-}
-
-static int __init
-arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame)
-{
- void __iomem *base;
- int ret, irq;
-
- if (arch_timer_mem_use_virtual)
- irq = frame->virt_irq;
- else
- irq = frame->phys_irq;
-
- if (!irq) {
- pr_err("Frame missing %s irq.\n",
- arch_timer_mem_use_virtual ? "virt" : "phys");
- return -EINVAL;
- }
-
- if (!request_mem_region(frame->cntbase, frame->size,
- "arch_mem_timer"))
- return -EBUSY;
-
- base = ioremap(frame->cntbase, frame->size);
- if (!base) {
- pr_err("Can't map frame's registers\n");
- return -ENXIO;
- }
-
- ret = arch_timer_mem_register(base, irq);
- if (ret) {
- iounmap(base);
- return ret;
- }
-
- arch_timers_present |= ARCH_TIMER_TYPE_MEM;
-
- return 0;
-}
-
-static int __init arch_timer_mem_of_init(struct device_node *np)
-{
- struct arch_timer_mem *timer_mem;
- struct arch_timer_mem_frame *frame;
- struct resource res;
- int ret = -EINVAL;
- u32 rate;
-
- timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL);
- if (!timer_mem)
- return -ENOMEM;
-
- if (of_address_to_resource(np, 0, &res))
- goto out;
- timer_mem->cntctlbase = res.start;
- timer_mem->size = resource_size(&res);
-
- for_each_available_child_of_node_scoped(np, frame_node) {
- u32 n;
- struct arch_timer_mem_frame *frame;
-
- if (of_property_read_u32(frame_node, "frame-number", &n)) {
- pr_err(FW_BUG "Missing frame-number.\n");
- goto out;
- }
- if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
- pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n",
- ARCH_TIMER_MEM_MAX_FRAMES - 1);
- goto out;
- }
- frame = &timer_mem->frame[n];
-
- if (frame->valid) {
- pr_err(FW_BUG "Duplicated frame-number.\n");
- goto out;
- }
-
- if (of_address_to_resource(frame_node, 0, &res))
- goto out;
-
- frame->cntbase = res.start;
- frame->size = resource_size(&res);
-
- frame->virt_irq = irq_of_parse_and_map(frame_node,
- ARCH_TIMER_VIRT_SPI);
- frame->phys_irq = irq_of_parse_and_map(frame_node,
- ARCH_TIMER_PHYS_SPI);
-
- frame->valid = true;
- }
-
- frame = arch_timer_mem_find_best_frame(timer_mem);
- if (!frame) {
- pr_err("Unable to find a suitable frame in timer @ %pa\n",
- &timer_mem->cntctlbase);
- ret = -EINVAL;
- goto out;
- }
-
- rate = arch_timer_mem_frame_get_cntfrq(frame);
- arch_timer_of_configure_rate(rate, np);
-
- ret = arch_timer_mem_frame_register(frame);
- if (!ret && !arch_timer_needs_of_probing())
- ret = arch_timer_common_init();
-out:
- kfree(timer_mem);
- return ret;
-}
-TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
- arch_timer_mem_of_init);
-
#ifdef CONFIG_ACPI_GTDT
-static int __init
-arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem)
-{
- struct arch_timer_mem_frame *frame;
- u32 rate;
- int i;
-
- for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
- frame = &timer_mem->frame[i];
-
- if (!frame->valid)
- continue;
-
- rate = arch_timer_mem_frame_get_cntfrq(frame);
- if (rate == arch_timer_rate)
- continue;
-
- pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n",
- &frame->cntbase,
- (unsigned long)rate, (unsigned long)arch_timer_rate);
-
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int __init arch_timer_mem_acpi_init(int platform_timer_count)
-{
- struct arch_timer_mem *timers, *timer;
- struct arch_timer_mem_frame *frame, *best_frame = NULL;
- int timer_count, i, ret = 0;
-
- timers = kcalloc(platform_timer_count, sizeof(*timers),
- GFP_KERNEL);
- if (!timers)
- return -ENOMEM;
-
- ret = acpi_arch_timer_mem_init(timers, &timer_count);
- if (ret || !timer_count)
- goto out;
-
- /*
- * While unlikely, it's theoretically possible that none of the frames
- * in a timer expose the combination of feature we want.
- */
- for (i = 0; i < timer_count; i++) {
- timer = &timers[i];
-
- frame = arch_timer_mem_find_best_frame(timer);
- if (!best_frame)
- best_frame = frame;
-
- ret = arch_timer_mem_verify_cntfrq(timer);
- if (ret) {
- pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
- goto out;
- }
-
- if (!best_frame) /* implies !frame */
- /*
- * Only complain about missing suitable frames if we
- * haven't already found one in a previous iteration.
- */
- pr_err("Unable to find a suitable frame in timer @ %pa\n",
- &timer->cntctlbase);
- }
-
- if (best_frame)
- ret = arch_timer_mem_frame_register(best_frame);
-out:
- kfree(timers);
- return ret;
-}
-
-/* Initialize per-processor generic timer and memory-mapped timer(if present) */
static int __init arch_timer_acpi_init(struct acpi_table_header *table)
{
- int ret, platform_timer_count;
+ int ret;
- if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
+ if (arch_timer_evt) {
pr_warn("already initialized, skipping\n");
return -EINVAL;
}
- arch_timers_present |= ARCH_TIMER_TYPE_CP15;
-
- ret = acpi_gtdt_init(table, &platform_timer_count);
+ ret = acpi_gtdt_init(table, NULL);
if (ret)
return ret;
@@ -1790,10 +1238,6 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
if (ret)
return ret;
- if (platform_timer_count &&
- arch_timer_mem_acpi_init(platform_timer_count))
- pr_err("Failed to initialize memory-mapped timer.\n");
-
return arch_timer_common_init();
}
TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
diff --git a/drivers/clocksource/arm_arch_timer_mmio.c b/drivers/clocksource/arm_arch_timer_mmio.c
new file mode 100644
index 000000000000..ebe1987d651e
--- /dev/null
+++ b/drivers/clocksource/arm_arch_timer_mmio.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARM Generic Memory Mapped Timer support
+ *
+ * Split from drivers/clocksource/arm_arch_timer.c
+ *
+ * Copyright (C) 2011 ARM Ltd.
+ * All Rights Reserved
+ */
+
+#define pr_fmt(fmt) "arch_timer_mmio: " fmt
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+#define CNTTIDR 0x08
+#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
+
+#define CNTACR(n) (0x40 + ((n) * 4))
+#define CNTACR_RPCT BIT(0)
+#define CNTACR_RVCT BIT(1)
+#define CNTACR_RFRQ BIT(2)
+#define CNTACR_RVOFF BIT(3)
+#define CNTACR_RWVT BIT(4)
+#define CNTACR_RWPT BIT(5)
+
+#define CNTPCT_LO 0x00
+#define CNTVCT_LO 0x08
+#define CNTFRQ 0x10
+#define CNTP_CVAL_LO 0x20
+#define CNTP_CTL 0x2c
+#define CNTV_CVAL_LO 0x30
+#define CNTV_CTL 0x3c
+
+enum arch_timer_access {
+ PHYS_ACCESS,
+ VIRT_ACCESS,
+};
+
+struct arch_timer {
+ struct clock_event_device evt;
+ struct clocksource cs;
+ struct arch_timer_mem *gt_block;
+ void __iomem *base;
+ enum arch_timer_access access;
+ u32 rate;
+};
+
+#define evt_to_arch_timer(e) container_of(e, struct arch_timer, evt)
+#define cs_to_arch_timer(c) container_of(c, struct arch_timer, cs)
+
+static void arch_timer_mmio_write(struct arch_timer *timer,
+ enum arch_timer_reg reg, u64 val)
+{
+ switch (timer->access) {
+ case PHYS_ACCESS:
+ switch (reg) {
+ case ARCH_TIMER_REG_CTRL:
+ writel_relaxed((u32)val, timer->base + CNTP_CTL);
+ return;
+ case ARCH_TIMER_REG_CVAL:
+ /*
+ * Not guaranteed to be atomic, so the timer
+ * must be disabled at this point.
+ */
+ writeq_relaxed(val, timer->base + CNTP_CVAL_LO);
+ return;
+ }
+ break;
+ case VIRT_ACCESS:
+ switch (reg) {
+ case ARCH_TIMER_REG_CTRL:
+ writel_relaxed((u32)val, timer->base + CNTV_CTL);
+ return;
+ case ARCH_TIMER_REG_CVAL:
+ /* Same restriction as above */
+ writeq_relaxed(val, timer->base + CNTV_CVAL_LO);
+ return;
+ }
+ break;
+ }
+
+ /* Should never be here */
+ WARN_ON_ONCE(1);
+}
+
+static u32 arch_timer_mmio_read(struct arch_timer *timer, enum arch_timer_reg reg)
+{
+ switch (timer->access) {
+ case PHYS_ACCESS:
+ switch (reg) {
+ case ARCH_TIMER_REG_CTRL:
+ return readl_relaxed(timer->base + CNTP_CTL);
+ default:
+ break;
+ }
+ break;
+ case VIRT_ACCESS:
+ switch (reg) {
+ case ARCH_TIMER_REG_CTRL:
+ return readl_relaxed(timer->base + CNTV_CTL);
+ default:
+ break;
+ }
+ break;
+ }
+
+ /* Should never be here */
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static noinstr u64 arch_counter_mmio_get_cnt(struct arch_timer *t)
+{
+ int offset_lo = t->access == VIRT_ACCESS ? CNTVCT_LO : CNTPCT_LO;
+ u32 cnt_lo, cnt_hi, tmp_hi;
+
+ do {
+ cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
+ cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
+ tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
+ } while (cnt_hi != tmp_hi);
+
+ return ((u64) cnt_hi << 32) | cnt_lo;
+}
+
+static u64 arch_mmio_counter_read(struct clocksource *cs)
+{
+ struct arch_timer *at = cs_to_arch_timer(cs);
+
+ return arch_counter_mmio_get_cnt(at);
+}
+
+static int arch_timer_mmio_shutdown(struct clock_event_device *clk)
+{
+ struct arch_timer *at = evt_to_arch_timer(clk);
+ unsigned long ctrl;
+
+ ctrl = arch_timer_mmio_read(at, ARCH_TIMER_REG_CTRL);
+ ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
+ arch_timer_mmio_write(at, ARCH_TIMER_REG_CTRL, ctrl);
+
+ return 0;
+}
+
+static int arch_timer_mmio_set_next_event(unsigned long evt,
+ struct clock_event_device *clk)
+{
+ struct arch_timer *timer = evt_to_arch_timer(clk);
+ unsigned long ctrl;
+ u64 cnt;
+
+ ctrl = arch_timer_mmio_read(timer, ARCH_TIMER_REG_CTRL);
+
+ /* Timer must be disabled before programming CVAL */
+ if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
+ ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
+ arch_timer_mmio_write(timer, ARCH_TIMER_REG_CTRL, ctrl);
+ }
+
+ ctrl |= ARCH_TIMER_CTRL_ENABLE;
+ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+
+ cnt = arch_counter_mmio_get_cnt(timer);
+
+ arch_timer_mmio_write(timer, ARCH_TIMER_REG_CVAL, evt + cnt);
+ arch_timer_mmio_write(timer, ARCH_TIMER_REG_CTRL, ctrl);
+ return 0;
+}
+
+static irqreturn_t arch_timer_mmio_handler(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = dev_id;
+ struct arch_timer *at = evt_to_arch_timer(evt);
+ unsigned long ctrl;
+
+ ctrl = arch_timer_mmio_read(at, ARCH_TIMER_REG_CTRL);
+ if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
+ ctrl |= ARCH_TIMER_CTRL_IT_MASK;
+ arch_timer_mmio_write(at, ARCH_TIMER_REG_CTRL, ctrl);
+ evt->event_handler(evt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static struct arch_timer_mem_frame *find_best_frame(struct platform_device *pdev)
+{
+ struct arch_timer_mem_frame *frame, *best_frame = NULL;
+ struct arch_timer *at = platform_get_drvdata(pdev);
+ void __iomem *cntctlbase;
+ u32 cnttidr;
+
+ cntctlbase = ioremap(at->gt_block->cntctlbase, at->gt_block->size);
+ if (!cntctlbase) {
+ dev_err(&pdev->dev, "Can't map CNTCTLBase @ %pa\n",
+ &at->gt_block->cntctlbase);
+ return NULL;
+ }
+
+ cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
+
+ /*
+ * Try to find a virtual capable frame. Otherwise fall back to a
+ * physical capable frame.
+ */
+ for (int i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
+ u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
+ CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
+
+ frame = &at->gt_block->frame[i];
+ if (!frame->valid)
+ continue;
+
+ /* Try enabling everything, and see what sticks */
+ writel_relaxed(cntacr, cntctlbase + CNTACR(i));
+ cntacr = readl_relaxed(cntctlbase + CNTACR(i));
+
+ /* Pick a suitable frame for which we have an IRQ */
+ if ((cnttidr & CNTTIDR_VIRT(i)) &&
+ !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT)) &&
+ frame->virt_irq) {
+ best_frame = frame;
+ at->access = VIRT_ACCESS;
+ break;
+ }
+
+ if ((~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) ||
+ !frame->phys_irq)
+ continue;
+
+ at->access = PHYS_ACCESS;
+ best_frame = frame;
+ }
+
+ iounmap(cntctlbase);
+
+ return best_frame;
+}
+
+static void arch_timer_mmio_setup(struct arch_timer *at, int irq)
+{
+ at->evt = (struct clock_event_device) {
+ .features = (CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_DYNIRQ),
+ .name = "arch_mem_timer",
+ .rating = 400,
+ .cpumask = cpu_possible_mask,
+ .irq = irq,
+ .set_next_event = arch_timer_mmio_set_next_event,
+ .set_state_oneshot_stopped = arch_timer_mmio_shutdown,
+ .set_state_shutdown = arch_timer_mmio_shutdown,
+ };
+
+ at->evt.set_state_shutdown(&at->evt);
+
+ clockevents_config_and_register(&at->evt, at->rate, 0xf,
+ (unsigned long)CLOCKSOURCE_MASK(56));
+
+ enable_irq(at->evt.irq);
+
+ at->cs = (struct clocksource) {
+ .name = "arch_mmio_counter",
+ .rating = 300,
+ .read = arch_mmio_counter_read,
+ .mask = CLOCKSOURCE_MASK(56),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+
+ clocksource_register_hz(&at->cs, at->rate);
+}
+
+static int arch_timer_mmio_frame_register(struct platform_device *pdev,
+ struct arch_timer_mem_frame *frame)
+{
+ struct arch_timer *at = platform_get_drvdata(pdev);
+ struct device_node *np = pdev->dev.of_node;
+ int ret, irq;
+ u32 rate;
+
+ if (!devm_request_mem_region(&pdev->dev, frame->cntbase, frame->size,
+ "arch_mem_timer"))
+ return -EBUSY;
+
+ at->base = devm_ioremap(&pdev->dev, frame->cntbase, frame->size);
+ if (!at->base) {
+ dev_err(&pdev->dev, "Can't map frame's registers\n");
+ return -ENXIO;
+ }
+
+ /*
+ * Allow "clock-frequency" to override the probed rate. If neither
+ * lead to something useful, use the CPU timer frequency as the
+ * fallback. The nice thing about that last point is that we woudn't
+ * made it here if we didn't have a valid frequency.
+ */
+ rate = readl_relaxed(at->base + CNTFRQ);
+
+ if (!np || of_property_read_u32(np, "clock-frequency", &at->rate))
+ at->rate = rate;
+
+ if (!at->rate)
+ at->rate = arch_timer_get_rate();
+
+ irq = at->access == VIRT_ACCESS ? frame->virt_irq : frame->phys_irq;
+ ret = devm_request_irq(&pdev->dev, irq, arch_timer_mmio_handler,
+ IRQF_TIMER | IRQF_NO_AUTOEN, "arch_mem_timer",
+ &at->evt);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to request mem timer irq\n");
+ return ret;
+ }
+
+ /* Afer this point, we're not allowed to fail anymore */
+ arch_timer_mmio_setup(at, irq);
+ return 0;
+}
+
+static int of_populate_gt_block(struct platform_device *pdev,
+ struct arch_timer *at)
+{
+ struct resource res;
+
+ if (of_address_to_resource(pdev->dev.of_node, 0, &res))
+ return -EINVAL;
+
+ at->gt_block->cntctlbase = res.start;
+ at->gt_block->size = resource_size(&res);
+
+ for_each_available_child_of_node_scoped(pdev->dev.of_node, frame_node) {
+ struct arch_timer_mem_frame *frame;
+ u32 n;
+
+ if (of_property_read_u32(frame_node, "frame-number", &n)) {
+ dev_err(&pdev->dev, FW_BUG "Missing frame-number\n");
+ return -EINVAL;
+ }
+ if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
+ dev_err(&pdev->dev,
+ FW_BUG "Wrong frame-number, only 0-%u are permitted\n",
+ ARCH_TIMER_MEM_MAX_FRAMES - 1);
+ return -EINVAL;
+ }
+
+ frame = &at->gt_block->frame[n];
+
+ if (frame->valid) {
+ dev_err(&pdev->dev, FW_BUG "Duplicated frame-number\n");
+ return -EINVAL;
+ }
+
+ if (of_address_to_resource(frame_node, 0, &res))
+ return -EINVAL;
+
+ frame->cntbase = res.start;
+ frame->size = resource_size(&res);
+
+ frame->phys_irq = irq_of_parse_and_map(frame_node, 0);
+ frame->virt_irq = irq_of_parse_and_map(frame_node, 1);
+
+ frame->valid = true;
+ }
+
+ return 0;
+}
+
+static int arch_timer_mmio_probe(struct platform_device *pdev)
+{
+ struct arch_timer_mem_frame *frame;
+ struct arch_timer *at;
+ struct device_node *np;
+ int ret;
+
+ np = pdev->dev.of_node;
+
+ at = devm_kmalloc(&pdev->dev, sizeof(*at), GFP_KERNEL | __GFP_ZERO);
+ if (!at)
+ return -ENOMEM;
+
+ if (np) {
+ at->gt_block = devm_kmalloc(&pdev->dev, sizeof(*at->gt_block),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!at->gt_block)
+ return -ENOMEM;
+ ret = of_populate_gt_block(pdev, at);
+ if (ret)
+ return ret;
+ } else {
+ at->gt_block = dev_get_platdata(&pdev->dev);
+ }
+
+ platform_set_drvdata(pdev, at);
+
+ frame = find_best_frame(pdev);
+ if (!frame) {
+ dev_err(&pdev->dev,
+ "Unable to find a suitable frame in timer @ %pa\n",
+ &at->gt_block->cntctlbase);
+ return -EINVAL;
+ }
+
+ ret = arch_timer_mmio_frame_register(pdev, frame);
+ if (!ret)
+ dev_info(&pdev->dev,
+ "mmio timer running at %lu.%02luMHz (%s)\n",
+ (unsigned long)at->rate / 1000000,
+ (unsigned long)(at->rate / 10000) % 100,
+ at->access == VIRT_ACCESS ? "virt" : "phys");
+
+ return ret;
+}
+
+static const struct of_device_id arch_timer_mmio_of_table[] = {
+ { .compatible = "arm,armv7-timer-mem", },
+ {}
+};
+
+static struct platform_driver arch_timer_mmio_drv = {
+ .driver = {
+ .name = "arch-timer-mmio",
+ .of_match_table = arch_timer_mmio_of_table,
+ },
+ .probe = arch_timer_mmio_probe,
+};
+builtin_platform_driver(arch_timer_mmio_drv);
+
+static struct platform_driver arch_timer_mmio_acpi_drv = {
+ .driver = {
+ .name = "gtdt-arm-mmio-timer",
+ },
+ .probe = arch_timer_mmio_probe,
+};
+builtin_platform_driver(arch_timer_mmio_acpi_drv);
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 2d86bbc2764a..5e3d6bb7e437 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -263,14 +263,13 @@ static void __init gt_delay_timer_init(void)
register_current_timer_delay(&gt_delay_timer);
}
-static int __init gt_clocksource_init(void)
+static int __init gt_clocksource_init(unsigned int psv)
{
writel(0, gt_base + GT_CONTROL);
writel(0, gt_base + GT_COUNTER0);
writel(0, gt_base + GT_COUNTER1);
/* set prescaler and enable timer on all the cores */
- writel(FIELD_PREP(GT_CONTROL_PRESCALER_MASK,
- CONFIG_ARM_GT_INITIAL_PRESCALER_VAL - 1) |
+ writel(FIELD_PREP(GT_CONTROL_PRESCALER_MASK, psv - 1) |
GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
#ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
@@ -338,11 +337,45 @@ static int gt_clk_rate_change_cb(struct notifier_block *nb,
return NOTIFY_DONE;
}
+struct gt_prescaler_config {
+ const char *compatible;
+ unsigned long prescaler;
+};
+
+static const struct gt_prescaler_config gt_prescaler_configs[] = {
+ /*
+ * On am43 the global timer clock is a child of the clock used for CPU
+ * OPPs, so the initial prescaler has to be compatible with all OPPs
+ * which are 300, 600, 720, 800 and 1000 with a fixed divider of 2, this
+ * gives us a GCD of 10. Initial frequency is 1000, so the prescaler is
+ * 50.
+ */
+ { .compatible = "ti,am43", .prescaler = 50 },
+ { .compatible = "xlnx,zynq-7000", .prescaler = 2 },
+ { .compatible = NULL }
+};
+
+static unsigned long gt_get_initial_prescaler_value(struct device_node *np)
+{
+ const struct gt_prescaler_config *config;
+
+ if (CONFIG_ARM_GT_INITIAL_PRESCALER_VAL != 0)
+ return CONFIG_ARM_GT_INITIAL_PRESCALER_VAL;
+
+ for (config = gt_prescaler_configs; config->compatible; config++) {
+ if (of_machine_is_compatible(config->compatible))
+ return config->prescaler;
+ }
+
+ return 1;
+}
+
static int __init global_timer_of_register(struct device_node *np)
{
struct clk *gt_clk;
static unsigned long gt_clk_rate;
int err;
+ unsigned long psv;
/*
* In A9 r2p0 the comparators for each processor with the global timer
@@ -378,8 +411,9 @@ static int __init global_timer_of_register(struct device_node *np)
goto out_unmap;
}
+ psv = gt_get_initial_prescaler_value(np);
gt_clk_rate = clk_get_rate(gt_clk);
- gt_target_rate = gt_clk_rate / CONFIG_ARM_GT_INITIAL_PRESCALER_VAL;
+ gt_target_rate = gt_clk_rate / psv;
gt_clk_rate_change_nb.notifier_call =
gt_clk_rate_change_cb;
err = clk_notifier_register(gt_clk, &gt_clk_rate_change_nb);
@@ -404,7 +438,7 @@ static int __init global_timer_of_register(struct device_node *np)
}
/* Register and immediately configure the timer on the boot CPU */
- err = gt_clocksource_init();
+ err = gt_clocksource_init(psv);
if (err)
goto out_irq;
diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c
index e95fdc49c226..bbceb0289d45 100644
--- a/drivers/clocksource/clps711x-timer.c
+++ b/drivers/clocksource/clps711x-timer.c
@@ -78,24 +78,33 @@ static int __init clps711x_timer_init(struct device_node *np)
unsigned int irq = irq_of_parse_and_map(np, 0);
struct clk *clock = of_clk_get(np, 0);
void __iomem *base = of_iomap(np, 0);
+ int ret = 0;
if (!base)
return -ENOMEM;
- if (!irq)
- return -EINVAL;
- if (IS_ERR(clock))
- return PTR_ERR(clock);
+ if (!irq) {
+ ret = -EINVAL;
+ goto unmap_io;
+ }
+ if (IS_ERR(clock)) {
+ ret = PTR_ERR(clock);
+ goto unmap_io;
+ }
switch (of_alias_get_id(np, "timer")) {
case CLPS711X_CLKSRC_CLOCKSOURCE:
clps711x_clksrc_init(clock, base);
break;
case CLPS711X_CLKSRC_CLOCKEVENT:
- return _clps711x_clkevt_init(clock, base, irq);
+ ret = _clps711x_clkevt_init(clock, base, irq);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
- return 0;
+unmap_io:
+ iounmap(base);
+ return ret;
}
TIMER_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init);
diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c
index cb6fc2f152d4..e79cfb0b8e05 100644
--- a/drivers/clocksource/ingenic-sysost.c
+++ b/drivers/clocksource/ingenic-sysost.c
@@ -127,18 +127,23 @@ static u8 ingenic_ost_get_prescale(unsigned long rate, unsigned long req_rate)
return 2; /* /16 divider */
}
-static long ingenic_ost_round_rate(struct clk_hw *hw, unsigned long req_rate,
- unsigned long *parent_rate)
+static int ingenic_ost_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- unsigned long rate = *parent_rate;
+ unsigned long rate = req->best_parent_rate;
u8 prescale;
- if (req_rate > rate)
- return rate;
+ if (req->rate > rate) {
+ req->rate = rate;
- prescale = ingenic_ost_get_prescale(rate, req_rate);
+ return 0;
+ }
+
+ prescale = ingenic_ost_get_prescale(rate, req->rate);
- return rate >> (prescale * 2);
+ req->rate = rate >> (prescale * 2);
+
+ return 0;
}
static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long req_rate,
@@ -175,14 +180,14 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re
static const struct clk_ops ingenic_ost_percpu_timer_ops = {
.recalc_rate = ingenic_ost_percpu_timer_recalc_rate,
- .round_rate = ingenic_ost_round_rate,
- .set_rate = ingenic_ost_percpu_timer_set_rate,
+ .determine_rate = ingenic_ost_determine_rate,
+ .set_rate = ingenic_ost_percpu_timer_set_rate,
};
static const struct clk_ops ingenic_ost_global_timer_ops = {
.recalc_rate = ingenic_ost_global_timer_recalc_rate,
- .round_rate = ingenic_ost_round_rate,
- .set_rate = ingenic_ost_global_timer_set_rate,
+ .determine_rate = ingenic_ost_determine_rate,
+ .set_rate = ingenic_ost_global_timer_set_rate,
};
static const char * const ingenic_ost_clk_parents[] = { "ext" };
diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c
index c3536fffbe9a..5a99801a1657 100644
--- a/drivers/clocksource/scx200_hrt.c
+++ b/drivers/clocksource/scx200_hrt.c
@@ -52,6 +52,7 @@ static struct clocksource cs_hrt = {
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
/* mult, shift are set based on mhz27 flag */
+ .owner = THIS_MODULE,
};
static int __init init_hrt_clocksource(void)
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index b72b36e0abed..385eb94bbe7c 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -578,37 +578,74 @@ static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag)
+static int sh_cmt_start_clocksource(struct sh_cmt_channel *ch)
{
int ret = 0;
unsigned long flags;
- if (flag & FLAG_CLOCKSOURCE)
- pm_runtime_get_sync(&ch->cmt->pdev->dev);
+ pm_runtime_get_sync(&ch->cmt->pdev->dev);
raw_spin_lock_irqsave(&ch->lock, flags);
- if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) {
- if (flag & FLAG_CLOCKEVENT)
- pm_runtime_get_sync(&ch->cmt->pdev->dev);
+ if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE)))
ret = sh_cmt_enable(ch);
- }
if (ret)
goto out;
- ch->flags |= flag;
+
+ ch->flags |= FLAG_CLOCKSOURCE;
/* setup timeout if no clockevent */
- if (ch->cmt->num_channels == 1 &&
- flag == FLAG_CLOCKSOURCE && (!(ch->flags & FLAG_CLOCKEVENT)))
+ if (ch->cmt->num_channels == 1 && !(ch->flags & FLAG_CLOCKEVENT))
__sh_cmt_set_next(ch, ch->max_match_value);
+out:
+ raw_spin_unlock_irqrestore(&ch->lock, flags);
+
+ return ret;
+}
+
+static void sh_cmt_stop_clocksource(struct sh_cmt_channel *ch)
+{
+ unsigned long flags;
+ unsigned long f;
+
+ raw_spin_lock_irqsave(&ch->lock, flags);
+
+ f = ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE);
+
+ ch->flags &= ~FLAG_CLOCKSOURCE;
+
+ if (f && !(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE)))
+ sh_cmt_disable(ch);
+
+ raw_spin_unlock_irqrestore(&ch->lock, flags);
+
+ pm_runtime_put(&ch->cmt->pdev->dev);
+}
+
+static int sh_cmt_start_clockevent(struct sh_cmt_channel *ch)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ch->lock, flags);
+
+ if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) {
+ pm_runtime_get_sync(&ch->cmt->pdev->dev);
+ ret = sh_cmt_enable(ch);
+ }
+
+ if (ret)
+ goto out;
+
+ ch->flags |= FLAG_CLOCKEVENT;
out:
raw_spin_unlock_irqrestore(&ch->lock, flags);
return ret;
}
-static void sh_cmt_stop(struct sh_cmt_channel *ch, unsigned long flag)
+static void sh_cmt_stop_clockevent(struct sh_cmt_channel *ch)
{
unsigned long flags;
unsigned long f;
@@ -616,22 +653,19 @@ static void sh_cmt_stop(struct sh_cmt_channel *ch, unsigned long flag)
raw_spin_lock_irqsave(&ch->lock, flags);
f = ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE);
- ch->flags &= ~flag;
+
+ ch->flags &= ~FLAG_CLOCKEVENT;
if (f && !(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) {
sh_cmt_disable(ch);
- if (flag & FLAG_CLOCKEVENT)
- pm_runtime_put(&ch->cmt->pdev->dev);
+ pm_runtime_put(&ch->cmt->pdev->dev);
}
/* adjust the timeout to maximum if only clocksource left */
- if ((flag == FLAG_CLOCKEVENT) && (ch->flags & FLAG_CLOCKSOURCE))
+ if (ch->flags & FLAG_CLOCKSOURCE)
__sh_cmt_set_next(ch, ch->max_match_value);
raw_spin_unlock_irqrestore(&ch->lock, flags);
-
- if (flag & FLAG_CLOCKSOURCE)
- pm_runtime_put(&ch->cmt->pdev->dev);
}
static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs)
@@ -672,7 +706,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
ch->total_cycles = 0;
- ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
+ ret = sh_cmt_start_clocksource(ch);
if (!ret)
ch->cs_enabled = true;
@@ -685,7 +719,7 @@ static void sh_cmt_clocksource_disable(struct clocksource *cs)
WARN_ON(!ch->cs_enabled);
- sh_cmt_stop(ch, FLAG_CLOCKSOURCE);
+ sh_cmt_stop_clocksource(ch);
ch->cs_enabled = false;
}
@@ -696,7 +730,7 @@ static void sh_cmt_clocksource_suspend(struct clocksource *cs)
if (!ch->cs_enabled)
return;
- sh_cmt_stop(ch, FLAG_CLOCKSOURCE);
+ sh_cmt_stop_clocksource(ch);
dev_pm_genpd_suspend(&ch->cmt->pdev->dev);
}
@@ -708,7 +742,7 @@ static void sh_cmt_clocksource_resume(struct clocksource *cs)
return;
dev_pm_genpd_resume(&ch->cmt->pdev->dev);
- sh_cmt_start(ch, FLAG_CLOCKSOURCE);
+ sh_cmt_start_clocksource(ch);
}
static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch,
@@ -740,7 +774,7 @@ static struct sh_cmt_channel *ced_to_sh_cmt(struct clock_event_device *ced)
static void sh_cmt_clock_event_start(struct sh_cmt_channel *ch, int periodic)
{
- sh_cmt_start(ch, FLAG_CLOCKEVENT);
+ sh_cmt_start_clockevent(ch);
if (periodic)
sh_cmt_set_next(ch, ((ch->cmt->rate + HZ/2) / HZ) - 1);
@@ -752,7 +786,7 @@ static int sh_cmt_clock_event_shutdown(struct clock_event_device *ced)
{
struct sh_cmt_channel *ch = ced_to_sh_cmt(ced);
- sh_cmt_stop(ch, FLAG_CLOCKEVENT);
+ sh_cmt_stop_clockevent(ch);
return 0;
}
@@ -763,7 +797,7 @@ static int sh_cmt_clock_event_set_state(struct clock_event_device *ced,
/* deal with old setting first */
if (clockevent_state_oneshot(ced) || clockevent_state_periodic(ced))
- sh_cmt_stop(ch, FLAG_CLOCKEVENT);
+ sh_cmt_stop_clockevent(ch);
dev_info(&ch->cmt->pdev->dev, "ch%u: used for %s clock events\n",
ch->index, periodic ? "periodic" : "oneshot");
diff --git a/drivers/clocksource/timer-cs5535.c b/drivers/clocksource/timer-cs5535.c
index d47acfe848ae..8af666c39890 100644
--- a/drivers/clocksource/timer-cs5535.c
+++ b/drivers/clocksource/timer-cs5535.c
@@ -101,6 +101,7 @@ static struct clock_event_device cs5535_clockevent = {
.tick_resume = mfgpt_shutdown,
.set_next_event = mfgpt_next_event,
.rating = 250,
+ .owner = THIS_MODULE,
};
static irqreturn_t mfgpt_tick(int irq, void *dev_id)
diff --git a/drivers/clocksource/timer-econet-en751221.c b/drivers/clocksource/timer-econet-en751221.c
index 3b449fdaafee..4008076b1a21 100644
--- a/drivers/clocksource/timer-econet-en751221.c
+++ b/drivers/clocksource/timer-econet-en751221.c
@@ -146,7 +146,7 @@ static int __init cevt_init(struct device_node *np)
for_each_possible_cpu(i) {
struct clock_event_device *cd = &per_cpu(econet_timer_pcpu, i);
- cd->rating = 310,
+ cd->rating = 310;
cd->features = CLOCK_EVT_FEAT_ONESHOT |
CLOCK_EVT_FEAT_C3STOP |
CLOCK_EVT_FEAT_PERCPU;
diff --git a/drivers/clocksource/timer-nxp-pit.c b/drivers/clocksource/timer-nxp-pit.c
new file mode 100644
index 000000000000..2d0a3554b6bf
--- /dev/null
+++ b/drivers/clocksource/timer-nxp-pit.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2012-2013 Freescale Semiconductor, Inc.
+ * Copyright 2018,2021-2025 NXP
+ */
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+#include <linux/cpuhotplug.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+#include <linux/platform_device.h>
+
+/*
+ * Each pit takes 0x10 Bytes register space
+ */
+#define PIT0_OFFSET 0x100
+#define PIT_CH(n) (PIT0_OFFSET + 0x10 * (n))
+
+#define PITMCR(__base) (__base)
+
+#define PITMCR_FRZ BIT(0)
+#define PITMCR_MDIS BIT(1)
+
+#define PITLDVAL(__base) (__base)
+#define PITTCTRL(__base) ((__base) + 0x08)
+
+#define PITCVAL_OFFSET 0x04
+#define PITCVAL(__base) ((__base) + 0x04)
+
+#define PITTCTRL_TEN BIT(0)
+#define PITTCTRL_TIE BIT(1)
+
+#define PITTFLG(__base) ((__base) + 0x0c)
+
+#define PITTFLG_TIF BIT(0)
+
+struct pit_timer {
+ void __iomem *clksrc_base;
+ void __iomem *clkevt_base;
+ struct clock_event_device ced;
+ struct clocksource cs;
+ int rate;
+};
+
+struct pit_timer_data {
+ int max_pit_instances;
+};
+
+static DEFINE_PER_CPU(struct pit_timer *, pit_timers);
+
+/*
+ * Global structure for multiple PITs initialization
+ */
+static int pit_instances;
+static int max_pit_instances = 1;
+
+static void __iomem *sched_clock_base;
+
+static inline struct pit_timer *ced_to_pit(struct clock_event_device *ced)
+{
+ return container_of(ced, struct pit_timer, ced);
+}
+
+static inline struct pit_timer *cs_to_pit(struct clocksource *cs)
+{
+ return container_of(cs, struct pit_timer, cs);
+}
+
+static inline void pit_module_enable(void __iomem *base)
+{
+ writel(0, PITMCR(base));
+}
+
+static inline void pit_module_disable(void __iomem *base)
+{
+ writel(PITMCR_MDIS, PITMCR(base));
+}
+
+static inline void pit_timer_enable(void __iomem *base, bool tie)
+{
+ u32 val = PITTCTRL_TEN | (tie ? PITTCTRL_TIE : 0);
+
+ writel(val, PITTCTRL(base));
+}
+
+static inline void pit_timer_disable(void __iomem *base)
+{
+ writel(0, PITTCTRL(base));
+}
+
+static inline void pit_timer_set_counter(void __iomem *base, unsigned int cnt)
+{
+ writel(cnt, PITLDVAL(base));
+}
+
+static inline void pit_timer_irqack(struct pit_timer *pit)
+{
+ writel(PITTFLG_TIF, PITTFLG(pit->clkevt_base));
+}
+
+static u64 notrace pit_read_sched_clock(void)
+{
+ return ~readl(sched_clock_base);
+}
+
+static u64 pit_timer_clocksource_read(struct clocksource *cs)
+{
+ struct pit_timer *pit = cs_to_pit(cs);
+
+ return (u64)~readl(PITCVAL(pit->clksrc_base));
+}
+
+static int pit_clocksource_init(struct pit_timer *pit, const char *name,
+ void __iomem *base, unsigned long rate)
+{
+ /*
+ * The channels 0 and 1 can be chained to build a 64-bit
+ * timer. Let's use the channel 2 as a clocksource and leave
+ * the channels 0 and 1 unused for anyone else who needs them
+ */
+ pit->clksrc_base = base + PIT_CH(2);
+ pit->cs.name = name;
+ pit->cs.rating = 300;
+ pit->cs.read = pit_timer_clocksource_read;
+ pit->cs.mask = CLOCKSOURCE_MASK(32);
+ pit->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+ /* set the max load value and start the clock source counter */
+ pit_timer_disable(pit->clksrc_base);
+ pit_timer_set_counter(pit->clksrc_base, ~0);
+ pit_timer_enable(pit->clksrc_base, 0);
+
+ sched_clock_base = pit->clksrc_base + PITCVAL_OFFSET;
+ sched_clock_register(pit_read_sched_clock, 32, rate);
+
+ return clocksource_register_hz(&pit->cs, rate);
+}
+
+static int pit_set_next_event(unsigned long delta, struct clock_event_device *ced)
+{
+ struct pit_timer *pit = ced_to_pit(ced);
+
+ /*
+ * set a new value to PITLDVAL register will not restart the timer,
+ * to abort the current cycle and start a timer period with the new
+ * value, the timer must be disabled and enabled again.
+ * and the PITLAVAL should be set to delta minus one according to pit
+ * hardware requirement.
+ */
+ pit_timer_disable(pit->clkevt_base);
+ pit_timer_set_counter(pit->clkevt_base, delta - 1);
+ pit_timer_enable(pit->clkevt_base, true);
+
+ return 0;
+}
+
+static int pit_shutdown(struct clock_event_device *ced)
+{
+ struct pit_timer *pit = ced_to_pit(ced);
+
+ pit_timer_disable(pit->clkevt_base);
+
+ return 0;
+}
+
+static int pit_set_periodic(struct clock_event_device *ced)
+{
+ struct pit_timer *pit = ced_to_pit(ced);
+
+ pit_set_next_event(pit->rate / HZ, ced);
+
+ return 0;
+}
+
+static irqreturn_t pit_timer_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *ced = dev_id;
+ struct pit_timer *pit = ced_to_pit(ced);
+
+ pit_timer_irqack(pit);
+
+ /*
+ * pit hardware doesn't support oneshot, it will generate an interrupt
+ * and reload the counter value from PITLDVAL when PITCVAL reach zero,
+ * and start the counter again. So software need to disable the timer
+ * to stop the counter loop in ONESHOT mode.
+ */
+ if (likely(clockevent_state_oneshot(ced)))
+ pit_timer_disable(pit->clkevt_base);
+
+ ced->event_handler(ced);
+
+ return IRQ_HANDLED;
+}
+
+static int pit_clockevent_per_cpu_init(struct pit_timer *pit, const char *name,
+ void __iomem *base, unsigned long rate,
+ int irq, unsigned int cpu)
+{
+ int ret;
+
+ /*
+ * The channels 0 and 1 can be chained to build a 64-bit
+ * timer. Let's use the channel 3 as a clockevent and leave
+ * the channels 0 and 1 unused for anyone else who needs them
+ */
+ pit->clkevt_base = base + PIT_CH(3);
+ pit->rate = rate;
+
+ pit_timer_disable(pit->clkevt_base);
+
+ pit_timer_irqack(pit);
+
+ ret = request_irq(irq, pit_timer_interrupt, IRQF_TIMER | IRQF_NOBALANCING,
+ name, &pit->ced);
+ if (ret)
+ return ret;
+
+ pit->ced.cpumask = cpumask_of(cpu);
+ pit->ced.irq = irq;
+
+ pit->ced.name = name;
+ pit->ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+ pit->ced.set_state_shutdown = pit_shutdown;
+ pit->ced.set_state_periodic = pit_set_periodic;
+ pit->ced.set_next_event = pit_set_next_event;
+ pit->ced.rating = 300;
+
+ per_cpu(pit_timers, cpu) = pit;
+
+ return 0;
+}
+
+static void pit_clockevent_per_cpu_exit(struct pit_timer *pit, unsigned int cpu)
+{
+ pit_timer_disable(pit->clkevt_base);
+ free_irq(pit->ced.irq, &pit->ced);
+ per_cpu(pit_timers, cpu) = NULL;
+}
+
+static int pit_clockevent_starting_cpu(unsigned int cpu)
+{
+ struct pit_timer *pit = per_cpu(pit_timers, cpu);
+ int ret;
+
+ if (!pit)
+ return 0;
+
+ ret = irq_force_affinity(pit->ced.irq, cpumask_of(cpu));
+ if (ret) {
+ pit_clockevent_per_cpu_exit(pit, cpu);
+ return ret;
+ }
+
+ /*
+ * The value for the LDVAL register trigger is calculated as:
+ * LDVAL trigger = (period / clock period) - 1
+ * The pit is a 32-bit down count timer, when the counter value
+ * reaches 0, it will generate an interrupt, thus the minimal
+ * LDVAL trigger value is 1. And then the min_delta is
+ * minimal LDVAL trigger value + 1, and the max_delta is full 32-bit.
+ */
+ clockevents_config_and_register(&pit->ced, pit->rate, 2, 0xffffffff);
+
+ return 0;
+}
+
+static int pit_timer_init(struct device_node *np)
+{
+ struct pit_timer *pit;
+ struct clk *pit_clk;
+ void __iomem *timer_base;
+ const char *name = of_node_full_name(np);
+ unsigned long clk_rate;
+ int irq, ret;
+
+ pit = kzalloc(sizeof(*pit), GFP_KERNEL);
+ if (!pit)
+ return -ENOMEM;
+
+ ret = -ENXIO;
+ timer_base = of_iomap(np, 0);
+ if (!timer_base) {
+ pr_err("Failed to iomap\n");
+ goto out_kfree;
+ }
+
+ ret = -EINVAL;
+ irq = irq_of_parse_and_map(np, 0);
+ if (irq <= 0) {
+ pr_err("Failed to irq_of_parse_and_map\n");
+ goto out_iounmap;
+ }
+
+ pit_clk = of_clk_get(np, 0);
+ if (IS_ERR(pit_clk)) {
+ ret = PTR_ERR(pit_clk);
+ goto out_irq_dispose_mapping;
+ }
+
+ ret = clk_prepare_enable(pit_clk);
+ if (ret)
+ goto out_clk_put;
+
+ clk_rate = clk_get_rate(pit_clk);
+
+ pit_module_disable(timer_base);
+
+ ret = pit_clocksource_init(pit, name, timer_base, clk_rate);
+ if (ret) {
+ pr_err("Failed to initialize clocksource '%pOF'\n", np);
+ goto out_pit_module_disable;
+ }
+
+ ret = pit_clockevent_per_cpu_init(pit, name, timer_base, clk_rate, irq, pit_instances);
+ if (ret) {
+ pr_err("Failed to initialize clockevent '%pOF'\n", np);
+ goto out_pit_clocksource_unregister;
+ }
+
+ /* enable the pit module */
+ pit_module_enable(timer_base);
+
+ pit_instances++;
+
+ if (pit_instances == max_pit_instances) {
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "PIT timer:starting",
+ pit_clockevent_starting_cpu, NULL);
+ if (ret < 0)
+ goto out_pit_clocksource_unregister;
+ }
+
+ return 0;
+
+out_pit_clocksource_unregister:
+ clocksource_unregister(&pit->cs);
+out_pit_module_disable:
+ pit_module_disable(timer_base);
+ clk_disable_unprepare(pit_clk);
+out_clk_put:
+ clk_put(pit_clk);
+out_irq_dispose_mapping:
+ irq_dispose_mapping(irq);
+out_iounmap:
+ iounmap(timer_base);
+out_kfree:
+ kfree(pit);
+
+ return ret;
+}
+
+static int pit_timer_probe(struct platform_device *pdev)
+{
+ const struct pit_timer_data *pit_timer_data;
+
+ pit_timer_data = of_device_get_match_data(&pdev->dev);
+ if (pit_timer_data)
+ max_pit_instances = pit_timer_data->max_pit_instances;
+
+ return pit_timer_init(pdev->dev.of_node);
+}
+
+static struct pit_timer_data s32g2_data = { .max_pit_instances = 2 };
+
+static const struct of_device_id pit_timer_of_match[] = {
+ { .compatible = "nxp,s32g2-pit", .data = &s32g2_data },
+ { }
+};
+MODULE_DEVICE_TABLE(of, pit_timer_of_match);
+
+static struct platform_driver nxp_pit_driver = {
+ .driver = {
+ .name = "nxp-pit",
+ .of_match_table = pit_timer_of_match,
+ },
+ .probe = pit_timer_probe,
+};
+module_platform_driver(nxp_pit_driver);
+
+TIMER_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
diff --git a/drivers/clocksource/timer-nxp-stm.c b/drivers/clocksource/timer-nxp-stm.c
index d7ccf9001729..bbc40623728f 100644
--- a/drivers/clocksource/timer-nxp-stm.c
+++ b/drivers/clocksource/timer-nxp-stm.c
@@ -201,6 +201,7 @@ static int __init nxp_stm_clocksource_init(struct device *dev, struct stm_timer
stm_timer->cs.resume = nxp_stm_clocksource_resume;
stm_timer->cs.mask = CLOCKSOURCE_MASK(32);
stm_timer->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ stm_timer->cs.owner = THIS_MODULE;
ret = clocksource_register_hz(&stm_timer->cs, stm_timer->rate);
if (ret)
@@ -314,6 +315,7 @@ static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, struct stm
stm_timer->ced.cpumask = cpumask_of(cpu);
stm_timer->ced.rating = 460;
stm_timer->ced.irq = irq;
+ stm_timer->ced.owner = THIS_MODULE;
per_cpu(stm_timers, cpu) = stm_timer;
diff --git a/drivers/clocksource/timer-rtl-otto.c b/drivers/clocksource/timer-rtl-otto.c
index 8a3068b36e75..6113d2fdd4de 100644
--- a/drivers/clocksource/timer-rtl-otto.c
+++ b/drivers/clocksource/timer-rtl-otto.c
@@ -38,14 +38,13 @@
#define RTTM_BIT_COUNT 28
#define RTTM_MIN_DELTA 8
#define RTTM_MAX_DELTA CLOCKSOURCE_MASK(28)
+#define RTTM_MAX_DIVISOR GENMASK(15, 0)
/*
- * Timers are derived from the LXB clock frequency. Usually this is a fixed
- * multiple of the 25 MHz oscillator. The 930X SOC is an exception from that.
- * Its LXB clock has only dividers and uses the switch PLL of 2.45 GHz as its
- * base. The only meaningful frequencies we can achieve from that are 175.000
- * MHz and 153.125 MHz. The greatest common divisor of all explained possible
- * speeds is 3125000. Pin the timers to this 3.125 MHz reference frequency.
+ * Timers are derived from the lexra bus (LXB) clock frequency. This is 175 MHz
+ * on RTL930x and 200 MHz on the other platforms. With 3.125 MHz choose a common
+ * divisor to have enough range and detail. This provides comparability between
+ * the different platforms.
*/
#define RTTM_TICKS_PER_SEC 3125000
@@ -55,11 +54,6 @@ struct rttm_cs {
};
/* Simple internal register functions */
-static inline void rttm_set_counter(void __iomem *base, unsigned int counter)
-{
- iowrite32(counter, base + RTTM_CNT);
-}
-
static inline unsigned int rttm_get_counter(void __iomem *base)
{
return ioread32(base + RTTM_CNT);
@@ -112,6 +106,22 @@ static irqreturn_t rttm_timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static void rttm_bounce_timer(void __iomem *base, u32 mode)
+{
+ /*
+ * When a running timer has less than ~5us left, a stop/start sequence
+ * might fail. While the details are unknown the most evident effect is
+ * that the subsequent interrupt will not be fired.
+ *
+ * As a workaround issue an intermediate restart with a very slow
+ * frequency of ~3kHz keeping the target counter (>=8). So the follow
+ * up restart will always be issued outside the critical window.
+ */
+
+ rttm_disable_timer(base);
+ rttm_enable_timer(base, mode, RTTM_MAX_DIVISOR);
+}
+
static void rttm_stop_timer(void __iomem *base)
{
rttm_disable_timer(base);
@@ -120,7 +130,6 @@ static void rttm_stop_timer(void __iomem *base)
static void rttm_start_timer(struct timer_of *to, u32 mode)
{
- rttm_set_counter(to->of_base.base, 0);
rttm_enable_timer(to->of_base.base, mode, to->of_clk.rate / RTTM_TICKS_PER_SEC);
}
@@ -129,7 +138,8 @@ static int rttm_next_event(unsigned long delta, struct clock_event_device *clkev
struct timer_of *to = to_timer_of(clkevt);
RTTM_DEBUG(to->of_base.base);
- rttm_stop_timer(to->of_base.base);
+ rttm_bounce_timer(to->of_base.base, RTTM_CTRL_COUNTER);
+ rttm_disable_timer(to->of_base.base);
rttm_set_period(to->of_base.base, delta);
rttm_start_timer(to, RTTM_CTRL_COUNTER);
@@ -141,7 +151,8 @@ static int rttm_state_oneshot(struct clock_event_device *clkevt)
struct timer_of *to = to_timer_of(clkevt);
RTTM_DEBUG(to->of_base.base);
- rttm_stop_timer(to->of_base.base);
+ rttm_bounce_timer(to->of_base.base, RTTM_CTRL_COUNTER);
+ rttm_disable_timer(to->of_base.base);
rttm_set_period(to->of_base.base, RTTM_TICKS_PER_SEC / HZ);
rttm_start_timer(to, RTTM_CTRL_COUNTER);
@@ -153,7 +164,8 @@ static int rttm_state_periodic(struct clock_event_device *clkevt)
struct timer_of *to = to_timer_of(clkevt);
RTTM_DEBUG(to->of_base.base);
- rttm_stop_timer(to->of_base.base);
+ rttm_bounce_timer(to->of_base.base, RTTM_CTRL_TIMER);
+ rttm_disable_timer(to->of_base.base);
rttm_set_period(to->of_base.base, RTTM_TICKS_PER_SEC / HZ);
rttm_start_timer(to, RTTM_CTRL_TIMER);
diff --git a/drivers/clocksource/timer-stm32-lp.c b/drivers/clocksource/timer-stm32-lp.c
index 6e7944ffd7c0..c2a699f5c1dd 100644
--- a/drivers/clocksource/timer-stm32-lp.c
+++ b/drivers/clocksource/timer-stm32-lp.c
@@ -211,6 +211,7 @@ static void stm32_clkevent_lp_init(struct stm32_lp_private *priv,
priv->clkevt.rating = STM32_LP_RATING;
priv->clkevt.suspend = stm32_clkevent_lp_suspend;
priv->clkevt.resume = stm32_clkevent_lp_resume;
+ priv->clkevt.owner = THIS_MODULE;
clockevents_config_and_register(&priv->clkevt, rate, 0x1,
STM32_LPTIM_MAX_ARR);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 6b48a9006444..f827d3f98f60 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -185,6 +185,7 @@ static int sun5i_setup_clocksource(struct platform_device *pdev,
cs->clksrc.read = sun5i_clksrc_read;
cs->clksrc.mask = CLOCKSOURCE_MASK(32);
cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ cs->clksrc.owner = THIS_MODULE;
ret = clocksource_register_hz(&cs->clksrc, rate);
if (ret) {
@@ -214,6 +215,7 @@ static int sun5i_setup_clockevent(struct platform_device *pdev,
ce->clkevt.rating = 340;
ce->clkevt.irq = irq;
ce->clkevt.cpumask = cpu_possible_mask;
+ ce->clkevt.owner = THIS_MODULE;
/* Enable timer0 interrupt */
val = readl(base + TIMER_IRQ_EN_REG);
diff --git a/drivers/clocksource/timer-tegra186.c b/drivers/clocksource/timer-tegra186.c
index e5394f98a02e..355558893e5f 100644
--- a/drivers/clocksource/timer-tegra186.c
+++ b/drivers/clocksource/timer-tegra186.c
@@ -159,7 +159,7 @@ static void tegra186_wdt_enable(struct tegra186_wdt *wdt)
tmr_writel(wdt->tmr, TMRCSSR_SRC_USEC, TMRCSSR);
/* configure timer (system reset happens on the fifth expiration) */
- value = TMRCR_PTV(wdt->base.timeout * USEC_PER_SEC / 5) |
+ value = TMRCR_PTV(wdt->base.timeout * (USEC_PER_SEC / 5)) |
TMRCR_PERIODIC | TMRCR_ENABLE;
tmr_writel(wdt->tmr, value, TMRCR);
@@ -231,7 +231,7 @@ static unsigned int tegra186_wdt_get_timeleft(struct watchdog_device *wdd)
{
struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
u32 expiration, val;
- u64 timeleft;
+ u32 timeleft;
if (!watchdog_active(&wdt->base)) {
/* return zero if the watchdog timer is not activated. */
@@ -266,21 +266,26 @@ static unsigned int tegra186_wdt_get_timeleft(struct watchdog_device *wdd)
* Calculate the time remaining by adding the time for the
* counter value to the time of the counter expirations that
* remain.
+ * Note: Since wdt->base.timeout is bound to 255, the maximum
+ * value added to timeleft is
+ * 255 * (1,000,000 / 5) * 4
+ * = 255 * 200,000 * 4
+ * = 204,000,000
+ * TMRSR_PCV is a 29-bit field.
+ * Its maximum value is 0x1fffffff = 536,870,911.
+ * 204,000,000 + 536,870,911 = 740,870,911 = 0x2C28CAFF.
+ * timeleft can therefore not overflow, and 64-bit calculations
+ * are not necessary.
*/
- timeleft += (((u64)wdt->base.timeout * USEC_PER_SEC) / 5) * (4 - expiration);
+ timeleft += (wdt->base.timeout * (USEC_PER_SEC / 5)) * (4 - expiration);
/*
* Convert the current counter value to seconds,
- * rounding up to the nearest second. Cast u64 to
- * u32 under the assumption that no overflow happens
- * when coverting to seconds.
+ * rounding to the nearest second.
*/
- timeleft = DIV_ROUND_CLOSEST_ULL(timeleft, USEC_PER_SEC);
+ timeleft = DIV_ROUND_CLOSEST(timeleft, USEC_PER_SEC);
- if (WARN_ON_ONCE(timeleft > U32_MAX))
- return U32_MAX;
-
- return lower_32_bits(timeleft);
+ return timeleft;
}
static const struct watchdog_ops tegra186_wdt_ops = {
@@ -328,16 +333,12 @@ static struct tegra186_wdt *tegra186_wdt_create(struct tegra186_timer *tegra,
wdt->base.parent = tegra->dev;
err = watchdog_init_timeout(&wdt->base, 5, tegra->dev);
- if (err < 0) {
- dev_err(tegra->dev, "failed to initialize timeout: %d\n", err);
+ if (err < 0)
return ERR_PTR(err);
- }
err = devm_watchdog_register_device(tegra->dev, &wdt->base);
- if (err < 0) {
- dev_err(tegra->dev, "failed to register WDT: %d\n", err);
+ if (err < 0)
return ERR_PTR(err);
- }
return wdt;
}
@@ -373,6 +374,7 @@ static int tegra186_timer_tsc_init(struct tegra186_timer *tegra)
tegra->tsc.read = tegra186_timer_tsc_read;
tegra->tsc.mask = CLOCKSOURCE_MASK(56);
tegra->tsc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ tegra->tsc.owner = THIS_MODULE;
return clocksource_register_hz(&tegra->tsc, 31250000);
}
@@ -392,6 +394,7 @@ static int tegra186_timer_osc_init(struct tegra186_timer *tegra)
tegra->osc.read = tegra186_timer_osc_read;
tegra->osc.mask = CLOCKSOURCE_MASK(32);
tegra->osc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ tegra->osc.owner = THIS_MODULE;
return clocksource_register_hz(&tegra->osc, 38400000);
}
@@ -411,6 +414,7 @@ static int tegra186_timer_usec_init(struct tegra186_timer *tegra)
tegra->usec.read = tegra186_timer_usec_read;
tegra->usec.mask = CLOCKSOURCE_MASK(32);
tegra->usec.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ tegra->usec.owner = THIS_MODULE;
return clocksource_register_hz(&tegra->usec, USEC_PER_SEC);
}
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index e9e32df6b566..793e7cdcb1b1 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -31,6 +31,7 @@
#include <linux/platform_data/dmtimer-omap.h>
#include <clocksource/timer-ti-dm.h>
+#include <linux/delay.h>
/*
* timer errata flags
@@ -836,6 +837,48 @@ static int omap_dm_timer_set_match(struct omap_dm_timer *cookie, int enable,
return 0;
}
+static int omap_dm_timer_set_cap(struct omap_dm_timer *cookie,
+ int autoreload, bool config_period)
+{
+ struct dmtimer *timer;
+ struct device *dev;
+ int rc;
+ u32 l;
+
+ timer = to_dmtimer(cookie);
+ if (unlikely(!timer))
+ return -EINVAL;
+
+ dev = &timer->pdev->dev;
+ rc = pm_runtime_resume_and_get(dev);
+ if (rc)
+ return rc;
+ /*
+ * 1. Select autoreload mode. TIMER_TCLR[1] AR bit.
+ * 2. TIMER_TCLR[14]: Sets the functionality of the TIMER IO pin.
+ * 3. TIMER_TCLR[13] : Capture mode select bit.
+ * 3. TIMER_TCLR[9-8] : Select transition capture mode.
+ */
+
+ l = dmtimer_read(timer, OMAP_TIMER_CTRL_REG);
+
+ if (autoreload)
+ l |= OMAP_TIMER_CTRL_AR;
+
+ l |= OMAP_TIMER_CTRL_CAPTMODE | OMAP_TIMER_CTRL_GPOCFG;
+
+ if (config_period == true)
+ l |= OMAP_TIMER_CTRL_TCM_LOWTOHIGH; /* Time Period config */
+ else
+ l |= OMAP_TIMER_CTRL_TCM_BOTHEDGES; /* Duty Cycle config */
+
+ dmtimer_write(timer, OMAP_TIMER_CTRL_REG, l);
+
+ pm_runtime_put_sync(dev);
+
+ return 0;
+}
+
static int omap_dm_timer_set_pwm(struct omap_dm_timer *cookie, int def_on,
int toggle, int trigger, int autoreload)
{
@@ -1023,23 +1066,92 @@ static unsigned int omap_dm_timer_read_counter(struct omap_dm_timer *cookie)
return __omap_dm_timer_read_counter(timer);
}
+static inline unsigned int __omap_dm_timer_cap(struct dmtimer *timer, int idx)
+{
+ return idx == 0 ? dmtimer_read(timer, OMAP_TIMER_CAPTURE_REG) :
+ dmtimer_read(timer, OMAP_TIMER_CAPTURE2_REG);
+}
+
static int omap_dm_timer_write_counter(struct omap_dm_timer *cookie, unsigned int value)
{
struct dmtimer *timer;
+ struct device *dev;
timer = to_dmtimer(cookie);
- if (unlikely(!timer || !atomic_read(&timer->enabled))) {
- pr_err("%s: timer not available or enabled.\n", __func__);
+ if (unlikely(!timer)) {
+ pr_err("%s: timer not available.\n", __func__);
return -EINVAL;
}
+ dev = &timer->pdev->dev;
+
+ pm_runtime_resume_and_get(dev);
dmtimer_write(timer, OMAP_TIMER_COUNTER_REG, value);
+ pm_runtime_put_sync(dev);
/* Save the context */
timer->context.tcrr = value;
return 0;
}
+/**
+ * omap_dm_timer_cap_counter() - Calculate the high count or period count depending on the
+ * configuration.
+ * @cookie:Pointer to OMAP DM timer
+ * @is_period:Whether to configure timer in period or duty cycle mode
+ *
+ * Return high count or period count if timer is enabled else appropriate error.
+ */
+static unsigned int omap_dm_timer_cap_counter(struct omap_dm_timer *cookie, bool is_period)
+{
+ struct dmtimer *timer;
+ unsigned int cap1 = 0;
+ unsigned int cap2 = 0;
+ u32 l, ret;
+
+ timer = to_dmtimer(cookie);
+ if (unlikely(!timer || !atomic_read(&timer->enabled))) {
+ pr_err("%s:timer is not available or enabled.%p\n", __func__, (void *)timer);
+ return -EINVAL;
+ }
+
+ /* Stop the timer */
+ omap_dm_timer_stop(cookie);
+
+ /* Clear the timer counter value to 0 */
+ ret = omap_dm_timer_write_counter(cookie, 0);
+ if (ret)
+ return ret;
+
+ /* Sets the timer capture configuration for period/duty cycle calculation */
+ ret = omap_dm_timer_set_cap(cookie, true, is_period);
+ if (ret) {
+ pr_err("%s: Failed to set timer capture configuration.\n", __func__);
+ return ret;
+ }
+ /* Start the timer */
+ omap_dm_timer_start(cookie);
+
+ /*
+ * 1 sec delay is given so as to provide
+ * enough time to capture low frequency signals.
+ */
+ msleep(1000);
+
+ cap1 = __omap_dm_timer_cap(timer, 0);
+ cap2 = __omap_dm_timer_cap(timer, 1);
+
+ /*
+ * Clears the TCLR configuration.
+ * The start bit must be set to 1 as the timer is already in start mode.
+ */
+ l = dmtimer_read(timer, OMAP_TIMER_CTRL_REG);
+ l &= ~(0xffff) | 0x1;
+ dmtimer_write(timer, OMAP_TIMER_CTRL_REG, l);
+
+ return (cap2-cap1);
+}
+
static int __maybe_unused omap_dm_timer_runtime_suspend(struct device *dev)
{
struct dmtimer *timer = dev_get_drvdata(dev);
@@ -1246,6 +1358,9 @@ static const struct omap_dm_timer_ops dmtimer_ops = {
.write_counter = omap_dm_timer_write_counter,
.read_status = omap_dm_timer_read_status,
.write_status = omap_dm_timer_write_status,
+ .set_cap = omap_dm_timer_set_cap,
+ .get_cap_status = omap_dm_timer_get_pwm_status,
+ .read_cap = omap_dm_timer_cap_counter,
};
static const struct dmtimer_platform_data omap3plus_pdata = {
diff --git a/drivers/clocksource/timer-vf-pit.c b/drivers/clocksource/timer-vf-pit.c
deleted file mode 100644
index 911c92146eca..000000000000
--- a/drivers/clocksource/timer-vf-pit.c
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2012-2013 Freescale Semiconductor, Inc.
- */
-
-#include <linux/interrupt.h>
-#include <linux/clockchips.h>
-#include <linux/clk.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/sched_clock.h>
-
-/*
- * Each pit takes 0x10 Bytes register space
- */
-#define PITMCR 0x00
-#define PIT0_OFFSET 0x100
-#define PITn_OFFSET(n) (PIT0_OFFSET + 0x10 * (n))
-#define PITLDVAL 0x00
-#define PITCVAL 0x04
-#define PITTCTRL 0x08
-#define PITTFLG 0x0c
-
-#define PITMCR_MDIS (0x1 << 1)
-
-#define PITTCTRL_TEN (0x1 << 0)
-#define PITTCTRL_TIE (0x1 << 1)
-#define PITCTRL_CHN (0x1 << 2)
-
-#define PITTFLG_TIF 0x1
-
-static void __iomem *clksrc_base;
-static void __iomem *clkevt_base;
-static unsigned long cycle_per_jiffy;
-
-static inline void pit_timer_enable(void)
-{
- __raw_writel(PITTCTRL_TEN | PITTCTRL_TIE, clkevt_base + PITTCTRL);
-}
-
-static inline void pit_timer_disable(void)
-{
- __raw_writel(0, clkevt_base + PITTCTRL);
-}
-
-static inline void pit_irq_acknowledge(void)
-{
- __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG);
-}
-
-static u64 notrace pit_read_sched_clock(void)
-{
- return ~__raw_readl(clksrc_base + PITCVAL);
-}
-
-static int __init pit_clocksource_init(unsigned long rate)
-{
- /* set the max load value and start the clock source counter */
- __raw_writel(0, clksrc_base + PITTCTRL);
- __raw_writel(~0UL, clksrc_base + PITLDVAL);
- __raw_writel(PITTCTRL_TEN, clksrc_base + PITTCTRL);
-
- sched_clock_register(pit_read_sched_clock, 32, rate);
- return clocksource_mmio_init(clksrc_base + PITCVAL, "vf-pit", rate,
- 300, 32, clocksource_mmio_readl_down);
-}
-
-static int pit_set_next_event(unsigned long delta,
- struct clock_event_device *unused)
-{
- /*
- * set a new value to PITLDVAL register will not restart the timer,
- * to abort the current cycle and start a timer period with the new
- * value, the timer must be disabled and enabled again.
- * and the PITLAVAL should be set to delta minus one according to pit
- * hardware requirement.
- */
- pit_timer_disable();
- __raw_writel(delta - 1, clkevt_base + PITLDVAL);
- pit_timer_enable();
-
- return 0;
-}
-
-static int pit_shutdown(struct clock_event_device *evt)
-{
- pit_timer_disable();
- return 0;
-}
-
-static int pit_set_periodic(struct clock_event_device *evt)
-{
- pit_set_next_event(cycle_per_jiffy, evt);
- return 0;
-}
-
-static irqreturn_t pit_timer_interrupt(int irq, void *dev_id)
-{
- struct clock_event_device *evt = dev_id;
-
- pit_irq_acknowledge();
-
- /*
- * pit hardware doesn't support oneshot, it will generate an interrupt
- * and reload the counter value from PITLDVAL when PITCVAL reach zero,
- * and start the counter again. So software need to disable the timer
- * to stop the counter loop in ONESHOT mode.
- */
- if (likely(clockevent_state_oneshot(evt)))
- pit_timer_disable();
-
- evt->event_handler(evt);
-
- return IRQ_HANDLED;
-}
-
-static struct clock_event_device clockevent_pit = {
- .name = "VF pit timer",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
- .set_state_shutdown = pit_shutdown,
- .set_state_periodic = pit_set_periodic,
- .set_next_event = pit_set_next_event,
- .rating = 300,
-};
-
-static int __init pit_clockevent_init(unsigned long rate, int irq)
-{
- __raw_writel(0, clkevt_base + PITTCTRL);
- __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG);
-
- BUG_ON(request_irq(irq, pit_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL,
- "VF pit timer", &clockevent_pit));
-
- clockevent_pit.cpumask = cpumask_of(0);
- clockevent_pit.irq = irq;
- /*
- * The value for the LDVAL register trigger is calculated as:
- * LDVAL trigger = (period / clock period) - 1
- * The pit is a 32-bit down count timer, when the counter value
- * reaches 0, it will generate an interrupt, thus the minimal
- * LDVAL trigger value is 1. And then the min_delta is
- * minimal LDVAL trigger value + 1, and the max_delta is full 32-bit.
- */
- clockevents_config_and_register(&clockevent_pit, rate, 2, 0xffffffff);
-
- return 0;
-}
-
-static int __init pit_timer_init(struct device_node *np)
-{
- struct clk *pit_clk;
- void __iomem *timer_base;
- unsigned long clk_rate;
- int irq, ret;
-
- timer_base = of_iomap(np, 0);
- if (!timer_base) {
- pr_err("Failed to iomap\n");
- return -ENXIO;
- }
-
- /*
- * PIT0 and PIT1 can be chained to build a 64-bit timer,
- * so choose PIT2 as clocksource, PIT3 as clockevent device,
- * and leave PIT0 and PIT1 unused for anyone else who needs them.
- */
- clksrc_base = timer_base + PITn_OFFSET(2);
- clkevt_base = timer_base + PITn_OFFSET(3);
-
- irq = irq_of_parse_and_map(np, 0);
- if (irq <= 0)
- return -EINVAL;
-
- pit_clk = of_clk_get(np, 0);
- if (IS_ERR(pit_clk))
- return PTR_ERR(pit_clk);
-
- ret = clk_prepare_enable(pit_clk);
- if (ret)
- return ret;
-
- clk_rate = clk_get_rate(pit_clk);
- cycle_per_jiffy = clk_rate / (HZ);
-
- /* enable the pit module */
- __raw_writel(~PITMCR_MDIS, timer_base + PITMCR);
-
- ret = pit_clocksource_init(clk_rate);
- if (ret)
- return ret;
-
- return pit_clockevent_init(clk_rate, irq);
-}
-TIMER_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 394484929dae..a9626b30044a 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -13,7 +13,8 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \
tee-dev.o \
platform-access.o \
dbc.o \
- hsti.o
+ hsti.o \
+ sfs.o
obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 1c5a7189631e..9e21da0e298a 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -17,6 +17,7 @@
#include "psp-dev.h"
#include "sev-dev.h"
#include "tee-dev.h"
+#include "sfs.h"
#include "platform-access.h"
#include "dbc.h"
#include "hsti.h"
@@ -182,6 +183,17 @@ static int psp_check_tee_support(struct psp_device *psp)
return 0;
}
+static int psp_check_sfs_support(struct psp_device *psp)
+{
+ /* Check if device supports SFS feature */
+ if (!psp->capability.sfs) {
+ dev_dbg(psp->dev, "psp does not support SFS\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int psp_init(struct psp_device *psp)
{
int ret;
@@ -198,6 +210,12 @@ static int psp_init(struct psp_device *psp)
return ret;
}
+ if (!psp_check_sfs_support(psp)) {
+ ret = sfs_dev_init(psp);
+ if (ret)
+ return ret;
+ }
+
if (psp->vdata->platform_access) {
ret = platform_access_dev_init(psp);
if (ret)
@@ -302,6 +320,8 @@ void psp_dev_destroy(struct sp_device *sp)
tee_dev_destroy(psp);
+ sfs_dev_destroy(psp);
+
dbc_dev_destroy(psp);
platform_access_dev_destroy(psp);
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index e43ce87ede76..268c83f298cb 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -32,7 +32,8 @@ union psp_cap_register {
unsigned int sev :1,
tee :1,
dbc_thru_ext :1,
- rsvd1 :4,
+ sfs :1,
+ rsvd1 :3,
security_reporting :1,
fused_part :1,
rsvd2 :1,
@@ -68,6 +69,7 @@ struct psp_device {
void *tee_data;
void *platform_access_data;
void *dbc_data;
+ void *sfs_data;
union psp_cap_register capability;
};
@@ -118,12 +120,16 @@ struct psp_ext_request {
* @PSP_SUB_CMD_DBC_SET_UID: Set UID for DBC
* @PSP_SUB_CMD_DBC_GET_PARAMETER: Get parameter from DBC
* @PSP_SUB_CMD_DBC_SET_PARAMETER: Set parameter for DBC
+ * @PSP_SUB_CMD_SFS_GET_FW_VERS: Get firmware versions for ASP and other MP
+ * @PSP_SUB_CMD_SFS_UPDATE: Command to load, verify and execute SFS package
*/
enum psp_sub_cmd {
PSP_SUB_CMD_DBC_GET_NONCE = PSP_DYNAMIC_BOOST_GET_NONCE,
PSP_SUB_CMD_DBC_SET_UID = PSP_DYNAMIC_BOOST_SET_UID,
PSP_SUB_CMD_DBC_GET_PARAMETER = PSP_DYNAMIC_BOOST_GET_PARAMETER,
PSP_SUB_CMD_DBC_SET_PARAMETER = PSP_DYNAMIC_BOOST_SET_PARAMETER,
+ PSP_SUB_CMD_SFS_GET_FW_VERS = PSP_SFS_GET_FW_VERSIONS,
+ PSP_SUB_CMD_SFS_UPDATE = PSP_SFS_UPDATE,
};
int psp_extended_mailbox_cmd(struct psp_device *psp, unsigned int timeout_msecs,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 9f5ccc1720cb..65d6d0af140a 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -82,6 +82,21 @@ MODULE_FIRMWARE("amd/amd_sev_fam19h_model1xh.sbin"); /* 4th gen EPYC */
static bool psp_dead;
static int psp_timeout;
+enum snp_hv_fixed_pages_state {
+ ALLOCATED,
+ HV_FIXED,
+};
+
+struct snp_hv_fixed_pages_entry {
+ struct list_head list;
+ struct page *page;
+ unsigned int order;
+ bool free;
+ enum snp_hv_fixed_pages_state page_state;
+};
+
+static LIST_HEAD(snp_hv_fixed_pages);
+
/* Trusted Memory Region (TMR):
* The TMR is a 1MB area that must be 1MB aligned. Use the page allocator
* to allocate the memory, which will return aligned memory for the specified
@@ -1073,6 +1088,165 @@ static void snp_set_hsave_pa(void *arg)
wrmsrq(MSR_VM_HSAVE_PA, 0);
}
+/* Hypervisor Fixed pages API interface */
+static void snp_hv_fixed_pages_state_update(struct sev_device *sev,
+ enum snp_hv_fixed_pages_state page_state)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+
+ /* List is protected by sev_cmd_mutex */
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list)
+ entry->page_state = page_state;
+}
+
+/*
+ * Allocate HV_FIXED pages in 2MB aligned sizes to ensure the whole
+ * 2MB pages are marked as HV_FIXED.
+ */
+struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages)
+{
+ struct psp_device *psp_master = psp_get_master_device();
+ struct snp_hv_fixed_pages_entry *entry;
+ struct sev_device *sev;
+ unsigned int order;
+ struct page *page;
+
+ if (!psp_master || !psp_master->sev_data)
+ return NULL;
+
+ sev = psp_master->sev_data;
+
+ order = get_order(PMD_SIZE * num_2mb_pages);
+
+ /*
+ * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list
+ * also needs to be protected using the same mutex.
+ */
+ guard(mutex)(&sev_cmd_mutex);
+
+ /*
+ * This API uses SNP_INIT_EX to transition allocated pages to HV_Fixed
+ * page state, fail if SNP is already initialized.
+ */
+ if (sev->snp_initialized)
+ return NULL;
+
+ /* Re-use freed pages that match the request */
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list) {
+ /* Hypervisor fixed page allocator implements exact fit policy */
+ if (entry->order == order && entry->free) {
+ entry->free = false;
+ memset(page_address(entry->page), 0,
+ (1 << entry->order) * PAGE_SIZE);
+ return entry->page;
+ }
+ }
+
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!page)
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ __free_pages(page, order);
+ return NULL;
+ }
+
+ entry->page = page;
+ entry->order = order;
+ list_add_tail(&entry->list, &snp_hv_fixed_pages);
+
+ return page;
+}
+
+void snp_free_hv_fixed_pages(struct page *page)
+{
+ struct psp_device *psp_master = psp_get_master_device();
+ struct snp_hv_fixed_pages_entry *entry, *nentry;
+
+ if (!psp_master || !psp_master->sev_data)
+ return;
+
+ /*
+ * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list
+ * also needs to be protected using the same mutex.
+ */
+ guard(mutex)(&sev_cmd_mutex);
+
+ list_for_each_entry_safe(entry, nentry, &snp_hv_fixed_pages, list) {
+ if (entry->page != page)
+ continue;
+
+ /*
+ * HV_FIXED page state cannot be changed until reboot
+ * and they cannot be used by an SNP guest, so they cannot
+ * be returned back to the page allocator.
+ * Mark the pages as free internally to allow possible re-use.
+ */
+ if (entry->page_state == HV_FIXED) {
+ entry->free = true;
+ } else {
+ __free_pages(page, entry->order);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ return;
+ }
+}
+
+static void snp_add_hv_fixed_pages(struct sev_device *sev, struct sev_data_range_list *range_list)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+ struct sev_data_range *range;
+ int num_elements;
+
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ num_elements = list_count_nodes(&snp_hv_fixed_pages) +
+ range_list->num_elements;
+
+ /*
+ * Ensure the list of HV_FIXED pages that will be passed to firmware
+ * do not exceed the page-sized argument buffer.
+ */
+ if (num_elements * sizeof(*range) + sizeof(*range_list) > PAGE_SIZE) {
+ dev_warn(sev->dev, "Additional HV_Fixed pages cannot be accommodated, omitting\n");
+ return;
+ }
+
+ range = &range_list->ranges[range_list->num_elements];
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list) {
+ range->base = page_to_pfn(entry->page) << PAGE_SHIFT;
+ range->page_count = 1 << entry->order;
+ range++;
+ }
+ range_list->num_elements = num_elements;
+}
+
+static void snp_leak_hv_fixed_pages(void)
+{
+ struct snp_hv_fixed_pages_entry *entry;
+
+ /* List is protected by sev_cmd_mutex */
+ lockdep_assert_held(&sev_cmd_mutex);
+
+ if (list_empty(&snp_hv_fixed_pages))
+ return;
+
+ list_for_each_entry(entry, &snp_hv_fixed_pages, list)
+ if (entry->page_state == HV_FIXED)
+ __snp_leak_pages(page_to_pfn(entry->page),
+ 1 << entry->order, false);
+}
+
static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
{
struct sev_data_range_list *range_list = arg;
@@ -1163,6 +1337,12 @@ static int __sev_snp_init_locked(int *error)
return rc;
}
+ /*
+ * Add HV_Fixed pages from other PSP sub-devices, such as SFS to the
+ * HV_Fixed page list.
+ */
+ snp_add_hv_fixed_pages(sev, snp_range_list);
+
memset(&data, 0, sizeof(data));
data.init_rmp = 1;
data.list_paddr_en = 1;
@@ -1202,6 +1382,7 @@ static int __sev_snp_init_locked(int *error)
return rc;
}
+ snp_hv_fixed_pages_state_update(sev, HV_FIXED);
sev->snp_initialized = true;
dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
@@ -1784,6 +1965,7 @@ static int __sev_snp_shutdown_locked(int *error, bool panic)
return ret;
}
+ snp_leak_hv_fixed_pages();
sev->snp_initialized = false;
dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index 3e4e5574e88a..28021abc85ad 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -65,4 +65,7 @@ void sev_dev_destroy(struct psp_device *psp);
void sev_pci_init(void);
void sev_pci_exit(void);
+struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages);
+void snp_free_hv_fixed_pages(struct page *page);
+
#endif /* __SEV_DEV_H */
diff --git a/drivers/crypto/ccp/sfs.c b/drivers/crypto/ccp/sfs.c
new file mode 100644
index 000000000000..2f4beaafe7ec
--- /dev/null
+++ b/drivers/crypto/ccp/sfs.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Processor Seamless Firmware Servicing support.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "sfs.h"
+#include "sev-dev.h"
+
+#define SFS_DEFAULT_TIMEOUT (10 * MSEC_PER_SEC)
+#define SFS_MAX_PAYLOAD_SIZE (2 * 1024 * 1024)
+#define SFS_NUM_2MB_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PMD_SIZE)
+#define SFS_NUM_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PAGE_SIZE)
+
+static DEFINE_MUTEX(sfs_ioctl_mutex);
+
+static struct sfs_misc_dev *misc_dev;
+
+static int send_sfs_cmd(struct sfs_device *sfs_dev, int msg)
+{
+ int ret;
+
+ sfs_dev->command_buf->hdr.status = 0;
+ sfs_dev->command_buf->hdr.sub_cmd_id = msg;
+
+ ret = psp_extended_mailbox_cmd(sfs_dev->psp,
+ SFS_DEFAULT_TIMEOUT,
+ (struct psp_ext_request *)sfs_dev->command_buf);
+ if (ret == -EIO) {
+ dev_dbg(sfs_dev->dev,
+ "msg 0x%x failed with PSP error: 0x%x, extended status: 0x%x\n",
+ msg, sfs_dev->command_buf->hdr.status,
+ *(u32 *)sfs_dev->command_buf->buf);
+ }
+
+ return ret;
+}
+
+static int send_sfs_get_fw_versions(struct sfs_device *sfs_dev)
+{
+ /*
+ * SFS_GET_FW_VERSIONS command needs the output buffer to be
+ * initialized to 0xC7 in every byte.
+ */
+ memset(sfs_dev->command_buf->sfs_buffer, 0xc7, PAGE_SIZE);
+ sfs_dev->command_buf->hdr.payload_size = 2 * PAGE_SIZE;
+
+ return send_sfs_cmd(sfs_dev, PSP_SFS_GET_FW_VERSIONS);
+}
+
+static int send_sfs_update_package(struct sfs_device *sfs_dev, const char *payload_name)
+{
+ char payload_path[PAYLOAD_NAME_SIZE + sizeof("amd/")];
+ const struct firmware *firmware;
+ unsigned long package_size;
+ int ret;
+
+ /* Sanitize userspace provided payload name */
+ if (!strnchr(payload_name, PAYLOAD_NAME_SIZE, '\0'))
+ return -EINVAL;
+
+ snprintf(payload_path, sizeof(payload_path), "amd/%s", payload_name);
+
+ ret = firmware_request_nowarn(&firmware, payload_path, sfs_dev->dev);
+ if (ret < 0) {
+ dev_warn_ratelimited(sfs_dev->dev, "firmware request failed for %s (%d)\n",
+ payload_path, ret);
+ return -ENOENT;
+ }
+
+ /*
+ * SFS Update Package command's input buffer contains TEE_EXT_CMD_BUFFER
+ * followed by the Update Package and it should be 64KB aligned.
+ */
+ package_size = ALIGN(firmware->size + PAGE_SIZE, 0x10000U);
+
+ /*
+ * SFS command buffer is a pre-allocated 2MB buffer, fail update package
+ * if SFS payload is larger than the pre-allocated command buffer.
+ */
+ if (package_size > SFS_MAX_PAYLOAD_SIZE) {
+ dev_warn_ratelimited(sfs_dev->dev,
+ "SFS payload size %ld larger than maximum supported payload size of %u\n",
+ package_size, SFS_MAX_PAYLOAD_SIZE);
+ release_firmware(firmware);
+ return -E2BIG;
+ }
+
+ /*
+ * Copy firmware data to a HV_Fixed memory region.
+ */
+ memcpy(sfs_dev->command_buf->sfs_buffer, firmware->data, firmware->size);
+ sfs_dev->command_buf->hdr.payload_size = package_size;
+
+ release_firmware(firmware);
+
+ return send_sfs_cmd(sfs_dev, PSP_SFS_UPDATE);
+}
+
+static long sfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct sfs_user_get_fw_versions __user *sfs_get_fw_versions;
+ struct sfs_user_update_package __user *sfs_update_package;
+ struct psp_device *psp_master = psp_get_master_device();
+ char payload_name[PAYLOAD_NAME_SIZE];
+ struct sfs_device *sfs_dev;
+ int ret = 0;
+
+ if (!psp_master || !psp_master->sfs_data)
+ return -ENODEV;
+
+ sfs_dev = psp_master->sfs_data;
+
+ guard(mutex)(&sfs_ioctl_mutex);
+
+ switch (cmd) {
+ case SFSIOCFWVERS:
+ dev_dbg(sfs_dev->dev, "in SFSIOCFWVERS\n");
+
+ sfs_get_fw_versions = (struct sfs_user_get_fw_versions __user *)arg;
+
+ ret = send_sfs_get_fw_versions(sfs_dev);
+ if (ret && ret != -EIO)
+ return ret;
+
+ /*
+ * Return SFS status and extended status back to userspace
+ * if PSP status indicated success or command error.
+ */
+ if (copy_to_user(&sfs_get_fw_versions->blob, sfs_dev->command_buf->sfs_buffer,
+ PAGE_SIZE))
+ return -EFAULT;
+ if (copy_to_user(&sfs_get_fw_versions->sfs_status,
+ &sfs_dev->command_buf->hdr.status,
+ sizeof(sfs_get_fw_versions->sfs_status)))
+ return -EFAULT;
+ if (copy_to_user(&sfs_get_fw_versions->sfs_extended_status,
+ &sfs_dev->command_buf->buf,
+ sizeof(sfs_get_fw_versions->sfs_extended_status)))
+ return -EFAULT;
+ break;
+ case SFSIOCUPDATEPKG:
+ dev_dbg(sfs_dev->dev, "in SFSIOCUPDATEPKG\n");
+
+ sfs_update_package = (struct sfs_user_update_package __user *)arg;
+
+ if (copy_from_user(payload_name, sfs_update_package->payload_name,
+ PAYLOAD_NAME_SIZE))
+ return -EFAULT;
+
+ ret = send_sfs_update_package(sfs_dev, payload_name);
+ if (ret && ret != -EIO)
+ return ret;
+
+ /*
+ * Return SFS status and extended status back to userspace
+ * if PSP status indicated success or command error.
+ */
+ if (copy_to_user(&sfs_update_package->sfs_status,
+ &sfs_dev->command_buf->hdr.status,
+ sizeof(sfs_update_package->sfs_status)))
+ return -EFAULT;
+ if (copy_to_user(&sfs_update_package->sfs_extended_status,
+ &sfs_dev->command_buf->buf,
+ sizeof(sfs_update_package->sfs_extended_status)))
+ return -EFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static const struct file_operations sfs_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = sfs_ioctl,
+};
+
+static void sfs_exit(struct kref *ref)
+{
+ misc_deregister(&misc_dev->misc);
+ kfree(misc_dev);
+ misc_dev = NULL;
+}
+
+void sfs_dev_destroy(struct psp_device *psp)
+{
+ struct sfs_device *sfs_dev = psp->sfs_data;
+
+ if (!sfs_dev)
+ return;
+
+ /*
+ * Change SFS command buffer back to the default "Write-Back" type.
+ */
+ set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+
+ snp_free_hv_fixed_pages(sfs_dev->page);
+
+ if (sfs_dev->misc)
+ kref_put(&misc_dev->refcount, sfs_exit);
+
+ psp->sfs_data = NULL;
+}
+
+/* Based on sev_misc_init() */
+static int sfs_misc_init(struct sfs_device *sfs)
+{
+ struct device *dev = sfs->dev;
+ int ret;
+
+ /*
+ * SFS feature support can be detected on multiple devices but the SFS
+ * FW commands must be issued on the master. During probe, we do not
+ * know the master hence we create /dev/sfs on the first device probe.
+ */
+ if (!misc_dev) {
+ struct miscdevice *misc;
+
+ misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
+ if (!misc_dev)
+ return -ENOMEM;
+
+ misc = &misc_dev->misc;
+ misc->minor = MISC_DYNAMIC_MINOR;
+ misc->name = "sfs";
+ misc->fops = &sfs_fops;
+ misc->mode = 0600;
+
+ ret = misc_register(misc);
+ if (ret)
+ return ret;
+
+ kref_init(&misc_dev->refcount);
+ } else {
+ kref_get(&misc_dev->refcount);
+ }
+
+ sfs->misc = misc_dev;
+ dev_dbg(dev, "registered SFS device\n");
+
+ return 0;
+}
+
+int sfs_dev_init(struct psp_device *psp)
+{
+ struct device *dev = psp->dev;
+ struct sfs_device *sfs_dev;
+ struct page *page;
+ int ret = -ENOMEM;
+
+ sfs_dev = devm_kzalloc(dev, sizeof(*sfs_dev), GFP_KERNEL);
+ if (!sfs_dev)
+ return -ENOMEM;
+
+ /*
+ * Pre-allocate 2MB command buffer for all SFS commands using
+ * SNP HV_Fixed page allocator which also transitions the
+ * SFS command buffer to HV_Fixed page state if SNP is enabled.
+ */
+ page = snp_alloc_hv_fixed_pages(SFS_NUM_2MB_PAGES_CMDBUF);
+ if (!page) {
+ dev_dbg(dev, "Command Buffer HV-Fixed page allocation failed\n");
+ goto cleanup_dev;
+ }
+ sfs_dev->page = page;
+ sfs_dev->command_buf = page_address(page);
+
+ dev_dbg(dev, "Command buffer 0x%px to be marked as HV_Fixed\n", sfs_dev->command_buf);
+
+ /*
+ * SFS command buffer must be mapped as non-cacheable.
+ */
+ ret = set_memory_uc((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+ if (ret) {
+ dev_dbg(dev, "Set memory uc failed\n");
+ goto cleanup_cmd_buf;
+ }
+
+ dev_dbg(dev, "Command buffer 0x%px marked uncacheable\n", sfs_dev->command_buf);
+
+ psp->sfs_data = sfs_dev;
+ sfs_dev->dev = dev;
+ sfs_dev->psp = psp;
+
+ ret = sfs_misc_init(sfs_dev);
+ if (ret)
+ goto cleanup_mem_attr;
+
+ dev_notice(sfs_dev->dev, "SFS support is available\n");
+
+ return 0;
+
+cleanup_mem_attr:
+ set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF);
+
+cleanup_cmd_buf:
+ snp_free_hv_fixed_pages(page);
+
+cleanup_dev:
+ psp->sfs_data = NULL;
+ devm_kfree(dev, sfs_dev);
+
+ return ret;
+}
diff --git a/drivers/crypto/ccp/sfs.h b/drivers/crypto/ccp/sfs.h
new file mode 100644
index 000000000000..97704c210efd
--- /dev/null
+++ b/drivers/crypto/ccp/sfs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Platform Security Processor (PSP) Seamless Firmware (SFS) Support.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#ifndef __SFS_H__
+#define __SFS_H__
+
+#include <uapi/linux/psp-sfs.h>
+
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/psp-sev.h>
+#include <linux/psp-platform-access.h>
+#include <linux/set_memory.h>
+
+#include "psp-dev.h"
+
+struct sfs_misc_dev {
+ struct kref refcount;
+ struct miscdevice misc;
+};
+
+struct sfs_command {
+ struct psp_ext_req_buffer_hdr hdr;
+ u8 buf[PAGE_SIZE - sizeof(struct psp_ext_req_buffer_hdr)];
+ u8 sfs_buffer[];
+} __packed;
+
+struct sfs_device {
+ struct device *dev;
+ struct psp_device *psp;
+
+ struct page *page;
+ struct sfs_command *command_buf;
+
+ struct sfs_misc_dev *misc;
+};
+
+void sfs_dev_destroy(struct psp_device *psp);
+int sfs_dev_init(struct psp_device *psp);
+
+#endif /* __SFS_H__ */
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index cafc90d4caaf..0d05eac7c72b 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -788,7 +788,9 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry,
*kernel_entry = addr + entry;
- return efi_adjust_memory_range_protection(addr, kernel_text_size);
+ return efi_adjust_memory_range_protection(addr, kernel_text_size) ?:
+ efi_adjust_memory_range_protection(addr + kernel_inittext_offset,
+ kernel_inittext_size);
}
static void __noreturn enter_kernel(unsigned long kernel_addr,
diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c
index 1c7045467c48..5584e0f82cce 100644
--- a/drivers/irqchip/irq-aspeed-scu-ic.c
+++ b/drivers/irqchip/irq-aspeed-scu-ic.c
@@ -1,61 +1,78 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Aspeed AST24XX, AST25XX, and AST26XX SCU Interrupt Controller
+ * Aspeed AST24XX, AST25XX, AST26XX, and AST27XX SCU Interrupt Controller
* Copyright 2019 IBM Corporation
*
* Eddie James <eajames@linux.ibm.com>
*/
#include <linux/bitops.h>
+#include <linux/io.h>
#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
-#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/regmap.h>
-#define ASPEED_SCU_IC_REG 0x018
-#define ASPEED_SCU_IC_SHIFT 0
-#define ASPEED_SCU_IC_ENABLE GENMASK(15, ASPEED_SCU_IC_SHIFT)
-#define ASPEED_SCU_IC_NUM_IRQS 7
#define ASPEED_SCU_IC_STATUS GENMASK(28, 16)
#define ASPEED_SCU_IC_STATUS_SHIFT 16
+#define AST2700_SCU_IC_STATUS GENMASK(15, 0)
+
+struct aspeed_scu_ic_variant {
+ const char *compatible;
+ unsigned long irq_enable;
+ unsigned long irq_shift;
+ unsigned int num_irqs;
+ unsigned long ier;
+ unsigned long isr;
+};
-#define ASPEED_AST2600_SCU_IC0_REG 0x560
-#define ASPEED_AST2600_SCU_IC0_SHIFT 0
-#define ASPEED_AST2600_SCU_IC0_ENABLE \
- GENMASK(5, ASPEED_AST2600_SCU_IC0_SHIFT)
-#define ASPEED_AST2600_SCU_IC0_NUM_IRQS 6
+#define SCU_VARIANT(_compat, _shift, _enable, _num, _ier, _isr) { \
+ .compatible = _compat, \
+ .irq_shift = _shift, \
+ .irq_enable = _enable, \
+ .num_irqs = _num, \
+ .ier = _ier, \
+ .isr = _isr, \
+}
-#define ASPEED_AST2600_SCU_IC1_REG 0x570
-#define ASPEED_AST2600_SCU_IC1_SHIFT 4
-#define ASPEED_AST2600_SCU_IC1_ENABLE \
- GENMASK(5, ASPEED_AST2600_SCU_IC1_SHIFT)
-#define ASPEED_AST2600_SCU_IC1_NUM_IRQS 2
+static const struct aspeed_scu_ic_variant scu_ic_variants[] __initconst = {
+ SCU_VARIANT("aspeed,ast2400-scu-ic", 0, GENMASK(15, 0), 7, 0x00, 0x00),
+ SCU_VARIANT("aspeed,ast2500-scu-ic", 0, GENMASK(15, 0), 7, 0x00, 0x00),
+ SCU_VARIANT("aspeed,ast2600-scu-ic0", 0, GENMASK(5, 0), 6, 0x00, 0x00),
+ SCU_VARIANT("aspeed,ast2600-scu-ic1", 4, GENMASK(5, 4), 2, 0x00, 0x00),
+ SCU_VARIANT("aspeed,ast2700-scu-ic0", 0, GENMASK(3, 0), 4, 0x00, 0x04),
+ SCU_VARIANT("aspeed,ast2700-scu-ic1", 0, GENMASK(3, 0), 4, 0x00, 0x04),
+ SCU_VARIANT("aspeed,ast2700-scu-ic2", 0, GENMASK(3, 0), 4, 0x04, 0x00),
+ SCU_VARIANT("aspeed,ast2700-scu-ic3", 0, GENMASK(1, 0), 2, 0x04, 0x00),
+};
struct aspeed_scu_ic {
- unsigned long irq_enable;
- unsigned long irq_shift;
- unsigned int num_irqs;
- unsigned int reg;
- struct regmap *scu;
- struct irq_domain *irq_domain;
+ unsigned long irq_enable;
+ unsigned long irq_shift;
+ unsigned int num_irqs;
+ void __iomem *base;
+ struct irq_domain *irq_domain;
+ unsigned long ier;
+ unsigned long isr;
};
-static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
+static inline bool scu_has_split_isr(struct aspeed_scu_ic *scu)
+{
+ return scu->ier != scu->isr;
+}
+
+static void aspeed_scu_ic_irq_handler_combined(struct irq_desc *desc)
{
- unsigned int sts;
- unsigned long bit;
- unsigned long enabled;
- unsigned long max;
- unsigned long status;
struct aspeed_scu_ic *scu_ic = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- unsigned int mask = scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT;
+ unsigned long bit, enabled, max, status;
+ unsigned int sts, mask;
chained_irq_enter(chip, desc);
+ mask = scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT;
/*
* The SCU IC has just one register to control its operation and read
* status. The interrupt enable bits occupy the lower 16 bits of the
@@ -66,7 +83,7 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
* shifting the status down to get the mapping and then back up to
* clear the bit.
*/
- regmap_read(scu_ic->scu, scu_ic->reg, &sts);
+ sts = readl(scu_ic->base);
enabled = sts & scu_ic->irq_enable;
status = (sts >> ASPEED_SCU_IC_STATUS_SHIFT) & enabled;
@@ -74,43 +91,83 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
max = scu_ic->num_irqs + bit;
for_each_set_bit_from(bit, &status, max) {
- generic_handle_domain_irq(scu_ic->irq_domain,
- bit - scu_ic->irq_shift);
+ generic_handle_domain_irq(scu_ic->irq_domain, bit - scu_ic->irq_shift);
+ writel((readl(scu_ic->base) & ~mask) | BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT),
+ scu_ic->base);
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static void aspeed_scu_ic_irq_handler_split(struct irq_desc *desc)
+{
+ struct aspeed_scu_ic *scu_ic = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned long bit, enabled, max, status;
+ unsigned int sts, mask;
- regmap_write_bits(scu_ic->scu, scu_ic->reg, mask,
- BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT));
+ chained_irq_enter(chip, desc);
+
+ mask = scu_ic->irq_enable;
+ sts = readl(scu_ic->base + scu_ic->isr);
+ enabled = sts & scu_ic->irq_enable;
+ sts = readl(scu_ic->base + scu_ic->isr);
+ status = sts & enabled;
+
+ bit = scu_ic->irq_shift;
+ max = scu_ic->num_irqs + bit;
+
+ for_each_set_bit_from(bit, &status, max) {
+ generic_handle_domain_irq(scu_ic->irq_domain, bit - scu_ic->irq_shift);
+ /* Clear interrupt */
+ writel(BIT(bit), scu_ic->base + scu_ic->isr);
}
chained_irq_exit(chip, desc);
}
-static void aspeed_scu_ic_irq_mask(struct irq_data *data)
+static void aspeed_scu_ic_irq_mask_combined(struct irq_data *data)
{
struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data);
- unsigned int mask = BIT(data->hwirq + scu_ic->irq_shift) |
- (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT);
+ unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift);
+ unsigned int mask = bit | (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT);
/*
* Status bits are cleared by writing 1. In order to prevent the mask
* operation from clearing the status bits, they should be under the
* mask and written with 0.
*/
- regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, 0);
+ writel(readl(scu_ic->base) & ~mask, scu_ic->base);
}
-static void aspeed_scu_ic_irq_unmask(struct irq_data *data)
+static void aspeed_scu_ic_irq_unmask_combined(struct irq_data *data)
{
struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data);
unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift);
- unsigned int mask = bit |
- (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT);
+ unsigned int mask = bit | (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT);
/*
* Status bits are cleared by writing 1. In order to prevent the unmask
* operation from clearing the status bits, they should be under the
* mask and written with 0.
*/
- regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, bit);
+ writel((readl(scu_ic->base) & ~mask) | bit, scu_ic->base);
+}
+
+static void aspeed_scu_ic_irq_mask_split(struct irq_data *data)
+{
+ struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data);
+ unsigned int mask = BIT(data->hwirq + scu_ic->irq_shift);
+
+ writel(readl(scu_ic->base) & ~mask, scu_ic->base + scu_ic->ier);
+}
+
+static void aspeed_scu_ic_irq_unmask_split(struct irq_data *data)
+{
+ struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data);
+ unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift);
+
+ writel(readl(scu_ic->base) | bit, scu_ic->base + scu_ic->ier);
}
static int aspeed_scu_ic_irq_set_affinity(struct irq_data *data,
@@ -120,17 +177,29 @@ static int aspeed_scu_ic_irq_set_affinity(struct irq_data *data,
return -EINVAL;
}
-static struct irq_chip aspeed_scu_ic_chip = {
+static struct irq_chip aspeed_scu_ic_chip_combined = {
.name = "aspeed-scu-ic",
- .irq_mask = aspeed_scu_ic_irq_mask,
- .irq_unmask = aspeed_scu_ic_irq_unmask,
- .irq_set_affinity = aspeed_scu_ic_irq_set_affinity,
+ .irq_mask = aspeed_scu_ic_irq_mask_combined,
+ .irq_unmask = aspeed_scu_ic_irq_unmask_combined,
+ .irq_set_affinity = aspeed_scu_ic_irq_set_affinity,
+};
+
+static struct irq_chip aspeed_scu_ic_chip_split = {
+ .name = "ast2700-scu-ic",
+ .irq_mask = aspeed_scu_ic_irq_mask_split,
+ .irq_unmask = aspeed_scu_ic_irq_unmask_split,
+ .irq_set_affinity = aspeed_scu_ic_irq_set_affinity,
};
static int aspeed_scu_ic_map(struct irq_domain *domain, unsigned int irq,
irq_hw_number_t hwirq)
{
- irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip, handle_level_irq);
+ struct aspeed_scu_ic *scu_ic = domain->host_data;
+
+ if (scu_has_split_isr(scu_ic))
+ irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip_split, handle_level_irq);
+ else
+ irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip_combined, handle_level_irq);
irq_set_chip_data(irq, domain->host_data);
return 0;
@@ -143,21 +212,21 @@ static const struct irq_domain_ops aspeed_scu_ic_domain_ops = {
static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
struct device_node *node)
{
- int irq;
- int rc = 0;
+ int irq, rc = 0;
- if (!node->parent) {
- rc = -ENODEV;
+ scu_ic->base = of_iomap(node, 0);
+ if (IS_ERR(scu_ic->base)) {
+ rc = PTR_ERR(scu_ic->base);
goto err;
}
- scu_ic->scu = syscon_node_to_regmap(node->parent);
- if (IS_ERR(scu_ic->scu)) {
- rc = PTR_ERR(scu_ic->scu);
- goto err;
+ if (scu_has_split_isr(scu_ic)) {
+ writel(AST2700_SCU_IC_STATUS, scu_ic->base + scu_ic->isr);
+ writel(0, scu_ic->base + scu_ic->ier);
+ } else {
+ writel(ASPEED_SCU_IC_STATUS, scu_ic->base);
+ writel(0, scu_ic->base);
}
- regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_STATUS, ASPEED_SCU_IC_STATUS);
- regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_ENABLE, 0);
irq = irq_of_parse_and_map(node, 0);
if (!irq) {
@@ -166,75 +235,60 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
}
scu_ic->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), scu_ic->num_irqs,
- &aspeed_scu_ic_domain_ops,
- scu_ic);
+ &aspeed_scu_ic_domain_ops, scu_ic);
if (!scu_ic->irq_domain) {
rc = -ENOMEM;
goto err;
}
- irq_set_chained_handler_and_data(irq, aspeed_scu_ic_irq_handler,
+ irq_set_chained_handler_and_data(irq, scu_has_split_isr(scu_ic) ?
+ aspeed_scu_ic_irq_handler_split :
+ aspeed_scu_ic_irq_handler_combined,
scu_ic);
return 0;
err:
kfree(scu_ic);
-
return rc;
}
-static int __init aspeed_scu_ic_of_init(struct device_node *node,
- struct device_node *parent)
+static const struct aspeed_scu_ic_variant *aspeed_scu_ic_find_variant(struct device_node *np)
{
- struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL);
-
- if (!scu_ic)
- return -ENOMEM;
-
- scu_ic->irq_enable = ASPEED_SCU_IC_ENABLE;
- scu_ic->irq_shift = ASPEED_SCU_IC_SHIFT;
- scu_ic->num_irqs = ASPEED_SCU_IC_NUM_IRQS;
- scu_ic->reg = ASPEED_SCU_IC_REG;
-
- return aspeed_scu_ic_of_init_common(scu_ic, node);
+ for (int i = 0; i < ARRAY_SIZE(scu_ic_variants); i++) {
+ if (of_device_is_compatible(np, scu_ic_variants[i].compatible))
+ return &scu_ic_variants[i];
+ }
+ return NULL;
}
-static int __init aspeed_ast2600_scu_ic0_of_init(struct device_node *node,
- struct device_node *parent)
+static int __init aspeed_scu_ic_of_init(struct device_node *node, struct device_node *parent)
{
- struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL);
+ const struct aspeed_scu_ic_variant *variant;
+ struct aspeed_scu_ic *scu_ic;
- if (!scu_ic)
- return -ENOMEM;
-
- scu_ic->irq_enable = ASPEED_AST2600_SCU_IC0_ENABLE;
- scu_ic->irq_shift = ASPEED_AST2600_SCU_IC0_SHIFT;
- scu_ic->num_irqs = ASPEED_AST2600_SCU_IC0_NUM_IRQS;
- scu_ic->reg = ASPEED_AST2600_SCU_IC0_REG;
-
- return aspeed_scu_ic_of_init_common(scu_ic, node);
-}
-
-static int __init aspeed_ast2600_scu_ic1_of_init(struct device_node *node,
- struct device_node *parent)
-{
- struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL);
+ variant = aspeed_scu_ic_find_variant(node);
+ if (!variant)
+ return -ENODEV;
+ scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL);
if (!scu_ic)
return -ENOMEM;
- scu_ic->irq_enable = ASPEED_AST2600_SCU_IC1_ENABLE;
- scu_ic->irq_shift = ASPEED_AST2600_SCU_IC1_SHIFT;
- scu_ic->num_irqs = ASPEED_AST2600_SCU_IC1_NUM_IRQS;
- scu_ic->reg = ASPEED_AST2600_SCU_IC1_REG;
+ scu_ic->irq_enable = variant->irq_enable;
+ scu_ic->irq_shift = variant->irq_shift;
+ scu_ic->num_irqs = variant->num_irqs;
+ scu_ic->ier = variant->ier;
+ scu_ic->isr = variant->isr;
return aspeed_scu_ic_of_init_common(scu_ic, node);
}
IRQCHIP_DECLARE(ast2400_scu_ic, "aspeed,ast2400-scu-ic", aspeed_scu_ic_of_init);
IRQCHIP_DECLARE(ast2500_scu_ic, "aspeed,ast2500-scu-ic", aspeed_scu_ic_of_init);
-IRQCHIP_DECLARE(ast2600_scu_ic0, "aspeed,ast2600-scu-ic0",
- aspeed_ast2600_scu_ic0_of_init);
-IRQCHIP_DECLARE(ast2600_scu_ic1, "aspeed,ast2600-scu-ic1",
- aspeed_ast2600_scu_ic1_of_init);
+IRQCHIP_DECLARE(ast2600_scu_ic0, "aspeed,ast2600-scu-ic0", aspeed_scu_ic_of_init);
+IRQCHIP_DECLARE(ast2600_scu_ic1, "aspeed,ast2600-scu-ic1", aspeed_scu_ic_of_init);
+IRQCHIP_DECLARE(ast2700_scu_ic0, "aspeed,ast2700-scu-ic0", aspeed_scu_ic_of_init);
+IRQCHIP_DECLARE(ast2700_scu_ic1, "aspeed,ast2700-scu-ic1", aspeed_scu_ic_of_init);
+IRQCHIP_DECLARE(ast2700_scu_ic2, "aspeed,ast2700-scu-ic2", aspeed_scu_ic_of_init);
+IRQCHIP_DECLARE(ast2700_scu_ic3, "aspeed,ast2700-scu-ic3", aspeed_scu_ic_of_init);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 24ef5af569fe..8a3410c2b7b5 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -153,14 +153,19 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
{
msi_alloc_info_t *info = args;
struct v2m_data *v2m = NULL, *tmp;
- int hwirq, offset, i, err = 0;
+ int hwirq, i, err = 0;
+ unsigned long offset;
+ unsigned long align_mask = nr_irqs - 1;
spin_lock(&v2m_lock);
list_for_each_entry(tmp, &v2m_nodes, entry) {
- offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis,
- get_count_order(nr_irqs));
- if (offset >= 0) {
+ unsigned long align_off = tmp->spi_start - (tmp->spi_start & ~align_mask);
+
+ offset = bitmap_find_next_zero_area_off(tmp->bm, tmp->nr_spis, 0,
+ nr_irqs, align_mask, align_off);
+ if (offset < tmp->nr_spis) {
v2m = tmp;
+ bitmap_set(v2m->bm, offset, nr_irqs);
break;
}
}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index dbeb85677b08..3de351e66ee8 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1766,8 +1766,9 @@ static int gic_irq_domain_select(struct irq_domain *d,
struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token)
{
- unsigned int type, ret, ppi_idx;
+ unsigned int type, ppi_idx;
irq_hw_number_t hwirq;
+ int ret;
/* Not for us */
if (fwspec->fwnode != d->fwnode)
diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c
index 13c035727e32..ce2732d649a3 100644
--- a/drivers/irqchip/irq-gic-v5-irs.c
+++ b/drivers/irqchip/irq-gic-v5-irs.c
@@ -571,7 +571,7 @@ static void __init gicv5_irs_init_bases(struct gicv5_irs_chip_data *irs_data,
FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_NO_READ_ALLOC) |
FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_NON_CACHE) |
FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_NON_CACHE);
- irs_data->flags |= IRS_FLAGS_NON_COHERENT;
+ irs_data->flags |= IRS_FLAGS_NON_COHERENT;
} else {
cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_WRITE_ALLOC) |
FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_READ_ALLOC) |
diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c
index 9290ac741949..554485f0be1f 100644
--- a/drivers/irqchip/irq-gic-v5-its.c
+++ b/drivers/irqchip/irq-gic-v5-its.c
@@ -191,9 +191,9 @@ static int gicv5_its_create_itt_two_level(struct gicv5_its_chip_data *its,
unsigned int num_events)
{
unsigned int l1_bits, l2_bits, span, events_per_l2_table;
- unsigned int i, complete_tables, final_span, num_ents;
+ unsigned int complete_tables, final_span, num_ents;
__le64 *itt_l1, *itt_l2, **l2ptrs;
- int ret;
+ int i, ret;
u64 val;
ret = gicv5_its_l2sz_to_l2_bits(itt_l2sz);
@@ -768,8 +768,6 @@ static struct gicv5_its_dev *gicv5_its_alloc_device(struct gicv5_its_chip_data *
goto out_dev_free;
}
- gicv5_its_device_cache_inv(its, its_dev);
-
its_dev->its_node = its;
its_dev->event_map = (unsigned long *)bitmap_zalloc(its_dev->num_events, GFP_KERNEL);
@@ -949,15 +947,18 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi
device_id = its_dev->device_id;
for (i = 0; i < nr_irqs; i++) {
- lpi = gicv5_alloc_lpi();
+ ret = gicv5_alloc_lpi();
if (ret < 0) {
pr_debug("Failed to find free LPI!\n");
- goto out_eventid;
+ goto out_free_irqs;
}
+ lpi = ret;
ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi);
- if (ret)
- goto out_free_lpi;
+ if (ret) {
+ gicv5_free_lpi(lpi);
+ goto out_free_irqs;
+ }
/*
* Store eventid and deviceid into the hwirq for later use.
@@ -977,8 +978,13 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi
return 0;
-out_free_lpi:
- gicv5_free_lpi(lpi);
+out_free_irqs:
+ while (--i >= 0) {
+ irqd = irq_domain_get_irq_data(domain, virq + i);
+ gicv5_free_lpi(irqd->parent_data->hwirq);
+ irq_domain_reset_irq_data(irqd);
+ irq_domain_free_irqs_parent(domain, virq + i, 1);
+ }
out_eventid:
gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs);
return ret;
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index b2860eb2d32c..39e5a72ccd3c 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -46,6 +46,7 @@
#define EIOINTC_ALL_ENABLE_VEC_MASK(vector) (EIOINTC_ALL_ENABLE & ~BIT(vector & 0x1f))
#define EIOINTC_REG_ENABLE_VEC(vector) (EIOINTC_REG_ENABLE + ((vector >> 5) << 2))
#define EIOINTC_USE_CPU_ENCODE BIT(0)
+#define EIOINTC_ROUTE_MULT_IP BIT(1)
#define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE)
@@ -59,6 +60,14 @@
#define EIOINTC_REG_ROUTE_VEC_MASK(vector) (0xff << EIOINTC_REG_ROUTE_VEC_SHIFT(vector))
static int nr_pics;
+struct eiointc_priv;
+
+struct eiointc_ip_route {
+ struct eiointc_priv *priv;
+ /* Offset Routed destination IP */
+ int start;
+ int end;
+};
struct eiointc_priv {
u32 node;
@@ -68,6 +77,8 @@ struct eiointc_priv {
struct fwnode_handle *domain_handle;
struct irq_domain *eiointc_domain;
int flags;
+ irq_hw_number_t parent_hwirq;
+ struct eiointc_ip_route route_info[VEC_REG_COUNT];
};
static struct eiointc_priv *eiointc_priv[MAX_IO_PICS];
@@ -188,6 +199,7 @@ static int eiointc_router_init(unsigned int cpu)
{
int i, bit, cores, index, node;
unsigned int data;
+ int hwirq, mask;
node = cpu_to_eio_node(cpu);
index = eiointc_index(node);
@@ -197,6 +209,13 @@ static int eiointc_router_init(unsigned int cpu)
return -EINVAL;
}
+ /* Enable cpu interrupt pin from eiointc */
+ hwirq = eiointc_priv[index]->parent_hwirq;
+ mask = BIT(hwirq);
+ if (eiointc_priv[index]->flags & EIOINTC_ROUTE_MULT_IP)
+ mask |= BIT(hwirq + 1) | BIT(hwirq + 2) | BIT(hwirq + 3);
+ set_csr_ecfg(mask);
+
if (!(eiointc_priv[index]->flags & EIOINTC_USE_CPU_ENCODE))
cores = CORES_PER_EIO_NODE;
else
@@ -211,8 +230,31 @@ static int eiointc_router_init(unsigned int cpu)
}
for (i = 0; i < eiointc_priv[0]->vec_count / 32 / 4; i++) {
- bit = BIT(1 + index); /* Route to IP[1 + index] */
- data = bit | (bit << 8) | (bit << 16) | (bit << 24);
+ /*
+ * Route to interrupt pin, relative offset used here
+ * Offset 0 means routing to IP0 and so on
+ *
+ * If flags is set with EIOINTC_ROUTE_MULT_IP,
+ * every 64 vector routes to different consecutive
+ * IPs, otherwise all vector routes to the same IP
+ */
+ if (eiointc_priv[index]->flags & EIOINTC_ROUTE_MULT_IP) {
+ /* The first 64 vectors route to hwirq */
+ bit = BIT(hwirq++ - INT_HWI0);
+ data = bit | (bit << 8);
+
+ /* The second 64 vectors route to hwirq + 1 */
+ bit = BIT(hwirq++ - INT_HWI0);
+ data |= (bit << 16) | (bit << 24);
+
+ /*
+ * Route to hwirq + 2/hwirq + 3 separately
+ * in next loop
+ */
+ } else {
+ bit = BIT(hwirq - INT_HWI0);
+ data = bit | (bit << 8) | (bit << 16) | (bit << 24);
+ }
iocsr_write32(data, EIOINTC_REG_IPMAP + i * 4);
}
@@ -241,15 +283,22 @@ static int eiointc_router_init(unsigned int cpu)
static void eiointc_irq_dispatch(struct irq_desc *desc)
{
- int i;
- u64 pending;
- bool handled = false;
+ struct eiointc_ip_route *info = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct eiointc_priv *priv = irq_desc_get_handler_data(desc);
+ bool handled = false;
+ u64 pending;
+ int i;
chained_irq_enter(chip, desc);
- for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) {
+ /*
+ * If EIOINTC_ROUTE_MULT_IP is set, every 64 interrupt vectors in
+ * eiointc interrupt controller routes to different cpu interrupt pins
+ *
+ * Every cpu interrupt pin has its own irq handler, it is ok to
+ * read ISR for these 64 interrupt vectors rather than all vectors
+ */
+ for (i = info->start; i < info->end; i++) {
pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3));
/* Skip handling if pending bitmap is zero */
@@ -262,7 +311,7 @@ static void eiointc_irq_dispatch(struct irq_desc *desc)
int bit = __ffs(pending);
int irq = bit + VEC_COUNT_PER_REG * i;
- generic_handle_domain_irq(priv->eiointc_domain, irq);
+ generic_handle_domain_irq(info->priv->eiointc_domain, irq);
pending &= ~BIT(bit);
handled = true;
}
@@ -462,8 +511,33 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq,
}
eiointc_priv[nr_pics++] = priv;
+ /*
+ * Only the first eiointc device on VM supports routing to
+ * different CPU interrupt pins. The later eiointc devices use
+ * generic method if there are multiple eiointc devices in future
+ */
+ if (cpu_has_hypervisor && (nr_pics == 1)) {
+ priv->flags |= EIOINTC_ROUTE_MULT_IP;
+ priv->parent_hwirq = INT_HWI0;
+ }
+
+ if (priv->flags & EIOINTC_ROUTE_MULT_IP) {
+ for (i = 0; i < priv->vec_count / VEC_COUNT_PER_REG; i++) {
+ priv->route_info[i].start = priv->parent_hwirq - INT_HWI0 + i;
+ priv->route_info[i].end = priv->route_info[i].start + 1;
+ priv->route_info[i].priv = priv;
+ parent_irq = get_percpu_irq(priv->parent_hwirq + i);
+ irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch,
+ &priv->route_info[i]);
+ }
+ } else {
+ priv->route_info[0].start = 0;
+ priv->route_info[0].end = priv->vec_count / VEC_COUNT_PER_REG;
+ priv->route_info[0].priv = priv;
+ irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch,
+ &priv->route_info[0]);
+ }
eiointc_router_init(0);
- irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);
if (nr_pics == 1) {
register_syscore_ops(&eiointc_syscore_ops);
@@ -495,7 +569,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent,
priv->vec_count = VEC_COUNT;
priv->node = acpi_eiointc->node;
-
+ priv->parent_hwirq = acpi_eiointc->cascade;
parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade);
ret = eiointc_init(priv, parent_irq, acpi_eiointc->node_map);
@@ -527,8 +601,9 @@ out_free_priv:
static int __init eiointc_of_init(struct device_node *of_node,
struct device_node *parent)
{
- int parent_irq, ret;
struct eiointc_priv *priv;
+ struct irq_data *irq_data;
+ int parent_irq, ret;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -544,6 +619,12 @@ static int __init eiointc_of_init(struct device_node *of_node,
if (ret < 0)
goto out_free_priv;
+ irq_data = irq_get_irq_data(parent_irq);
+ if (!irq_data) {
+ ret = -ENODEV;
+ goto out_free_priv;
+ }
+
/*
* In particular, the number of devices supported by the LS2K0500
* extended I/O interrupt vector is 128.
@@ -552,7 +633,7 @@ static int __init eiointc_of_init(struct device_node *of_node,
priv->vec_count = 128;
else
priv->vec_count = VEC_COUNT;
-
+ priv->parent_hwirq = irqd_to_hwirq(irq_data);
priv->node = 0;
priv->domain_handle = of_fwnode_handle(of_node);
diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c
index 2d4c3ec128b8..912bf50a5c7c 100644
--- a/drivers/irqchip/irq-loongson-pch-lpc.c
+++ b/drivers/irqchip/irq-loongson-pch-lpc.c
@@ -200,8 +200,13 @@ int __init pch_lpc_acpi_init(struct irq_domain *parent,
goto iounmap_base;
}
- priv->lpc_domain = irq_domain_create_linear(irq_handle, LPC_COUNT,
- &pch_lpc_domain_ops, priv);
+ /*
+ * The LPC interrupt controller is a legacy i8259-compatible device,
+ * which requires a static 1:1 mapping for IRQs 0-15.
+ * Use irq_domain_create_legacy to establish this static mapping early.
+ */
+ priv->lpc_domain = irq_domain_create_legacy(irq_handle, LPC_COUNT, 0, 0,
+ &pch_lpc_domain_ops, priv);
if (!priv->lpc_domain) {
pr_err("Failed to create IRQ domain\n");
goto free_irq_handle;
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index 908944009c21..d5eefc3d7215 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -112,6 +112,20 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
*/
if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
chip->irq_set_affinity = msi_domain_set_affinity;
+
+ /*
+ * If the parent domain insists on being in charge of masking, obey
+ * blindly. The interrupt is un-masked at the PCI level on startup
+ * and masked on shutdown to prevent rogue interrupts after the
+ * driver freed the interrupt. Not masking it at the PCI level
+ * speeds up operation for disable/enable_irq() as it avoids
+ * getting all the way out to the PCI device.
+ */
+ if (info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT) {
+ chip->irq_mask = irq_chip_mask_parent;
+ chip->irq_unmask = irq_chip_unmask_parent;
+ }
+
return true;
}
EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info);
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index 76e11cac9631..2191a2b79578 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -73,8 +73,9 @@ static int __init nvic_of_init(struct device_node *node,
struct device_node *parent)
{
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
- unsigned int irqs, i, ret, numbanks;
+ unsigned int irqs, i, numbanks;
void __iomem *nvic_base;
+ int ret;
numbanks = (readl_relaxed(V7M_SCS_ICTR) &
V7M_SCS_ICTR_INTLINESNUM_MASK) + 1;
diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c
index a697eb55ac90..6047a524ac77 100644
--- a/drivers/irqchip/irq-renesas-rza1.c
+++ b/drivers/irqchip/irq-renesas-rza1.c
@@ -142,11 +142,12 @@ static const struct irq_domain_ops rza1_irqc_domain_ops = {
static int rza1_irqc_parse_map(struct rza1_irqc_priv *priv,
struct device_node *gic_node)
{
- unsigned int imaplen, i, j, ret;
struct device *dev = priv->dev;
+ unsigned int imaplen, i, j;
struct device_node *ipar;
const __be32 *imap;
u32 intsize;
+ int ret;
imap = of_get_property(dev->of_node, "interrupt-map", &imaplen);
if (!imap)
diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c
index 360d88687e4f..2a54adeb4cc7 100644
--- a/drivers/irqchip/irq-renesas-rzg2l.c
+++ b/drivers/irqchip/irq-renesas-rzg2l.c
@@ -578,7 +578,7 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node *
&rzg2l_irqc_domain_ops, rzg2l_irqc_data);
if (!irq_domain) {
pm_runtime_put(dev);
- return dev_err_probe(dev, -ENOMEM, "failed to add irq domain\n");
+ return -ENOMEM;
}
register_syscore_ops(&rzg2l_irqc_syscore_ops);
diff --git a/drivers/irqchip/irq-sg2042-msi.c b/drivers/irqchip/irq-sg2042-msi.c
index bcfddc51bc6a..f7cf0dc72eab 100644
--- a/drivers/irqchip/irq-sg2042-msi.c
+++ b/drivers/irqchip/irq-sg2042-msi.c
@@ -30,6 +30,7 @@ struct sg204x_msi_chip_info {
* @doorbell_addr: see TRM, 10.1.32, GP_INTR0_SET
* @irq_first: First vectors number that MSIs starts
* @num_irqs: Number of vectors for MSIs
+ * @irq_type: IRQ type for MSIs
* @msi_map: mapping for allocated MSI vectors.
* @msi_map_lock: Lock for msi_map
* @chip_info: chip specific infomations
@@ -41,6 +42,7 @@ struct sg204x_msi_chipdata {
u32 irq_first;
u32 num_irqs;
+ unsigned int irq_type;
unsigned long *msi_map;
struct mutex msi_map_lock;
@@ -85,6 +87,8 @@ static void sg2042_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *m
static const struct irq_chip sg2042_msi_middle_irq_chip = {
.name = "SG2042 MSI",
+ .irq_startup = irq_chip_startup_parent,
+ .irq_shutdown = irq_chip_shutdown_parent,
.irq_ack = sg2042_msi_irq_ack,
.irq_mask = irq_chip_mask_parent,
.irq_unmask = irq_chip_unmask_parent,
@@ -114,6 +118,8 @@ static void sg2044_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *m
static struct irq_chip sg2044_msi_middle_irq_chip = {
.name = "SG2044 MSI",
+ .irq_startup = irq_chip_startup_parent,
+ .irq_shutdown = irq_chip_shutdown_parent,
.irq_ack = sg2044_msi_irq_ack,
.irq_mask = irq_chip_mask_parent,
.irq_unmask = irq_chip_unmask_parent,
@@ -133,14 +139,14 @@ static int sg204x_msi_parent_domain_alloc(struct irq_domain *domain, unsigned in
fwspec.fwnode = domain->parent->fwnode;
fwspec.param_count = 2;
fwspec.param[0] = data->irq_first + hwirq;
- fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+ fwspec.param[1] = data->irq_type;
ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
if (ret)
return ret;
d = irq_domain_get_irq_data(domain->parent, virq);
- return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+ return d->chip->irq_set_type(d, data->irq_type);
}
static int sg204x_msi_middle_domain_alloc(struct irq_domain *domain, unsigned int virq,
@@ -185,8 +191,10 @@ static const struct irq_domain_ops sg204x_msi_middle_domain_ops = {
.select = msi_lib_irq_domain_select,
};
-#define SG2042_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
- MSI_FLAG_USE_DEF_CHIP_OPS)
+#define SG2042_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
+ MSI_FLAG_USE_DEF_CHIP_OPS | \
+ MSI_FLAG_PCI_MSI_MASK_PARENT | \
+ MSI_FLAG_PCI_MSI_STARTUP_PARENT)
#define SG2042_MSI_FLAGS_SUPPORTED MSI_GENERIC_FLAGS_MASK
@@ -200,10 +208,13 @@ static const struct msi_parent_ops sg2042_msi_parent_ops = {
.init_dev_msi_info = msi_lib_init_dev_msi_info,
};
-#define SG2044_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
- MSI_FLAG_USE_DEF_CHIP_OPS)
+#define SG2044_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
+ MSI_FLAG_USE_DEF_CHIP_OPS | \
+ MSI_FLAG_PCI_MSI_MASK_PARENT | \
+ MSI_FLAG_PCI_MSI_STARTUP_PARENT)
-#define SG2044_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \
+#define SG2044_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \
+ MSI_FLAG_MULTI_PCI_MSI | \
MSI_FLAG_PCI_MSIX)
static const struct msi_parent_ops sg2044_msi_parent_ops = {
@@ -289,6 +300,7 @@ static int sg2042_msi_probe(struct platform_device *pdev)
}
data->irq_first = (u32)args.args[0];
+ data->irq_type = (unsigned int)args.args[1];
data->num_irqs = (u32)args.args[args.nargs - 1];
mutex_init(&data->msi_map_lock);
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index bf69a4802b71..559fda8fb3a8 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -179,12 +179,14 @@ static int plic_set_affinity(struct irq_data *d,
if (cpu >= nr_cpu_ids)
return -EINVAL;
- plic_irq_disable(d);
+ /* Invalidate the original routing entry */
+ plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 0);
irq_data_update_effective_affinity(d, cpumask_of(cpu));
+ /* Setting the new routing entry if irq is enabled */
if (!irqd_irq_disabled(d))
- plic_irq_enable(d);
+ plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
return IRQ_SET_MASK_OK_DONE;
}
@@ -257,7 +259,7 @@ static int plic_irq_suspend(void)
readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
}
- for_each_cpu(cpu, cpu_present_mask) {
+ for_each_present_cpu(cpu) {
struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu);
if (!handler->present)
@@ -289,7 +291,7 @@ static void plic_irq_resume(void)
priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID);
}
- for_each_cpu(cpu, cpu_present_mask) {
+ for_each_present_cpu(cpu) {
struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu);
if (!handler->present)
diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c
index 84533fdd61aa..047472dc9244 100644
--- a/drivers/media/rc/pwm-ir-tx.c
+++ b/drivers/media/rc/pwm-ir-tx.c
@@ -117,7 +117,6 @@ static int pwm_ir_tx_atomic(struct rc_dev *dev, unsigned int *txbuf,
static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
{
struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer);
- ktime_t now;
/*
* If we happen to hit an odd latency spike, loop through the
@@ -139,9 +138,7 @@ static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer)
hrtimer_add_expires_ns(timer, ns);
pwm_ir->txbuf_index++;
-
- now = timer->base->get_time();
- } while (hrtimer_get_expires_tv64(timer) < now);
+ } while (hrtimer_expires_remaining(timer) > 0);
return HRTIMER_RESTART;
}
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 74aaea61de13..d2b690857e58 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -519,6 +519,7 @@ int of_irq_count(struct device_node *dev)
return nr;
}
+EXPORT_SYMBOL_GPL(of_irq_count);
/**
* of_irq_to_resource_table - Fill in resource table with node's IRQ info
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c
index 0938ef7ebabf..dfb61f152702 100644
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -148,20 +148,43 @@ static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *d
arg->hwirq = desc->msi_index;
}
-static __always_inline void cond_mask_parent(struct irq_data *data)
+static void cond_shutdown_parent(struct irq_data *data)
{
struct msi_domain_info *info = data->domain->host_data;
- if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT))
+ if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT))
+ irq_chip_shutdown_parent(data);
+ else if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT))
irq_chip_mask_parent(data);
}
-static __always_inline void cond_unmask_parent(struct irq_data *data)
+static unsigned int cond_startup_parent(struct irq_data *data)
{
struct msi_domain_info *info = data->domain->host_data;
- if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT))
+ if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT))
+ return irq_chip_startup_parent(data);
+ else if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT))
irq_chip_unmask_parent(data);
+
+ return 0;
+}
+
+static void pci_irq_shutdown_msi(struct irq_data *data)
+{
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+
+ pci_msi_mask(desc, BIT(data->irq - desc->irq));
+ cond_shutdown_parent(data);
+}
+
+static unsigned int pci_irq_startup_msi(struct irq_data *data)
+{
+ struct msi_desc *desc = irq_data_get_msi_desc(data);
+ unsigned int ret = cond_startup_parent(data);
+
+ pci_msi_unmask(desc, BIT(data->irq - desc->irq));
+ return ret;
}
static void pci_irq_mask_msi(struct irq_data *data)
@@ -169,14 +192,12 @@ static void pci_irq_mask_msi(struct irq_data *data)
struct msi_desc *desc = irq_data_get_msi_desc(data);
pci_msi_mask(desc, BIT(data->irq - desc->irq));
- cond_mask_parent(data);
}
static void pci_irq_unmask_msi(struct irq_data *data)
{
struct msi_desc *desc = irq_data_get_msi_desc(data);
- cond_unmask_parent(data);
pci_msi_unmask(desc, BIT(data->irq - desc->irq));
}
@@ -194,6 +215,8 @@ static void pci_irq_unmask_msi(struct irq_data *data)
static const struct msi_domain_template pci_msi_template = {
.chip = {
.name = "PCI-MSI",
+ .irq_startup = pci_irq_startup_msi,
+ .irq_shutdown = pci_irq_shutdown_msi,
.irq_mask = pci_irq_mask_msi,
.irq_unmask = pci_irq_unmask_msi,
.irq_write_msi_msg = pci_msi_domain_write_msg,
@@ -210,15 +233,27 @@ static const struct msi_domain_template pci_msi_template = {
},
};
+static void pci_irq_shutdown_msix(struct irq_data *data)
+{
+ pci_msix_mask(irq_data_get_msi_desc(data));
+ cond_shutdown_parent(data);
+}
+
+static unsigned int pci_irq_startup_msix(struct irq_data *data)
+{
+ unsigned int ret = cond_startup_parent(data);
+
+ pci_msix_unmask(irq_data_get_msi_desc(data));
+ return ret;
+}
+
static void pci_irq_mask_msix(struct irq_data *data)
{
pci_msix_mask(irq_data_get_msi_desc(data));
- cond_mask_parent(data);
}
static void pci_irq_unmask_msix(struct irq_data *data)
{
- cond_unmask_parent(data);
pci_msix_unmask(irq_data_get_msi_desc(data));
}
@@ -234,6 +269,8 @@ EXPORT_SYMBOL_GPL(pci_msix_prepare_desc);
static const struct msi_domain_template pci_msix_template = {
.chip = {
.name = "PCI-MSIX",
+ .irq_startup = pci_irq_startup_msix,
+ .irq_shutdown = pci_irq_shutdown_msix,
.irq_mask = pci_irq_mask_msix,
.irq_unmask = pci_irq_unmask_msix,
.irq_write_msi_msg = pci_msi_domain_write_msg,
diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h
new file mode 100644
index 000000000000..ee3793e9b1a4
--- /dev/null
+++ b/include/asm-generic/thread_info_tif.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_THREAD_INFO_TIF_H_
+#define _ASM_GENERIC_THREAD_INFO_TIF_H_
+
+#include <vdso/bits.h>
+
+/* Bits 16-31 are reserved for architecture specific purposes */
+
+#define TIF_NOTIFY_RESUME 0 // callback before returning to user
+#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
+
+#define TIF_SIGPENDING 1 // signal pending
+#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
+
+#define TIF_NOTIFY_SIGNAL 2 // signal notifications exist
+#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
+
+#define TIF_MEMDIE 3 // is terminating due to OOM killer
+#define _TIF_MEMDIE BIT(TIF_MEMDIE)
+
+#define TIF_NEED_RESCHED 4 // rescheduling necessary
+#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
+
+#ifdef HAVE_TIF_NEED_RESCHED_LAZY
+# define TIF_NEED_RESCHED_LAZY 5 // Lazy rescheduling needed
+# define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
+#endif
+
+#ifdef HAVE_TIF_POLLING_NRFLAG
+# define TIF_POLLING_NRFLAG 6 // idle is polling for TIF_NEED_RESCHED
+# define _TIF_POLLING_NRFLAG BIT(TIF_POLLING_NRFLAG)
+#endif
+
+#define TIF_USER_RETURN_NOTIFY 7 // notify kernel of userspace return
+#define _TIF_USER_RETURN_NOTIFY BIT(TIF_USER_RETURN_NOTIFY)
+
+#define TIF_UPROBE 8 // breakpointed or singlestepping
+#define _TIF_UPROBE BIT(TIF_UPROBE)
+
+#define TIF_PATCH_PENDING 9 // pending live patching update
+#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
+
+#ifdef HAVE_TIF_RESTORE_SIGMASK
+# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */
+# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
+#endif
+
+#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index 7fc0b560007d..5c6d9799f4e7 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -4,8 +4,6 @@
#ifndef __ASSEMBLY__
-#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
-
#ifndef __arch_get_vdso_u_time_data
static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
@@ -20,8 +18,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo
}
#endif
-#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
-
#ifndef __arch_update_vdso_clock
static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc)
{
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index ce6521ad04d1..2eda895f19f5 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -9,9 +9,6 @@
#include <linux/timecounter.h>
#include <linux/types.h>
-#define ARCH_TIMER_TYPE_CP15 BIT(0)
-#define ARCH_TIMER_TYPE_MEM BIT(1)
-
#define ARCH_TIMER_CTRL_ENABLE (1 << 0)
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
@@ -51,8 +48,6 @@ enum arch_timer_spi_nr {
#define ARCH_TIMER_PHYS_ACCESS 0
#define ARCH_TIMER_VIRT_ACCESS 1
-#define ARCH_TIMER_MEM_PHYS_ACCESS 2
-#define ARCH_TIMER_MEM_VIRT_ACCESS 3
#define ARCH_TIMER_MEM_MAX_FRAMES 8
diff --git a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h
index f315d5a7f5ee..7dd04424afcc 100644
--- a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h
+++ b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h
@@ -20,4 +20,18 @@
#define ASPEED_AST2600_SCU_IC1_LPC_RESET_LO_TO_HI 0
#define ASPEED_AST2600_SCU_IC1_LPC_RESET_HI_TO_LO 1
+#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_LO_TO_HI 3
+#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_HI_TO_LO 2
+
+#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_LO_TO_HI 3
+#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_HI_TO_LO 2
+
+#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_LO_TO_HI 3
+#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_HI_TO_LO 2
+#define ASPEED_AST2700_SCU_IC2_LPC_RESET_LO_TO_HI 1
+#define ASPEED_AST2700_SCU_IC2_LPC_RESET_HI_TO_LO 0
+
+#define ASPEED_AST2700_SCU_IC3_LPC_RESET_LO_TO_HI 1
+#define ASPEED_AST2700_SCU_IC3_LPC_RESET_HI_TO_LO 0
+
#endif /* _DT_BINDINGS_INTERRUPT_CONTROLLER_ASPEED_SCU_IC_H_ */
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 0bf7d33a1048..7fcec025c5e0 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -96,6 +96,14 @@ enum cc_attr {
* enabled to run SEV-SNP guests.
*/
CC_ATTR_HOST_SEV_SNP,
+
+ /**
+ * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active.
+ *
+ * The host kernel is running with the necessary features enabled
+ * to run SEV-SNP guests with full Secure AVIC capabilities.
+ */
+ CC_ATTR_SNP_SECURE_AVIC,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1ef867bb8c44..2cf1bf65b225 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -154,14 +154,11 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
return ktime_to_ns(timer->node.expires);
}
-static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
-{
- return ktime_sub(timer->node.expires, timer->base->get_time());
-}
+ktime_t hrtimer_cb_get_time(const struct hrtimer *timer);
-static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
+static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
{
- return timer->base->get_time();
+ return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer));
}
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
@@ -200,8 +197,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
static inline ktime_t
hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
{
- return __hrtimer_expires_remaining_adjusted(timer,
- timer->base->get_time());
+ return __hrtimer_expires_remaining_adjusted(timer, hrtimer_cb_get_time(timer));
}
#ifdef CONFIG_TIMERFD
@@ -363,7 +359,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
- return hrtimer_forward(timer, timer->base->get_time(), interval);
+ return hrtimer_forward(timer, hrtimer_cb_get_time(timer), interval);
}
/* Precise sleep: */
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 84a5045f80f3..aa49ffa130e5 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -41,7 +41,6 @@
* @seq: seqcount around __run_hrtimer
* @running: pointer to the currently running hrtimer
* @active: red black tree root node for the active timers
- * @get_time: function to retrieve the current time of the clock
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
@@ -51,7 +50,6 @@ struct hrtimer_clock_base {
seqcount_raw_spinlock_t seq;
struct hrtimer *running;
struct timerqueue_head active;
- ktime_t (*get_time)(void);
ktime_t offset;
} __hrtimer_clock_base_align;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d6b606a81ef..c67e76fbcc07 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data,
extern int irq_chip_get_parent_state(struct irq_data *data,
enum irqchip_irq_state which,
bool *state);
+extern void irq_chip_shutdown_parent(struct irq_data *data);
+extern unsigned int irq_chip_startup_parent(struct irq_data *data);
extern void irq_chip_enable_parent(struct irq_data *data);
extern void irq_chip_disable_parent(struct irq_data *data);
extern void irq_chip_ack_parent(struct irq_data *data);
@@ -976,10 +978,6 @@ static inline void irq_free_desc(unsigned int irq)
irq_free_descs(irq, 1);
}
-#ifdef CONFIG_GENERIC_IRQ_LEGACY
-void irq_init_desc(unsigned int irq);
-#endif
-
/**
* struct irq_chip_regs - register offsets for struct irq_gci
* @enable: Enable register offset to reg_base
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 91b20788273d..0d1927da8055 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -61,7 +61,7 @@
extern void register_refined_jiffies(long clock_tick_rate);
-/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+/* TICK_USEC is the time between ticks in usec */
#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index faac634ac230..d415dd15a0a9 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -564,6 +564,8 @@ enum {
MSI_FLAG_PARENT_PM_DEV = (1 << 8),
/* Support for parent mask/unmask */
MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9),
+ /* Support for parent startup/shutdown */
+ MSI_FLAG_PCI_MSI_STARTUP_PARENT = (1 << 10),
/* Mask for the generic functionality */
MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h
index 95d852aef130..726d89143842 100644
--- a/include/linux/platform_data/dmtimer-omap.h
+++ b/include/linux/platform_data/dmtimer-omap.h
@@ -36,9 +36,13 @@ struct omap_dm_timer_ops {
int (*set_pwm)(struct omap_dm_timer *timer, int def_on,
int toggle, int trigger, int autoreload);
int (*get_pwm_status)(struct omap_dm_timer *timer);
+ int (*set_cap)(struct omap_dm_timer *timer,
+ int autoreload, bool config_period);
+ int (*get_cap_status)(struct omap_dm_timer *timer);
int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler);
unsigned int (*read_counter)(struct omap_dm_timer *timer);
+ unsigned int (*read_cap)(struct omap_dm_timer *timer, bool is_period);
int (*write_counter)(struct omap_dm_timer *timer,
unsigned int value);
unsigned int (*read_status)(struct omap_dm_timer *timer);
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 1504fb012c05..540abf7de048 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -7,6 +7,8 @@
enum psp_platform_access_msg {
PSP_CMD_NONE = 0x0,
+ PSP_SFS_GET_FW_VERSIONS,
+ PSP_SFS_UPDATE,
PSP_CMD_HSTI_QUERY = 0x14,
PSP_I2C_REQ_BUS_CMD = 0x64,
PSP_DYNAMIC_BOOST_GET_NONCE,
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index a96fd345aa38..69553e7c14c1 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
diff --git a/include/uapi/linux/psp-sfs.h b/include/uapi/linux/psp-sfs.h
new file mode 100644
index 000000000000..94e51670383c
--- /dev/null
+++ b/include/uapi/linux/psp-sfs.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD Seamless Firmware Servicing (SFS)
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ */
+
+#ifndef __PSP_SFS_USER_H__
+#define __PSP_SFS_USER_H__
+
+#include <linux/types.h>
+
+/**
+ * SFS: AMD Seamless Firmware Support (SFS) interface
+ */
+
+#define PAYLOAD_NAME_SIZE 64
+#define TEE_EXT_CMD_BUFFER_SIZE 4096
+
+/**
+ * struct sfs_user_get_fw_versions - get current level of base firmware (output).
+ * @blob: current level of base firmware for ASP and patch levels (input/output).
+ * @sfs_status: 32-bit SFS status value (output).
+ * @sfs_extended_status: 32-bit SFS extended status value (output).
+ */
+struct sfs_user_get_fw_versions {
+ __u8 blob[TEE_EXT_CMD_BUFFER_SIZE];
+ __u32 sfs_status;
+ __u32 sfs_extended_status;
+} __packed;
+
+/**
+ * struct sfs_user_update_package - update SFS package (input).
+ * @payload_name: name of SFS package to load, verify and execute (input).
+ * @sfs_status: 32-bit SFS status value (output).
+ * @sfs_extended_status: 32-bit SFS extended status value (output).
+ */
+struct sfs_user_update_package {
+ char payload_name[PAYLOAD_NAME_SIZE];
+ __u32 sfs_status;
+ __u32 sfs_extended_status;
+} __packed;
+
+/**
+ * Seamless Firmware Support (SFS) IOC
+ *
+ * possible return codes for all SFS IOCTLs:
+ * 0: success
+ * -EINVAL: invalid input
+ * -E2BIG: excess data passed
+ * -EFAULT: failed to copy to/from userspace
+ * -EBUSY: mailbox in recovery or in use
+ * -ENODEV: driver not bound with PSP device
+ * -EACCES: request isn't authorized
+ * -EINVAL: invalid parameter
+ * -ETIMEDOUT: request timed out
+ * -EAGAIN: invalid request for state machine
+ * -ENOENT: not implemented
+ * -ENFILE: overflow
+ * -EPERM: invalid signature
+ * -EIO: PSP I/O error
+ */
+#define SFS_IOC_TYPE 'S'
+
+/**
+ * SFSIOCFWVERS - returns blob containing FW versions
+ * ASP provides the current level of Base Firmware for the ASP
+ * and the other microprocessors as well as current patch
+ * level(s).
+ */
+#define SFSIOCFWVERS _IOWR(SFS_IOC_TYPE, 0x1, struct sfs_user_get_fw_versions)
+
+/**
+ * SFSIOCUPDATEPKG - updates package/payload
+ * ASP loads, verifies and executes the SFS package.
+ * By default, the SFS package/payload is loaded from
+ * /lib/firmware/amd, but alternative firmware loading
+ * path can be specified using kernel parameter
+ * firmware_class.path or the firmware loading path
+ * can be customized using sysfs file:
+ * /sys/module/firmware_class/parameters/path.
+ */
+#define SFSIOCUPDATEPKG _IOWR(SFS_IOC_TYPE, 0x2, struct sfs_user_update_package)
+
+#endif /* __PSP_SFS_USER_H__ */
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 02533038640e..23c39b96190f 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -31,7 +31,7 @@ struct arch_vdso_time_data {};
#if defined(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)
#include <asm/vdso/arch_data.h>
-#elif defined(CONFIG_GENERIC_VDSO_DATA_STORE)
+#else
struct vdso_arch_data {
/* Needed for the generic code, never actually used at runtime */
char __unused;
@@ -164,7 +164,6 @@ struct vdso_rng_data {
* With the hidden visibility, the compiler simply generates a PC-relative
* relocation, and this is what we need.
*/
-#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
extern struct vdso_time_data vdso_u_time_data __attribute__((visibility("hidden")));
extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden")));
extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden")));
@@ -185,8 +184,6 @@ enum vdso_pages {
VDSO_NR_PAGES
};
-#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
-
/*
* The generic vDSO implementation requires that gettimeofday.h
* provides:
@@ -196,11 +193,7 @@ enum vdso_pages {
* - clock_gettime_fallback(): fallback for clock_gettime.
* - clock_getres_fallback(): fallback for clock_getres.
*/
-#ifdef ENABLE_COMPAT_VDSO
-#include <asm/vdso/compat_gettimeofday.h>
-#else
#include <asm/vdso/gettimeofday.h>
-#endif /* ENABLE_COMPAT_VDSO */
#else /* !__ASSEMBLY__ */
diff --git a/include/vdso/gettime.h b/include/vdso/gettime.h
index c50d152e7b3e..9ac161866653 100644
--- a/include/vdso/gettime.h
+++ b/include/vdso/gettime.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
struct __kernel_timespec;
+struct __kernel_old_timeval;
struct timezone;
#if !defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
diff --git a/include/vdso/jiffies.h b/include/vdso/jiffies.h
index 2f9d596c8b29..8ca04a141412 100644
--- a/include/vdso/jiffies.h
+++ b/include/vdso/jiffies.h
@@ -5,7 +5,7 @@
#include <asm/param.h> /* for HZ */
#include <vdso/time64.h>
-/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
+/* TICK_NSEC is the time between ticks in nsec */
#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
#endif /* __VDSO_JIFFIES_H */
diff --git a/init/Kconfig b/init/Kconfig
index c2b0c9df679a..f3b13463ec26 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1363,7 +1363,7 @@ config UTS_NS
config TIME_NS
bool "TIME namespace"
- depends on GENERIC_VDSO_TIME_NS
+ depends on GENERIC_GETTIMEOFDAY
default y
help
In this namespace boottime and monotonic clocks can be set.
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 54ea59ff8fbe..da326800c1c9 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -103,6 +103,19 @@ config PREEMPT_RT
Select this if you are building a kernel for systems which
require real-time guarantees.
+config PREEMPT_RT_NEEDS_BH_LOCK
+ bool "Enforce softirq synchronisation on PREEMPT_RT"
+ depends on PREEMPT_RT
+ help
+ Enforce synchronisation across the softirqs context. On PREEMPT_RT
+ the softirq is preemptible. This enforces the same per-CPU BLK
+ semantic non-PREEMPT_RT builds have. This should not be needed
+ because per-CPU locks were added to avoid the per-CPU BKL.
+
+ This switch provides the old behaviour for testing reasons. Select
+ this if you suspect an error with preemptible softirq and want test
+ the old synchronized behaviour.
+
config PREEMPT_COUNT
bool
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 4b6da9116aa6..880c9bf2f315 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -39,6 +39,56 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
return 0;
}
+static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat)
+{
+#ifdef CONFIG_COMPAT
+ if (compat)
+ return p->compat_robust_list;
+#endif
+ return p->robust_list;
+}
+
+static void __user *futex_get_robust_list_common(int pid, bool compat)
+{
+ struct task_struct *p = current;
+ void __user *head;
+ int ret;
+
+ scoped_guard(rcu) {
+ if (pid) {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ return (void __user *)ERR_PTR(-ESRCH);
+ }
+ get_task_struct(p);
+ }
+
+ /*
+ * Hold exec_update_lock to serialize with concurrent exec()
+ * so ptrace_may_access() is checked against stable credentials
+ */
+ ret = down_read_killable(&p->signal->exec_update_lock);
+ if (ret)
+ goto err_put;
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = futex_task_robust_list(p, compat);
+
+ up_read(&p->signal->exec_update_lock);
+ put_task_struct(p);
+
+ return head;
+
+err_unlock:
+ up_read(&p->signal->exec_update_lock);
+err_put:
+ put_task_struct(p);
+ return (void __user *)ERR_PTR(ret);
+}
+
/**
* sys_get_robust_list() - Get the robust-futex list head of a task
* @pid: pid of the process [zero for current task]
@@ -49,36 +99,14 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
struct robust_list_head __user * __user *, head_ptr,
size_t __user *, len_ptr)
{
- struct robust_list_head __user *head;
- unsigned long ret;
- struct task_struct *p;
-
- rcu_read_lock();
-
- ret = -ESRCH;
- if (!pid)
- p = current;
- else {
- p = find_task_by_vpid(pid);
- if (!p)
- goto err_unlock;
- }
-
- ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
- goto err_unlock;
+ struct robust_list_head __user *head = futex_get_robust_list_common(pid, false);
- head = p->robust_list;
- rcu_read_unlock();
+ if (IS_ERR(head))
+ return PTR_ERR(head);
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(head, head_ptr);
-
-err_unlock:
- rcu_read_unlock();
-
- return ret;
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
@@ -455,36 +483,14 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
compat_uptr_t __user *, head_ptr,
compat_size_t __user *, len_ptr)
{
- struct compat_robust_list_head __user *head;
- unsigned long ret;
- struct task_struct *p;
-
- rcu_read_lock();
-
- ret = -ESRCH;
- if (!pid)
- p = current;
- else {
- p = find_task_by_vpid(pid);
- if (!p)
- goto err_unlock;
- }
-
- ret = -EPERM;
- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
- goto err_unlock;
+ struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true);
- head = p->compat_robust_list;
- rcu_read_unlock();
+ if (IS_ERR(head))
+ return PTR_ERR(head);
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(ptr_to_compat(head), head_ptr);
-
-err_unlock:
- rcu_read_unlock();
-
- return ret;
}
#endif /* CONFIG_COMPAT */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 1da5e9d9da71..1b4254d19a73 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -6,10 +6,6 @@ menu "IRQ subsystem"
config MAY_HAVE_SPARSE_IRQ
bool
-# Legacy support, required for itanic
-config GENERIC_IRQ_LEGACY
- bool
-
# Enable the generic irq autoprobe mechanism
config GENERIC_IRQ_PROBE
bool
@@ -147,7 +143,9 @@ config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD
config IRQ_KUNIT_TEST
bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS
depends on KUNIT=y
+ depends on SPARSE_IRQ
default KUNIT_ALL_TESTS
+ select IRQ_DOMAIN
imply SMP
help
This option enables KUnit tests for the IRQ subsystem API. These are
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0d0276378c70..3ffa0d80ddd1 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1260,6 +1260,43 @@ int irq_chip_get_parent_state(struct irq_data *data,
EXPORT_SYMBOL_GPL(irq_chip_get_parent_state);
/**
+ * irq_chip_shutdown_parent - Shutdown the parent interrupt
+ * @data: Pointer to interrupt specific data
+ *
+ * Invokes the irq_shutdown() callback of the parent if available or falls
+ * back to irq_chip_disable_parent().
+ */
+void irq_chip_shutdown_parent(struct irq_data *data)
+{
+ struct irq_data *parent = data->parent_data;
+
+ if (parent->chip->irq_shutdown)
+ parent->chip->irq_shutdown(parent);
+ else
+ irq_chip_disable_parent(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_shutdown_parent);
+
+/**
+ * irq_chip_startup_parent - Startup the parent interrupt
+ * @data: Pointer to interrupt specific data
+ *
+ * Invokes the irq_startup() callback of the parent if available or falls
+ * back to irq_chip_enable_parent().
+ */
+unsigned int irq_chip_startup_parent(struct irq_data *data)
+{
+ struct irq_data *parent = data->parent_data;
+
+ if (parent->chip->irq_startup)
+ return parent->chip->irq_startup(parent);
+
+ irq_chip_enable_parent(data);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_chip_startup_parent);
+
+/**
* irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if
* NULL)
* @data: Pointer to interrupt specific data
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index eb16a58e0322..b41188698622 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -30,29 +30,22 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
return this->irq == match->irq && this->dev_id == match->dev_id;
}
-/**
- * devm_request_threaded_irq - allocate an interrupt line for a managed device
- * @dev: device to request interrupt for
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @thread_fn: function to be called in a threaded interrupt context. NULL
- * for devices which handle everything in @handler
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
- * @dev_id: A cookie passed back to the handler function
- *
- * Except for the extra @dev argument, this function takes the
- * same arguments and performs the same function as
- * request_threaded_irq(). IRQs requested with this function will be
- * automatically freed on driver detach.
- *
- * If an IRQ allocated with this function needs to be freed
- * separately, devm_free_irq() must be used.
- */
-int devm_request_threaded_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, irq_handler_t thread_fn,
- unsigned long irqflags, const char *devname,
- void *dev_id)
+static int devm_request_result(struct device *dev, int rc, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ const char *devname)
+{
+ if (rc >= 0)
+ return rc;
+
+ return dev_err_probe(dev, rc, "request_irq(%u) %ps %ps %s\n",
+ irq, handler, thread_fn, devname ? : "");
+}
+
+static int __devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler,
+ irq_handler_t thread_fn,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
{
struct irq_devres *dr;
int rc;
@@ -78,28 +71,48 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq,
return 0;
}
-EXPORT_SYMBOL(devm_request_threaded_irq);
/**
- * devm_request_any_context_irq - allocate an interrupt line for a managed device
- * @dev: device to request interrupt for
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
- * @dev_id: A cookie passed back to the handler function
+ * devm_request_threaded_irq - allocate an interrupt line for a managed device with error logging
+ * @dev: Device to request interrupt for
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the interrupt occurs
+ * @thread_fn: Function to be called in a threaded interrupt context. NULL
+ * for devices which handle everything in @handler
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
+ * @dev_id: A cookie passed back to the handler function
*
- * Except for the extra @dev argument, this function takes the
- * same arguments and performs the same function as
- * request_any_context_irq(). IRQs requested with this function will be
- * automatically freed on driver detach.
+ * Except for the extra @dev argument, this function takes the same
+ * arguments and performs the same function as request_threaded_irq().
+ * Interrupts requested with this function will be automatically freed on
+ * driver detach.
+ *
+ * If an interrupt allocated with this function needs to be freed
+ * separately, devm_free_irq() must be used.
+ *
+ * When the request fails, an error message is printed with contextual
+ * information (device name, interrupt number, handler functions and
+ * error code). Don't add extra error messages at the call sites.
*
- * If an IRQ allocated with this function needs to be freed
- * separately, devm_free_irq() must be used.
+ * Return: 0 on success or a negative error number.
*/
-int devm_request_any_context_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id)
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id)
+{
+ int rc = __devm_request_threaded_irq(dev, irq, handler, thread_fn,
+ irqflags, devname, dev_id);
+
+ return devm_request_result(dev, rc, irq, handler, thread_fn, devname);
+}
+EXPORT_SYMBOL(devm_request_threaded_irq);
+
+static int __devm_request_any_context_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
{
struct irq_devres *dr;
int rc;
@@ -124,6 +137,40 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq,
return rc;
}
+
+/**
+ * devm_request_any_context_irq - allocate an interrupt line for a managed device with error logging
+ * @dev: Device to request interrupt for
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the interrupt occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * Except for the extra @dev argument, this function takes the same
+ * arguments and performs the same function as request_any_context_irq().
+ * Interrupts requested with this function will be automatically freed on
+ * driver detach.
+ *
+ * If an interrupt allocated with this function needs to be freed
+ * separately, devm_free_irq() must be used.
+ *
+ * When the request fails, an error message is printed with contextual
+ * information (device name, interrupt number, handler functions and
+ * error code). Don't add extra error messages at the call sites.
+ *
+ * Return: IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success, or a negative error
+ * number.
+ */
+int devm_request_any_context_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ int rc = __devm_request_any_context_irq(dev, irq, handler, irqflags,
+ devname, dev_id);
+
+ return devm_request_result(dev, rc, irq, handler, NULL, devname);
+}
EXPORT_SYMBOL(devm_request_any_context_irq);
/**
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9489f93b3db3..e103451243a0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
wake_up_process(action->thread);
}
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_ns __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(arg, 0, &val);
+ if (ret) {
+ pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret);
+ return 0;
+ }
+
+ if (!val) {
+ pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n");
+ return 0;
+ }
+
+ irqhandler_duration_threshold_ns = val * 1000;
+ static_branch_enable(&irqhandler_duration_check_enabled);
+
+ return 1;
+}
+__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+ const struct irqaction *action)
+{
+ u64 delta_ns = local_clock() - ts_start;
+
+ if (unlikely(delta_ns > irqhandler_duration_threshold_ns)) {
+ pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n",
+ smp_processor_id(), irq, action->handler,
+ div_u64(delta_ns, NSEC_PER_USEC));
+ }
+}
+
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval = IRQ_NONE;
@@ -155,7 +193,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
lockdep_hardirq_threaded();
trace_irq_handler_entry(irq, action);
- res = action->handler(irq, action->dev_id);
+
+ if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
+ u64 ts_start = local_clock();
+
+ res = action->handler(irq, action->dev_id);
+ irqhandler_duration_check(ts_start, irq, action);
+ } else {
+ res = action->handler(irq, action->dev_id);
+ }
+
trace_irq_handler_exit(irq, action, res);
if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c
index a75abebed7f2..e2d31914b3c4 100644
--- a/kernel/irq/irq_test.c
+++ b/kernel/irq/irq_test.c
@@ -41,21 +41,37 @@ static struct irq_chip fake_irq_chip = {
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static void irq_disable_depth_test(struct kunit *test)
+static int irq_test_setup_fake_irq(struct kunit *test, struct irq_affinity_desc *affd)
{
struct irq_desc *desc;
- int virq, ret;
+ int virq;
- virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL);
+ virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, affd);
KUNIT_ASSERT_GE(test, virq, 0);
- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+ irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq);
+
+ desc = irq_to_desc(virq);
+ KUNIT_ASSERT_PTR_NE(test, desc, NULL);
+
+ /* On some architectures, IRQs are NOREQUEST | NOPROBE by default. */
+ irq_settings_clr_norequest(desc);
+
+ return virq;
+}
+
+static void irq_disable_depth_test(struct kunit *test)
+{
+ struct irq_desc *desc;
+ int virq, ret;
+
+ virq = irq_test_setup_fake_irq(test, NULL);
desc = irq_to_desc(virq);
KUNIT_ASSERT_PTR_NE(test, desc, NULL);
ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
- KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
KUNIT_EXPECT_EQ(test, desc->depth, 0);
@@ -73,16 +89,13 @@ static void irq_free_disabled_test(struct kunit *test)
struct irq_desc *desc;
int virq, ret;
- virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL);
- KUNIT_ASSERT_GE(test, virq, 0);
-
- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+ virq = irq_test_setup_fake_irq(test, NULL);
desc = irq_to_desc(virq);
KUNIT_ASSERT_PTR_NE(test, desc, NULL);
ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
- KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
KUNIT_EXPECT_EQ(test, desc->depth, 0);
@@ -93,7 +106,7 @@ static void irq_free_disabled_test(struct kunit *test)
KUNIT_EXPECT_GE(test, desc->depth, 1);
ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
- KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
KUNIT_EXPECT_EQ(test, desc->depth, 0);
free_irq(virq, NULL);
@@ -112,10 +125,7 @@ static void irq_shutdown_depth_test(struct kunit *test)
if (!IS_ENABLED(CONFIG_SMP))
kunit_skip(test, "requires CONFIG_SMP for managed shutdown");
- virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity);
- KUNIT_ASSERT_GE(test, virq, 0);
-
- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+ virq = irq_test_setup_fake_irq(test, &affinity);
desc = irq_to_desc(virq);
KUNIT_ASSERT_PTR_NE(test, desc, NULL);
@@ -124,7 +134,7 @@ static void irq_shutdown_depth_test(struct kunit *test)
KUNIT_ASSERT_PTR_NE(test, data, NULL);
ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
- KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
KUNIT_EXPECT_TRUE(test, irqd_is_activated(data));
KUNIT_EXPECT_TRUE(test, irqd_is_started(data));
@@ -169,13 +179,12 @@ static void irq_cpuhotplug_test(struct kunit *test)
kunit_skip(test, "requires more than 1 CPU for CPU hotplug");
if (!cpu_is_hotpluggable(1))
kunit_skip(test, "CPU 1 must be hotpluggable");
+ if (!cpu_online(1))
+ kunit_skip(test, "CPU 1 must be online");
cpumask_copy(&affinity.mask, cpumask_of(1));
- virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity);
- KUNIT_ASSERT_GE(test, virq, 0);
-
- irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq);
+ virq = irq_test_setup_fake_irq(test, &affinity);
desc = irq_to_desc(virq);
KUNIT_ASSERT_PTR_NE(test, desc, NULL);
@@ -184,7 +193,7 @@ static void irq_cpuhotplug_test(struct kunit *test)
KUNIT_ASSERT_PTR_NE(test, data, NULL);
ret = request_irq(virq, noop_handler, 0, "test_irq", NULL);
- KUNIT_EXPECT_EQ(test, ret, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
KUNIT_EXPECT_TRUE(test, irqd_is_activated(data));
KUNIT_EXPECT_TRUE(test, irqd_is_started(data));
@@ -196,13 +205,9 @@ static void irq_cpuhotplug_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, desc->depth, 1);
KUNIT_EXPECT_EQ(test, remove_cpu(1), 0);
- KUNIT_EXPECT_FALSE(test, irqd_is_activated(data));
- KUNIT_EXPECT_FALSE(test, irqd_is_started(data));
KUNIT_EXPECT_GE(test, desc->depth, 1);
KUNIT_EXPECT_EQ(test, add_cpu(1), 0);
- KUNIT_EXPECT_FALSE(test, irqd_is_activated(data));
- KUNIT_EXPECT_FALSE(test, irqd_is_started(data));
KUNIT_EXPECT_EQ(test, desc->depth, 1);
enable_irq(virq);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index b64c57b44c20..db714d3014b5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -653,13 +653,6 @@ void irq_mark_irq(unsigned int irq)
irq_insert_desc(irq, irq_desc + irq);
}
-#ifdef CONFIG_GENERIC_IRQ_LEGACY
-void irq_init_desc(unsigned int irq)
-{
- free_desc(irq);
-}
-#endif
-
#endif /* !CONFIG_SPARSE_IRQ */
int handle_irq_desc(struct irq_desc *desc)
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b7a1ec327e81..2452b7366b00 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec126f85ff40..198d2dd45f59 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -919,7 +919,7 @@ void hrtick_start(struct rq *rq, u64 delay)
* doesn't make sense and can cause timer DoS.
*/
delta = max_t(s64, delay, 10000LL);
- rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta);
+ rq->hrtick_time = ktime_add_ns(hrtimer_cb_get_time(timer), delta);
if (rq == this_rq())
__hrtick_restart(rq);
diff --git a/kernel/smp.c b/kernel/smp.c
index 56f83aa58ec8..02f52291fae4 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -884,16 +884,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
- * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
- * (atomically) until function has completed on other CPUs. If
- * %SCF_RUN_LOCAL is set, the function will also be run locally
- * if the local CPU is set in the @cpumask.
- *
- * If @wait is true, then returns once @func has returned.
+ * @wait: If true, wait (atomically) until function has completed
+ * on other CPUs.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. Preemption
* must be disabled when calling this function.
+ *
+ * @func is not called on the local CPU even if @mask contains it. Consider
+ * using on_each_cpu_cond_mask() instead if this is not desirable.
*/
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 513b1945987c..77198911b8dd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
/* First entry of a task into a BH disabled section? */
if (!current->softirq_disable_cnt) {
if (preemptible()) {
- local_lock(&softirq_ctrl.lock);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
+ local_lock(&softirq_ctrl.lock);
+ else
+ migrate_disable();
+
/* Required to meet the RCU bottomhalf requirements. */
rcu_read_lock();
} else {
@@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
* Track the per CPU softirq disabled state. On RT this is per CPU
* state to allow preemption of bottom half disabled sections.
*/
- newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
- /*
- * Reflect the result in the task state to prevent recursion on the
- * local lock and to make softirq_count() & al work.
- */
- current->softirq_disable_cnt = newcnt;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+ newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
+ /*
+ * Reflect the result in the task state to prevent recursion on the
+ * local lock and to make softirq_count() & al work.
+ */
+ current->softirq_disable_cnt = newcnt;
- if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
- raw_local_irq_save(flags);
- lockdep_softirqs_off(ip);
- raw_local_irq_restore(flags);
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
+ raw_local_irq_save(flags);
+ lockdep_softirqs_off(ip);
+ raw_local_irq_restore(flags);
+ }
+ } else {
+ bool sirq_dis = false;
+
+ if (!current->softirq_disable_cnt)
+ sirq_dis = true;
+
+ this_cpu_add(softirq_ctrl.cnt, cnt);
+ current->softirq_disable_cnt += cnt;
+ WARN_ON_ONCE(current->softirq_disable_cnt < 0);
+
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) {
+ raw_local_irq_save(flags);
+ lockdep_softirqs_off(ip);
+ raw_local_irq_restore(flags);
+ }
}
}
EXPORT_SYMBOL(__local_bh_disable_ip);
@@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip);
static void __local_bh_enable(unsigned int cnt, bool unlock)
{
unsigned long flags;
+ bool sirq_en = false;
int newcnt;
- DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
- this_cpu_read(softirq_ctrl.cnt));
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+ DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
+ this_cpu_read(softirq_ctrl.cnt));
+ if (softirq_count() == cnt)
+ sirq_en = true;
+ } else {
+ if (current->softirq_disable_cnt == cnt)
+ sirq_en = true;
+ }
- if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) {
raw_local_irq_save(flags);
lockdep_softirqs_on(_RET_IP_);
raw_local_irq_restore(flags);
}
- newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
- current->softirq_disable_cnt = newcnt;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
+ newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
+ current->softirq_disable_cnt = newcnt;
- if (!newcnt && unlock) {
- rcu_read_unlock();
- local_unlock(&softirq_ctrl.lock);
+ if (!newcnt && unlock) {
+ rcu_read_unlock();
+ local_unlock(&softirq_ctrl.lock);
+ }
+ } else {
+ current->softirq_disable_cnt -= cnt;
+ this_cpu_sub(softirq_ctrl.cnt, cnt);
+ if (unlock && !current->softirq_disable_cnt) {
+ migrate_enable();
+ rcu_read_unlock();
+ } else {
+ WARN_ON_ONCE(current->softirq_disable_cnt < 0);
+ }
}
}
@@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
lock_map_release(&bh_lock_map);
local_irq_save(flags);
- curcnt = __this_cpu_read(softirq_ctrl.cnt);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
+ curcnt = this_cpu_read(softirq_ctrl.cnt);
+ else
+ curcnt = current->softirq_disable_cnt;
/*
* If this is not reenabling soft interrupts, no point in trying to
@@ -805,6 +848,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t)
return false;
}
+#ifdef CONFIG_PREEMPT_RT
+struct tasklet_sync_callback {
+ spinlock_t cb_lock;
+ atomic_t cb_waiters;
+};
+
+static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = {
+ .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock),
+ .cb_waiters = ATOMIC_INIT(0),
+};
+
+static void tasklet_lock_callback(void)
+{
+ spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_unlock_callback(void)
+{
+ spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_callback_cancel_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ atomic_inc(&sync_cb->cb_waiters);
+ spin_lock(&sync_cb->cb_lock);
+ atomic_dec(&sync_cb->cb_waiters);
+ spin_unlock(&sync_cb->cb_lock);
+}
+
+static void tasklet_callback_sync_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ if (atomic_read(&sync_cb->cb_waiters)) {
+ spin_unlock(&sync_cb->cb_lock);
+ spin_lock(&sync_cb->cb_lock);
+ }
+}
+
+#else /* !CONFIG_PREEMPT_RT: */
+
+static void tasklet_lock_callback(void) { }
+static void tasklet_unlock_callback(void) { }
+static void tasklet_callback_sync_wait_running(void) { }
+
+#ifdef CONFIG_SMP
+static void tasklet_callback_cancel_wait_running(void) { }
+#endif
+#endif /* !CONFIG_PREEMPT_RT */
+
static void tasklet_action_common(struct tasklet_head *tl_head,
unsigned int softirq_nr)
{
@@ -816,6 +911,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
tl_head->tail = &tl_head->head;
local_irq_enable();
+ tasklet_lock_callback();
while (list) {
struct tasklet_struct *t = list;
@@ -835,6 +931,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
}
}
tasklet_unlock(t);
+ tasklet_callback_sync_wait_running();
continue;
}
tasklet_unlock(t);
@@ -847,6 +944,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
__raise_softirq_irqoff(softirq_nr);
local_irq_enable();
}
+ tasklet_unlock_callback();
}
static __latent_entropy void tasklet_action(void)
@@ -897,12 +995,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t)
/*
* Prevent a live lock when current preempted soft
* interrupt processing or prevents ksoftirqd from
- * running. If the tasklet runs on a different CPU
- * then this has no effect other than doing the BH
- * disable/enable dance for nothing.
+ * running.
*/
- local_bh_disable();
- local_bh_enable();
+ tasklet_callback_cancel_wait_running();
} else {
cpu_relax();
}
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index e6e9b85d4db5..f7d52d9543cc 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_LEGACY_TIMER_TICK) += tick-legacy.o
ifeq ($(CONFIG_SMP),y)
obj-$(CONFIG_NO_HZ_COMMON) += timer_migration.o
endif
-obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
+obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
obj-$(CONFIG_TIME_NS) += namespace.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 577f0e6842d4..069d93bfb0c7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -35,7 +35,7 @@
/**
* struct alarm_base - Alarm timer bases
- * @lock: Lock for syncrhonized access to the base
+ * @lock: Lock for synchronized access to the base
* @timerqueue: Timerqueue head managing the list of events
* @get_ktime: Function to read the time correlating to the base
* @get_timespec: Function to read the namespace time correlating to the base
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f3e831f62906..a59bc75ab7c5 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -633,7 +633,7 @@ void tick_offline_cpu(unsigned int cpu)
raw_spin_lock(&clockevents_lock);
tick_broadcast_offline(cpu);
- tick_shutdown(cpu);
+ tick_shutdown();
/*
* Unregister the clock event devices which were
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0aef0e349e49..a1890a073196 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -144,7 +144,7 @@ static u64 suspend_start;
* Default for maximum permissible skew when cs->uncertainty_margin is
* not specified, and the lower bound even when cs->uncertainty_margin
* is specified. This is also the default that is used when registering
- * clocks with unspecifed cs->uncertainty_margin, so this macro is used
+ * clocks with unspecified cs->uncertainty_margin, so this macro is used
* even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels.
*/
#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
@@ -407,9 +407,8 @@ void clocksource_verify_percpu(struct clocksource *cs)
if (!cpumask_empty(&cpus_behind))
pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n",
cpumask_pr_args(&cpus_behind), testcpu, cs->name);
- if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
- pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n",
- testcpu, cs_nsec_min, cs_nsec_max, cs->name);
+ pr_info(" CPU %d check durations %lldns - %lldns for clocksource %s.\n",
+ testcpu, cs_nsec_min, cs_nsec_max, cs->name);
}
EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e8c479329282..88aa062b8a55 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -59,6 +59,7 @@
#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
static void retrigger_next_event(void *arg);
+static ktime_t __hrtimer_cb_get_time(clockid_t clock_id);
/*
* The timer bases:
@@ -76,42 +77,34 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
.index = HRTIMER_BASE_MONOTONIC,
.clockid = CLOCK_MONOTONIC,
- .get_time = &ktime_get,
},
{
.index = HRTIMER_BASE_REALTIME,
.clockid = CLOCK_REALTIME,
- .get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME,
.clockid = CLOCK_BOOTTIME,
- .get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI,
.clockid = CLOCK_TAI,
- .get_time = &ktime_get_clocktai,
},
{
.index = HRTIMER_BASE_MONOTONIC_SOFT,
.clockid = CLOCK_MONOTONIC,
- .get_time = &ktime_get,
},
{
.index = HRTIMER_BASE_REALTIME_SOFT,
.clockid = CLOCK_REALTIME,
- .get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME_SOFT,
.clockid = CLOCK_BOOTTIME,
- .get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI_SOFT,
.clockid = CLOCK_TAI,
- .get_time = &ktime_get_clocktai,
},
},
.csd = CSD_INIT(retrigger_next_event, NULL)
@@ -208,7 +201,7 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_
/*
* The offline local CPU can't be the default target if the
* next remote target event is after this timer. Keep the
- * elected new base. An IPI will we issued to reprogram
+ * elected new base. An IPI will be issued to reprogram
* it as a last resort.
*/
if (!hrtimer_base_is_online(this_cpu_base))
@@ -1253,7 +1246,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL)
- tim = ktime_add_safe(tim, base->get_time());
+ tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid));
tim = hrtimer_update_lowres(timer, tim, mode);
@@ -1574,10 +1567,10 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude)
static inline int hrtimer_clockid_to_base(clockid_t clock_id)
{
switch (clock_id) {
- case CLOCK_REALTIME:
- return HRTIMER_BASE_REALTIME;
case CLOCK_MONOTONIC:
return HRTIMER_BASE_MONOTONIC;
+ case CLOCK_REALTIME:
+ return HRTIMER_BASE_REALTIME;
case CLOCK_BOOTTIME:
return HRTIMER_BASE_BOOTTIME;
case CLOCK_TAI:
@@ -1588,6 +1581,29 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
}
}
+static ktime_t __hrtimer_cb_get_time(clockid_t clock_id)
+{
+ switch (clock_id) {
+ case CLOCK_MONOTONIC:
+ return ktime_get();
+ case CLOCK_REALTIME:
+ return ktime_get_real();
+ case CLOCK_BOOTTIME:
+ return ktime_get_boottime();
+ case CLOCK_TAI:
+ return ktime_get_clocktai();
+ default:
+ WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
+ return ktime_get();
+ }
+}
+
+ktime_t hrtimer_cb_get_time(const struct hrtimer *timer)
+{
+ return __hrtimer_cb_get_time(timer->base->clockid);
+}
+EXPORT_SYMBOL_GPL(hrtimer_cb_get_time);
+
static void __hrtimer_setup(struct hrtimer *timer,
enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t clock_id, enum hrtimer_mode mode)
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 876d389b2e21..7c6110e964e7 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -163,8 +163,7 @@ void posixtimer_rearm_itimer(struct task_struct *tsk)
struct hrtimer *tmr = &tsk->signal->real_timer;
if (!hrtimer_is_queued(tmr) && tsk->signal->it_real_incr != 0) {
- hrtimer_forward(tmr, tmr->base->get_time(),
- tsk->signal->it_real_incr);
+ hrtimer_forward_now(tmr, tsk->signal->it_real_incr);
hrtimer_restart(tmr);
}
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 8b582174b1f9..aa3120104a51 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -299,8 +299,7 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
{
struct hrtimer *timer = &timr->it.real.timer;
- timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
- timr->it_interval);
+ timr->it_overrun += hrtimer_forward_now(timer, timr->it_interval);
hrtimer_restart(timer);
}
@@ -535,7 +534,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
goto out;
}
/*
- * After succesful copy out, the timer ID is visible to user space
+ * After successful copy out, the timer ID is visible to user space
* now but not yet valid because new_timer::signal low order bit is 1.
*
* Complete the initialization with the clock specific create
@@ -825,7 +824,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
hrtimer_setup(&timr->it.real.timer, posix_timer_fn, timr->it_clock, mode);
if (!absolute)
- expires = ktime_add_safe(expires, timer->base->get_time());
+ expires = ktime_add_safe(expires, hrtimer_cb_get_time(timer));
hrtimer_set_expires(timer, expires);
if (!sigev_none)
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index cc15fe293719..cc1afec306b3 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -174,8 +174,7 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
return HRTIMER_RESTART;
}
-void __init
-sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
+void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
@@ -247,6 +246,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
pr_debug("Registered %pS as sched_clock source\n", read);
}
+EXPORT_SYMBOL_GPL(sched_clock_register);
void __init generic_sched_clock_init(void)
{
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 9a3859443c04..7e33d3f2e889 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -411,24 +411,18 @@ int tick_cpu_dying(unsigned int dying_cpu)
}
/*
- * Shutdown an event device on a given cpu:
+ * Shutdown an event device on the outgoing CPU:
*
- * This is called on a life CPU, when a CPU is dead. So we cannot
- * access the hardware device itself.
- * We just set the mode and remove it from the lists.
+ * Called by the dying CPU during teardown, with clockevents_lock held
+ * and interrupts disabled.
*/
-void tick_shutdown(unsigned int cpu)
+void tick_shutdown(void)
{
- struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
struct clock_event_device *dev = td->evtdev;
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
- /*
- * Prevent that the clock events layer tries to call
- * the set mode function!
- */
- clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
clockevents_exchange_device(dev, NULL);
dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index faac36de35b9..4e4f7bbe2a64 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -26,7 +26,7 @@ extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void tick_check_new_device(struct clock_event_device *dev);
extern void tick_offline_cpu(unsigned int cpu);
-extern void tick_shutdown(unsigned int cpu);
+extern void tick_shutdown(void);
extern void tick_suspend(void);
extern void tick_resume(void);
extern bool tick_check_replacement(struct clock_event_device *curdev,
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index b03d0ada6469..488e47e96e93 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -102,8 +102,6 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
SEQ_printf(m, " .index: %d\n", base->index);
SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution);
-
- SEQ_printf(m, " .get_time: %ps\n", base->get_time);
#ifdef CONFIG_HIGH_RES_TIMERS
SEQ_printf(m, " .offset: %Lu nsecs\n",
(unsigned long long) ktime_to_ns(base->offset));
diff --git a/lib/test_objpool.c b/lib/test_objpool.c
index 8f688187fa87..6a34a7582fdb 100644
--- a/lib/test_objpool.c
+++ b/lib/test_objpool.c
@@ -164,7 +164,7 @@ static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
/* do bulk-testings for objects pop/push */
item->worker(item, 1);
- hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
+ hrtimer_forward_now(hrt, item->hrtcycle);
return HRTIMER_RESTART;
}
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index 45df764b49ad..db87ba34ef19 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -12,24 +12,6 @@ config GENERIC_GETTIMEOFDAY
Each architecture that enables this feature has to
provide the fallback implementation.
-config GENERIC_VDSO_32
- bool
- depends on GENERIC_GETTIMEOFDAY && !64BIT
- help
- This config option helps to avoid possible performance issues
- in 32 bit only architectures.
-
-config GENERIC_COMPAT_VDSO
- bool
- help
- This config option enables the compat VDSO layer.
-
-config GENERIC_VDSO_TIME_NS
- bool
- help
- Selected by architectures which support time namespaces in the
- VDSO
-
config GENERIC_VDSO_OVERFLOW_PROTECT
bool
help
@@ -37,14 +19,9 @@ config GENERIC_VDSO_OVERFLOW_PROTECT
time getter functions for the price of an extra conditional
in the hotpath.
-endif
-
config VDSO_GETRANDOM
bool
help
Selected by architectures that support vDSO getrandom().
-config GENERIC_VDSO_DATA_STORE
- bool
- help
- Selected by architectures that use the generic vDSO data store.
+endif
diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
index aedd40aaa950..405f743253d7 100644
--- a/lib/vdso/Makefile
+++ b/lib/vdso/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_GENERIC_VDSO_DATA_STORE) += datastore.o
+obj-$(CONFIG_HAVE_GENERIC_VDSO) += datastore.o
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index 3693c6caf2c4..a565c30c71a0 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -11,14 +11,14 @@
/*
* The vDSO data page.
*/
-#ifdef CONFIG_HAVE_GENERIC_VDSO
+#ifdef CONFIG_GENERIC_GETTIMEOFDAY
static union {
struct vdso_time_data data;
u8 page[PAGE_SIZE];
} vdso_time_data_store __page_aligned_data;
struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data;
static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE);
-#endif /* CONFIG_HAVE_GENERIC_VDSO */
+#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#ifdef CONFIG_VDSO_GETRANDOM
static union {
@@ -46,7 +46,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
switch (vmf->pgoff) {
case VDSO_TIME_PAGE_OFFSET:
- if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO))
+ if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
return VM_FAULT_SIGBUS;
pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
if (timens_page) {
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 02ea19f67164..95df0153f05a 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -108,15 +108,11 @@ bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock
return true;
}
-#ifdef CONFIG_TIME_NS
-
-#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
static __always_inline
const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd)
{
return (void *)vd + PAGE_SIZE;
}
-#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
static __always_inline
bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
@@ -149,20 +145,6 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *
return true;
}
-#else
-static __always_inline
-const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd)
-{
- return NULL;
-}
-
-static __always_inline
-bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
-{
- return false;
-}
-#endif
static __always_inline
bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
@@ -204,7 +186,6 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
return true;
}
-#ifdef CONFIG_TIME_NS
static __always_inline
bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
clockid_t clk, struct __kernel_timespec *ts)
@@ -233,14 +214,6 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock
return true;
}
-#else
-static __always_inline
-bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
-{
- return false;
-}
-#endif
static __always_inline
bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py
index 98445671fe83..ccc24d30de80 100644
--- a/scripts/gdb/linux/timerlist.py
+++ b/scripts/gdb/linux/timerlist.py
@@ -56,8 +56,6 @@ def print_base(base):
text += " .index: {}\n".format(base['index'])
text += " .resolution: {} nsecs\n".format(constants.LX_hrtimer_resolution)
-
- text += " .get_time: {}\n".format(base['get_time'])
if constants.LX_CONFIG_HIGH_RES_TIMERS:
text += " .offset: {} nsecs\n".format(base['offset'])
text += "active timers:\n"
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
index c364bd126ac8..2d5f4d47071f 100644
--- a/sound/core/hrtimer.c
+++ b/sound/core/hrtimer.c
@@ -44,7 +44,7 @@ static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt)
}
/* calculate the drift */
- delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt));
+ delta = ktime_sub(hrtimer_cb_get_time(hrt), hrtimer_get_expires(hrt));
if (delta > 0)
ticks += ktime_divns(delta, ticks * resolution);
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..0ad5cc70ecbe 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -880,3 +880,15 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 80239843e9f0..0f6b197cfcb0 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -87,6 +87,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+ OPT_BOOLEAN(0 , "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
@@ -162,6 +163,7 @@ static bool opts_valid(void)
opts.hack_noinstr ||
opts.ibt ||
opts.mcount ||
+ opts.noabs ||
opts.noinstr ||
opts.orc ||
opts.retpoline ||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d14f20ef1db1..093fcd01dd6e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3564,7 +3564,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
/* Ignore KCFI type preambles, which always fall through */
if (!strncmp(func->name, "__cfi_", 6) ||
- !strncmp(func->name, "__pfx_", 6))
+ !strncmp(func->name, "__pfx_", 6) ||
+ !strncmp(func->name, "__pi___cfi_", 11) ||
+ !strncmp(func->name, "__pi___pfx_", 11))
return 0;
if (file->ignore_unreachables)
@@ -4644,6 +4646,47 @@ static void disas_warned_funcs(struct objtool_file *file)
disas_funcs(funcs);
}
+__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ unsigned int type = reloc_type(reloc);
+ size_t sz = elf_addr_size(elf);
+
+ return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32);
+}
+
+static int check_abs_references(struct objtool_file *file)
+{
+ struct section *sec;
+ struct reloc *reloc;
+ int ret = 0;
+
+ for_each_sec(file, sec) {
+ /* absolute references in non-loadable sections are fine */
+ if (!(sec->sh.sh_flags & SHF_ALLOC))
+ continue;
+
+ /* section must have an associated .rela section */
+ if (!sec->rsec)
+ continue;
+
+ /*
+ * Special case for compiler generated metadata that is not
+ * consumed until after boot.
+ */
+ if (!strcmp(sec->name, "__patchable_function_entries"))
+ continue;
+
+ for_each_reloc(sec->rsec, reloc) {
+ if (arch_absolute_reloc(file->elf, reloc)) {
+ WARN("section %s has absolute relocation at offset 0x%lx",
+ sec->name, reloc_offset(reloc));
+ ret++;
+ }
+ }
+ }
+ return ret;
+}
+
struct insn_chunk {
void *addr;
struct insn_chunk *next;
@@ -4777,6 +4820,9 @@ int check(struct objtool_file *file)
goto out;
}
+ if (opts.noabs)
+ warnings += check_abs_references(file);
+
if (opts.orc && nr_insns) {
ret = orc_create(file);
if (ret)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..be33c7b43180 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,6 +97,7 @@ bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
unsigned int arch_reloc_size(struct reloc *reloc);
unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 6b08666fa69d..ab22673862e1 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -26,6 +26,7 @@ struct opts {
bool uaccess;
int prefix;
bool cfi;
+ bool noabs;
/* options: */
bool backtrace;
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6a922d046b8e..802895fae3ca 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -45,7 +45,6 @@ NORETURN(rewind_stack_and_make_dead)
NORETURN(rust_begin_unwind)
NORETURN(rust_helper_BUG)
NORETURN(sev_es_terminate)
-NORETURN(snp_abort)
NORETURN(start_kernel)
NORETURN(stop_this_cpu)
NORETURN(usercopy_abort)
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 8cfb87f7f7c5..490ace1f017e 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
+PKG_CONFIG ?= pkg-config
+LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO")
+
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1
LDLIBS := -lpthread -lrt -lnuma
LOCAL_HDRS := \
../include/futextest.h \
- ../include/atomic.h \
- ../include/logging.h
+ ../include/atomic.h
TEST_GEN_PROGS := \
futex_wait_timeout \
futex_wait_wouldblock \
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
index f29e4d627e79..e0a33510ccb6 100644
--- a/tools/testing/selftests/futex/functional/futex_numa.c
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -5,9 +5,10 @@
#include <sys/mman.h>
#include <fcntl.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <time.h>
#include <assert.h>
-#include "logging.h"
#include "futextest.h"
#include "futex2test.h"
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index a9ecfb2d3932..d037a3f10ee8 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -16,9 +16,9 @@
#include <linux/futex.h>
#include <sys/mman.h>
-#include "logging.h"
#include "futextest.h"
#include "futex2test.h"
+#include "../../kselftest_harness.h"
#define MAX_THREADS 64
@@ -77,7 +77,7 @@ static void join_max_threads(void)
}
}
-static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags)
+static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags)
{
int to_wake, ret, i, need_exit = 0;
@@ -88,11 +88,17 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
do {
ret = futex2_wake(futex_ptr, to_wake, futex_flags);
- if (must_fail) {
- if (ret < 0)
- break;
- ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
- to_wake, futex_flags);
+
+ if (err_value) {
+ if (ret >= 0)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+
+ if (errno != err_value)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n",
+ to_wake, futex_flags, err_value, errno, strerror(errno));
+
+ break;
}
if (ret < 0) {
ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
@@ -106,12 +112,12 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
join_max_threads();
for (i = 0; i < MAX_THREADS; i++) {
- if (must_fail && thread_args[i].result != -1) {
+ if (err_value && thread_args[i].result != -1) {
ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
i, thread_args[i].result);
need_exit = 1;
}
- if (!must_fail && thread_args[i].result != 0) {
+ if (!err_value && thread_args[i].result != 0) {
ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
need_exit = 1;
}
@@ -120,58 +126,30 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
ksft_exit_fail_msg("Aborting due to earlier errors.\n");
}
-static void test_futex(void *futex_ptr, int must_fail)
+static void test_futex(void *futex_ptr, int err_value)
{
- __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
}
-static void test_futex_mpol(void *futex_ptr, int must_fail)
+static void test_futex_mpol(void *futex_ptr, int err_value)
{
- __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
}
-static void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
-int main(int argc, char *argv[])
+TEST(futex_numa_mpol)
{
struct futex32_numa *futex_numa;
- int mem_size, i;
void *futex_ptr;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
+ int mem_size;
mem_size = sysconf(_SC_PAGE_SIZE);
- futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (futex_ptr == MAP_FAILED)
ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+ /* Create an invalid memory region for the "Memory out of range" test */
+ mprotect(futex_ptr + mem_size, mem_size, PROT_NONE);
+
futex_numa = futex_ptr;
ksft_print_msg("Regular test\n");
@@ -182,27 +160,31 @@ int main(int argc, char *argv[])
if (futex_numa->numa == FUTEX_NO_NODE)
ksft_exit_fail_msg("NUMA node is left uninitialized\n");
- ksft_print_msg("Memory too small\n");
- test_futex(futex_ptr + mem_size - 4, 1);
+ /* FUTEX2_NUMA futex must be 8-byte aligned */
+ ksft_print_msg("Mis-aligned futex\n");
+ test_futex(futex_ptr + mem_size - 4, EINVAL);
ksft_print_msg("Memory out of range\n");
- test_futex(futex_ptr + mem_size, 1);
+ test_futex(futex_ptr + mem_size, EFAULT);
futex_numa->numa = FUTEX_NO_NODE;
mprotect(futex_ptr, mem_size, PROT_READ);
ksft_print_msg("Memory, RO\n");
- test_futex(futex_ptr, 1);
+ test_futex(futex_ptr, EFAULT);
mprotect(futex_ptr, mem_size, PROT_NONE);
ksft_print_msg("Memory, no access\n");
- test_futex(futex_ptr, 1);
+ test_futex(futex_ptr, EFAULT);
mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
ksft_print_msg("Memory back to RW\n");
test_futex(futex_ptr, 0);
+ ksft_test_result_pass("futex2 memory boundary tests passed\n");
+
/* MPOL test. Does not work as expected */
- for (i = 0; i < 4; i++) {
+#ifdef LIBNUMA_VER_SUFFICIENT
+ for (int i = 0; i < 4; i++) {
unsigned long nodemask;
int ret;
@@ -221,15 +203,17 @@ int main(int argc, char *argv[])
ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
if (ret < 0)
ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
- if (0)
- test_futex_mpol(futex_numa, 0);
if (futex_numa->numa != i) {
ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
futex_numa->numa, i);
}
}
}
- ksft_test_result_pass("NUMA MPOL tests passed\n");
- ksft_finished();
- return 0;
+ ksft_test_result_pass("futex2 MPOL hints test passed\n");
+#else
+ ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n");
+#endif
+ munmap(futex_ptr, mem_size * 2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index aea001ac4946..3b7b5851f290 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -14,7 +14,7 @@
#include <linux/prctl.h>
#include <sys/prctl.h>
-#include "logging.h"
+#include "../../kselftest_harness.h"
#define MAX_THREADS 64
@@ -128,46 +128,14 @@ static void futex_dummy_op(void)
ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
}
-static void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -g Test global hash instead intead local immutable \n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
-int main(int argc, char *argv[])
+TEST(priv_hash)
{
int futex_slots1, futex_slotsn, online_cpus;
pthread_mutexattr_t mutex_attr_pi;
int ret, retry = 20;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(21);
ret = pthread_mutexattr_init(&mutex_attr_pi);
ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
@@ -189,14 +157,14 @@ int main(int argc, char *argv[])
if (ret != 0)
ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
- /* First thread, has to initialiaze private hash */
+ /* First thread, has to initialize private hash */
futex_slots1 = futex_hash_slots_get();
if (futex_slots1 <= 0) {
ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
- ksft_exit_fail_msg(test_msg_auto_create);
+ ksft_exit_fail_msg("%s", test_msg_auto_create);
}
- ksft_test_result_pass(test_msg_auto_create);
+ ksft_test_result_pass("%s", test_msg_auto_create);
online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
@@ -237,11 +205,11 @@ retry_getslots:
}
ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
futex_slots1, futex_slotsn);
- ksft_exit_fail_msg(test_msg_auto_inc);
+ ksft_exit_fail_msg("%s", test_msg_auto_inc);
}
- ksft_test_result_pass(test_msg_auto_inc);
+ ksft_test_result_pass("%s", test_msg_auto_inc);
} else {
- ksft_test_result_skip(test_msg_auto_inc);
+ ksft_test_result_skip("%s", test_msg_auto_inc);
}
ret = pthread_mutex_unlock(&global_lock);
@@ -257,17 +225,17 @@ retry_getslots:
futex_hash_slots_set_verify(2);
join_max_threads();
- ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n",
+ ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n",
counter, MAX_THREADS);
counter = 0;
- /* Once the user set something, auto reisze must be disabled */
+ /* Once the user set something, auto resize must be disabled */
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
create_max_threads(thread_lock_fn);
join_max_threads();
ret = futex_hash_slots_get();
- ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
+ ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n",
ret);
futex_hash_slots_set_must_fail(1 << 29);
@@ -280,7 +248,7 @@ retry_getslots:
ret = futex_hash_slots_set(0);
ksft_test_result(ret == 0, "Global hash request\n");
if (ret != 0)
- goto out;
+ return;
futex_hash_slots_set_must_fail(4);
futex_hash_slots_set_must_fail(8);
@@ -289,17 +257,14 @@ retry_getslots:
futex_hash_slots_set_must_fail(6);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
- if (ret != 0) {
+ if (ret != 0)
ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
- return 1;
- }
+
create_max_threads(thread_lock_fn);
join_max_threads();
ret = futex_hash_slots_get();
ksft_test_result(ret == 0, "Continue to use global hash\n");
-
-out:
- ksft_finished();
- return 0;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
index 51485be6eb2f..69e2555b6039 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -7,24 +7,15 @@
#include <pthread.h>
#include <limits.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
volatile futex_t *f1;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -38,67 +29,49 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(requeue_single)
{
- pthread_t waiter[10];
- int res, ret = RET_PASS;
- int c, i;
volatile futex_t _f1 = 0;
volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res;
f1 = &_f1;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test futex_requeue\n",
- basename(argv[0]));
-
/*
* Requeue a waiter from f1 to f2, and wake f2.
*/
if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Requeuing 1 futex from f1 to f2\n");
+ ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
- if (res != 1) {
+ if (res != 1)
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
- }
-
- info("Waking 1 futex at f2\n");
+ ksft_print_dbg_msg("Waking 1 futex at f2\n");
res = futex_wake(&f2, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue simple succeeds\n");
}
+}
+
+TEST(requeue_multiple)
+{
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res, i;
+ f1 = &_f1;
/*
* Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
@@ -106,31 +79,28 @@ int main(int argc, char *argv[])
*/
for (i = 0; i < 10; i++) {
if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
}
usleep(WAKE_WAIT_US);
- info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
if (res != 10) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
}
- info("Waking INT_MAX futexes at f2\n");
+ ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n");
res = futex_wake(&f2, INT_MAX, 0);
if (res != 7) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue many succeeds\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 215c6cb539b4..f299d75848cd 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -26,11 +26,11 @@
#include <stdlib.h>
#include <signal.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi"
#define MAX_WAKE_ITERS 1000
#define THREAD_MAX 10
#define SIGNAL_PERIOD_US 100
@@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
futex_t wake_complete = FUTEX_INITIALIZER;
-/* Test option defaults */
-static long timeout_ns;
-static int broadcast;
-static int owner;
-static int locked;
-
struct thread_arg {
long id;
struct timespec *timeout;
@@ -56,18 +50,73 @@ struct thread_arg {
};
#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
-void usage(char *prog)
+FIXTURE(args)
{
- printf("Usage: %s\n", prog);
- printf(" -b Broadcast wakeup (all waiters)\n");
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -l Lock the pi futex across requeue\n");
- printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
- printf(" -t N Timeout in nanoseconds (default: 0)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
+};
+
+FIXTURE_SETUP(args)
+{
+};
+
+FIXTURE_TEARDOWN(args)
+{
+};
+
+FIXTURE_VARIANT(args)
+{
+ long timeout_ns;
+ bool broadcast;
+ bool owner;
+ bool locked;
+};
+
+/*
+ * For a given timeout value, this macro creates a test input with all the
+ * possible combinations of valid arguments
+ */
+#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout) \
+{ \
+ .timeout_ns = timeout, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .owner = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .owner = true, \
+}; \
+
+FIXTURE_VARIANT_ADD_TIMEOUT(0);
+FIXTURE_VARIANT_ADD_TIMEOUT(5000);
+FIXTURE_VARIANT_ADD_TIMEOUT(500000);
+FIXTURE_VARIANT_ADD_TIMEOUT(2000000000);
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
@@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
return -1;
}
ret = pthread_attr_setschedpolicy(&attr, policy);
if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
return -1;
}
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
if (ret) {
- error("pthread_attr_setschedparam\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
return -1;
}
ret = pthread_create(pth, &attr, func, arg);
if (ret) {
- error("pthread_create\n", ret);
+ ksft_exit_fail_msg("pthread_create\n");
return -1;
}
return 0;
@@ -112,7 +161,7 @@ void *waiterfn(void *arg)
struct thread_arg *args = (struct thread_arg *)arg;
futex_t old_val;
- info("Waiter %ld: running\n", args->id);
+ ksft_print_dbg_msg("Waiter %ld: running\n", args->id);
/* Each thread sleeps for a different amount of time
* This is to avoid races, because we don't lock the
* external mutex here */
@@ -120,26 +169,25 @@ void *waiterfn(void *arg)
old_val = f1;
atomic_inc(&waiters_blocked);
- info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
&f1, f1, &f2);
args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
FUTEX_PRIVATE_FLAG);
- info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret,
args->ret < 0 ? strerror(errno) : "");
atomic_inc(&waiters_woken);
if (args->ret < 0) {
if (args->timeout && errno == ETIMEDOUT)
args->ret = 0;
else {
- args->ret = RET_ERROR;
- error("futex_wait_requeue_pi\n", errno);
+ ksft_exit_fail_msg("futex_wait_requeue_pi\n");
}
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret);
pthread_exit((void *)&args->ret);
}
@@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
- info("Waker: Calling broadcast\n");
+ ksft_print_dbg_msg("Waker: Calling broadcast\n");
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
continue_requeue:
@@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg)
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
FUTEX_PRIVATE_FLAG);
if (args->ret < 0) {
- args->ret = RET_ERROR;
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
} else if (++i < MAX_WAKE_ITERS) {
task_count += args->ret;
if (task_count < THREAD_MAX - waiters_woken.val)
goto continue_requeue;
} else {
- error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
- 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
- args->ret = RET_ERROR;
+ ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ MAX_WAKE_ITERS, task_count, THREAD_MAX);
}
futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
@@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg)
if (args->ret > 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
pthread_exit((void *)&args->ret);
}
@@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
- info("task_count: %d, waiters_woken: %d\n",
+ ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n",
task_count, waiters_woken.val);
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
- info("Waker: Calling signal\n");
+ ksft_print_dbg_msg("Waker: Calling signal\n");
/* cond_signal */
old_val = f1;
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
@@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg)
FUTEX_PRIVATE_FLAG);
if (args->ret < 0)
args->ret = -errno;
- info("futex: %x\n", f2);
+ ksft_print_dbg_msg("futex: %x\n", f2);
if (args->lock) {
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
}
- info("futex: %x\n", f2);
- if (args->ret < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- args->ret = RET_ERROR;
- break;
- }
+ ksft_print_dbg_msg("futex: %x\n", f2);
+ if (args->ret < 0)
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
task_count += args->ret;
usleep(SIGNAL_PERIOD_US);
i++;
/* we have to loop at least THREAD_MAX times */
if (i > MAX_WAKE_ITERS + THREAD_MAX) {
- error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
- 0, MAX_WAKE_ITERS + THREAD_MAX);
- args->ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ MAX_WAKE_ITERS + THREAD_MAX);
}
}
@@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg)
if (args->ret >= 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
- info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val);
pthread_exit((void *)&args->ret);
}
@@ -269,35 +310,40 @@ void *third_party_blocker(void *arg)
ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
out:
- if (args->ret || ret2) {
- error("third_party_blocker() futex error", 0);
- args->ret = RET_ERROR;
- }
+ if (args->ret || ret2)
+ ksft_exit_fail_msg("third_party_blocker() futex error");
pthread_exit((void *)&args->ret);
}
-int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+TEST_F(args, futex_requeue_pi)
{
- void *(*wakerfn)(void *) = signal_wakerfn;
struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
pthread_t waiter[THREAD_MAX], waker, blocker;
- struct timespec ts, *tsp = NULL;
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ bool third_party_owner = variant->owner;
+ long timeout_ns = variant->timeout_ns;
+ bool broadcast = variant->broadcast;
struct thread_arg args[THREAD_MAX];
- int *waiter_ret;
- int i, ret = RET_PASS;
+ struct timespec ts, *tsp = NULL;
+ bool lock = variant->locked;
+ int *waiter_ret, i, ret = 0;
+
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, lock, third_party_owner, timeout_ns);
if (timeout_ns) {
time_t secs;
- info("timeout_ns = %ld\n", timeout_ns);
+ ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns);
ret = clock_gettime(CLOCK_MONOTONIC, &ts);
secs = (ts.tv_nsec + timeout_ns) / 1000000000;
ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
ts.tv_sec += secs;
- info("ts.tv_sec = %ld\n", ts.tv_sec);
- info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec);
+ ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec);
tsp = &ts;
}
@@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
if (third_party_owner) {
if (create_rt_thread(&blocker, third_party_blocker,
(void *)&blocker_arg, SCHED_FIFO, 1)) {
- error("Creating third party blocker thread failed\n",
- errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating third party blocker thread failed\n");
}
}
@@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
for (i = 0; i < THREAD_MAX; i++) {
args[i].id = i;
args[i].timeout = tsp;
- info("Starting thread %d\n", i);
+ ksft_print_dbg_msg("Starting thread %d\n", i);
if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
SCHED_FIFO, 1)) {
- error("Creating waiting thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waiting thread failed\n");
}
}
waker_arg.lock = lock;
if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
SCHED_FIFO, 1)) {
- error("Creating waker thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waker thread failed\n");
}
/* Wait for threads to finish */
@@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
pthread_join(blocker, NULL);
pthread_join(waker, NULL);
-out:
if (!ret) {
if (*waiter_ret)
ret = *waiter_ret;
@@ -355,66 +393,8 @@ out:
ret = blocker_arg.ret;
}
- return ret;
+ if (ret)
+ ksft_test_result_fail("fail");
}
-int main(int argc, char *argv[])
-{
- char *test_name;
- int c, ret;
-
- while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
- switch (c) {
- case 'b':
- broadcast = 1;
- break;
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'l':
- locked = 1;
- break;
- case 'o':
- owner = 1;
- locked = 0;
- break;
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
- ksft_print_msg(
- "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
- broadcast, locked, owner, timeout_ns);
-
- ret = asprintf(&test_name,
- "%s broadcast=%d locked=%d owner=%d timeout=%ldns",
- TEST_NAME, broadcast, locked, owner, timeout_ns);
- if (ret < 0) {
- ksft_print_msg("Failed to generate test name\n");
- test_name = TEST_NAME;
- }
-
- /*
- * FIXME: unit_test is obsolete now that we parse options and the
- * various style of runs are done by run.sh - simplify the code and move
- * unit_test into main()
- */
- ret = unit_test(broadcast, locked, owner, timeout_ns);
-
- print_result(test_name, ret);
- return ret;
-}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index d0a4d332ea44..77135a22a583 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -23,67 +23,32 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include "futextest.h"
-#include "logging.h"
-#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+#include "futextest.h"
+#include "../../kselftest_harness.h"
futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
int child_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *blocking_child(void *arg)
{
child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
if (child_ret < 0) {
child_ret = -errno;
- error("futex_wait\n", errno);
+ ksft_exit_fail_msg("futex_wait\n");
}
return (void *)&child_ret;
}
-int main(int argc, char *argv[])
+TEST(requeue_pi_mismatched_ops)
{
- int ret = RET_PASS;
pthread_t child;
- int c;
+ int ret;
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
- basename(argv[0]));
+ if (pthread_create(&child, NULL, blocking_child, NULL))
+ ksft_exit_fail_msg("pthread_create\n");
- if (pthread_create(&child, NULL, blocking_child, NULL)) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
/* Allow the child to block in the kernel. */
sleep(1);
@@ -102,34 +67,27 @@ int main(int argc, char *argv[])
* FUTEX_WAKE.
*/
ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
- if (ret == 1) {
- ret = RET_PASS;
- } else if (ret < 0) {
- error("futex_wake\n", errno);
- ret = RET_ERROR;
- } else {
- error("futex_wake did not wake the child\n", 0);
- ret = RET_ERROR;
- }
+ if (ret == 1)
+ ret = 0;
+ else if (ret < 0)
+ ksft_exit_fail_msg("futex_wake\n");
+ else
+ ksft_exit_fail_msg("futex_wake did not wake the child\n");
} else {
- error("futex_cmp_requeue_pi\n", errno);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi\n");
}
} else if (ret > 0) {
- fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
- ret = RET_FAIL;
+ ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
} else {
- error("futex_cmp_requeue_pi found no waiters\n", 0);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n");
}
pthread_join(child, NULL);
- if (!ret)
- ret = child_ret;
-
- out:
- /* If the kernel crashes, we shouldn't return at all. */
- print_result(TEST_NAME, ret);
- return ret;
+ if (!ret && !child_ret)
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n");
+ else
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index c6b8f32990c8..e34ee0f9ebcc 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -24,11 +24,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi-signal-restart"
#define DELAY_US 100
futex_t f1 = FUTEX_INITIALIZER;
@@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER;
int waiter_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
{
@@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
memset(&schedp, 0, sizeof(schedp));
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
- if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
ret = pthread_attr_setschedpolicy(&attr, policy);
- if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
- if (ret) {
- error("pthread_attr_setschedparam\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
ret = pthread_create(pth, &attr, func, arg);
- if (ret) {
- error("pthread_create\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
+
return 0;
}
void handle_signal(int signo)
{
- info("signal received %s requeue\n",
+ ksft_print_dbg_msg("signal received %s requeue\n",
requeued.val ? "after" : "prior to");
}
@@ -94,78 +78,46 @@ void *waiterfn(void *arg)
unsigned int old_val;
int res;
- waiter_ret = RET_PASS;
-
- info("Waiter running\n");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Waiter running\n");
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
old_val = f1;
res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
FUTEX_PRIVATE_FLAG);
if (!requeued.val || errno != EWOULDBLOCK) {
- fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
res, strerror(errno));
- info("w2:futex: %x\n", f2);
+ ksft_print_dbg_msg("w2:futex: %x\n", f2);
if (!res)
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- waiter_ret = RET_FAIL;
}
- info("Waiter exiting with %d\n", waiter_ret);
pthread_exit(NULL);
}
-int main(int argc, char *argv[])
+TEST(futex_requeue_pi_signal_restart)
{
unsigned int old_val;
struct sigaction sa;
pthread_t waiter;
- int c, res, ret = RET_PASS;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test signal handling during requeue_pi\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: <none>\n");
+ int res;
sa.sa_handler = handle_signal;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
- if (sigaction(SIGUSR1, &sa, NULL)) {
- error("sigaction\n", errno);
- exit(1);
- }
+ if (sigaction(SIGUSR1, &sa, NULL))
+ ksft_exit_fail_msg("sigaction\n");
- info("m1:f2: %x\n", f2);
- info("Creating waiter\n");
+ ksft_print_dbg_msg("m1:f2: %x\n", f2);
+ ksft_print_dbg_msg("Creating waiter\n");
res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
- if (res) {
- error("Creating waiting thread failed", res);
- ret = RET_ERROR;
- goto out;
- }
+ if (res)
+ ksft_exit_fail_msg("Creating waiting thread failed");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
- info("m2:f2: %x\n", f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("m2:f2: %x\n", f2);
futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
- info("m3:f2: %x\n", f2);
+ ksft_print_dbg_msg("m3:f2: %x\n", f2);
while (1) {
/*
@@ -173,11 +125,11 @@ int main(int argc, char *argv[])
* restart futex_wait_requeue_pi() in the kernel. Wait for the
* waiter to block on f1 again.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
usleep(DELAY_US);
- info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
old_val = f1;
res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
FUTEX_PRIVATE_FLAG);
@@ -191,12 +143,10 @@ int main(int argc, char *argv[])
atomic_set(&requeued, 1);
break;
} else if (res < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
}
}
- info("m4:f2: %x\n", f2);
+ ksft_print_dbg_msg("m4:f2: %x\n", f2);
/*
* Signal the waiter after requeue, waiter should return from
@@ -204,19 +154,14 @@ int main(int argc, char *argv[])
* futex_unlock_pi() can't happen before the signal wakeup is detected
* in the kernel.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
- info("Waiting for waiter to return\n");
+ ksft_print_dbg_msg("Waiting for waiter to return\n");
pthread_join(waiter, NULL);
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("m5:f2: %x\n", f2);
-
- out:
- if (ret == RET_PASS && waiter_ret)
- ret = waiter_ret;
-
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_print_dbg_msg("m5:f2: %x\n", f2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
index 685140d9b93d..152ca4612886 100644
--- a/tools/testing/selftests/futex/functional/futex_wait.c
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -9,25 +9,16 @@
#include <sys/shm.h>
#include <sys/mman.h>
#include <fcntl.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
#define SHM_PATH "futex_shm_file"
void *futex;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
static void *waiterfn(void *arg)
{
struct timespec to;
@@ -45,53 +36,37 @@ static void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_futex)
{
- int res, ret = RET_PASS, fd, c, shm_id;
- u_int32_t f_private = 0, *shared_data;
unsigned int flags = FUTEX_PRIVATE_FLAG;
+ u_int32_t f_private = 0;
pthread_t waiter;
- void *shm;
+ int res;
futex = &f_private;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(3);
- ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
-
/* Testing a private futex */
- info("Calling private futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling private futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
if (res != 1) {
ksft_test_result_fail("futex_wake private returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake private succeeds\n");
}
+}
+
+TEST(anon_page)
+{
+ u_int32_t *shared_data;
+ pthread_t waiter;
+ int res, shm_id;
/* Testing an anon page shared memory */
shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
@@ -105,67 +80,65 @@ int main(int argc, char *argv[])
*shared_data = 0;
futex = shared_data;
- info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
}
+ shmdt(shared_data);
+}
+
+TEST(file_backed)
+{
+ u_int32_t f_private = 0;
+ pthread_t waiter;
+ int res, fd;
+ void *shm;
/* Testing a file backed shared memory */
fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- perror("open");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_msg("open");
- if (ftruncate(fd, sizeof(f_private))) {
- perror("ftruncate");
- exit(1);
- }
+ if (ftruncate(fd, sizeof(f_private)))
+ ksft_exit_fail_msg("ftruncate");
shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (shm == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (shm == MAP_FAILED)
+ ksft_exit_fail_msg("mmap");
memcpy(shm, &f_private, sizeof(f_private));
futex = shm;
- info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex);
res = futex_wake(shm, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
}
- /* Freeing resources */
- shmdt(shared_data);
munmap(shm, sizeof(f_private));
remove(SHM_PATH);
close(fd);
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index fb4148f23fa3..8952ebda14ab 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -27,10 +27,9 @@
#include <libgen.h>
#include <signal.h>
-#include "logging.h"
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-private-mapped-file"
#define PAGE_SZ 4096
char pad[PAGE_SZ] = {1};
@@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1};
#define WAKE_WAIT_US 3000000
struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *thr_futex_wait(void *arg)
{
int ret;
- info("futex wait\n");
+ ksft_print_dbg_msg("futex wait\n");
ret = futex_wait(&val, 1, &wait_timeout, 0);
- if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
- error("futex error.\n", errno);
- print_result(TEST_NAME, RET_ERROR);
- exit(RET_ERROR);
- }
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT)
+ ksft_exit_fail_msg("futex error.\n");
if (ret && errno == ETIMEDOUT)
- fail("waiter timedout\n");
+ ksft_exit_fail_msg("waiter timedout\n");
- info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+ ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno);
return NULL;
}
-int main(int argc, char **argv)
+TEST(wait_private_mapped_file)
{
pthread_t thr;
- int ret = RET_PASS;
int res;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg(
- "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
- basename(argv[0]));
-
- ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
- if (ret < 0) {
- fprintf(stderr, "pthread_create error\n");
- ret = RET_ERROR;
- goto out;
- }
-
- info("wait a while\n");
+
+ res = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (res < 0)
+ ksft_exit_fail_msg("pthread_create error\n");
+
+ ksft_print_dbg_msg("wait a while\n");
usleep(WAKE_WAIT_US);
val = 2;
res = futex_wake(&val, 1, 0);
- info("futex_wake %d\n", res);
- if (res != 1) {
- fail("FUTEX_WAKE didn't find the waiting thread.\n");
- ret = RET_FAIL;
- }
+ ksft_print_dbg_msg("futex_wake %d\n", res);
+ if (res != 1)
+ ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n");
- info("join\n");
+ ksft_print_dbg_msg("join\n");
pthread_join(thr, NULL);
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_test_result_pass("wait_private_mapped_file");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index d183f878360b..0c8766aced2e 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -16,26 +16,15 @@
*****************************************************************************/
#include <pthread.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
-
-#define TEST_NAME "futex-wait-timeout"
+#include "../../kselftest_harness.h"
static long timeout_ns = 100000; /* 100us default timeout */
static futex_t futex_pi;
static pthread_barrier_t barrier;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
/*
* Get a PI lock and hold it forever, so the main thread lock_pi will block
* and we can test the timeout
@@ -47,13 +36,13 @@ void *get_pi_lock(void *arg)
ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
if (ret != 0)
- error("futex_lock_pi failed\n", ret);
+ ksft_exit_fail_msg("futex_lock_pi failed\n");
pthread_barrier_wait(&barrier);
/* Blocks forever */
ret = futex_wait(&lock, 0, NULL, 0);
- error("futex_wait failed\n", ret);
+ ksft_exit_fail_msg("futex_wait failed\n");
return NULL;
}
@@ -61,12 +50,11 @@ void *get_pi_lock(void *arg)
/*
* Check if the function returned the expected error
*/
-static void test_timeout(int res, int *ret, char *test_name, int err)
+static void test_timeout(int res, char *test_name, int err)
{
if (!res || errno != err) {
ksft_test_result_fail("%s returned %d\n", test_name,
res < 0 ? errno : res);
- *ret = RET_FAIL;
} else {
ksft_test_result_pass("%s succeeds\n", test_name);
}
@@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err)
static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
long timeout_ns)
{
- if (clock_gettime(clockid, to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+ if (clock_gettime(clockid, to))
+ ksft_exit_fail_msg("clock_gettime failed\n");
to->tv_nsec += timeout_ns;
@@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
return 0;
}
-int main(int argc, char *argv[])
+TEST(wait_bitset)
{
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
struct timespec to;
- pthread_t thread;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(9);
- ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-
- pthread_barrier_init(&barrier, NULL, 2);
- pthread_create(&thread, NULL, get_pi_lock, NULL);
+ int res;
/* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
res = futex_wait(&f1, f1, &to, 0);
- test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+ test_timeout(res, "futex_wait relative", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, 0);
- test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT);
+}
+
+TEST(requeue_pi)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res;
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT);
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
- test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+}
+
+TEST(lock_pi)
+{
+ struct timespec to;
+ pthread_t thread;
+ int res;
+
+ /* Create a thread that will lock forever so any waiter will timeout */
+ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
/* Wait until the other thread calls futex_lock_pi() */
pthread_barrier_wait(&barrier);
pthread_barrier_destroy(&barrier);
+
/*
* FUTEX_LOCK_PI with CLOCK_REALTIME
* Due to historical reasons, FUTEX_LOCK_PI supports only realtime
@@ -181,26 +150,38 @@ int main(int argc, char *argv[])
* smaller than realtime and the syscall will timeout immediately.
*/
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_lock_pi(&futex_pi, &to, 0, 0);
- test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT);
/* Test operations that don't support FUTEX_CLOCK_REALTIME */
res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
+ test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS);
+}
+
+TEST(waitv)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ struct timespec to;
+ int res;
/* futex_waitv with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
- test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_waitv monotonic", ETIMEDOUT);
/* futex_waitv with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
-
- ksft_print_cnts();
- return ret;
+ test_timeout(res, "futex_waitv realtime", ETIMEDOUT);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index ed9cd07e31c1..ce2301500d83 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -29,95 +29,55 @@
#include <linux/futex.h>
#include <libgen.h>
-#include "logging.h"
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-uninitialized-heap"
#define WAIT_US 5000000
static int child_blocked = 1;
-static int child_ret;
+static bool child_ret;
void *buf;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *wait_thread(void *arg)
{
int res;
- child_ret = RET_PASS;
+ child_ret = true;
res = futex_wait(buf, 1, NULL, 0);
child_blocked = 0;
if (res != 0 && errno != EWOULDBLOCK) {
- error("futex failure\n", errno);
- child_ret = RET_ERROR;
+ ksft_exit_fail_msg("futex failure\n");
+ child_ret = false;
}
pthread_exit(NULL);
}
-int main(int argc, char **argv)
+TEST(futex_wait_uninitialized_heap)
{
- int c, ret = RET_PASS;
long page_size;
pthread_t thr;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
+ int ret;
page_size = sysconf(_SC_PAGESIZE);
buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
- if (buf == (void *)-1) {
- error("mmap\n", errno);
- exit(1);
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
+ if (buf == (void *)-1)
+ ksft_exit_fail_msg("mmap\n");
ret = pthread_create(&thr, NULL, wait_thread, NULL);
- if (ret) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
- info("waiting %dus for child to return\n", WAIT_US);
+ ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US);
usleep(WAIT_US);
- ret = child_ret;
- if (child_blocked) {
- fail("child blocked in kernel\n");
- ret = RET_FAIL;
- }
+ if (child_blocked)
+ ksft_test_result_fail("child blocked in kernel\n");
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ if (!child_ret)
+ ksft_test_result_fail("child error\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 2d8230da9064..36b7a54a4085 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -21,72 +21,44 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-wouldblock"
#define timeout_ns 100000
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
-int main(int argc, char *argv[])
+TEST(futex_wait_wouldblock)
{
struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1+1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
+ int res;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
- info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
if (!res || errno != EWOULDBLOCK) {
ksft_test_result_fail("futex_wait returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wait\n");
}
+}
- if (clock_gettime(CLOCK_MONOTONIC, &to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+TEST(futex_waitv_wouldblock)
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1 + 1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ int res;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("clock_gettime failed %d\n", errno);
to.tv_nsec += timeout_ns;
@@ -95,17 +67,15 @@ int main(int argc, char *argv[])
to.tv_nsec -= 1000000000;
}
- info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
if (!res || errno != EWOULDBLOCK) {
ksft_test_result_fail("futex_waitv returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
index a94337f677e1..c684b10eb76e 100644
--- a/tools/testing/selftests/futex/functional/futex_waitv.c
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -15,25 +15,16 @@
#include <pthread.h>
#include <stdint.h>
#include <sys/shm.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define WAKE_WAIT_US 10000
#define NR_FUTEXES 30
static struct futex_waitv waitv[NR_FUTEXES];
u_int32_t futexes[NR_FUTEXES] = {0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -41,7 +32,7 @@ void *waiterfn(void *arg)
/* setting absolute timeout for futex2 */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -57,34 +48,10 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_waitv)
{
pthread_t waiter;
- int res, ret = RET_PASS;
- struct timespec to;
- int c, i;
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(7);
- ksft_print_msg("%s: Test FUTEX_WAITV\n",
- basename(argv[0]));
+ int res, i;
for (i = 0; i < NR_FUTEXES; i++) {
waitv[i].uaddr = (uintptr_t)&futexes[i];
@@ -95,7 +62,7 @@ int main(int argc, char *argv[])
/* Private waitv */
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -104,10 +71,15 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv private\n");
}
+}
+
+TEST(shared_waitv)
+{
+ pthread_t waiter;
+ int res, i;
/* Shared waitv */
for (i = 0; i < NR_FUTEXES; i++) {
@@ -128,7 +100,7 @@ int main(int argc, char *argv[])
}
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -137,19 +109,24 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake shared returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv shared\n");
}
for (i = 0; i < NR_FUTEXES; i++)
shmdt(u64_to_ptr(waitv[i].uaddr));
+}
+
+TEST(invalid_flag)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter without FUTEX_32 flag */
waitv[0].flags = FUTEX_PRIVATE_FLAG;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -158,17 +135,22 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv without FUTEX_32\n");
}
+}
+
+TEST(unaligned_address)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter with an unaligned address */
waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
waitv[0].uaddr = 1;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -177,16 +159,21 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv with an unaligned address\n");
}
+}
+
+TEST(null_address)
+{
+ struct timespec to;
+ int res;
/* Testing a NULL address for waiters.uaddr */
waitv[0].uaddr = 0x00000000;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -195,14 +182,13 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
}
/* Testing a NULL address for *waiters */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -211,14 +197,19 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
}
+}
+
+TEST(invalid_clockid)
+{
+ struct timespec to;
+ int res;
/* Testing an invalid clockid */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -227,11 +218,9 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv invalid clockid\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 81739849f299..e88545c06d57 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -18,74 +18,36 @@
#
###############################################################################
-# Test for a color capable console
-if [ -z "$USE_COLOR" ]; then
- tput setf 7 || tput setaf 7
- if [ $? -eq 0 ]; then
- USE_COLOR=1
- tput sgr0
- fi
-fi
-if [ "$USE_COLOR" -eq 1 ]; then
- COLOR="-c"
-fi
-
-
echo
-# requeue pi testing
-# without timeouts
-./futex_requeue_pi $COLOR
-./futex_requeue_pi $COLOR -b
-./futex_requeue_pi $COLOR -b -l
-./futex_requeue_pi $COLOR -b -o
-./futex_requeue_pi $COLOR -l
-./futex_requeue_pi $COLOR -o
-# with timeouts
-./futex_requeue_pi $COLOR -b -l -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -l -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-./futex_requeue_pi $COLOR -b -t 5000
-./futex_requeue_pi $COLOR -t 5000
-./futex_requeue_pi $COLOR -b -t 500000
-./futex_requeue_pi $COLOR -t 500000
-./futex_requeue_pi $COLOR -b -o -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -o -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-# with long timeout
-./futex_requeue_pi $COLOR -b -l -t 2000000000
-./futex_requeue_pi $COLOR -l -t 2000000000
-
+./futex_requeue_pi
echo
-./futex_requeue_pi_mismatched_ops $COLOR
+./futex_requeue_pi_mismatched_ops
echo
-./futex_requeue_pi_signal_restart $COLOR
+./futex_requeue_pi_signal_restart
echo
-./futex_wait_timeout $COLOR
+./futex_wait_timeout
echo
-./futex_wait_wouldblock $COLOR
+./futex_wait_wouldblock
echo
-./futex_wait_uninitialized_heap $COLOR
-./futex_wait_private_mapped_file $COLOR
+./futex_wait_uninitialized_heap
+./futex_wait_private_mapped_file
echo
-./futex_wait $COLOR
+./futex_wait
echo
-./futex_requeue $COLOR
+./futex_requeue
echo
-./futex_waitv $COLOR
+./futex_waitv
echo
-./futex_priv_hash $COLOR
-./futex_priv_hash -g $COLOR
+./futex_priv_hash
echo
-./futex_numa_mpol $COLOR
+./futex_numa_mpol
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index 7a5fd1d5355e..3d48e9789d9f 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -58,6 +58,17 @@ typedef volatile u_int32_t futex_t;
#define SYS_futex SYS_futex_time64
#endif
+/*
+ * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if
+ * we are using a newer compiler then the size of the timestamps will be 64bit,
+ * however, the SYS_futex will still point to the 32bit futex system call.
+ */
+#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \
+ defined(_TIME_BITS) && _TIME_BITS == 64
+# undef SYS_futex
+# define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
deleted file mode 100644
index 874c69ce5cce..000000000000
--- a/tools/testing/selftests/futex/include/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/******************************************************************************
- *
- * Copyright © International Business Machines Corp., 2009
- *
- * DESCRIPTION
- * Glibc independent futex library for testing kernel functionality.
- *
- * AUTHOR
- * Darren Hart <dvhart@linux.intel.com>
- *
- * HISTORY
- * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
- *
- *****************************************************************************/
-
-#ifndef _LOGGING_H
-#define _LOGGING_H
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/futex.h>
-#include "kselftest.h"
-
-/*
- * Define PASS, ERROR, and FAIL strings with and without color escape
- * sequences, default to no color.
- */
-#define ESC 0x1B, '['
-#define BRIGHT '1'
-#define GREEN '3', '2'
-#define YELLOW '3', '3'
-#define RED '3', '1'
-#define ESCEND 'm'
-#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
-#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
-#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
-#define RESET_COLOR ESC, '0', 'm'
-static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
- RESET_COLOR, 0};
-static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
- RESET_COLOR, 0};
-static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
- RESET_COLOR, 0};
-static const char INFO_NORMAL[] = " INFO";
-static const char PASS_NORMAL[] = " PASS";
-static const char ERROR_NORMAL[] = "ERROR";
-static const char FAIL_NORMAL[] = " FAIL";
-const char *INFO = INFO_NORMAL;
-const char *PASS = PASS_NORMAL;
-const char *ERROR = ERROR_NORMAL;
-const char *FAIL = FAIL_NORMAL;
-
-/* Verbosity setting for INFO messages */
-#define VQUIET 0
-#define VCRITICAL 1
-#define VINFO 2
-#define VMAX VINFO
-int _verbose = VCRITICAL;
-
-/* Functional test return codes */
-#define RET_PASS 0
-#define RET_ERROR -1
-#define RET_FAIL -2
-
-/**
- * log_color() - Use colored output for PASS, ERROR, and FAIL strings
- * @use_color: use color (1) or not (0)
- */
-void log_color(int use_color)
-{
- if (use_color) {
- PASS = PASS_COLOR;
- ERROR = ERROR_COLOR;
- FAIL = FAIL_COLOR;
- } else {
- PASS = PASS_NORMAL;
- ERROR = ERROR_NORMAL;
- FAIL = FAIL_NORMAL;
- }
-}
-
-/**
- * log_verbosity() - Set verbosity of test output
- * @verbose: Enable (1) verbose output or not (0)
- *
- * Currently setting verbose=1 will enable INFO messages and 0 will disable
- * them. FAIL and ERROR messages are always displayed.
- */
-void log_verbosity(int level)
-{
- if (level > VMAX)
- level = VMAX;
- else if (level < 0)
- level = 0;
- _verbose = level;
-}
-
-/**
- * print_result() - Print standard PASS | ERROR | FAIL results
- * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
- *
- * print_result() is primarily intended for functional tests.
- */
-void print_result(const char *test_name, int ret)
-{
- switch (ret) {
- case RET_PASS:
- ksft_test_result_pass("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_ERROR:
- ksft_test_result_error("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_FAIL:
- ksft_test_result_fail("%s\n", test_name);
- ksft_print_cnts();
- return;
- }
-}
-
-/* log level macros */
-#define info(message, vargs...) \
-do { \
- if (_verbose >= VINFO) \
- fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
-} while (0)
-
-#define error(message, err, args...) \
-do { \
- if (_verbose >= VCRITICAL) {\
- if (err) \
- fprintf(stderr, "\t%s: %s: "message, \
- ERROR, strerror(err), ##args); \
- else \
- fprintf(stderr, "\t%s: "message, ERROR, ##args); \
- } \
-} while (0)
-
-#define fail(message, args...) \
-do { \
- if (_verbose >= VCRITICAL) \
- fprintf(stderr, "\t%s: "message, FAIL, ##args); \
-} while (0)
-
-#endif
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index c3b6d2604b1e..8deeb4b72e73 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -54,6 +54,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <sys/utsname.h>
@@ -104,6 +105,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
+static bool ksft_debug_enabled;
static inline unsigned int ksft_test_num(void)
{
@@ -175,6 +177,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
va_end(args);
}
+static inline void ksft_print_dbg_msg(const char *msg, ...)
+{
+ va_list args;
+
+ if (!ksft_debug_enabled)
+ return;
+
+ va_start(args, msg);
+ ksft_print_msg(msg, args);
+ va_end(args);
+}
+
static inline void ksft_perror(const char *msg)
{
ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 8516e8434bc4..3f66e862e83e 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -1091,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv)
{
int opt;
- while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) {
switch (opt) {
case 'f':
case 'F':
@@ -1104,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv)
case 'l':
test_harness_list_tests();
return KSFT_SKIP;
+ case 'd':
+ ksft_debug_enabled = true;
+ break;
case 'h':
default:
fprintf(stderr,
- "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n"
+ "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n"
"\t-h print help\n"
"\t-l list all tests\n"
+ "\t-d enable debug prints\n"
"\n"
"\t-t name include test\n"
"\t-T name exclude test\n"
@@ -1142,8 +1146,9 @@ static bool test_enabled(int argc, char **argv,
int opt;
optind = 1;
- while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) {
- has_positive |= islower(opt);
+ while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) {
+ if (opt != 'd')
+ has_positive |= islower(opt);
switch (tolower(opt)) {
case 't':
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index 6133524710f7..cf7cc0ce0248 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,6 +4,5 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_TIME_NS=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_CGROUPS=y
CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 663a9cef1952..dcac5cbe7933 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -40,9 +40,9 @@
* Define weak versions to play nice with binaries that are statically linked
* against a libc that doesn't support registering its own rseq.
*/
-__weak ptrdiff_t __rseq_offset;
-__weak unsigned int __rseq_size;
-__weak unsigned int __rseq_flags;
+extern __weak ptrdiff_t __rseq_offset;
+extern __weak unsigned int __rseq_size;
+extern __weak unsigned int __rseq_flags;
static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
static const unsigned int *libc_rseq_size_p = &__rseq_size;
@@ -209,7 +209,7 @@ void rseq_init(void)
* libc not having registered a restartable sequence. Try to find the
* symbols if that's the case.
*/
- if (!*libc_rseq_size_p) {
+ if (!libc_rseq_size_p || !*libc_rseq_size_p) {
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 30d5c8f0e5c7..ba322a353aff 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso_test
vdso_test_abi
-vdso_test_clock_getres
vdso_test_correctness
vdso_test_gettimeofday
vdso_test_getcpu
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 918a2caa070e..e361aca22a74 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -4,7 +4,6 @@ include ../../../scripts/Makefile.arch
TEST_GEN_PROGS := vdso_test_gettimeofday
TEST_GEN_PROGS += vdso_test_getcpu
TEST_GEN_PROGS += vdso_test_abi
-TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
TEST_GEN_PROGS += vdso_standalone_test_x86
endif
@@ -29,7 +28,6 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
-$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h
index bb237d771051..e7205584cbdc 100644
--- a/tools/testing/selftests/vDSO/vdso_call.h
+++ b/tools/testing/selftests/vDSO/vdso_call.h
@@ -44,7 +44,6 @@
register long _r6 asm ("r6"); \
register long _r7 asm ("r7"); \
register long _r8 asm ("r8"); \
- register long _rval asm ("r3"); \
\
LOADARGS_##nr(fn, args); \
\
@@ -54,13 +53,13 @@
" bns+ 1f\n" \
" neg 3, 3\n" \
"1:" \
- : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \
+ : "+r" (_r0), "+r" (_r3), "+r" (_r4), "+r" (_r5), \
"+r" (_r6), "+r" (_r7), "+r" (_r8) \
- : "r" (_rval) \
+ : \
: "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \
"cr6", "cr7", "xer", "lr", "ctr", "memory" \
); \
- _rval; \
+ _r3; \
})
#else
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index a54424e2336f..238d609a457a 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -26,24 +26,31 @@
static const char *version;
static const char **name;
+/* The same as struct __kernel_timespec */
+struct vdso_timespec64 {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+};
+
typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
typedef time_t (*vdso_time_t)(time_t *t);
-const char *vdso_clock_name[12] = {
- "CLOCK_REALTIME",
- "CLOCK_MONOTONIC",
- "CLOCK_PROCESS_CPUTIME_ID",
- "CLOCK_THREAD_CPUTIME_ID",
- "CLOCK_MONOTONIC_RAW",
- "CLOCK_REALTIME_COARSE",
- "CLOCK_MONOTONIC_COARSE",
- "CLOCK_BOOTTIME",
- "CLOCK_REALTIME_ALARM",
- "CLOCK_BOOTTIME_ALARM",
- "CLOCK_SGI_CYCLE",
- "CLOCK_TAI",
+static const char * const vdso_clock_name[] = {
+ [CLOCK_REALTIME] = "CLOCK_REALTIME",
+ [CLOCK_MONOTONIC] = "CLOCK_MONOTONIC",
+ [CLOCK_PROCESS_CPUTIME_ID] = "CLOCK_PROCESS_CPUTIME_ID",
+ [CLOCK_THREAD_CPUTIME_ID] = "CLOCK_THREAD_CPUTIME_ID",
+ [CLOCK_MONOTONIC_RAW] = "CLOCK_MONOTONIC_RAW",
+ [CLOCK_REALTIME_COARSE] = "CLOCK_REALTIME_COARSE",
+ [CLOCK_MONOTONIC_COARSE] = "CLOCK_MONOTONIC_COARSE",
+ [CLOCK_BOOTTIME] = "CLOCK_BOOTTIME",
+ [CLOCK_REALTIME_ALARM] = "CLOCK_REALTIME_ALARM",
+ [CLOCK_BOOTTIME_ALARM] = "CLOCK_BOOTTIME_ALARM",
+ [10 /* CLOCK_SGI_CYCLE */] = "CLOCK_SGI_CYCLE",
+ [CLOCK_TAI] = "CLOCK_TAI",
};
static void vdso_test_gettimeofday(void)
@@ -70,6 +77,33 @@ static void vdso_test_gettimeofday(void)
}
}
+static void vdso_test_clock_gettime64(clockid_t clk_id)
+{
+ /* Find clock_gettime64. */
+ vdso_clock_gettime64_t vdso_clock_gettime64 =
+ (vdso_clock_gettime64_t)vdso_sym(version, name[5]);
+
+ if (!vdso_clock_gettime64) {
+ ksft_print_msg("Couldn't find %s\n", name[5]);
+ ksft_test_result_skip("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct vdso_timespec64 ts;
+ long ret = VDSO_CALL(vdso_clock_gettime64, 2, clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_test_result_pass("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_fail("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ }
+}
+
static void vdso_test_clock_gettime(clockid_t clk_id)
{
/* Find clock_gettime. */
@@ -171,23 +205,23 @@ static inline void vdso_test_clock(clockid_t clock_id)
ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]);
vdso_test_clock_gettime(clock_id);
+ vdso_test_clock_gettime64(clock_id);
vdso_test_clock_getres(clock_id);
}
-#define VDSO_TEST_PLAN 16
+#define VDSO_TEST_PLAN 29
int main(int argc, char **argv)
{
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
ksft_print_header();
- ksft_set_plan(VDSO_TEST_PLAN);
- if (!sysinfo_ehdr) {
- ksft_print_msg("AT_SYSINFO_EHDR is not present!\n");
- return KSFT_SKIP;
- }
+ if (!sysinfo_ehdr)
+ ksft_exit_skip("AT_SYSINFO_EHDR is not present!\n");
+
+ ksft_set_plan(VDSO_TEST_PLAN);
version = versions[VDSO_VERSION];
name = (const char **)&names[VDSO_NAMES];
@@ -198,40 +232,17 @@ int main(int argc, char **argv)
vdso_test_gettimeofday();
-#if _POSIX_TIMERS > 0
-
-#ifdef CLOCK_REALTIME
vdso_test_clock(CLOCK_REALTIME);
-#endif
-
-#ifdef CLOCK_BOOTTIME
vdso_test_clock(CLOCK_BOOTTIME);
-#endif
-
-#ifdef CLOCK_TAI
vdso_test_clock(CLOCK_TAI);
-#endif
-
-#ifdef CLOCK_REALTIME_COARSE
vdso_test_clock(CLOCK_REALTIME_COARSE);
-#endif
-
-#ifdef CLOCK_MONOTONIC
vdso_test_clock(CLOCK_MONOTONIC);
-#endif
-
-#ifdef CLOCK_MONOTONIC_RAW
vdso_test_clock(CLOCK_MONOTONIC_RAW);
-#endif
-
-#ifdef CLOCK_MONOTONIC_COARSE
vdso_test_clock(CLOCK_MONOTONIC_COARSE);
-#endif
-
-#endif
+ vdso_test_clock(CLOCK_PROCESS_CPUTIME_ID);
+ vdso_test_clock(CLOCK_THREAD_CPUTIME_ID);
vdso_test_time();
- ksft_print_cnts();
- return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
deleted file mode 100644
index b5d5f59f725a..000000000000
--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- * vdso_clock_getres.c: Sample code to test clock_getres.
- * Copyright (c) 2019 Arm Ltd.
- *
- * Compile with:
- * gcc -std=gnu99 vdso_clock_getres.c
- *
- * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
- * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
- * Might work on other architectures.
- */
-
-#define _GNU_SOURCE
-#include <elf.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <sys/auxv.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-
-#include "../kselftest.h"
-
-static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
-{
- long ret;
-
- ret = syscall(SYS_clock_getres, _clkid, _ts);
-
- return ret;
-}
-
-const char *vdso_clock_name[12] = {
- "CLOCK_REALTIME",
- "CLOCK_MONOTONIC",
- "CLOCK_PROCESS_CPUTIME_ID",
- "CLOCK_THREAD_CPUTIME_ID",
- "CLOCK_MONOTONIC_RAW",
- "CLOCK_REALTIME_COARSE",
- "CLOCK_MONOTONIC_COARSE",
- "CLOCK_BOOTTIME",
- "CLOCK_REALTIME_ALARM",
- "CLOCK_BOOTTIME_ALARM",
- "CLOCK_SGI_CYCLE",
- "CLOCK_TAI",
-};
-
-/*
- * This function calls clock_getres in vdso and by system call
- * with different values for clock_id.
- *
- * Example of output:
- *
- * clock_id: CLOCK_REALTIME [PASS]
- * clock_id: CLOCK_BOOTTIME [PASS]
- * clock_id: CLOCK_TAI [PASS]
- * clock_id: CLOCK_REALTIME_COARSE [PASS]
- * clock_id: CLOCK_MONOTONIC [PASS]
- * clock_id: CLOCK_MONOTONIC_RAW [PASS]
- * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
- */
-static inline int vdso_test_clock(unsigned int clock_id)
-{
- struct timespec x, y;
-
- printf("clock_id: %s", vdso_clock_name[clock_id]);
- clock_getres(clock_id, &x);
- syscall_clock_getres(clock_id, &y);
-
- if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
- printf(" [FAIL]\n");
- return KSFT_FAIL;
- }
-
- printf(" [PASS]\n");
- return KSFT_PASS;
-}
-
-int main(int argc, char **argv)
-{
- int ret = 0;
-
-#if _POSIX_TIMERS > 0
-
-#ifdef CLOCK_REALTIME
- ret += vdso_test_clock(CLOCK_REALTIME);
-#endif
-
-#ifdef CLOCK_BOOTTIME
- ret += vdso_test_clock(CLOCK_BOOTTIME);
-#endif
-
-#ifdef CLOCK_TAI
- ret += vdso_test_clock(CLOCK_TAI);
-#endif
-
-#ifdef CLOCK_REALTIME_COARSE
- ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
-#endif
-
-#ifdef CLOCK_MONOTONIC
- ret += vdso_test_clock(CLOCK_MONOTONIC);
-#endif
-
-#ifdef CLOCK_MONOTONIC_RAW
- ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
-#endif
-
-#ifdef CLOCK_MONOTONIC_COARSE
- ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
-#endif
-
-#endif
- if (ret > 0)
- return KSFT_FAIL;
-
- return KSFT_PASS;
-}