diff options
200 files changed, 5063 insertions, 3521 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bf1e6ca6aeb8..e019db1633fd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2606,6 +2606,11 @@ for it. Intended to get systems with badly broken firmware running. + irqhandler.duration_warn_us= [KNL] + Warn if an IRQ handler exceeds the specified duration + threshold in microseconds. Useful for identifying + long-running IRQs in the system. + irqpoll [HW] When an interrupt is not handled search all handlers for it. Also check all handlers each timer diff --git a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml index d5287a2bf866..d998a9d69b91 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml @@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/interrupt-controller/aspeed,ast2500-scu-ic.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Aspeed AST25XX and AST26XX SCU Interrupt Controller +title: Aspeed AST25XX, AST26XX, AST27XX SCU Interrupt Controller maintainers: - Eddie James <eajames@linux.ibm.com> @@ -16,6 +16,10 @@ properties: - aspeed,ast2500-scu-ic - aspeed,ast2600-scu-ic0 - aspeed,ast2600-scu-ic1 + - aspeed,ast2700-scu-ic0 + - aspeed,ast2700-scu-ic1 + - aspeed,ast2700-scu-ic2 + - aspeed,ast2700-scu-ic3 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml b/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml index 5eccd10d95ce..67be6d095fe4 100644 --- a/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml +++ b/Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml @@ -75,6 +75,10 @@ patternProperties: - aspeed,ast2500-scu-ic - aspeed,ast2600-scu-ic0 - aspeed,ast2600-scu-ic1 + - aspeed,ast2700-scu-ic0 + - aspeed,ast2700-scu-ic1 + - aspeed,ast2700-scu-ic2 + - aspeed,ast2700-scu-ic3 '^silicon-id@[0-9a-f]+$': description: Unique hardware silicon identifiers within the SoC diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt deleted file mode 100644 index 3cb2f4c98d64..000000000000 --- a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt +++ /dev/null @@ -1,38 +0,0 @@ -Faraday Technology timer - -This timer is a generic IP block from Faraday Technology, embedded in the -Cortina Systems Gemini SoCs and other designs. - -Required properties: - -- compatible : Must be one of - "faraday,fttmr010" - "cortina,gemini-timer", "faraday,fttmr010" - "moxa,moxart-timer", "faraday,fttmr010" - "aspeed,ast2400-timer" - "aspeed,ast2500-timer" - "aspeed,ast2600-timer" - -- reg : Should contain registers location and length -- interrupts : Should contain the three timer interrupts usually with - flags for falling edge - -Optionally required properties: - -- clocks : a clock to provide the tick rate for "faraday,fttmr010" -- clock-names : should be "EXTCLK" and "PCLK" for the external tick timer - and peripheral clock respectively, for "faraday,fttmr010" -- syscon : a phandle to the global Gemini system controller if the compatible - type is "cortina,gemini-timer" - -Example: - -timer@43000000 { - compatible = "faraday,fttmr010"; - reg = <0x43000000 0x1000>; - interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */ - <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */ - <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */ - clocks = <&extclk>, <&pclk>; - clock-names = "EXTCLK", "PCLK"; -}; diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml new file mode 100644 index 000000000000..39506323556c --- /dev/null +++ b/Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/faraday,fttmr010.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Faraday FTTMR010 timer + +maintainers: + - Joel Stanley <joel@jms.id.au> + - Linus Walleij <linus.walleij@linaro.org> + +description: + This timer is a generic IP block from Faraday Technology, embedded in the + Cortina Systems Gemini SoCs and other designs. + +properties: + compatible: + oneOf: + - items: + - const: moxa,moxart-timer + - const: faraday,fttmr010 + - enum: + - aspeed,ast2400-timer + - aspeed,ast2500-timer + - aspeed,ast2600-timer + - cortina,gemini-timer + - faraday,fttmr010 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 8 + description: One interrupt per timer + + clocks: + minItems: 1 + items: + - description: Peripheral clock + - description: External tick clock + + clock-names: + minItems: 1 + items: + - const: PCLK + - const: EXTCLK + + resets: + maxItems: 1 + + syscon: + description: System controller phandle for Gemini systems + $ref: /schemas/types.yaml#/definitions/phandle + +required: + - compatible + - reg + - interrupts + +allOf: + - if: + properties: + compatible: + contains: + const: cortina,gemini-timer + then: + required: + - syscon + else: + properties: + syscon: false + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + + timer@43000000 { + compatible = "faraday,fttmr010"; + reg = <0x43000000 0x1000>; + interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */ + <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */ + <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */ + clocks = <&pclk>, <&extclk>; + clock-names = "PCLK", "EXTCLK"; + }; diff --git a/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml index 0e4a8ddc3de3..e3b61b62521e 100644 --- a/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml +++ b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml @@ -14,7 +14,9 @@ properties: const: fsl,ftm-timer reg: - maxItems: 1 + items: + - description: clock event device + - description: clock source device interrupts: maxItems: 1 @@ -50,7 +52,8 @@ examples: ftm@400b8000 { compatible = "fsl,ftm-timer"; - reg = <0x400b8000 0x1000>; + reg = <0x400b8000 0x1000>, + <0x400b9000 0x1000>; interrupts = <0 44 IRQ_TYPE_LEVEL_HIGH>; clock-names = "ftm-evt", "ftm-src", "ftm-evt-counter-en", "ftm-src-counter-en"; clocks = <&clks VF610_CLK_FTM2>, <&clks VF610_CLK_FTM3>, diff --git a/Documentation/devicetree/bindings/timer/fsl,timrot.yaml b/Documentation/devicetree/bindings/timer/fsl,timrot.yaml new file mode 100644 index 000000000000..d181f274ef9f --- /dev/null +++ b/Documentation/devicetree/bindings/timer/fsl,timrot.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/fsl,timrot.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale MXS Timer + +maintainers: + - Frank Li <Frank.Li@nxp.com> + +properties: + compatible: + items: + - enum: + - fsl,imx23-timrot + - fsl,imx28-timrot + - const: fsl,timrot + + reg: + maxItems: 1 + + interrupts: + items: + - description: irq for timer0 + - description: irq for timer1 + - description: irq for timer2 + - description: irq for timer3 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + timer: timer@80068000 { + compatible = "fsl,imx28-timrot", "fsl,timrot"; + reg = <0x80068000 0x2000>; + interrupts = <48>, <49>, <50>, <51>; + clocks = <&clks 26>; + }; diff --git a/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml b/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml index bee2c35bd0e2..42e130654d58 100644 --- a/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml +++ b/Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml @@ -15,8 +15,13 @@ description: properties: compatible: - enum: - - fsl,vf610-pit + oneOf: + - enum: + - fsl,vf610-pit + - nxp,s32g2-pit + - items: + - const: nxp,s32g3-pit + - const: nxp,s32g2-pit reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/timer/mediatek,timer.yaml b/Documentation/devicetree/bindings/timer/mediatek,timer.yaml index f68fc7050c56..e3e38066c2cb 100644 --- a/Documentation/devicetree/bindings/timer/mediatek,timer.yaml +++ b/Documentation/devicetree/bindings/timer/mediatek,timer.yaml @@ -26,6 +26,7 @@ properties: - items: - enum: - mediatek,mt2701-timer + - mediatek,mt6572-timer - mediatek,mt6580-timer - mediatek,mt6582-timer - mediatek,mt6589-timer @@ -44,6 +45,7 @@ properties: - mediatek,mt8188-timer - mediatek,mt8192-timer - mediatek,mt8195-timer + - mediatek,mt8196-timer - mediatek,mt8365-systimer - const: mediatek,mt6765-timer diff --git a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml index 10578f544581..a4b229e0e78a 100644 --- a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml +++ b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml @@ -26,6 +26,7 @@ properties: - items: - enum: - axis,artpec8-mct + - axis,artpec9-mct - google,gs101-mct - samsung,exynos2200-mct-peris - samsung,exynos3250-mct @@ -131,6 +132,7 @@ allOf: contains: enum: - axis,artpec8-mct + - axis,artpec9-mct - google,gs101-mct - samsung,exynos2200-mct-peris - samsung,exynos5260-mct diff --git a/MAINTAINERS b/MAINTAINERS index 62d16c20a888..331728e93f69 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1989,6 +1989,7 @@ S: Maintained F: arch/arm/include/asm/arch_timer.h F: arch/arm64/include/asm/arch_timer.h F: drivers/clocksource/arm_arch_timer.c +F: drivers/clocksource/arm_arch_timer_mmio.c ARM GENERIC INTERRUPT CONTROLLER DRIVERS M: Marc Zyngier <maz@kernel.org> diff --git a/arch/Kconfig b/arch/Kconfig index 2162276995a0..5440616f0774 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1657,7 +1657,7 @@ config HAVE_SPARSE_SYSCALL_NR related optimizations for a given architecture. config ARCH_HAS_VDSO_ARCH_DATA - depends on GENERIC_VDSO_DATA_STORE + depends on HAVE_GENERIC_VDSO bool config ARCH_HAS_VDSO_TIME_DATA @@ -1778,6 +1778,10 @@ config ARCH_VMLINUX_NEEDS_RELOCS relocations preserved. This is used by some architectures to construct bespoke relocation tables for KASLR. +# Select if architecture uses the common generic TIF bits +config HAVE_GENERIC_TIF_BITS + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index 4e7226ad02ec..ff1c729af05f 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -7,8 +7,6 @@ #include <vdso/datapage.h> #include <asm/cacheflush.h> -extern bool cntvct_ok; - static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) { diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 325448ffbba0..e38a30477f3d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -54,11 +54,9 @@ struct elfinfo { char *dynstr; /* ptr to .dynstr section */ }; -/* Cached result of boot-time check for whether the arch timer exists, - * and if so, whether the virtual counter is useable. +/* Boot-time check for whether the arch timer exists, and if so, + * whether the virtual counter is usable. */ -bool cntvct_ok __ro_after_init; - static bool __init cntvct_functional(void) { struct device_node *np; @@ -159,7 +157,7 @@ static void __init patch_vdso(void *ehdr) * want programs to incur the slight additional overhead of * dispatching through the VDSO only to fall back to syscalls. */ - if (!cntvct_ok) { + if (!cntvct_functional()) { vdso_nullpatch_one(&einfo, "__vdso_gettimeofday"); vdso_nullpatch_one(&einfo, "__vdso_clock_gettime"); vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64"); @@ -197,8 +195,6 @@ static int __init vdso_init(void) vdso_total_pages = VDSO_NR_PAGES; /* for the data/vvar pages */ vdso_total_pages += text_pages; - cntvct_ok = cntvct_functional(); - patch_vdso(vdso_start); return 0; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 5c1023a6d78c..7b27ee9482b3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -926,9 +926,7 @@ config VDSO default y if ARM_ARCH_TIMER select HAVE_GENERIC_VDSO select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_32 select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_DATA_STORE help Place in the process address space an ELF shared object providing fast implementations of gettimeofday and diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 616b702f10d4..b3e13f67d598 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -166,8 +166,6 @@ config ARM64 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_DATA_STORE - select GENERIC_VDSO_TIME_NS select HARDIRQS_SW_RESEND select HAS_IOPORT select HAVE_MOVE_PMD @@ -1750,7 +1748,6 @@ config COMPAT_VDSO bool "Enable vDSO for 32-bit applications" depends on !CPU_BIG_ENDIAN depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != "" - select GENERIC_COMPAT_VDSO default y help Place in the process address space of 32-bit applications an diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 3ac35f4a667c..6d75e03d3827 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -7,11 +7,10 @@ #ifndef __ASSEMBLY__ /* - * Warning: This code is meant to be used with - * ENABLE_COMPAT_VDSO only. + * Warning: This code is meant to be used from the compat vDSO only. */ -#ifndef ENABLE_COMPAT_VDSO -#error This header is meant to be used with ENABLE_COMPAT_VDSO only +#ifdef __arch64__ +#error This header is meant to be used with from the compat vDSO only #endif #ifdef dmb diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index d60ea7a72a9c..7d1a116549b1 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -2,8 +2,8 @@ /* * Copyright (C) 2018 ARM Limited */ -#ifndef __ASM_VDSO_GETTIMEOFDAY_H -#define __ASM_VDSO_GETTIMEOFDAY_H +#ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H +#define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H #ifndef __ASSEMBLY__ @@ -163,4 +163,4 @@ static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) #endif /* !__ASSEMBLY__ */ -#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ +#endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index da1ab8759592..c59e84105b43 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -5,6 +5,8 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H +#ifdef __aarch64__ + #ifndef __ASSEMBLY__ #include <asm/alternative.h> @@ -96,4 +98,10 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( #endif /* !__ASSEMBLY__ */ +#else /* !__aarch64__ */ + +#include "compat_gettimeofday.h" + +#endif /* __aarch64__ */ + #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 5de4deaf4299..ffa3536581f6 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -57,7 +57,6 @@ VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING VDSO_CAFLAGS += -march=armv8-a VDSO_CFLAGS := $(VDSO_CAFLAGS) -VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 # KBUILD_CFLAGS from top-level Makefile VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1f08941fdea4..e6225539579f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -110,8 +110,6 @@ config LOONGARCH select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_DATA_STORE - select GENERIC_VDSO_TIME_NS select GPIOLIB select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL @@ -142,6 +140,7 @@ config LOONGARCH select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EXIT_THREAD + select HAVE_GENERIC_TIF_BITS select HAVE_GUP_FAST select HAVE_FTRACE_GRAPH_FUNC select HAVE_FUNCTION_ARG_ACCESS_API diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 9dfa2ef00816..4d7117fcdc78 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -65,50 +65,42 @@ register unsigned long current_stack_pointer __asm__("$sp"); * access * - pending work-to-be-done flags are in LSW * - other flags in MSW + * + * Tell the generic TIF infrastructure which special bits loongarch supports */ -#define TIF_NEED_RESCHED 0 /* rescheduling necessary */ -#define TIF_NEED_RESCHED_LAZY 1 /* lazy rescheduling necessary */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ -#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ -#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ -#define TIF_NOHZ 6 /* in adaptive nohz mode */ -#define TIF_UPROBE 7 /* breakpointed or singlestepping */ -#define TIF_USEDFPU 8 /* FPU was used by this task this quantum (SMP) */ -#define TIF_USEDSIMD 9 /* SIMD has been used this quantum */ -#define TIF_MEMDIE 10 /* is terminating due to OOM killer */ -#define TIF_FIXADE 11 /* Fix address errors in software */ -#define TIF_LOGADE 12 /* Log address errors to syslog */ -#define TIF_32BIT_REGS 13 /* 32-bit general purpose registers */ -#define TIF_32BIT_ADDR 14 /* 32-bit address space */ -#define TIF_LOAD_WATCH 15 /* If set, load watch registers */ -#define TIF_SINGLESTEP 16 /* Single Step */ -#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */ -#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ -#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ -#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ -#define TIF_PATCH_PENDING 21 /* pending live patching update */ +#define HAVE_TIF_NEED_RESCHED_LAZY +#define HAVE_TIF_RESTORE_SIGMASK + +#include <asm-generic/thread_info_tif.h> + +/* Architecture specific bits */ +#define TIF_NOHZ 16 /* in adaptive nohz mode */ +#define TIF_USEDFPU 17 /* FPU was used by this task this quantum (SMP) */ +#define TIF_USEDSIMD 18 /* SIMD has been used this quantum */ +#define TIF_FIXADE 19 /* Fix address errors in software */ +#define TIF_LOGADE 20 /* Log address errors to syslog */ +#define TIF_32BIT_REGS 21 /* 32-bit general purpose registers */ +#define TIF_32BIT_ADDR 22 /* 32-bit address space */ +#define TIF_LOAD_WATCH 23 /* If set, load watch registers */ +#define TIF_SINGLESTEP 24 /* Single Step */ +#define TIF_LSX_CTX_LIVE 25 /* LSX context must be preserved */ +#define TIF_LASX_CTX_LIVE 26 /* LASX context must be preserved */ +#define TIF_USEDLBT 27 /* LBT was used by this task this quantum (SMP) */ +#define TIF_LBT_CTX_LIVE 28 /* LBT context must be preserved */ -#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) -#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) -#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL) -#define _TIF_NOHZ (1<<TIF_NOHZ) -#define _TIF_UPROBE (1<<TIF_UPROBE) -#define _TIF_USEDFPU (1<<TIF_USEDFPU) -#define _TIF_USEDSIMD (1<<TIF_USEDSIMD) -#define _TIF_FIXADE (1<<TIF_FIXADE) -#define _TIF_LOGADE (1<<TIF_LOGADE) -#define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS) -#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR) -#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) -#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) -#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE) -#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE) -#define _TIF_USEDLBT (1<<TIF_USEDLBT) -#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE) -#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) +#define _TIF_NOHZ BIT(TIF_NOHZ) +#define _TIF_USEDFPU BIT(TIF_USEDFPU) +#define _TIF_USEDSIMD BIT(TIF_USEDSIMD) +#define _TIF_FIXADE BIT(TIF_FIXADE) +#define _TIF_LOGADE BIT(TIF_LOGADE) +#define _TIF_32BIT_REGS BIT(TIF_32BIT_REGS) +#define _TIF_32BIT_ADDR BIT(TIF_32BIT_ADDR) +#define _TIF_LOAD_WATCH BIT(TIF_LOAD_WATCH) +#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP) +#define _TIF_LSX_CTX_LIVE BIT(TIF_LSX_CTX_LIVE) +#define _TIF_LASX_CTX_LIVE BIT(TIF_LASX_CTX_LIVE) +#define _TIF_USEDLBT BIT(TIF_USEDLBT) +#define _TIF_LBT_CTX_LIVE BIT(TIF_LBT_CTX_LIVE) #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index f3092f2de8b5..6fb92cc1a4c9 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -112,8 +112,6 @@ static int arch_timer_starting(unsigned int cpu) static int arch_timer_dying(unsigned int cpu) { - constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device)); - /* Clear Timer Interrupt */ write_csr_tintclear(CSR_TINTCLR_TI); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 447b2fcaa316..608e01ed6cff 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -51,7 +51,6 @@ config MIPS select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_DATA_STORE select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HAS_IOPORT if !NO_IOPORT_MAP || ISA select HAVE_ARCH_COMPILER_H diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 12fb3da59700..22173a897670 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -210,8 +210,6 @@ config PPC select GENERIC_PCI_IOMAP if PCI select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_DATA_STORE - select GENERIC_VDSO_TIME_NS select HAS_IOPORT if PCI select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 613ab57cda74..a9ef4eeb72ab 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,7 +53,7 @@ config RISCV select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN - select ARCH_HAS_VDSO_ARCH_DATA if GENERIC_VDSO_DATA_STORE + select ARCH_HAS_VDSO_ARCH_DATA if HAVE_GENERIC_VDSO select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU @@ -110,7 +110,7 @@ config RISCV select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_ENTRY - select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO + select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO && 64BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP if MMU select GENERIC_IRQ_IPI if SMP @@ -123,9 +123,7 @@ config RISCV select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD - select GENERIC_TIME_VSYSCALL if MMU && 64BIT - select GENERIC_VDSO_DATA_STORE if MMU - select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO + select GENERIC_TIME_VSYSCALL if GENERIC_GETTIMEOFDAY select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU select HAVE_ALIGNED_STRUCT_PAGE @@ -164,11 +162,12 @@ config RISCV select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT if MMU + select HAVE_GENERIC_TIF_BITS select HAVE_GUP_FAST if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION select HAVE_GCC_PLUGINS - select HAVE_GENERIC_VDSO if MMU && 64BIT + select HAVE_GENERIC_VDSO if MMU select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_BZIP2 if !XIP_KERNEL && !EFI_ZBOOT select HAVE_KERNEL_GZIP if !XIP_KERNEL && !EFI_ZBOOT @@ -224,7 +223,7 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU - select VDSO_GETRANDOM if HAVE_GENERIC_VDSO + select VDSO_GETRANDOM if HAVE_GENERIC_VDSO && 64BIT select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index b3e4d3c18fdc..6430c6e25c00 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -190,7 +190,7 @@ reg-names = "clr", "doorbell"; msi-controller; #msi-cells = <0>; - msi-ranges = <&intc 64 IRQ_TYPE_LEVEL_HIGH 32>; + msi-ranges = <&intc 64 IRQ_TYPE_EDGE_RISING 32>; }; rpgate: clock-controller@7030010368 { diff --git a/arch/riscv/boot/dts/sophgo/sg2044.dtsi b/arch/riscv/boot/dts/sophgo/sg2044.dtsi index 6ec955744b0c..320c4d1d08e6 100644 --- a/arch/riscv/boot/dts/sophgo/sg2044.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2044.dtsi @@ -214,7 +214,7 @@ reg-names = "clr", "doorbell"; #msi-cells = <0>; msi-controller; - msi-ranges = <&intc 352 IRQ_TYPE_LEVEL_HIGH 512>; + msi-ranges = <&intc 352 IRQ_TYPE_EDGE_RISING 512>; status = "disabled"; }; diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index c33d8b7dd488..836d80dd2921 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -107,23 +107,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); * - pending work-to-be-done flags are in lowest half-word * - other flags in upper half-word(s) */ -#define TIF_NEED_RESCHED 0 /* rescheduling necessary */ -#define TIF_NEED_RESCHED_LAZY 1 /* Lazy rescheduling needed */ -#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ -#define TIF_SIGPENDING 3 /* signal pending */ -#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ -#define TIF_MEMDIE 5 /* is terminating due to OOM killer */ -#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ -#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ -#define TIF_32BIT 11 /* compat-mode 32bit process */ -#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */ - -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) -#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -#define _TIF_UPROBE (1 << TIF_UPROBE) -#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE) + +/* + * Tell the generic TIF infrastructure which bits riscv supports + */ +#define HAVE_TIF_NEED_RESCHED_LAZY +#define HAVE_TIF_RESTORE_SIGMASK + +#include <asm-generic/thread_info_tif.h> + +#define TIF_32BIT 16 /* compat-mode 32bit process */ +#define TIF_RISCV_V_DEFER_RESTORE 17 /* restore Vector before returing to user */ + +#define _TIF_RISCV_V_DEFER_RESTORE BIT(TIF_RISCV_V_DEFER_RESTORE) #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7104534f0a0f..2414ee3ff002 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -174,8 +174,6 @@ config S390 select GENERIC_GETTIMEOFDAY select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL - select GENERIC_VDSO_DATA_STORE - select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL @@ -206,6 +204,7 @@ config S390 select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_GENERIC_TIF_BITS select HAVE_GUP_FAST select HAVE_FENTRY select HAVE_FTRACE_GRAPH_FUNC diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f6ed2c8192c8..7878e9bfbf07 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -56,49 +56,31 @@ void arch_setup_new_exec(void); /* * thread information flags bit numbers + * + * Tell the generic TIF infrastructure which special bits s390 supports */ -#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ -#define TIF_UPROBE 4 /* breakpointed or single-stepping */ -#define TIF_PATCH_PENDING 5 /* pending live patching update */ -#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */ -#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ -#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ -#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ -#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ -#define TIF_31BIT 16 /* 32bit process */ -#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ -#define TIF_SINGLE_STEP 19 /* This task is single stepped */ -#define TIF_BLOCK_STEP 20 /* This task is block stepped */ -#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ -#define TIF_SYSCALL_TRACE 24 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ -#define TIF_SECCOMP 26 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ +#define HAVE_TIF_NEED_RESCHED_LAZY +#define HAVE_TIF_RESTORE_SIGMASK + +#include <asm-generic/thread_info_tif.h> + +/* Architecture specific bits */ +#define TIF_ASCE_PRIMARY 16 /* primary asce is kernel asce */ +#define TIF_GUARDED_STORAGE 17 /* load guarded storage control block */ +#define TIF_ISOLATE_BP_GUEST 18 /* Run KVM guests with isolated BP */ +#define TIF_PER_TRAP 19 /* Need to handle PER trap on exit to usermode */ +#define TIF_31BIT 20 /* 32bit process */ +#define TIF_SINGLE_STEP 21 /* This task is single stepped */ +#define TIF_BLOCK_STEP 22 /* This task is block stepped */ +#define TIF_UPROBE_SINGLESTEP 23 /* This task is uprobe single stepped */ -#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING BIT(TIF_SIGPENDING) -#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) -#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) -#define _TIF_UPROBE BIT(TIF_UPROBE) -#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) #define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY) -#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) #define _TIF_31BIT BIT(TIF_31BIT) -#define _TIF_MEMDIE BIT(TIF_MEMDIE) -#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) #define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP) #define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP) -#define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE) -#define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP BIT(TIF_SECCOMP) -#define _TIF_SYSCALL_TRACEPOINT BIT(TIF_SYSCALL_TRACEPOINT) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index f7fb3d88c57b..36b985d0e7bf 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -3,6 +3,8 @@ # Branch profiling isn't noinstr-safe. Disable it for arch/x86/* subdir-ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING +obj-y += boot/startup/ + obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/ obj-y += entry/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 679c7f980aa3..75f3de70df51 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -14,7 +14,6 @@ config X86_32 select ARCH_WANT_IPC_PARSE_VERSION select CLKSRC_I8253 select CLONE_BACKWARDS - select GENERIC_VDSO_32 select HAVE_DEBUG_STACKOVERFLOW select KMAP_LOCAL select MODULES_USE_ELF_REL @@ -182,8 +181,6 @@ config X86 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY - select GENERIC_VDSO_DATA_STORE - select GENERIC_VDSO_TIME_NS select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND @@ -239,6 +236,7 @@ config X86 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA if X86_32 select HAVE_EXIT_THREAD + select HAVE_GENERIC_TIF_BITS select HAVE_GUP_FAST select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC if HAVE_FUNCTION_GRAPH_TRACER @@ -487,6 +485,19 @@ config X86_X2APIC If in doubt, say Y. +config AMD_SECURE_AVIC + bool "AMD Secure AVIC" + depends on AMD_MEM_ENCRYPT && X86_X2APIC + help + Enable this to get AMD Secure AVIC support on guests that have this feature. + + AMD Secure AVIC provides hardware acceleration for performance sensitive + APIC accesses and support for managing guest owned APIC state for SEV-SNP + guests. Secure AVIC does not support xAPIC mode. It has functional + dependency on x2apic being enabled in the guest. + + If you don't know what to do here, say N. + config X86_POSTED_MSI bool "Enable MSI and MSI-x delivery by posted interrupts" depends on X86_64 && IRQ_REMAP diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4b4e2a3ac6df..4db7e4bf69f5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -275,7 +275,6 @@ archprepare: $(cpufeaturemasks.hdr) ### # Kernel objects -core-y += arch/x86/boot/startup/ libs-y += arch/x86/lib/ # drivers-y are linked after core-y diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3a38fdcdb9bd..74657589264d 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -73,7 +73,7 @@ LDFLAGS_vmlinux += -T hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|_sinittext\|__inittext_end\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 94b5991da001..0f41ca0e52c0 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -332,6 +332,8 @@ static size_t parse_elf(void *output) } const unsigned long kernel_text_size = VO___start_rodata - VO__text; +const unsigned long kernel_inittext_offset = VO__sinittext - VO__text; +const unsigned long kernel_inittext_size = VO___inittext_end - VO__sinittext; const unsigned long kernel_total_size = VO__end - VO__text; static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); diff --git a/arch/x86/boot/compressed/sev-handle-vc.c b/arch/x86/boot/compressed/sev-handle-vc.c index 89dd02de2a0f..7530ad8b768b 100644 --- a/arch/x86/boot/compressed/sev-handle-vc.c +++ b/arch/x86/boot/compressed/sev-handle-vc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" +#include "error.h" #include "sev.h" #include <linux/kernel.h> @@ -14,6 +15,8 @@ #include <asm/fpu/xcr.h> #define __BOOT_COMPRESSED +#undef __init +#define __init /* Basic instruction decoding support needed */ #include "../../lib/inat.c" diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index fd1b67dfea22..6e5c32a53d03 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -32,102 +32,47 @@ struct ghcb *boot_ghcb; #undef __init #define __init -#undef __head -#define __head - #define __BOOT_COMPRESSED -extern struct svsm_ca *boot_svsm_caa; -extern u64 boot_svsm_caa_pa; - -struct svsm_ca *svsm_get_caa(void) -{ - return boot_svsm_caa; -} - -u64 svsm_get_caa_pa(void) -{ - return boot_svsm_caa_pa; -} - -int svsm_perform_call_protocol(struct svsm_call *call); - u8 snp_vmpl; +u16 ghcb_version; + +u64 boot_svsm_caa_pa; /* Include code for early handlers */ #include "../../boot/startup/sev-shared.c" -int svsm_perform_call_protocol(struct svsm_call *call) -{ - struct ghcb *ghcb; - int ret; - - if (boot_ghcb) - ghcb = boot_ghcb; - else - ghcb = NULL; - - do { - ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) - : svsm_perform_msr_protocol(call); - } while (ret == -EAGAIN); - - return ret; -} - static bool sev_snp_enabled(void) { return sev_status & MSR_AMD64_SEV_SNP_ENABLED; } -static void __page_state_change(unsigned long paddr, enum psc_op op) -{ - u64 val, msr; - - /* - * If private -> shared then invalidate the page before requesting the - * state change in the RMP table. - */ - if (op == SNP_PAGE_STATE_SHARED) - pvalidate_4k_page(paddr, paddr, false); - - /* Save the current GHCB MSR value */ - msr = sev_es_rd_ghcb_msr(); - - /* Issue VMGEXIT to change the page state in RMP table. */ - sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); - VMGEXIT(); - - /* Read the response of the VMGEXIT. */ - val = sev_es_rd_ghcb_msr(); - if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - /* Restore the GHCB MSR value */ - sev_es_wr_ghcb_msr(msr); - - /* - * Now that page state is changed in the RMP table, validate it so that it is - * consistent with the RMP entry. - */ - if (op == SNP_PAGE_STATE_PRIVATE) - pvalidate_4k_page(paddr, paddr, true); -} - void snp_set_page_private(unsigned long paddr) { + struct psc_desc d = { + SNP_PAGE_STATE_PRIVATE, + (struct svsm_ca *)boot_svsm_caa_pa, + boot_svsm_caa_pa + }; + if (!sev_snp_enabled()) return; - __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); + __page_state_change(paddr, paddr, &d); } void snp_set_page_shared(unsigned long paddr) { + struct psc_desc d = { + SNP_PAGE_STATE_SHARED, + (struct svsm_ca *)boot_svsm_caa_pa, + boot_svsm_caa_pa + }; + if (!sev_snp_enabled()) return; - __page_state_change(paddr, SNP_PAGE_STATE_SHARED); + __page_state_change(paddr, paddr, &d); } bool early_setup_ghcb(void) @@ -152,8 +97,14 @@ bool early_setup_ghcb(void) void snp_accept_memory(phys_addr_t start, phys_addr_t end) { + struct psc_desc d = { + SNP_PAGE_STATE_PRIVATE, + (struct svsm_ca *)boot_svsm_caa_pa, + boot_svsm_caa_pa + }; + for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE) - __page_state_change(pa, SNP_PAGE_STATE_PRIVATE); + __page_state_change(pa, pa, &d); } void sev_es_shutdown_ghcb(void) @@ -235,15 +186,23 @@ bool sev_es_check_ghcb_fault(unsigned long address) MSR_AMD64_SNP_VMSA_REG_PROT | \ MSR_AMD64_SNP_RESERVED_BIT13 | \ MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_SECURE_AVIC | \ MSR_AMD64_SNP_RESERVED_MASK) +#ifdef CONFIG_AMD_SECURE_AVIC +#define SNP_FEATURE_SECURE_AVIC MSR_AMD64_SNP_SECURE_AVIC +#else +#define SNP_FEATURE_SECURE_AVIC 0 +#endif + /* * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ #define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \ - MSR_AMD64_SNP_SECURE_TSC) + MSR_AMD64_SNP_SECURE_TSC | \ + SNP_FEATURE_SECURE_AVIC) u64 snp_get_unsupported_features(u64 status) { @@ -347,7 +306,7 @@ static bool early_snp_init(struct boot_params *bp) * running at VMPL0. The CA will be used to communicate with the * SVSM and request its services. */ - svsm_setup_ca(cc_info); + svsm_setup_ca(cc_info, rip_rel_ptr(&boot_ghcb_page)); /* * Pass run-time kernel a pointer to CC info via boot_params so EFI @@ -391,6 +350,8 @@ static int sev_check_cpu_support(void) if (!(eax & BIT(1))) return -ENODEV; + sev_snp_needs_sfw = !(ebx & BIT(31)); + return ebx & 0x3f; } @@ -453,30 +414,16 @@ void sev_enable(struct boot_params *bp) */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { u64 hv_features; - int ret; hv_features = get_hv_features(); if (!(hv_features & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); /* - * Enforce running at VMPL0 or with an SVSM. - * - * Use RMPADJUST (see the rmpadjust() function for a description of - * what the instruction does) to update the VMPL1 permissions of a - * page. If the guest is running at VMPL0, this will succeed. If the - * guest is running at any other VMPL, this will fail. Linux SNP guests - * only ever run at a single VMPL level so permission mask changes of a - * lesser-privileged VMPL are a don't-care. - */ - ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1); - - /* - * Running at VMPL0 is not required if an SVSM is present and the hypervisor - * supports the required SVSM GHCB events. + * Running at VMPL0 is required unless an SVSM is present and + * the hypervisor supports the required SVSM GHCB events. */ - if (ret && - !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))) + if (snp_vmpl && !(hv_features & GHCB_HV_FT_SNP_MULTI_VMPL)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } @@ -550,7 +497,6 @@ bool early_is_sevsnp_guest(void) /* Obtain the address of the calling area to use */ boot_rdmsr(MSR_SVSM_CAA, &m); - boot_svsm_caa = (void *)m.q; boot_svsm_caa_pa = m.q; /* diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 63e037e94e4c..916bac09b464 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,18 +106,5 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } - - if (max_amd_level >= 0x8000001f) { - u32 ebx; - - /* - * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in - * the virtualization flags entry (word 8) and set by - * scattered.c, so the bit needs to be explicitly set. - */ - cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); - if (ebx & BIT(31)) - set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); - } } } diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile index b514f7e81332..e8fdf020b422 100644 --- a/arch/x86/boot/startup/Makefile +++ b/arch/x86/boot/startup/Makefile @@ -4,6 +4,7 @@ KBUILD_AFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += -D__DISABLE_EXPORTS -mcmodel=small -fPIC \ -Os -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) \ + $(DISABLE_LATENT_ENTROPY_PLUGIN) \ -fno-stack-protector -D__NO_FORTIFY \ -fno-jump-tables \ -include $(srctree)/include/linux/hidden.h @@ -19,6 +20,7 @@ KCOV_INSTRUMENT := n obj-$(CONFIG_X86_64) += gdt_idt.o map_kernel.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o sev-startup.o +pi-objs := $(patsubst %.o,$(obj)/%.o,$(obj-y)) lib-$(CONFIG_X86_64) += la57toggle.o lib-$(CONFIG_EFI_MIXED) += efi-mixed.o @@ -28,3 +30,23 @@ lib-$(CONFIG_EFI_MIXED) += efi-mixed.o # to be linked into the decompressor or the EFI stub but not vmlinux # $(patsubst %.o,$(obj)/%.o,$(lib-y)): OBJECT_FILES_NON_STANDARD := y + +# +# Invoke objtool for each object individually to check for absolute +# relocations, even if other objtool actions are being deferred. +# +$(pi-objs): objtool-enabled = 1 +$(pi-objs): objtool-args = $(if $(delay-objtool),,$(objtool-args-y)) --noabs + +# +# Confine the startup code by prefixing all symbols with __pi_ (for position +# independent). This ensures that startup code can only call other startup +# code, or code that has explicitly been made accessible to it via a symbol +# alias. +# +$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ +$(obj)/%.pi.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + +targets += $(obj-y) +obj-y := $(patsubst %.o,%.pi.o,$(obj-y)) diff --git a/arch/x86/boot/startup/exports.h b/arch/x86/boot/startup/exports.h new file mode 100644 index 000000000000..01d2363dc445 --- /dev/null +++ b/arch/x86/boot/startup/exports.h @@ -0,0 +1,14 @@ + +/* + * The symbols below are functions that are implemented by the startup code, + * but called at runtime by the SEV code residing in the core kernel. + */ +PROVIDE(early_set_pages_state = __pi_early_set_pages_state); +PROVIDE(early_snp_set_memory_private = __pi_early_snp_set_memory_private); +PROVIDE(early_snp_set_memory_shared = __pi_early_snp_set_memory_shared); +PROVIDE(get_hv_features = __pi_get_hv_features); +PROVIDE(sev_es_terminate = __pi_sev_es_terminate); +PROVIDE(snp_cpuid = __pi_snp_cpuid); +PROVIDE(snp_cpuid_get_table = __pi_snp_cpuid_get_table); +PROVIDE(svsm_issue_call = __pi_svsm_issue_call); +PROVIDE(svsm_process_result_codes = __pi_svsm_process_result_codes); diff --git a/arch/x86/boot/startup/gdt_idt.c b/arch/x86/boot/startup/gdt_idt.c index a3112a69b06a..d16102abdaec 100644 --- a/arch/x86/boot/startup/gdt_idt.c +++ b/arch/x86/boot/startup/gdt_idt.c @@ -24,7 +24,7 @@ static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; /* This may run while still in the direct mapping */ -void __head startup_64_load_idt(void *vc_handler) +void startup_64_load_idt(void *vc_handler) { struct desc_ptr desc = { .address = (unsigned long)rip_rel_ptr(bringup_idt_table), @@ -46,7 +46,7 @@ void __head startup_64_load_idt(void *vc_handler) /* * Setup boot CPU state needed before kernel switches to virtual addresses. */ -void __head startup_64_setup_gdt_idt(void) +void __init startup_64_setup_gdt_idt(void) { struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page); void *handler = NULL; diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c index 332dbe6688c4..83ba98d61572 100644 --- a/arch/x86/boot/startup/map_kernel.c +++ b/arch/x86/boot/startup/map_kernel.c @@ -30,7 +30,7 @@ static inline bool check_la57_support(void) return true; } -static unsigned long __head sme_postprocess_startup(struct boot_params *bp, +static unsigned long __init sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd, unsigned long p2v_offset) { @@ -84,7 +84,7 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, * the 1:1 mapping of memory. Kernel virtual addresses can be determined by * subtracting p2v_offset from the RIP-relative address. */ -unsigned long __head __startup_64(unsigned long p2v_offset, +unsigned long __init __startup_64(unsigned long p2v_offset, struct boot_params *bp) { pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index a34cd19796f9..4e22ffd73516 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -12,35 +12,12 @@ #include <asm/setup_data.h> #ifndef __BOOT_COMPRESSED -#define error(v) pr_err(v) #define has_cpuflag(f) boot_cpu_has(f) #else #undef WARN #define WARN(condition, format...) (!!(condition)) -#undef vc_forward_exception -#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n") #endif -/* - * SVSM related information: - * During boot, the page tables are set up as identity mapped and later - * changed to use kernel virtual addresses. Maintain separate virtual and - * physical addresses for the CAA to allow SVSM functions to be used during - * early boot, both with identity mapped virtual addresses and proper kernel - * virtual addresses. - */ -struct svsm_ca *boot_svsm_caa __ro_after_init; -u64 boot_svsm_caa_pa __ro_after_init; - -/* - * Since feature negotiation related variables are set early in the boot - * process they must reside in the .data section so as not to be zeroed - * out when the .bss section is later cleared. - * - * GHCB protocol version negotiated with the hypervisor. - */ -static u16 ghcb_version __ro_after_init; - /* Copy of the SNP firmware's CPUID page. */ static struct snp_cpuid_table cpuid_table_copy __ro_after_init; @@ -54,17 +31,9 @@ static u32 cpuid_std_range_max __ro_after_init; static u32 cpuid_hyp_range_max __ro_after_init; static u32 cpuid_ext_range_max __ro_after_init; -bool __init sev_es_check_cpu_features(void) -{ - if (!has_cpuflag(X86_FEATURE_RDRAND)) { - error("RDRAND instruction not supported - no trusted source of randomness available\n"); - return false; - } +bool sev_snp_needs_sfw; - return true; -} - -void __head __noreturn +void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -83,7 +52,7 @@ sev_es_terminate(unsigned int set, unsigned int reason) /* * The hypervisor features are available from GHCB version 2 onward. */ -u64 get_hv_features(void) +u64 __init get_hv_features(void) { u64 val; @@ -100,72 +69,7 @@ u64 get_hv_features(void) return GHCB_MSR_HV_FT_RESP_VAL(val); } -void snp_register_ghcb_early(unsigned long paddr) -{ - unsigned long pfn = paddr >> PAGE_SHIFT; - u64 val; - - sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); - VMGEXIT(); - - val = sev_es_rd_ghcb_msr(); - - /* If the response GPA is not ours then abort the guest */ - if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || - (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); -} - -bool sev_es_negotiate_protocol(void) -{ - u64 val; - - /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - - if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) - return false; - - if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || - GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) - return false; - - ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); - - return true; -} - -static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) -{ - u32 ret; - - ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); - if (!ret) - return ES_OK; - - if (ret == 1) { - u64 info = ghcb->save.sw_exit_info_2; - unsigned long v = info & SVM_EVTINJ_VEC_MASK; - - /* Check if exception information from hypervisor is sane. */ - if ((info & SVM_EVTINJ_VALID) && - ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && - ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { - ctxt->fi.vector = v; - - if (info & SVM_EVTINJ_VALID_ERR) - ctxt->fi.error_code = info >> 32; - - return ES_EXCEPTION; - } - } - - return ES_VMM_ERROR; -} - -static inline int svsm_process_result_codes(struct svsm_call *call) +int svsm_process_result_codes(struct svsm_call *call) { switch (call->rax_out) { case SVSM_SUCCESS: @@ -193,7 +97,7 @@ static inline int svsm_process_result_codes(struct svsm_call *call) * - RAX specifies the SVSM protocol/callid as input and the return code * as output. */ -static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) +void svsm_issue_call(struct svsm_call *call, u8 *pending) { register unsigned long rax asm("rax") = call->rax; register unsigned long rcx asm("rcx") = call->rcx; @@ -216,7 +120,7 @@ static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) call->r9_out = r9; } -static int svsm_perform_msr_protocol(struct svsm_call *call) +int svsm_perform_msr_protocol(struct svsm_call *call) { u8 pending = 0; u64 val, resp; @@ -247,63 +151,6 @@ static int svsm_perform_msr_protocol(struct svsm_call *call) return svsm_process_result_codes(call); } -static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) -{ - struct es_em_ctxt ctxt; - u8 pending = 0; - - vc_ghcb_invalidate(ghcb); - - /* - * Fill in protocol and format specifiers. This can be called very early - * in the boot, so use rip-relative references as needed. - */ - ghcb->protocol_version = ghcb_version; - ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; - - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); - ghcb_set_sw_exit_info_1(ghcb, 0); - ghcb_set_sw_exit_info_2(ghcb, 0); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - - svsm_issue_call(call, &pending); - - if (pending) - return -EINVAL; - - switch (verify_exception_info(ghcb, &ctxt)) { - case ES_OK: - break; - case ES_EXCEPTION: - vc_forward_exception(&ctxt); - fallthrough; - default: - return -EINVAL; - } - - return svsm_process_result_codes(call); -} - -enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, - struct es_em_ctxt *ctxt, - u64 exit_code, u64 exit_info_1, - u64 exit_info_2) -{ - /* Fill in protocol and format specifiers */ - ghcb->protocol_version = ghcb_version; - ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; - - ghcb_set_sw_exit_code(ghcb, exit_code); - ghcb_set_sw_exit_info_1(ghcb, exit_info_1); - ghcb_set_sw_exit_info_2(ghcb, exit_info_2); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - return verify_exception_info(ghcb, ctxt); -} - static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) { u64 val; @@ -342,44 +189,7 @@ static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) return ret; } -static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) -{ - u32 cr4 = native_read_cr4(); - int ret; - - ghcb_set_rax(ghcb, leaf->fn); - ghcb_set_rcx(ghcb, leaf->subfn); - - if (cr4 & X86_CR4_OSXSAVE) - /* Safe to read xcr0 */ - ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); - else - /* xgetbv will cause #UD - use reset value for xcr0 */ - ghcb_set_xcr0(ghcb, 1); - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); - if (ret != ES_OK) - return ret; - - if (!(ghcb_rax_is_valid(ghcb) && - ghcb_rbx_is_valid(ghcb) && - ghcb_rcx_is_valid(ghcb) && - ghcb_rdx_is_valid(ghcb))) - return ES_VMM_ERROR; - leaf->eax = ghcb->save.rax; - leaf->ebx = ghcb->save.rbx; - leaf->ecx = ghcb->save.rcx; - leaf->edx = ghcb->save.rdx; - - return ES_OK; -} - -static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) -{ - return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) - : __sev_cpuid_hv_msr(leaf); -} /* * This may be called early while still running on the initial identity @@ -412,7 +222,7 @@ const struct snp_cpuid_table *snp_cpuid_get_table(void) * * Return: XSAVE area size on success, 0 otherwise. */ -static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) +static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); u64 xfeatures_found = 0; @@ -448,7 +258,7 @@ static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool __head +static bool snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -484,21 +294,21 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) return false; } -static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static void snp_cpuid_hv_msr(void *ctx, struct cpuid_leaf *leaf) { - if (sev_cpuid_hv(ghcb, ctxt, leaf)) + if (__sev_cpuid_hv_msr(leaf)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); } -static int __head -snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - struct cpuid_leaf *leaf) +static int +snp_cpuid_postprocess(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf), + void *ctx, struct cpuid_leaf *leaf) { struct cpuid_leaf leaf_hv = *leaf; switch (leaf->fn) { case 0x1: - snp_cpuid_hv(ghcb, ctxt, &leaf_hv); + cpuid_fn(ctx, &leaf_hv); /* initial APIC ID */ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); @@ -517,7 +327,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, break; case 0xB: leaf_hv.subfn = 0; - snp_cpuid_hv(ghcb, ctxt, &leaf_hv); + cpuid_fn(ctx, &leaf_hv); /* extended APIC ID */ leaf->edx = leaf_hv.edx; @@ -565,7 +375,7 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, } break; case 0x8000001E: - snp_cpuid_hv(ghcb, ctxt, &leaf_hv); + cpuid_fn(ctx, &leaf_hv); /* extended APIC ID */ leaf->eax = leaf_hv.eax; @@ -586,8 +396,8 @@ snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -int __head -snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf), + void *ctx, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -621,7 +431,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) return 0; } - return snp_cpuid_postprocess(ghcb, ctxt, leaf); + return snp_cpuid_postprocess(cpuid_fn, ctx, leaf); } /* @@ -629,7 +439,7 @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -648,13 +458,24 @@ void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; - ret = snp_cpuid(NULL, NULL, &leaf); + /* + * If SNP is active, then snp_cpuid() uses the CPUID table to obtain the + * CPUID values (with possible HV interaction during post-processing of + * the values). But if SNP is not active (no CPUID table present), then + * snp_cpuid() returns -EOPNOTSUPP so that an SEV-ES guest can call the + * HV to obtain the CPUID information. + */ + ret = snp_cpuid(snp_cpuid_hv_msr, NULL, &leaf); if (!ret) goto cpuid_done; if (ret != -EOPNOTSUPP) goto fail; + /* + * This is reached by a SEV-ES guest and needs to invoke the HV for + * the CPUID data. + */ if (__sev_cpuid_hv_msr(&leaf)) goto fail; @@ -705,7 +526,7 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static __head +static __init struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; @@ -733,7 +554,7 @@ struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; @@ -761,13 +582,24 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) } } -static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) +static int svsm_call_msr_protocol(struct svsm_call *call) +{ + int ret; + + do { + ret = svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + return ret; +} + +static void svsm_pval_4k_page(unsigned long paddr, bool validate, + struct svsm_ca *caa, u64 caa_pa) { struct svsm_pvalidate_call *pc; struct svsm_call call = {}; unsigned long flags; u64 pc_pa; - int ret; /* * This can be called very early in the boot, use native functions in @@ -775,10 +607,10 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) */ flags = native_local_irq_save(); - call.caa = svsm_get_caa(); + call.caa = caa; pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; - pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + pc_pa = caa_pa + offsetof(struct svsm_ca, svsm_buffer); pc->num_entries = 1; pc->cur_index = 0; @@ -792,20 +624,24 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); call.rcx = pc_pa; - ret = svsm_perform_call_protocol(&call); - if (ret) + /* + * Use the MSR protocol exclusively, so that this code is usable in + * startup code where VA/PA translations of the GHCB page's address may + * be problematic. + */ + if (svsm_call_msr_protocol(&call)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); native_local_irq_restore(flags); } -static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, - bool validate) +static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, + bool validate, struct svsm_ca *caa, u64 caa_pa) { int ret; if (snp_vmpl) { - svsm_pval_4k_page(paddr, validate); + svsm_pval_4k_page(paddr, validate, caa, caa_pa); } else { ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); if (ret) @@ -816,15 +652,51 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, * If validating memory (making it private) and affected by the * cache-coherency vulnerability, perform the cache eviction mitigation. */ - if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + if (validate && sev_snp_needs_sfw) sev_evict_cache((void *)vaddr, 1); } +static void __page_state_change(unsigned long vaddr, unsigned long paddr, + const struct psc_desc *desc) +{ + u64 val, msr; + + /* + * If private -> shared then invalidate the page before requesting the + * state change in the RMP table. + */ + if (desc->op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(vaddr, paddr, false, desc->ca, desc->caa_pa); + + /* Save the current GHCB MSR value */ + msr = sev_es_rd_ghcb_msr(); + + /* Issue VMGEXIT to change the page state in RMP table. */ + sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, desc->op)); + VMGEXIT(); + + /* Read the response of the VMGEXIT. */ + val = sev_es_rd_ghcb_msr(); + if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + /* Restore the GHCB MSR value */ + sev_es_wr_ghcb_msr(msr); + + /* + * Now that page state is changed in the RMP table, validate it so that it is + * consistent with the RMP entry. + */ + if (desc->op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(vaddr, paddr, true, desc->ca, desc->caa_pa); +} + /* * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM * services needed when not running in VMPL0. */ -static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) +static bool __init svsm_setup_ca(const struct cc_blob_sev_info *cc_info, + void *page) { struct snp_secrets_page *secrets_page; struct snp_cpuid_table *cpuid_table; @@ -847,7 +719,7 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) * routine is running identity mapped when called, both by the decompressor * code and the early kernel code. */ - if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1)) + if (!rmpadjust((unsigned long)page, RMP_PG_SIZE_4K, 1)) return false; /* @@ -875,11 +747,6 @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) if (caa & (PAGE_SIZE - 1)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA); - /* - * The CA is identity mapped when this routine is called, both by the - * decompressor code and the early kernel code. - */ - boot_svsm_caa = (struct svsm_ca *)caa; boot_svsm_caa_pa = caa; /* Advertise the SVSM presence via CPUID. */ diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c index 0b7e3b950183..09725428d3e6 100644 --- a/arch/x86/boot/startup/sev-startup.c +++ b/arch/x86/boot/startup/sev-startup.c @@ -41,143 +41,14 @@ #include <asm/cpuid/api.h> #include <asm/cmdline.h> -/* For early boot hypervisor communication in SEV-ES enabled guests */ -struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); - -/* - * Needs to be in the .data section because we need it NULL before bss is - * cleared - */ -struct ghcb *boot_ghcb __section(".data"); - -/* Bitmap of SEV features supported by the hypervisor */ -u64 sev_hv_features __ro_after_init; - -/* Secrets page physical address from the CC blob */ -u64 sev_secrets_pa __ro_after_init; - -/* For early boot SVSM communication */ -struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); - -DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); -DEFINE_PER_CPU(u64, svsm_caa_pa); - -/* - * Nothing shall interrupt this code path while holding the per-CPU - * GHCB. The backup GHCB is only for NMIs interrupting this path. - * - * Callers must disable local interrupts around it. - */ -noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - WARN_ON(!irqs_disabled()); - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (unlikely(data->ghcb_active)) { - /* GHCB is already in use - save its contents */ - - if (unlikely(data->backup_ghcb_active)) { - /* - * Backup-GHCB is also already in use. There is no way - * to continue here so just kill the machine. To make - * panic() work, mark GHCBs inactive so that messages - * can be printed out. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - instrumentation_begin(); - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - instrumentation_end(); - } - - /* Mark backup_ghcb active before writing to it */ - data->backup_ghcb_active = true; - - state->ghcb = &data->backup_ghcb; - - /* Backup GHCB content */ - *state->ghcb = *ghcb; - } else { - state->ghcb = NULL; - data->ghcb_active = true; - } - - return ghcb; -} - /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" -noinstr void __sev_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - WARN_ON(!irqs_disabled()); - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - /* - * Invalidate the GHCB so a VMGEXIT instruction issued - * from userspace won't appear to be valid. - */ - vc_ghcb_invalidate(ghcb); - data->ghcb_active = false; - } -} - -int svsm_perform_call_protocol(struct svsm_call *call) -{ - struct ghcb_state state; - unsigned long flags; - struct ghcb *ghcb; - int ret; - - /* - * This can be called very early in the boot, use native functions in - * order to avoid paravirt issues. - */ - flags = native_local_irq_save(); - - if (sev_cfg.ghcbs_initialized) - ghcb = __sev_get_ghcb(&state); - else if (boot_ghcb) - ghcb = boot_ghcb; - else - ghcb = NULL; - - do { - ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) - : svsm_perform_msr_protocol(call); - } while (ret == -EAGAIN); - - if (sev_cfg.ghcbs_initialized) - __sev_put_ghcb(&state); - - native_local_irq_restore(flags); - - return ret; -} - -void __head +void early_set_pages_state(unsigned long vaddr, unsigned long paddr, - unsigned long npages, enum psc_op op) + unsigned long npages, const struct psc_desc *desc) { unsigned long paddr_end; - u64 val; vaddr = vaddr & PAGE_MASK; @@ -185,42 +56,22 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, paddr_end = paddr + (npages << PAGE_SHIFT); while (paddr < paddr_end) { - /* Page validation must be rescinded before changing to shared */ - if (op == SNP_PAGE_STATE_SHARED) - pvalidate_4k_page(vaddr, paddr, false); - - /* - * Use the MSR protocol because this function can be called before - * the GHCB is established. - */ - sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); - VMGEXIT(); - - val = sev_es_rd_ghcb_msr(); - - if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) - goto e_term; - - if (GHCB_MSR_PSC_RESP_VAL(val)) - goto e_term; - - /* Page validation must be performed after changing to private */ - if (op == SNP_PAGE_STATE_PRIVATE) - pvalidate_4k_page(vaddr, paddr, true); + __page_state_change(vaddr, paddr, desc); vaddr += PAGE_SIZE; paddr += PAGE_SIZE; } - - return; - -e_term: - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { + struct psc_desc d = { + SNP_PAGE_STATE_PRIVATE, + rip_rel_ptr(&boot_svsm_ca_page), + boot_svsm_caa_pa + }; + /* * This can be invoked in early boot while running identity mapped, so * use an open coded check for SNP instead of using cc_platform_has(). @@ -234,12 +85,18 @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * Ask the hypervisor to mark the memory pages as private in the RMP * table. */ - early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); + early_set_pages_state(vaddr, paddr, npages, &d); } -void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { + struct psc_desc d = { + SNP_PAGE_STATE_SHARED, + rip_rel_ptr(&boot_svsm_ca_page), + boot_svsm_caa_pa + }; + /* * This can be invoked in early boot while running identity mapped, so * use an open coded check for SNP instead of using cc_platform_has(). @@ -250,7 +107,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr return; /* Ask hypervisor to mark the memory pages shared in the RMP table. */ - early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); + early_set_pages_state(vaddr, paddr, npages, &d); } /* @@ -266,7 +123,7 @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr * * Scan for the blob in that order. */ -static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static struct cc_blob_sev_info *__init find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -287,15 +144,15 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) found_cc_info: if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) - snp_abort(); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); return cc_info; } -static __head void svsm_setup(struct cc_blob_sev_info *cc_info) +static void __init svsm_setup(struct cc_blob_sev_info *cc_info) { + struct snp_secrets_page *secrets = (void *)cc_info->secrets_phys; struct svsm_call call = {}; - int ret; u64 pa; /* @@ -303,7 +160,7 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info) * running at VMPL0. The CA will be used to communicate with the * SVSM to perform the SVSM services. */ - if (!svsm_setup_ca(cc_info)) + if (!svsm_setup_ca(cc_info, rip_rel_ptr(&boot_svsm_ca_page))) return; /* @@ -315,25 +172,25 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info) pa = (u64)rip_rel_ptr(&boot_svsm_ca_page); /* - * Switch over to the boot SVSM CA while the current CA is still - * addressable. There is no GHCB at this point so use the MSR protocol. + * Switch over to the boot SVSM CA while the current CA is still 1:1 + * mapped and thus addressable with VA == PA. There is no GHCB at this + * point so use the MSR protocol. * * SVSM_CORE_REMAP_CA call: * RAX = 0 (Protocol=0, CallID=0) * RCX = New CA GPA */ - call.caa = svsm_get_caa(); + call.caa = (struct svsm_ca *)secrets->svsm_caa; call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); call.rcx = pa; - ret = svsm_perform_call_protocol(&call); - if (ret) + + if (svsm_call_msr_protocol(&call)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); - boot_svsm_caa = (struct svsm_ca *)pa; boot_svsm_caa_pa = pa; } -bool __head snp_init(struct boot_params *bp) +bool __init snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -361,8 +218,3 @@ bool __head snp_init(struct boot_params *bp) return true; } - -void __head __noreturn snp_abort(void) -{ - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); -} diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c index 70ea1748c0a7..e7ea65f3f1d6 100644 --- a/arch/x86/boot/startup/sme.c +++ b/arch/x86/boot/startup/sme.c @@ -91,7 +91,7 @@ struct sme_populate_pgd_data { */ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); -static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; @@ -106,7 +106,7 @@ static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) memset(pgd_p, 0, pgd_size); } -static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd; p4d_t *p4d; @@ -143,7 +143,7 @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) return pud; } -static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -159,7 +159,7 @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); } -static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) { pud_t *pud; pmd_t *pmd; @@ -185,7 +185,7 @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } -static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -195,7 +195,7 @@ static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) } } -static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) { while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd(ppd); @@ -205,7 +205,7 @@ static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) } } -static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, pmdval_t pmd_flags, pteval_t pte_flags) { unsigned long vaddr_end; @@ -229,22 +229,22 @@ static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, __sme_map_range_pte(ppd); } -static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } -static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } -static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } -static unsigned long __head sme_pgtable_calc(unsigned long len) +static unsigned long __init sme_pgtable_calc(unsigned long len) { unsigned long entries = 0, tables = 0; @@ -281,7 +281,7 @@ static unsigned long __head sme_pgtable_calc(unsigned long len) return entries + tables; } -void __head sme_encrypt_kernel(struct boot_params *bp) +void __init sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; @@ -485,7 +485,7 @@ void __head sme_encrypt_kernel(struct boot_params *bp) native_write_cr3(__native_read_cr3()); } -void __head sme_enable(struct boot_params *bp) +void __init sme_enable(struct boot_params *bp) { unsigned int eax, ebx, ecx, edx; unsigned long feature_mask; @@ -521,6 +521,7 @@ void __head sme_enable(struct boot_params *bp) return; me_mask = 1UL << (ebx & 0x3f); + sev_snp_needs_sfw = !(ebx & BIT(31)); /* Check the SEV MSR whether SEV or SME is enabled */ sev_status = msr = native_rdmsrq(MSR_AMD64_SEV); @@ -531,7 +532,7 @@ void __head sme_enable(struct boot_params *bp) * enablement abort the guest. */ if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED)) - snp_abort(); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { @@ -567,7 +568,6 @@ void __head sme_enable(struct boot_params *bp) #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* Local version for startup code, which never operates on user page tables */ -__weak pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d4610af68114..989ca9f72ba3 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -104,6 +104,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_HOST_SEV_SNP: return cc_flags.host_sev_snp; + case CC_ATTR_SNP_SECURE_AVIC: + return sev_status & MSR_AMD64_SNP_SECURE_AVIC; + default: return false; } diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile index 342d79f0ab6a..3b8ae214a6a6 100644 --- a/arch/x86/coco/sev/Makefile +++ b/arch/x86/coco/sev/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += core.o sev-nmi.o vc-handle.o +obj-y += core.o noinstr.o vc-handle.o # Clang 14 and older may fail to respect __no_sanitize_undefined when inlining -UBSAN_SANITIZE_sev-nmi.o := n +UBSAN_SANITIZE_noinstr.o := n # GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining -KASAN_SANITIZE_sev-nmi.o := n -KCSAN_SANITIZE_sev-nmi.o := n +KASAN_SANITIZE_noinstr.o := n +KCSAN_SANITIZE_noinstr.o := n diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 14ef5908fb27..9ae3b11754e6 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -46,6 +46,48 @@ #include <asm/cmdline.h> #include <asm/msr.h> +/* Bitmap of SEV features supported by the hypervisor */ +u64 sev_hv_features __ro_after_init; +SYM_PIC_ALIAS(sev_hv_features); + +/* Secrets page physical address from the CC blob */ +u64 sev_secrets_pa __ro_after_init; +SYM_PIC_ALIAS(sev_secrets_pa); + +/* For early boot SVSM communication */ +struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); +SYM_PIC_ALIAS(boot_svsm_ca_page); + +/* + * SVSM related information: + * During boot, the page tables are set up as identity mapped and later + * changed to use kernel virtual addresses. Maintain separate virtual and + * physical addresses for the CAA to allow SVSM functions to be used during + * early boot, both with identity mapped virtual addresses and proper kernel + * virtual addresses. + */ +u64 boot_svsm_caa_pa __ro_after_init; +SYM_PIC_ALIAS(boot_svsm_caa_pa); + +DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); +DEFINE_PER_CPU(u64, svsm_caa_pa); + +static inline struct svsm_ca *svsm_get_caa(void) +{ + if (sev_cfg.use_cas) + return this_cpu_read(svsm_caa); + else + return rip_rel_ptr(&boot_svsm_ca_page); +} + +static inline u64 svsm_get_caa_pa(void) +{ + if (sev_cfg.use_cas) + return this_cpu_read(svsm_caa_pa); + else + return boot_svsm_caa_pa; +} + /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ #define AP_INIT_CS_LIMIT 0xffff #define AP_INIT_DS_LIMIT 0xffff @@ -79,6 +121,7 @@ static const char * const sev_status_feat_names[] = { [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", + [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", }; /* @@ -100,6 +143,26 @@ DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); */ u8 snp_vmpl __ro_after_init; EXPORT_SYMBOL_GPL(snp_vmpl); +SYM_PIC_ALIAS(snp_vmpl); + +/* + * Since feature negotiation related variables are set early in the boot + * process they must reside in the .data section so as not to be zeroed + * out when the .bss section is later cleared. + * + * GHCB protocol version negotiated with the hypervisor. + */ +u16 ghcb_version __ro_after_init; +SYM_PIC_ALIAS(ghcb_version); + +/* For early boot hypervisor communication in SEV-ES enabled guests */ +static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); + +/* + * Needs to be in the .data section because we need it NULL before bss is + * cleared + */ +struct ghcb *boot_ghcb __section(".data"); static u64 __init get_snp_jump_table_addr(void) { @@ -154,6 +217,73 @@ static u64 __init get_jump_table_addr(void) return ret; } +static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) +{ + struct es_em_ctxt ctxt; + u8 pending = 0; + + vc_ghcb_invalidate(ghcb); + + /* + * Fill in protocol and format specifiers. This can be called very early + * in the boot, so use rip-relative references as needed. + */ + ghcb->protocol_version = ghcb_version; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + + svsm_issue_call(call, &pending); + + if (pending) + return -EINVAL; + + switch (verify_exception_info(ghcb, &ctxt)) { + case ES_OK: + break; + case ES_EXCEPTION: + vc_forward_exception(&ctxt); + fallthrough; + default: + return -EINVAL; + } + + return svsm_process_result_codes(call); +} + +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret; + + flags = native_local_irq_save(); + + if (sev_cfg.ghcbs_initialized) + ghcb = __sev_get_ghcb(&state); + else if (boot_ghcb) + ghcb = boot_ghcb; + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : __pi_svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + if (sev_cfg.ghcbs_initialized) + __sev_put_ghcb(&state); + + native_local_irq_restore(flags); + + return ret; +} + static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size, int ret, u64 svsm_ret) { @@ -531,8 +661,11 @@ static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) unsigned long vaddr_end; /* Use the MSR protocol when a GHCB is not available. */ - if (!boot_ghcb) - return early_set_pages_state(vaddr, __pa(vaddr), npages, op); + if (!boot_ghcb) { + struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() }; + + return early_set_pages_state(vaddr, __pa(vaddr), npages, &d); + } vaddr = vaddr & PAGE_MASK; vaddr_end = vaddr + (npages << PAGE_SHIFT); @@ -973,6 +1106,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK; + /* SVME must be set. */ vmsa->efer = EFER_SVME; @@ -1107,6 +1243,105 @@ int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd) return 0; } +u64 savic_ghcb_msr_read(u32 reg) +{ + u64 msr = APIC_BASE_MSR + (reg >> 4); + struct pt_regs regs = { .cx = msr }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + enum es_result res; + struct ghcb *ghcb; + + guard(irqsave)(); + + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false); + if (res != ES_OK) { + pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res); + /* MSR read failures are treated as fatal errors */ + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + } + + __sev_put_ghcb(&state); + + return regs.ax | regs.dx << 32; +} + +void savic_ghcb_msr_write(u32 reg, u64 value) +{ + u64 msr = APIC_BASE_MSR + (reg >> 4); + struct pt_regs regs = { + .cx = msr, + .ax = lower_32_bits(value), + .dx = upper_32_bits(value) + }; + struct es_em_ctxt ctxt = { .regs = ®s }; + struct ghcb_state state; + enum es_result res; + struct ghcb *ghcb; + + guard(irqsave)(); + + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true); + if (res != ES_OK) { + pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res); + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + } + + __sev_put_ghcb(&state); +} + +enum es_result savic_register_gpa(u64 gpa) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + enum es_result res; + struct ghcb *ghcb; + + guard(irqsave)(); + + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); + ghcb_set_rbx(ghcb, gpa); + res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, + SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0); + + __sev_put_ghcb(&state); + + return res; +} + +enum es_result savic_unregister_gpa(u64 *gpa) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + enum es_result res; + struct ghcb *ghcb; + + guard(irqsave)(); + + ghcb = __sev_get_ghcb(&state); + vc_ghcb_invalidate(ghcb); + + ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); + res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, + SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0); + if (gpa && res == ES_OK) + *gpa = ghcb->save.rbx; + + __sev_put_ghcb(&state); + + return res; +} + static void snp_register_per_cpu_ghcb(void) { struct sev_es_runtime_data *data; @@ -1233,7 +1468,8 @@ static void __init alloc_runtime_data(int cpu) struct svsm_ca *caa; /* Allocate the SVSM CA page if an SVSM is present */ - caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE); + caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE) + : &boot_svsm_ca_page; per_cpu(svsm_caa, cpu) = caa; per_cpu(svsm_caa_pa, cpu) = __pa(caa); @@ -1287,32 +1523,9 @@ void __init sev_es_init_vc_handling(void) init_ghcb(cpu); } - /* If running under an SVSM, switch to the per-cpu CA */ - if (snp_vmpl) { - struct svsm_call call = {}; - unsigned long flags; - int ret; - - local_irq_save(flags); - - /* - * SVSM_CORE_REMAP_CA call: - * RAX = 0 (Protocol=0, CallID=0) - * RCX = New CA GPA - */ - call.caa = svsm_get_caa(); - call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); - call.rcx = this_cpu_read(svsm_caa_pa); - ret = svsm_perform_call_protocol(&call); - if (ret) - panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", - ret, call.rax_out); - + if (snp_vmpl) sev_cfg.use_cas = true; - local_irq_restore(flags); - } - sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ @@ -1590,15 +1803,6 @@ void sev_show_status(void) pr_cont("\n"); } -void __init snp_update_svsm_ca(void) -{ - if (!snp_vmpl) - return; - - /* Update the CAA to a proper kernel address */ - boot_svsm_caa = &boot_svsm_ca_page; -} - #ifdef CONFIG_SYSFS static ssize_t vmpl_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/noinstr.c index d8dfaddfb367..b527eafb6312 100644 --- a/arch/x86/coco/sev/sev-nmi.c +++ b/arch/x86/coco/sev/noinstr.c @@ -106,3 +106,77 @@ void noinstr __sev_es_nmi_complete(void) __sev_put_ghcb(&state); } + +/* + * Nothing shall interrupt this code path while holding the per-CPU + * GHCB. The backup GHCB is only for NMIs interrupting this path. + * + * Callers must disable local interrupts around it. + */ +noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + WARN_ON(!irqs_disabled()); + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (unlikely(data->ghcb_active)) { + /* GHCB is already in use - save its contents */ + + if (unlikely(data->backup_ghcb_active)) { + /* + * Backup-GHCB is also already in use. There is no way + * to continue here so just kill the machine. To make + * panic() work, mark GHCBs inactive so that messages + * can be printed out. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + instrumentation_begin(); + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + instrumentation_end(); + } + + /* Mark backup_ghcb active before writing to it */ + data->backup_ghcb_active = true; + + state->ghcb = &data->backup_ghcb; + + /* Backup GHCB content */ + *state->ghcb = *ghcb; + } else { + state->ghcb = NULL; + data->ghcb_active = true; + } + + return ghcb; +} + +noinstr void __sev_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + WARN_ON(!irqs_disabled()); + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + /* + * Invalidate the GHCB so a VMGEXIT instruction issued + * from userspace won't appear to be valid. + */ + vc_ghcb_invalidate(ghcb); + data->ghcb_active = false; + } +} diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index c3b4acbde0d8..7fc136a35334 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -351,6 +351,8 @@ fault: } #define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__) +#define error(v) +#define has_cpuflag(f) boot_cpu_has(f) #include "vc-shared.c" @@ -402,14 +404,10 @@ static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool return ES_OK; } -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) { struct pt_regs *regs = ctxt->regs; enum es_result ret; - bool write; - - /* Is it a WRMSR? */ - write = ctxt->insn.opcode.bytes[1] == 0x30; switch (regs->cx) { case MSR_SVSM_CAA: @@ -419,6 +417,15 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) if (sev_status & MSR_AMD64_SNP_SECURE_TSC) return __vc_handle_secure_tsc_msrs(ctxt, write); break; + case MSR_AMD64_SAVIC_CONTROL: + /* + * AMD64_SAVIC_CONTROL should not be intercepted when + * Secure AVIC is enabled. Terminate the Secure AVIC guest + * if the interception is enabled. + */ + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return ES_VMM_ERROR; + break; default: break; } @@ -439,6 +446,11 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30); +} + static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) { int trapnr = ctxt->fi.vector; diff --git a/arch/x86/coco/sev/vc-shared.c b/arch/x86/coco/sev/vc-shared.c index 2c0ab0fdc060..9b01c9ad81be 100644 --- a/arch/x86/coco/sev/vc-shared.c +++ b/arch/x86/coco/sev/vc-shared.c @@ -409,15 +409,109 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + u32 ret; + + ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); + if (!ret) + return ES_OK; + + if (ret == 1) { + u64 info = ghcb->save.sw_exit_info_2; + unsigned long v = info & SVM_EVTINJ_VEC_MASK; + + /* Check if exception information from hypervisor is sane. */ + if ((info & SVM_EVTINJ_VALID) && + ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && + ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { + ctxt->fi.vector = v; + + if (info & SVM_EVTINJ_VALID_ERR) + ctxt->fi.error_code = info >> 32; + + return ES_EXCEPTION; + } + } + + return ES_VMM_ERROR; +} + +enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2) +{ + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = ghcb_version; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + return verify_exception_info(ghcb, ctxt); +} + +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + u32 cr4 = native_read_cr4(); + int ret; + + ghcb_set_rax(ghcb, leaf->fn); + ghcb_set_rcx(ghcb, leaf->subfn); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #UD - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + leaf->eax = ghcb->save.rax; + leaf->ebx = ghcb->save.rbx; + leaf->ecx = ghcb->save.rcx; + leaf->edx = ghcb->save.rdx; + + return ES_OK; +} + +struct cpuid_ctx { + struct ghcb *ghcb; + struct es_em_ctxt *ctxt; +}; + +static void snp_cpuid_hv_ghcb(void *p, struct cpuid_leaf *leaf) +{ + struct cpuid_ctx *ctx = p; + + if (__sev_cpuid_hv_ghcb(ctx->ghcb, ctx->ctxt, leaf)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); +} + static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { + struct cpuid_ctx ctx = { ghcb, ctxt }; struct pt_regs *regs = ctxt->regs; struct cpuid_leaf leaf; int ret; leaf.fn = regs->ax; leaf.subfn = regs->cx; - ret = snp_cpuid(ghcb, ctxt, &leaf); + ret = snp_cpuid(snp_cpuid_hv_ghcb, &ctx, &leaf); if (!ret) { regs->ax = leaf.eax; regs->bx = leaf.ebx; @@ -502,3 +596,50 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, return ES_OK; } + +void snp_register_ghcb_early(unsigned long paddr) +{ + unsigned long pfn = paddr >> PAGE_SHIFT; + u64 val; + + sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); + VMGEXIT(); + + val = sev_es_rd_ghcb_msr(); + + /* If the response GPA is not ours then abort the guest */ + if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || + (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); +} + +bool __init sev_es_check_cpu_features(void) +{ + if (!has_cpuflag(X86_FEATURE_RDRAND)) { + error("RDRAND instruction not supported - no trusted source of randomness available\n"); + return false; + } + + return true; +} + +bool sev_es_negotiate_protocol(void) +{ + u64 val; + + /* Do the GHCB protocol version negotiation */ + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) + return false; + + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) + return false; + + ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); + + return true; +} diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 07ba4935e873..a26e66d66444 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -305,6 +305,8 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); + void (*setup)(void); + void (*teardown)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); void (*init_apic_ldr)(void); @@ -317,6 +319,8 @@ struct apic { /* wakeup secondary CPU using 64-bit wakeup point */ int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); + char *name; }; @@ -470,6 +474,12 @@ static __always_inline bool apic_id_valid(u32 apic_id) return apic_id <= apic->max_apic_id; } +static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + if (apic->update_vector) + apic->update_vector(cpu, vector, set); +} + #else /* CONFIG_X86_LOCAL_APIC */ static inline u32 apic_read(u32 reg) { return 0; } @@ -481,6 +491,7 @@ static inline void apic_wait_icr_idle(void) { } static inline u32 safe_apic_wait_icr_idle(void) { return 0; } static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); } static inline void apic_setup_apic_calls(void) { } +static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { } #define apic_update_callback(_callback, _fn) do { } while (0) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 094106b6a538..be39a543fbe5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -135,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 02b23aa78955..f7b67cb73915 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -82,6 +82,8 @@ #ifndef __ASSEMBLER__ extern unsigned int output_len; extern const unsigned long kernel_text_size; +extern const unsigned long kernel_inittext_offset; +extern const unsigned long kernel_inittext_size; extern const unsigned long kernel_total_size; unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 5a68e9db6518..01ccdd168df0 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -2,12 +2,6 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H -#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000 -#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector -#else -#define __head __section(".head.text") __no_sanitize_undefined __no_kstack_erase -#endif - struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ void (*free_pgt_page)(void *, void *); /* free buf for page table */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 73393a66d3ab..718a55d82fe4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -706,8 +706,15 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SAVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SAVIC_EN_BIT 0 +#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) +#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 #define MSR_AMD64_RMP_CFG 0xc0010136 diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 692af46603a1..914eb32581c7 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -53,6 +53,7 @@ extern void i386_reserve_resources(void); extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp); extern void startup_64_setup_gdt_idt(void); extern void startup_64_load_idt(void *vc_handler); +extern void __pi_startup_64_load_idt(void *vc_handler); extern void early_setup_idt(void); extern void __init do_early_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 0020d77a0800..01a6e4dbe423 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -208,6 +208,7 @@ struct snp_psc_desc { #define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ #define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */ #define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */ +#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h index 3dfd306d1c9e..c58c47c68ab6 100644 --- a/arch/x86/include/asm/sev-internal.h +++ b/arch/x86/include/asm/sev-internal.h @@ -2,7 +2,6 @@ #define DR7_RESET_VALUE 0x400 -extern struct ghcb boot_ghcb_page; extern u64 sev_hv_features; extern u64 sev_secrets_pa; @@ -56,31 +55,15 @@ DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa); void early_set_pages_state(unsigned long vaddr, unsigned long paddr, - unsigned long npages, enum psc_op op); + unsigned long npages, const struct psc_desc *desc); DECLARE_PER_CPU(struct svsm_ca *, svsm_caa); DECLARE_PER_CPU(u64, svsm_caa_pa); -extern struct svsm_ca *boot_svsm_caa; extern u64 boot_svsm_caa_pa; -static __always_inline struct svsm_ca *svsm_get_caa(void) -{ - if (sev_cfg.use_cas) - return this_cpu_read(svsm_caa); - else - return boot_svsm_caa; -} - -static __always_inline u64 svsm_get_caa_pa(void) -{ - if (sev_cfg.use_cas) - return this_cpu_read(svsm_caa_pa); - else - return boot_svsm_caa_pa; -} - -int svsm_perform_call_protocol(struct svsm_call *call); +enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt); +void vc_forward_exception(struct es_em_ctxt *ctxt); static inline u64 sev_es_rd_ghcb_msr(void) { @@ -97,9 +80,8 @@ static __always_inline void sev_es_wr_ghcb_msr(u64 val) native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); } -void snp_register_ghcb_early(unsigned long paddr); -bool sev_es_negotiate_protocol(void); -bool sev_es_check_cpu_features(void); +enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write); + u64 get_hv_features(void); const struct snp_cpuid_table *snp_cpuid_get_table(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 465b19fd1a2d..f9046c4b9a2b 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -503,6 +503,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) } void setup_ghcb(void); +void snp_register_ghcb_early(unsigned long paddr); void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, @@ -511,14 +512,12 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); -void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); void sev_show_status(void); -void snp_update_svsm_ca(void); int prepare_pte_enc(struct pte_enc_desc *d); void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot); void snp_kexec_finish(void); @@ -533,6 +532,10 @@ int snp_svsm_vtpm_send_command(u8 *buffer); void __init snp_secure_tsc_prepare(void); void __init snp_secure_tsc_init(void); +enum es_result savic_register_gpa(u64 gpa); +enum es_result savic_unregister_gpa(u64 *gpa); +u64 savic_ghcb_msr_read(u32 reg); +void savic_ghcb_msr_write(u32 reg, u64 value); static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { @@ -540,8 +543,6 @@ static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -void vc_forward_exception(struct es_em_ctxt *ctxt); - /* I/O parameters for CPUID-related helpers */ struct cpuid_leaf { u32 fn; @@ -552,7 +553,13 @@ struct cpuid_leaf { u32 edx; }; -int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf); +int svsm_perform_msr_protocol(struct svsm_call *call); +int __pi_svsm_perform_msr_protocol(struct svsm_call *call); +int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf), + void *ctx, struct cpuid_leaf *leaf); + +void svsm_issue_call(struct svsm_call *call, u8 *pending); +int svsm_process_result_codes(struct svsm_call *call); void __noreturn sev_es_terminate(unsigned int set, unsigned int reason); enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, @@ -560,7 +567,18 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, u64 exit_info_1, u64 exit_info_2); +bool sev_es_negotiate_protocol(void); +bool sev_es_check_cpu_features(void); + +extern u16 ghcb_version; extern struct ghcb *boot_ghcb; +extern bool sev_snp_needs_sfw; + +struct psc_desc { + enum psc_op op; + struct svsm_ca *ca; + u64 caa_pa; +}; static inline void sev_evict_cache(void *va, int npages) { @@ -600,7 +618,6 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npag static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } -static inline void snp_abort(void) { } static inline void snp_dmi_setup(void) { } static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) { @@ -610,7 +627,6 @@ static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } -static inline void snp_update_svsm_ca(void) { } static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; } static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { } static inline void snp_kexec_finish(void) { } @@ -624,6 +640,10 @@ static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } static inline void __init snp_secure_tsc_prepare(void) { } static inline void __init snp_secure_tsc_init(void) { } static inline void sev_evict_cache(void *va, int npages) {} +static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; } +static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; } +static inline void savic_ghcb_msr_write(u32 reg, u64 value) { } +static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -635,9 +655,13 @@ void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); -void snp_leak_pages(u64 pfn, unsigned int npages); +void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); +static inline void snp_leak_pages(u64 pfn, unsigned int pages) +{ + __snp_leak_pages(pfn, pages, true); +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -650,6 +674,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as return -ENODEV; } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } +static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {} static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 9282465eea21..e71e0e8362ed 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -80,56 +80,42 @@ struct thread_info { #endif /* - * thread information flags - * - these are process state flags that various assembly files - * may need to access + * Tell the generic TIF infrastructure which bits x86 supports */ -#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */ -#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/ -#define TIF_SSBD 6 /* Speculative store bypass disable */ -#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ -#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */ -#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ -#define TIF_UPROBE 12 /* breakpointed or singlestepping */ -#define TIF_PATCH_PENDING 13 /* pending live patching update */ -#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ -#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ -#define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ -#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ -#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ +#define HAVE_TIF_NEED_RESCHED_LAZY +#define HAVE_TIF_POLLING_NRFLAG +#define HAVE_TIF_SINGLESTEP + +#include <asm-generic/thread_info_tif.h> + +/* Architecture specific TIF space starts at 16 */ +#define TIF_SSBD 16 /* Speculative store bypass disable */ +#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */ +#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */ +#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */ +#define TIF_NOTSC 21 /* TSC is not accessible in userland */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ +#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/ +#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ -#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ - -#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_SSBD (1 << TIF_SSBD) -#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) -#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH) -#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) -#define _TIF_UPROBE (1 << TIF_UPROBE) -#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) -#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) -#define _TIF_NOCPUID (1 << TIF_NOCPUID) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) -#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) -#define _TIF_ADDR32 (1 << TIF_ADDR32) +#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */ + +#define _TIF_SSBD BIT(TIF_SSBD) +#define _TIF_SPEC_IB BIT(TIF_SPEC_IB) +#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH) +#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD) +#define _TIF_NOCPUID BIT(TIF_NOCPUID) +#define _TIF_NOTSC BIT(TIF_NOTSC) +#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP) +#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE) +#define _TIF_FORCED_TF BIT(TIF_FORCED_TF) +#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP) +#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP) +#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES) +#define _TIF_ADDR32 BIT(TIF_ADDR32) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 9c640a521a67..650e3256ea7d 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -118,6 +118,10 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SAVIC 0x8000001a +#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 +#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 +#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 52d1808ee360..581db89477f9 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -18,6 +18,7 @@ ifeq ($(CONFIG_X86_64),y) # APIC probe will depend on the listing order here obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-y += apic_flat_64.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d73ba5a7b623..680d305589a3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -592,6 +592,8 @@ static void setup_APIC_timer(void) 0xF, ~0UL); } else clockevents_register_device(levt); + + apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true); } /* @@ -1168,6 +1170,9 @@ void disable_local_APIC(void) if (!apic_accessible()) return; + if (apic->teardown) + apic->teardown(); + apic_soft_disable(); #ifdef CONFIG_X86_32 @@ -1428,63 +1433,61 @@ union apic_ir { u32 regs[APIC_IR_REGS]; }; -static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) +static bool apic_check_and_eoi_isr(union apic_ir *isr) { int i, bit; - /* Read the IRRs */ - for (i = 0; i < APIC_IR_REGS; i++) - irr->regs[i] = apic_read(APIC_IRR + i * 0x10); - /* Read the ISRs */ for (i = 0; i < APIC_IR_REGS; i++) isr->regs[i] = apic_read(APIC_ISR + i * 0x10); + /* If the ISR map empty, nothing to do here. */ + if (bitmap_empty(isr->map, APIC_IR_BITS)) + return true; + /* - * If the ISR map is not empty. ACK the APIC and run another round - * to verify whether a pending IRR has been unblocked and turned - * into a ISR. + * There can be multiple ISR bits set when a high priority + * interrupt preempted a lower priority one. Issue an EOI for each + * set bit. The priority traversal order does not matter as there + * can't be new ISR bits raised at this point. What matters is that + * an EOI is issued for each ISR bit. */ - if (!bitmap_empty(isr->map, APIC_IR_BITS)) { - /* - * There can be multiple ISR bits set when a high priority - * interrupt preempted a lower priority one. Issue an ACK - * per set bit. - */ - for_each_set_bit(bit, isr->map, APIC_IR_BITS) - apic_eoi(); - return true; - } + for_each_set_bit(bit, isr->map, APIC_IR_BITS) + apic_eoi(); - return !bitmap_empty(irr->map, APIC_IR_BITS); + /* Reread the ISRs, they should be empty now */ + for (i = 0; i < APIC_IR_REGS; i++) + isr->regs[i] = apic_read(APIC_ISR + i * 0x10); + + return bitmap_empty(isr->map, APIC_IR_BITS); } /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. + * If a CPU services an interrupt and crashes before issuing EOI to the + * local APIC, the corresponding ISR bit is still set when the crashing CPU + * jumps into a crash kernel. Read the ISR and issue an EOI for each set + * bit to acknowledge it as otherwise these slots would be locked forever + * waiting for an EOI. * - * Most probably by now the CPU has serviced that pending interrupt and it - * might not have done the apic_eoi() because it thought, interrupt - * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear - * the ISR bit and cpu thinks it has already serviced the interrupt. Hence - * a vector might get locked. It was noticed for timer irq (vector - * 0x31). Issue an extra EOI to clear ISR. + * If there are pending bits in the IRR, then they won't be converted into + * ISR bits as the CPU has interrupts disabled. They will be delivered once + * the CPU enables interrupts and there is nothing which can prevent that. * - * If there are pending IRR bits they turn into ISR bits after a higher - * priority ISR bit has been acked. + * In the worst case this results in spurious interrupt warnings. */ -static void apic_pending_intr_clear(void) +static void apic_clear_isr(void) { - union apic_ir irr, isr; + union apic_ir ir; unsigned int i; - /* 512 loops are way oversized and give the APIC a chance to obey. */ - for (i = 0; i < 512; i++) { - if (!apic_check_and_ack(&irr, &isr)) - return; - } - /* Dump the IRR/ISR content if that failed */ - pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); + if (!apic_check_and_eoi_isr(&ir)) + pr_warn("APIC: Stale ISR: %256pb\n", ir.map); + + for (i = 0; i < APIC_IR_REGS; i++) + ir.regs[i] = apic_read(APIC_IRR + i * 0x10); + + if (!bitmap_empty(ir.map, APIC_IR_BITS)) + pr_warn("APIC: Stale IRR: %256pb\n", ir.map); } /** @@ -1503,6 +1506,9 @@ static void setup_local_APIC(void) return; } + if (apic->setup) + apic->setup(); + /* * If this comes from kexec/kcrash the APIC might be enabled in * SPIV. Soft disable it before doing further initialization. @@ -1541,8 +1547,7 @@ static void setup_local_APIC(void) value |= 0x10; apic_write(APIC_TASKPRI, value); - /* Clear eventually stale ISR/IRR bits */ - apic_pending_intr_clear(); + apic_clear_isr(); /* * Now that we are all set up, enable the APIC diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a947b46a8b64..bddc54465399 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,13 +134,20 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, apicd->hw_irq_cfg.vector = vector; apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); + + apic_update_vector(cpu, vector, true); + irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); - trace_vector_config(irqd->irq, vector, cpu, - apicd->hw_irq_cfg.dest_apicid); + trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid); } -static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, - unsigned int newcpu) +static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed) +{ + apic_update_vector(cpu, vector, false); + irq_matrix_free(vector_matrix, cpu, vector, managed); +} + +static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu) { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); @@ -174,8 +181,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->prev_cpu = apicd->cpu; WARN_ON_ONCE(apicd->cpu == newcpu); } else { - irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, - managed); + apic_free_vector(apicd->cpu, apicd->vector, managed); } setnew: @@ -261,7 +267,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest) trace_vector_alloc(irqd->irq, vector, resvd, vector); if (vector < 0) return vector; - apic_update_vector(irqd, vector, cpu); + chip_data_update(irqd, vector, cpu); return 0; } @@ -337,7 +343,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) trace_vector_alloc_managed(irqd->irq, vector, vector); if (vector < 0) return vector; - apic_update_vector(irqd, vector, cpu); + chip_data_update(irqd, vector, cpu); return 0; } @@ -357,7 +363,7 @@ static void clear_irq_vector(struct irq_data *irqd) apicd->prev_cpu); per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + apic_free_vector(apicd->cpu, vector, managed); apicd->vector = 0; /* Clean up move in progress */ @@ -366,7 +372,7 @@ static void clear_irq_vector(struct irq_data *irqd) return; per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; - irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + apic_free_vector(apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; hlist_del_init(&apicd->clist); @@ -905,7 +911,7 @@ static void free_moved_vector(struct apic_chip_data *apicd) * affinity mask comes online. */ trace_vector_free_moved(apicd->irq, cpu, vector, managed); - irq_matrix_free(vector_matrix, cpu, vector, managed); + apic_free_vector(cpu, vector, managed); per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; hlist_del_init(&apicd->clist); apicd->prev_vector = 0; diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c new file mode 100644 index 000000000000..dbc5678bc3b6 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure AVIC Support (SEV-SNP Guests) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> + */ + +#include <linux/cc_platform.h> +#include <linux/cpumask.h> +#include <linux/percpu-defs.h> +#include <linux/align.h> + +#include <asm/apic.h> +#include <asm/sev.h> + +#include "local.h" + +struct secure_avic_page { + u8 regs[PAGE_SIZE]; +} __aligned(PAGE_SIZE); + +static struct secure_avic_page __percpu *savic_page __ro_after_init; + +static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); +} + +static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset) +{ + return &per_cpu_ptr(savic_page, cpu)->regs[offset]; +} + +static inline void update_vector(unsigned int cpu, unsigned int offset, + unsigned int vector, bool set) +{ + void *bitmap = get_reg_bitmap(cpu, offset); + + if (set) + apic_set_vector(vector, bitmap); + else + apic_clear_vector(vector, bitmap); +} + +#define SAVIC_ALLOWED_IRR 0x204 + +/* + * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers + * result in #VC exception (for non-accelerated register accesses) + * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler + * can read/write the x2APIC register in the guest APIC backing page. + * + * Since doing this would increase the latency of accessing x2APIC + * registers, instead of doing RDMSR/WRMSR based accesses and + * handling the APIC register reads/writes in the #VC exception handler, + * the read() and write() callbacks directly read/write the APIC register + * from/to the vCPU's APIC backing page. + */ +static u32 savic_read(u32 reg) +{ + void *ap = this_cpu_ptr(savic_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + return savic_ghcb_msr_read(reg); + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return apic_get_reg(ap, reg); + case APIC_ICR: + return (u32)apic_get_reg64(ap, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + if (WARN_ONCE(!IS_ALIGNED(reg, 16), + "APIC register read offset 0x%x not aligned at 16 bytes", reg)) + return 0; + return apic_get_reg(ap, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from + * their respective base offset. APIC_IRRs are in the range + * + * (0x200, 0x210, ..., 0x270) + * + * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range + * + * (0x204, 0x214, ..., 0x274). + * + * Filter out everything else. + */ + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || + IS_ALIGNED(reg - 4, 16)), + "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)) + return 0; + return apic_get_reg(ap, reg); + default: + pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ 0x278 + +/* + * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware + * updates the APIC_IRR in the APIC backing page of the vCPU. In addition, + * hardware evaluates the new APIC_IRR update for interrupt injection to + * the vCPU. So, self IPIs are hardware-accelerated. + */ +static inline void self_ipi_reg_write(unsigned int vector) +{ + native_apic_msr_write(APIC_SELF_IPI, vector); +} + +static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi) +{ + if (nmi) + apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1); + else + update_vector(cpu, APIC_IRR, vector, true); +} + +static void send_ipi_allbut(unsigned int vector, bool nmi) +{ + unsigned int cpu, src_cpu; + + guard(irqsave)(); + + src_cpu = raw_smp_processor_id(); + + for_each_cpu(cpu, cpu_online_mask) { + if (cpu == src_cpu) + continue; + send_ipi_dest(cpu, vector, nmi); + } +} + +static inline void self_ipi(unsigned int vector, bool nmi) +{ + u32 icr_low = APIC_SELF_IPI | vector; + + if (nmi) + icr_low |= APIC_DM_NMI; + + native_x2apic_icr_write(icr_low, 0); +} + +static void savic_icr_write(u32 icr_low, u32 icr_high) +{ + unsigned int dsh, vector; + u64 icr_data; + bool nmi; + + dsh = icr_low & APIC_DEST_ALLBUT; + vector = icr_low & APIC_VECTOR_MASK; + nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI); + + switch (dsh) { + case APIC_DEST_SELF: + self_ipi(vector, nmi); + break; + case APIC_DEST_ALLINC: + self_ipi(vector, nmi); + fallthrough; + case APIC_DEST_ALLBUT: + send_ipi_allbut(vector, nmi); + break; + default: + send_ipi_dest(icr_high, vector, nmi); + break; + } + + icr_data = ((u64)icr_high) << 32 | icr_low; + if (dsh != APIC_DEST_SELF) + savic_ghcb_msr_write(APIC_ICR, icr_data); + apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data); +} + +static void savic_write(u32 reg, u32 data) +{ + void *ap = this_cpu_ptr(savic_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TDCR: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + savic_ghcb_msr_write(reg, data); + break; + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ: + case APIC_ESR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + apic_set_reg(ap, reg, data); + break; + case APIC_ICR: + savic_icr_write(data, 0); + break; + case APIC_SELF_IPI: + self_ipi_reg_write(data); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70: + if (IS_ALIGNED(reg - 4, 16)) { + apic_set_reg(ap, reg, data); + break; + } + fallthrough; + default: + pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg); + } +} + +static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh) +{ + unsigned int icr_low; + + icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL); + savic_icr_write(icr_low, dest); +} + +static void savic_send_ipi(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + send_ipi(dest, vector, 0); +} + +static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self) +{ + unsigned int cpu, this_cpu; + + guard(irqsave)(); + + this_cpu = raw_smp_processor_id(); + + for_each_cpu(cpu, mask) { + if (excl_self && cpu == this_cpu) + continue; + send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0); + } +} + +static void savic_send_ipi_mask(const struct cpumask *mask, int vector) +{ + send_ipi_mask(mask, vector, false); +} + +static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + send_ipi_mask(mask, vector, true); +} + +static void savic_send_ipi_allbutself(int vector) +{ + send_ipi(0, vector, APIC_DEST_ALLBUT); +} + +static void savic_send_ipi_all(int vector) +{ + send_ipi(0, vector, APIC_DEST_ALLINC); +} + +static void savic_send_ipi_self(int vector) +{ + self_ipi_reg_write(vector); +} + +static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set) +{ + update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set); +} + +static void savic_eoi(void) +{ + unsigned int cpu; + int vec; + + cpu = raw_smp_processor_id(); + vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR)); + if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR")) + return; + + /* Is level-triggered interrupt? */ + if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) { + update_vector(cpu, APIC_ISR, vec, false); + /* + * Propagate the EOI write to the hypervisor for level-triggered + * interrupts. Return to the guest from GHCB protocol event takes + * care of re-evaluating interrupt state. + */ + savic_ghcb_msr_write(APIC_EOI, 0); + } else { + /* + * Hardware clears APIC_ISR and re-evaluates the interrupt state + * to determine if there is any pending interrupt which can be + * delivered to CPU. + */ + native_apic_msr_eoi(); + } +} + +static void savic_teardown(void) +{ + /* Disable Secure AVIC */ + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0); + savic_unregister_gpa(NULL); +} + +static void savic_setup(void) +{ + void *ap = this_cpu_ptr(savic_page); + enum es_result res; + unsigned long gpa; + + /* + * Before Secure AVIC is enabled, APIC MSR reads are intercepted. + * APIC_ID MSR read returns the value from the hypervisor. + */ + apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID)); + + gpa = __pa(ap); + + /* + * The NPT entry for a vCPU's APIC backing page must always be + * present when the vCPU is running in order for Secure AVIC to + * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot + * be resumed if the NPT entry for the APIC backing page is not + * present. Notify GPA of the vCPU's APIC backing page to the + * hypervisor by calling savic_register_gpa(). Before executing + * VMRUN, the hypervisor makes use of this information to make sure + * the APIC backing page is mapped in NPT. + */ + res = savic_register_gpa(gpa); + if (res != ES_OK) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, + gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI); +} + +static int savic_probe(void) +{ + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return 0; + + if (!x2apic_mode) { + pr_err("Secure AVIC enabled in non x2APIC mode\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + /* unreachable */ + } + + savic_page = alloc_percpu(struct secure_avic_page); + if (!savic_page) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); + + return 1; +} + +static struct apic apic_x2apic_savic __ro_after_init = { + + .name = "secure avic x2apic", + .probe = savic_probe, + .acpi_madt_oem_check = savic_acpi_madt_oem_check, + .setup = savic_setup, + .teardown = savic_teardown, + + .dest_mode_logical = false, + + .disable_esr = 0, + + .cpu_present_to_apicid = default_cpu_present_to_apicid, + + .max_apic_id = UINT_MAX, + .x2apic_set_max_apicid = true, + .get_apic_id = x2apic_get_apic_id, + + .calc_dest_apicid = apic_default_calc_apicid, + + .send_IPI = savic_send_ipi, + .send_IPI_mask = savic_send_ipi_mask, + .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself, + .send_IPI_allbutself = savic_send_ipi_allbutself, + .send_IPI_all = savic_send_ipi_all, + .send_IPI_self = savic_send_ipi_self, + + .nmi_to_offline_cpu = true, + + .read = savic_read, + .write = savic_write, + .eoi = savic_eoi, + .icr_read = native_x2apic_icr_read, + .icr_write = savic_icr_write, + + .update_vector = savic_update_vector, +}; + +apic_driver(apic_x2apic_savic); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 533fcf5636fc..fd28b53dbac5 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -52,10 +52,13 @@ SYM_PIC_ALIAS(next_early_pgt); pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); unsigned int __pgtable_l5_enabled __ro_after_init; +SYM_PIC_ALIAS(__pgtable_l5_enabled); unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); +SYM_PIC_ALIAS(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; EXPORT_SYMBOL(ptrs_per_p4d); +SYM_PIC_ALIAS(ptrs_per_p4d); unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; EXPORT_SYMBOL(page_offset_base); @@ -316,5 +319,5 @@ void early_setup_idt(void) handler = vc_boot_ghcb; } - startup_64_load_idt(handler); + __pi_startup_64_load_idt(handler); } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 76743dfad6ab..80ef5d386b03 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -61,7 +61,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * any particular GDT layout, because we load our own as soon as we * can. */ -__HEAD + __INIT SYM_CODE_START(startup_32) movl pa(initial_stack),%ecx @@ -136,6 +136,9 @@ SYM_CODE_END(startup_32) * If cpu hotplug is not supported then this code can go in init section * which will be freed later */ +#ifdef CONFIG_HOTPLUG_CPU + .text +#endif SYM_FUNC_START(startup_32_smp) cld movl $(__BOOT_DS),%eax diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3e9b3a3bd039..21816b48537c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -33,7 +33,7 @@ * because we need identity-mapped pages. */ - __HEAD + __INIT .code64 SYM_CODE_START_NOALIGN(startup_64) UNWIND_HINT_END_OF_STACK @@ -71,7 +71,7 @@ SYM_CODE_START_NOALIGN(startup_64) xorl %edx, %edx wrmsr - call startup_64_setup_gdt_idt + call __pi_startup_64_setup_gdt_idt /* Now switch to __KERNEL_CS so IRET works reliably */ pushq $__KERNEL_CS @@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(startup_64) * subsequent code. Pass the boot_params pointer as the first argument. */ movq %r15, %rdi - call sme_enable + call __pi_sme_enable #endif /* Sanitize CPU configuration */ @@ -111,7 +111,7 @@ SYM_CODE_START_NOALIGN(startup_64) * programmed into CR3. */ movq %r15, %rsi - call __startup_64 + call __pi___startup_64 /* Form the CR3 value being sure to include the CR3 modifier */ leaq early_top_pgt(%rip), %rcx @@ -562,7 +562,7 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) /* Call C handler */ movq %rsp, %rdi movq ORIG_RAX(%rsp), %rsi - call do_vc_no_ghcb + call __pi_do_vc_no_ghcb /* Unwind pt_regs */ POP_REGS diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4fa0be732af1..d7af4a64c211 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -160,11 +160,6 @@ SECTIONS } :text = 0xcccccccc - /* bootstrapping code */ - .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { - HEAD_TEXT - } :text = 0xcccccccc - /* End of text section, which should occupy whole number of pages */ _etext = .; . = ALIGN(PAGE_SIZE); @@ -227,6 +222,8 @@ SECTIONS */ .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { *(.altinstr_aux) + . = ALIGN(PAGE_SIZE); + __inittext_end = .; } INIT_DATA_SECTION(16) @@ -535,3 +532,5 @@ xen_elfnote_entry_value = xen_elfnote_phys32_entry_value = ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET); #endif + +#include "../boot/startup/exports.h" diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index faf3a13fb6ba..2f8c32173972 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -536,12 +536,6 @@ void __init sme_early_init(void) x86_init.resources.dmi_setup = snp_dmi_setup; } - /* - * Switch the SVSM CA mapping (if active) from identity mapped to - * kernel mapped. - */ - snp_update_svsm_ca(); - if (sev_status & MSR_AMD64_SNP_SECURE_TSC) setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); } diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index f8a33b25ae86..edbf9c998848 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -16,7 +16,7 @@ .text .code64 -SYM_FUNC_START(sme_encrypt_execute) +SYM_FUNC_START(__pi_sme_encrypt_execute) /* * Entry parameters: @@ -69,9 +69,9 @@ SYM_FUNC_START(sme_encrypt_execute) ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(sme_encrypt_execute) +SYM_FUNC_END(__pi_sme_encrypt_execute) -SYM_FUNC_START(__enc_copy) +SYM_FUNC_START_LOCAL(__enc_copy) ANNOTATE_NOENDBR /* * Routine used to encrypt memory in place. diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 1d78e5631bb8..344030c1a81d 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -24,7 +24,7 @@ #include <asm/nospec-branch.h> #include <xen/interface/elfnote.h> - __HEAD + __INIT /* * Entry point for PVH guests. diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5778bc498415..e5a2b9a912d1 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -740,10 +740,10 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, const char *symname) { - int headtext = !strcmp(sec_name(sec->shdr.sh_info), ".head.text"); unsigned r_type = ELF64_R_TYPE(rel->r_info); ElfW(Addr) offset = rel->r_offset; int shn_abs = (sym->st_shndx == SHN_ABS) && !is_reloc(S_REL, symname); + if (sym->st_shndx == SHN_UNDEF) return 0; @@ -783,12 +783,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; } - if (headtext) { - die("Absolute reference to symbol '%s' not permitted in .head.text\n", - symname); - break; - } - /* * Relocation offsets for 64 bit kernels are output * as 32 bits and sign extended back to 64 bits when diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 942372e69b4d..ee643a6cd691 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -1029,7 +1029,7 @@ int rmp_make_shared(u64 pfn, enum pg_level level) } EXPORT_SYMBOL_GPL(rmp_make_shared); -void snp_leak_pages(u64 pfn, unsigned int npages) +void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) { struct page *page = pfn_to_page(pfn); @@ -1052,14 +1052,15 @@ void snp_leak_pages(u64 pfn, unsigned int npages) (PageHead(page) && compound_nr(page) <= npages)) list_add_tail(&page->buddy_list, &snp_leaked_pages_list); - dump_rmpentry(pfn); + if (dump_rmp) + dump_rmpentry(pfn); snp_nr_leaked_pages++; pfn++; page++; } spin_unlock(&snp_leaked_pages_list_lock); } -EXPORT_SYMBOL_GPL(snp_leak_pages); +EXPORT_SYMBOL_GPL(__snp_leak_pages); void kdump_sev_callback(void) { diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 70f8290b659d..fd995a1d3d24 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -388,11 +388,11 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, return 0; } -static int __init gtdt_sbsa_gwdt_init(void) +static int __init gtdt_platform_timer_init(void) { void *platform_timer; struct acpi_table_header *table; - int ret, timer_count, gwdt_count = 0; + int ret, timer_count, gwdt_count = 0, mmio_timer_count = 0; if (acpi_disabled) return 0; @@ -414,20 +414,41 @@ static int __init gtdt_sbsa_gwdt_init(void) goto out_put_gtdt; for_each_platform_timer(platform_timer) { + ret = 0; + if (is_non_secure_watchdog(platform_timer)) { ret = gtdt_import_sbsa_gwdt(platform_timer, gwdt_count); if (ret) - break; + continue; gwdt_count++; + } else if (is_timer_block(platform_timer)) { + struct arch_timer_mem atm = {}; + struct platform_device *pdev; + + ret = gtdt_parse_timer_block(platform_timer, &atm); + if (ret) + continue; + + pdev = platform_device_register_data(NULL, "gtdt-arm-mmio-timer", + gwdt_count, &atm, + sizeof(atm)); + if (IS_ERR(pdev)) { + pr_err("Can't register timer %d\n", gwdt_count); + continue; + } + + mmio_timer_count++; } } if (gwdt_count) pr_info("found %d SBSA generic Watchdog(s).\n", gwdt_count); + if (mmio_timer_count) + pr_info("found %d Generic MMIO timer(s).\n", mmio_timer_count); out_put_gtdt: acpi_put_table(table); return ret; } -device_initcall(gtdt_sbsa_gwdt_init); +device_initcall(gtdt_platform_timer_init); diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 645f517a1ac2..ffcd23668763 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -395,8 +395,7 @@ config ARM_GLOBAL_TIMER config ARM_GT_INITIAL_PRESCALER_VAL int "ARM global timer initial prescaler value" - default 2 if ARCH_ZYNQ - default 1 + default 0 depends on ARM_GLOBAL_TIMER help When the ARM global timer initializes, its current rate is declared @@ -406,6 +405,7 @@ config ARM_GT_INITIAL_PRESCALER_VAL bounds about how much the parent clock is allowed to decrease or increase wrt the initial clock value. This affects CPU_FREQ max delta from the initial frequency. + Use 0 to use auto-detection in the driver. config ARM_TIMER_SP804 bool "Support for Dual Timer SP804 module" @@ -474,11 +474,14 @@ config FSL_FTM_TIMER help Support for Freescale FlexTimer Module (FTM) timer. -config VF_PIT_TIMER - bool +config NXP_PIT_TIMER + bool "NXP Periodic Interrupt Timer" if COMPILE_TEST select CLKSRC_MMIO help - Support for Periodic Interrupt Timer on Freescale Vybrid Family SoCs. + Support for Periodic Interrupt Timer on Freescale / NXP + SoCs. This periodic timer is found on the Vybrid Family and + the Automotive S32G2/3 platforms. It contains 4 channels + where two can be coupled to form a 64 bits channel. config SYS_SUPPORTS_SH_CMT bool diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 205bf3b0a8f3..ec4452ee958f 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_CLKSRC_LPC32XX) += timer-lpc32xx.o obj-$(CONFIG_CLKSRC_MPS2) += mps2-timer.o obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o obj-$(CONFIG_FSL_FTM_TIMER) += timer-fsl-ftm.o -obj-$(CONFIG_VF_PIT_TIMER) += timer-vf-pit.o +obj-$(CONFIG_NXP_PIT_TIMER) += timer-nxp-pit.o obj-$(CONFIG_CLKSRC_QCOM) += timer-qcom.o obj-$(CONFIG_MTK_TIMER) += timer-mediatek.o obj-$(CONFIG_MTK_CPUX_TIMER) += timer-mediatek-cpux.o @@ -64,6 +64,7 @@ obj-$(CONFIG_REALTEK_OTTO_TIMER) += timer-rtl-otto.o obj-$(CONFIG_ARC_TIMERS) += arc_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o +obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer_mmio.o obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp804.o diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 80ba6a54248c..90aeff44a276 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -34,42 +34,12 @@ #include <clocksource/arm_arch_timer.h> -#define CNTTIDR 0x08 -#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) - -#define CNTACR(n) (0x40 + ((n) * 4)) -#define CNTACR_RPCT BIT(0) -#define CNTACR_RVCT BIT(1) -#define CNTACR_RFRQ BIT(2) -#define CNTACR_RVOFF BIT(3) -#define CNTACR_RWVT BIT(4) -#define CNTACR_RWPT BIT(5) - -#define CNTPCT_LO 0x00 -#define CNTVCT_LO 0x08 -#define CNTFRQ 0x10 -#define CNTP_CVAL_LO 0x20 -#define CNTP_CTL 0x2c -#define CNTV_CVAL_LO 0x30 -#define CNTV_CTL 0x3c - /* * The minimum amount of time a generic counter is guaranteed to not roll over * (40 years) */ #define MIN_ROLLOVER_SECS (40ULL * 365 * 24 * 3600) -static unsigned arch_timers_present __initdata; - -struct arch_timer { - void __iomem *base; - struct clock_event_device evt; -}; - -static struct arch_timer *arch_timer_mem __ro_after_init; - -#define to_arch_timer(e) container_of(e, struct arch_timer, evt) - static u32 arch_timer_rate __ro_after_init; static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init; @@ -85,7 +55,6 @@ static struct clock_event_device __percpu *arch_timer_evt; static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI; static bool arch_timer_c3stop __ro_after_init; -static bool arch_timer_mem_use_virtual __ro_after_init; static bool arch_counter_suspend_stop __ro_after_init; #ifdef CONFIG_GENERIC_GETTIMEOFDAY static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER; @@ -121,76 +90,6 @@ static int arch_counter_get_width(void) /* * Architected system timer support. */ - -static __always_inline -void arch_timer_reg_write(int access, enum arch_timer_reg reg, u64 val, - struct clock_event_device *clk) -{ - if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - writel_relaxed((u32)val, timer->base + CNTP_CTL); - break; - case ARCH_TIMER_REG_CVAL: - /* - * Not guaranteed to be atomic, so the timer - * must be disabled at this point. - */ - writeq_relaxed(val, timer->base + CNTP_CVAL_LO); - break; - default: - BUILD_BUG(); - } - } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - writel_relaxed((u32)val, timer->base + CNTV_CTL); - break; - case ARCH_TIMER_REG_CVAL: - /* Same restriction as above */ - writeq_relaxed(val, timer->base + CNTV_CVAL_LO); - break; - default: - BUILD_BUG(); - } - } else { - arch_timer_reg_write_cp15(access, reg, val); - } -} - -static __always_inline -u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, - struct clock_event_device *clk) -{ - u32 val; - - if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - val = readl_relaxed(timer->base + CNTP_CTL); - break; - default: - BUILD_BUG(); - } - } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - val = readl_relaxed(timer->base + CNTV_CTL); - break; - default: - BUILD_BUG(); - } - } else { - val = arch_timer_reg_read_cp15(access, reg); - } - - return val; -} - static noinstr u64 raw_counter_get_cntpct_stable(void) { return __arch_counter_get_cntpct_stable(); @@ -424,7 +323,7 @@ void erratum_set_next_event_generic(const int access, unsigned long evt, unsigned long ctrl; u64 cval; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL); ctrl |= ARCH_TIMER_CTRL_ENABLE; ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; @@ -436,7 +335,7 @@ void erratum_set_next_event_generic(const int access, unsigned long evt, write_sysreg(cval, cntv_cval_el0); } - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); + arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl); } static __maybe_unused int erratum_set_next_event_virt(unsigned long evt, @@ -667,10 +566,10 @@ static __always_inline irqreturn_t timer_handler(const int access, { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt); + ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL); if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ctrl |= ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); + arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl); evt->event_handler(evt); return IRQ_HANDLED; } @@ -692,28 +591,14 @@ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); } -static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); -} - -static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); -} - static __always_inline int arch_timer_shutdown(const int access, struct clock_event_device *clk) { unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL); ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); + arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl); return 0; } @@ -728,23 +613,13 @@ static int arch_timer_shutdown_phys(struct clock_event_device *clk) return arch_timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk); } -static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk) -{ - return arch_timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk); -} - -static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk) -{ - return arch_timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk); -} - static __always_inline void set_next_event(const int access, unsigned long evt, struct clock_event_device *clk) { unsigned long ctrl; u64 cnt; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl = arch_timer_reg_read_cp15(access, ARCH_TIMER_REG_CTRL); ctrl |= ARCH_TIMER_CTRL_ENABLE; ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; @@ -753,8 +628,8 @@ static __always_inline void set_next_event(const int access, unsigned long evt, else cnt = __arch_counter_get_cntvct(); - arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk); - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); + arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CVAL, evt + cnt); + arch_timer_reg_write_cp15(access, ARCH_TIMER_REG_CTRL, ctrl); } static int arch_timer_set_next_event_virt(unsigned long evt, @@ -771,60 +646,6 @@ static int arch_timer_set_next_event_phys(unsigned long evt, return 0; } -static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo) -{ - u32 cnt_lo, cnt_hi, tmp_hi; - - do { - cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); - cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo)); - tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); - } while (cnt_hi != tmp_hi); - - return ((u64) cnt_hi << 32) | cnt_lo; -} - -static __always_inline void set_next_event_mem(const int access, unsigned long evt, - struct clock_event_device *clk) -{ - struct arch_timer *timer = to_arch_timer(clk); - unsigned long ctrl; - u64 cnt; - - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); - - /* Timer must be disabled before programming CVAL */ - if (ctrl & ARCH_TIMER_CTRL_ENABLE) { - ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); - } - - ctrl |= ARCH_TIMER_CTRL_ENABLE; - ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - - if (access == ARCH_TIMER_MEM_VIRT_ACCESS) - cnt = arch_counter_get_cnt_mem(timer, CNTVCT_LO); - else - cnt = arch_counter_get_cnt_mem(timer, CNTPCT_LO); - - arch_timer_reg_write(access, ARCH_TIMER_REG_CVAL, evt + cnt, clk); - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); -} - -static int arch_timer_set_next_event_virt_mem(unsigned long evt, - struct clock_event_device *clk) -{ - set_next_event_mem(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk); - return 0; -} - -static int arch_timer_set_next_event_phys_mem(unsigned long evt, - struct clock_event_device *clk) -{ - set_next_event_mem(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk); - return 0; -} - static u64 __arch_timer_check_delta(void) { #ifdef CONFIG_ARM64 @@ -850,63 +671,41 @@ static u64 __arch_timer_check_delta(void) return CLOCKSOURCE_MASK(arch_counter_get_width()); } -static void __arch_timer_setup(unsigned type, - struct clock_event_device *clk) +static void __arch_timer_setup(struct clock_event_device *clk) { + typeof(clk->set_next_event) sne; u64 max_delta; clk->features = CLOCK_EVT_FEAT_ONESHOT; - if (type == ARCH_TIMER_TYPE_CP15) { - typeof(clk->set_next_event) sne; - - arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); - - if (arch_timer_c3stop) - clk->features |= CLOCK_EVT_FEAT_C3STOP; - clk->name = "arch_sys_timer"; - clk->rating = 450; - clk->cpumask = cpumask_of(smp_processor_id()); - clk->irq = arch_timer_ppi[arch_timer_uses_ppi]; - switch (arch_timer_uses_ppi) { - case ARCH_TIMER_VIRT_PPI: - clk->set_state_shutdown = arch_timer_shutdown_virt; - clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; - sne = erratum_handler(set_next_event_virt); - break; - case ARCH_TIMER_PHYS_SECURE_PPI: - case ARCH_TIMER_PHYS_NONSECURE_PPI: - case ARCH_TIMER_HYP_PPI: - clk->set_state_shutdown = arch_timer_shutdown_phys; - clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; - sne = erratum_handler(set_next_event_phys); - break; - default: - BUG(); - } + arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); - clk->set_next_event = sne; - max_delta = __arch_timer_check_delta(); - } else { - clk->features |= CLOCK_EVT_FEAT_DYNIRQ; - clk->name = "arch_mem_timer"; - clk->rating = 400; - clk->cpumask = cpu_possible_mask; - if (arch_timer_mem_use_virtual) { - clk->set_state_shutdown = arch_timer_shutdown_virt_mem; - clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem; - clk->set_next_event = - arch_timer_set_next_event_virt_mem; - } else { - clk->set_state_shutdown = arch_timer_shutdown_phys_mem; - clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem; - clk->set_next_event = - arch_timer_set_next_event_phys_mem; - } - - max_delta = CLOCKSOURCE_MASK(56); + if (arch_timer_c3stop) + clk->features |= CLOCK_EVT_FEAT_C3STOP; + clk->name = "arch_sys_timer"; + clk->rating = 450; + clk->cpumask = cpumask_of(smp_processor_id()); + clk->irq = arch_timer_ppi[arch_timer_uses_ppi]; + switch (arch_timer_uses_ppi) { + case ARCH_TIMER_VIRT_PPI: + clk->set_state_shutdown = arch_timer_shutdown_virt; + clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; + sne = erratum_handler(set_next_event_virt); + break; + case ARCH_TIMER_PHYS_SECURE_PPI: + case ARCH_TIMER_PHYS_NONSECURE_PPI: + case ARCH_TIMER_HYP_PPI: + clk->set_state_shutdown = arch_timer_shutdown_phys; + clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; + sne = erratum_handler(set_next_event_phys); + break; + default: + BUG(); } + clk->set_next_event = sne; + max_delta = __arch_timer_check_delta(); + clk->set_state_shutdown(clk); clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta); @@ -1029,7 +828,7 @@ static int arch_timer_starting_cpu(unsigned int cpu) struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); u32 flags; - __arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk); + __arch_timer_setup(clk); flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]); enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); @@ -1075,22 +874,12 @@ static void __init arch_timer_of_configure_rate(u32 rate, struct device_node *np pr_warn("frequency not available\n"); } -static void __init arch_timer_banner(unsigned type) +static void __init arch_timer_banner(void) { - pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", - type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "", - type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? - " and " : "", - type & ARCH_TIMER_TYPE_MEM ? "mmio" : "", + pr_info("cp15 timer running at %lu.%02luMHz (%s).\n", (unsigned long)arch_timer_rate / 1000000, (unsigned long)(arch_timer_rate / 10000) % 100, - type & ARCH_TIMER_TYPE_CP15 ? - (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" : - "", - type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "", - type & ARCH_TIMER_TYPE_MEM ? - arch_timer_mem_use_virtual ? "virt" : "phys" : - ""); + (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys"); } u32 arch_timer_get_rate(void) @@ -1108,11 +897,6 @@ bool arch_timer_evtstrm_available(void) return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available); } -static noinstr u64 arch_counter_get_cntvct_mem(void) -{ - return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO); -} - static struct arch_timer_kvm_info arch_timer_kvm_info; struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) @@ -1120,42 +904,35 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) return &arch_timer_kvm_info; } -static void __init arch_counter_register(unsigned type) +static void __init arch_counter_register(void) { u64 (*scr)(void); + u64 (*rd)(void); u64 start_count; int width; - /* Register the CP15 based counter if we have one */ - if (type & ARCH_TIMER_TYPE_CP15) { - u64 (*rd)(void); - - if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || - arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { - if (arch_timer_counter_has_wa()) { - rd = arch_counter_get_cntvct_stable; - scr = raw_counter_get_cntvct_stable; - } else { - rd = arch_counter_get_cntvct; - scr = arch_counter_get_cntvct; - } + if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || + arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { + if (arch_timer_counter_has_wa()) { + rd = arch_counter_get_cntvct_stable; + scr = raw_counter_get_cntvct_stable; } else { - if (arch_timer_counter_has_wa()) { - rd = arch_counter_get_cntpct_stable; - scr = raw_counter_get_cntpct_stable; - } else { - rd = arch_counter_get_cntpct; - scr = arch_counter_get_cntpct; - } + rd = arch_counter_get_cntvct; + scr = arch_counter_get_cntvct; } - - arch_timer_read_counter = rd; - clocksource_counter.vdso_clock_mode = vdso_default; } else { - arch_timer_read_counter = arch_counter_get_cntvct_mem; - scr = arch_counter_get_cntvct_mem; + if (arch_timer_counter_has_wa()) { + rd = arch_counter_get_cntpct_stable; + scr = raw_counter_get_cntpct_stable; + } else { + rd = arch_counter_get_cntpct; + scr = arch_counter_get_cntpct; + } } + arch_timer_read_counter = rd; + clocksource_counter.vdso_clock_mode = vdso_default; + width = arch_counter_get_width(); clocksource_counter.mask = CLOCKSOURCE_MASK(width); cyclecounter.mask = CLOCKSOURCE_MASK(width); @@ -1303,76 +1080,10 @@ out: return err; } -static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) -{ - int ret; - irq_handler_t func; - - arch_timer_mem = kzalloc(sizeof(*arch_timer_mem), GFP_KERNEL); - if (!arch_timer_mem) - return -ENOMEM; - - arch_timer_mem->base = base; - arch_timer_mem->evt.irq = irq; - __arch_timer_setup(ARCH_TIMER_TYPE_MEM, &arch_timer_mem->evt); - - if (arch_timer_mem_use_virtual) - func = arch_timer_handler_virt_mem; - else - func = arch_timer_handler_phys_mem; - - ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &arch_timer_mem->evt); - if (ret) { - pr_err("Failed to request mem timer irq\n"); - kfree(arch_timer_mem); - arch_timer_mem = NULL; - } - - return ret; -} - -static const struct of_device_id arch_timer_of_match[] __initconst = { - { .compatible = "arm,armv7-timer", }, - { .compatible = "arm,armv8-timer", }, - {}, -}; - -static const struct of_device_id arch_timer_mem_of_match[] __initconst = { - { .compatible = "arm,armv7-timer-mem", }, - {}, -}; - -static bool __init arch_timer_needs_of_probing(void) -{ - struct device_node *dn; - bool needs_probing = false; - unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM; - - /* We have two timers, and both device-tree nodes are probed. */ - if ((arch_timers_present & mask) == mask) - return false; - - /* - * Only one type of timer is probed, - * check if we have another type of timer node in device-tree. - */ - if (arch_timers_present & ARCH_TIMER_TYPE_CP15) - dn = of_find_matching_node(NULL, arch_timer_mem_of_match); - else - dn = of_find_matching_node(NULL, arch_timer_of_match); - - if (dn && of_device_is_available(dn)) - needs_probing = true; - - of_node_put(dn); - - return needs_probing; -} - static int __init arch_timer_common_init(void) { - arch_timer_banner(arch_timers_present); - arch_counter_register(arch_timers_present); + arch_timer_banner(); + arch_counter_register(); return arch_timer_arch_init(); } @@ -1421,13 +1132,11 @@ static int __init arch_timer_of_init(struct device_node *np) u32 rate; bool has_names; - if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { + if (arch_timer_evt) { pr_warn("multiple nodes in dt, skipping\n"); return 0; } - arch_timers_present |= ARCH_TIMER_TYPE_CP15; - has_names = of_property_present(np, "interrupt-names"); for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) { @@ -1472,283 +1181,22 @@ static int __init arch_timer_of_init(struct device_node *np) if (ret) return ret; - if (arch_timer_needs_of_probing()) - return 0; - return arch_timer_common_init(); } TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init); TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init); -static u32 __init -arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame) -{ - void __iomem *base; - u32 rate; - - base = ioremap(frame->cntbase, frame->size); - if (!base) { - pr_err("Unable to map frame @ %pa\n", &frame->cntbase); - return 0; - } - - rate = readl_relaxed(base + CNTFRQ); - - iounmap(base); - - return rate; -} - -static struct arch_timer_mem_frame * __init -arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem) -{ - struct arch_timer_mem_frame *frame, *best_frame = NULL; - void __iomem *cntctlbase; - u32 cnttidr; - int i; - - cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size); - if (!cntctlbase) { - pr_err("Can't map CNTCTLBase @ %pa\n", - &timer_mem->cntctlbase); - return NULL; - } - - cnttidr = readl_relaxed(cntctlbase + CNTTIDR); - - /* - * Try to find a virtual capable frame. Otherwise fall back to a - * physical capable frame. - */ - for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { - u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | - CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; - - frame = &timer_mem->frame[i]; - if (!frame->valid) - continue; - - /* Try enabling everything, and see what sticks */ - writel_relaxed(cntacr, cntctlbase + CNTACR(i)); - cntacr = readl_relaxed(cntctlbase + CNTACR(i)); - - if ((cnttidr & CNTTIDR_VIRT(i)) && - !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) { - best_frame = frame; - arch_timer_mem_use_virtual = true; - break; - } - - if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) - continue; - - best_frame = frame; - } - - iounmap(cntctlbase); - - return best_frame; -} - -static int __init -arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame) -{ - void __iomem *base; - int ret, irq; - - if (arch_timer_mem_use_virtual) - irq = frame->virt_irq; - else - irq = frame->phys_irq; - - if (!irq) { - pr_err("Frame missing %s irq.\n", - arch_timer_mem_use_virtual ? "virt" : "phys"); - return -EINVAL; - } - - if (!request_mem_region(frame->cntbase, frame->size, - "arch_mem_timer")) - return -EBUSY; - - base = ioremap(frame->cntbase, frame->size); - if (!base) { - pr_err("Can't map frame's registers\n"); - return -ENXIO; - } - - ret = arch_timer_mem_register(base, irq); - if (ret) { - iounmap(base); - return ret; - } - - arch_timers_present |= ARCH_TIMER_TYPE_MEM; - - return 0; -} - -static int __init arch_timer_mem_of_init(struct device_node *np) -{ - struct arch_timer_mem *timer_mem; - struct arch_timer_mem_frame *frame; - struct resource res; - int ret = -EINVAL; - u32 rate; - - timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL); - if (!timer_mem) - return -ENOMEM; - - if (of_address_to_resource(np, 0, &res)) - goto out; - timer_mem->cntctlbase = res.start; - timer_mem->size = resource_size(&res); - - for_each_available_child_of_node_scoped(np, frame_node) { - u32 n; - struct arch_timer_mem_frame *frame; - - if (of_property_read_u32(frame_node, "frame-number", &n)) { - pr_err(FW_BUG "Missing frame-number.\n"); - goto out; - } - if (n >= ARCH_TIMER_MEM_MAX_FRAMES) { - pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n", - ARCH_TIMER_MEM_MAX_FRAMES - 1); - goto out; - } - frame = &timer_mem->frame[n]; - - if (frame->valid) { - pr_err(FW_BUG "Duplicated frame-number.\n"); - goto out; - } - - if (of_address_to_resource(frame_node, 0, &res)) - goto out; - - frame->cntbase = res.start; - frame->size = resource_size(&res); - - frame->virt_irq = irq_of_parse_and_map(frame_node, - ARCH_TIMER_VIRT_SPI); - frame->phys_irq = irq_of_parse_and_map(frame_node, - ARCH_TIMER_PHYS_SPI); - - frame->valid = true; - } - - frame = arch_timer_mem_find_best_frame(timer_mem); - if (!frame) { - pr_err("Unable to find a suitable frame in timer @ %pa\n", - &timer_mem->cntctlbase); - ret = -EINVAL; - goto out; - } - - rate = arch_timer_mem_frame_get_cntfrq(frame); - arch_timer_of_configure_rate(rate, np); - - ret = arch_timer_mem_frame_register(frame); - if (!ret && !arch_timer_needs_of_probing()) - ret = arch_timer_common_init(); -out: - kfree(timer_mem); - return ret; -} -TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", - arch_timer_mem_of_init); - #ifdef CONFIG_ACPI_GTDT -static int __init -arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem) -{ - struct arch_timer_mem_frame *frame; - u32 rate; - int i; - - for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { - frame = &timer_mem->frame[i]; - - if (!frame->valid) - continue; - - rate = arch_timer_mem_frame_get_cntfrq(frame); - if (rate == arch_timer_rate) - continue; - - pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n", - &frame->cntbase, - (unsigned long)rate, (unsigned long)arch_timer_rate); - - return -EINVAL; - } - - return 0; -} - -static int __init arch_timer_mem_acpi_init(int platform_timer_count) -{ - struct arch_timer_mem *timers, *timer; - struct arch_timer_mem_frame *frame, *best_frame = NULL; - int timer_count, i, ret = 0; - - timers = kcalloc(platform_timer_count, sizeof(*timers), - GFP_KERNEL); - if (!timers) - return -ENOMEM; - - ret = acpi_arch_timer_mem_init(timers, &timer_count); - if (ret || !timer_count) - goto out; - - /* - * While unlikely, it's theoretically possible that none of the frames - * in a timer expose the combination of feature we want. - */ - for (i = 0; i < timer_count; i++) { - timer = &timers[i]; - - frame = arch_timer_mem_find_best_frame(timer); - if (!best_frame) - best_frame = frame; - - ret = arch_timer_mem_verify_cntfrq(timer); - if (ret) { - pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n"); - goto out; - } - - if (!best_frame) /* implies !frame */ - /* - * Only complain about missing suitable frames if we - * haven't already found one in a previous iteration. - */ - pr_err("Unable to find a suitable frame in timer @ %pa\n", - &timer->cntctlbase); - } - - if (best_frame) - ret = arch_timer_mem_frame_register(best_frame); -out: - kfree(timers); - return ret; -} - -/* Initialize per-processor generic timer and memory-mapped timer(if present) */ static int __init arch_timer_acpi_init(struct acpi_table_header *table) { - int ret, platform_timer_count; + int ret; - if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { + if (arch_timer_evt) { pr_warn("already initialized, skipping\n"); return -EINVAL; } - arch_timers_present |= ARCH_TIMER_TYPE_CP15; - - ret = acpi_gtdt_init(table, &platform_timer_count); + ret = acpi_gtdt_init(table, NULL); if (ret) return ret; @@ -1790,10 +1238,6 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table) if (ret) return ret; - if (platform_timer_count && - arch_timer_mem_acpi_init(platform_timer_count)) - pr_err("Failed to initialize memory-mapped timer.\n"); - return arch_timer_common_init(); } TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init); diff --git a/drivers/clocksource/arm_arch_timer_mmio.c b/drivers/clocksource/arm_arch_timer_mmio.c new file mode 100644 index 000000000000..ebe1987d651e --- /dev/null +++ b/drivers/clocksource/arm_arch_timer_mmio.c @@ -0,0 +1,440 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ARM Generic Memory Mapped Timer support + * + * Split from drivers/clocksource/arm_arch_timer.c + * + * Copyright (C) 2011 ARM Ltd. + * All Rights Reserved + */ + +#define pr_fmt(fmt) "arch_timer_mmio: " fmt + +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> + +#include <clocksource/arm_arch_timer.h> + +#define CNTTIDR 0x08 +#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) + +#define CNTACR(n) (0x40 + ((n) * 4)) +#define CNTACR_RPCT BIT(0) +#define CNTACR_RVCT BIT(1) +#define CNTACR_RFRQ BIT(2) +#define CNTACR_RVOFF BIT(3) +#define CNTACR_RWVT BIT(4) +#define CNTACR_RWPT BIT(5) + +#define CNTPCT_LO 0x00 +#define CNTVCT_LO 0x08 +#define CNTFRQ 0x10 +#define CNTP_CVAL_LO 0x20 +#define CNTP_CTL 0x2c +#define CNTV_CVAL_LO 0x30 +#define CNTV_CTL 0x3c + +enum arch_timer_access { + PHYS_ACCESS, + VIRT_ACCESS, +}; + +struct arch_timer { + struct clock_event_device evt; + struct clocksource cs; + struct arch_timer_mem *gt_block; + void __iomem *base; + enum arch_timer_access access; + u32 rate; +}; + +#define evt_to_arch_timer(e) container_of(e, struct arch_timer, evt) +#define cs_to_arch_timer(c) container_of(c, struct arch_timer, cs) + +static void arch_timer_mmio_write(struct arch_timer *timer, + enum arch_timer_reg reg, u64 val) +{ + switch (timer->access) { + case PHYS_ACCESS: + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed((u32)val, timer->base + CNTP_CTL); + return; + case ARCH_TIMER_REG_CVAL: + /* + * Not guaranteed to be atomic, so the timer + * must be disabled at this point. + */ + writeq_relaxed(val, timer->base + CNTP_CVAL_LO); + return; + } + break; + case VIRT_ACCESS: + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed((u32)val, timer->base + CNTV_CTL); + return; + case ARCH_TIMER_REG_CVAL: + /* Same restriction as above */ + writeq_relaxed(val, timer->base + CNTV_CVAL_LO); + return; + } + break; + } + + /* Should never be here */ + WARN_ON_ONCE(1); +} + +static u32 arch_timer_mmio_read(struct arch_timer *timer, enum arch_timer_reg reg) +{ + switch (timer->access) { + case PHYS_ACCESS: + switch (reg) { + case ARCH_TIMER_REG_CTRL: + return readl_relaxed(timer->base + CNTP_CTL); + default: + break; + } + break; + case VIRT_ACCESS: + switch (reg) { + case ARCH_TIMER_REG_CTRL: + return readl_relaxed(timer->base + CNTV_CTL); + default: + break; + } + break; + } + + /* Should never be here */ + WARN_ON_ONCE(1); + return 0; +} + +static noinstr u64 arch_counter_mmio_get_cnt(struct arch_timer *t) +{ + int offset_lo = t->access == VIRT_ACCESS ? CNTVCT_LO : CNTPCT_LO; + u32 cnt_lo, cnt_hi, tmp_hi; + + do { + cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); + cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo)); + tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4)); + } while (cnt_hi != tmp_hi); + + return ((u64) cnt_hi << 32) | cnt_lo; +} + +static u64 arch_mmio_counter_read(struct clocksource *cs) +{ + struct arch_timer *at = cs_to_arch_timer(cs); + + return arch_counter_mmio_get_cnt(at); +} + +static int arch_timer_mmio_shutdown(struct clock_event_device *clk) +{ + struct arch_timer *at = evt_to_arch_timer(clk); + unsigned long ctrl; + + ctrl = arch_timer_mmio_read(at, ARCH_TIMER_REG_CTRL); + ctrl &= ~ARCH_TIMER_CTRL_ENABLE; + arch_timer_mmio_write(at, ARCH_TIMER_REG_CTRL, ctrl); + + return 0; +} + +static int arch_timer_mmio_set_next_event(unsigned long evt, + struct clock_event_device *clk) +{ + struct arch_timer *timer = evt_to_arch_timer(clk); + unsigned long ctrl; + u64 cnt; + + ctrl = arch_timer_mmio_read(timer, ARCH_TIMER_REG_CTRL); + + /* Timer must be disabled before programming CVAL */ + if (ctrl & ARCH_TIMER_CTRL_ENABLE) { + ctrl &= ~ARCH_TIMER_CTRL_ENABLE; + arch_timer_mmio_write(timer, ARCH_TIMER_REG_CTRL, ctrl); + } + + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + + cnt = arch_counter_mmio_get_cnt(timer); + + arch_timer_mmio_write(timer, ARCH_TIMER_REG_CVAL, evt + cnt); + arch_timer_mmio_write(timer, ARCH_TIMER_REG_CTRL, ctrl); + return 0; +} + +static irqreturn_t arch_timer_mmio_handler(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + struct arch_timer *at = evt_to_arch_timer(evt); + unsigned long ctrl; + + ctrl = arch_timer_mmio_read(at, ARCH_TIMER_REG_CTRL); + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + ctrl |= ARCH_TIMER_CTRL_IT_MASK; + arch_timer_mmio_write(at, ARCH_TIMER_REG_CTRL, ctrl); + evt->event_handler(evt); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static struct arch_timer_mem_frame *find_best_frame(struct platform_device *pdev) +{ + struct arch_timer_mem_frame *frame, *best_frame = NULL; + struct arch_timer *at = platform_get_drvdata(pdev); + void __iomem *cntctlbase; + u32 cnttidr; + + cntctlbase = ioremap(at->gt_block->cntctlbase, at->gt_block->size); + if (!cntctlbase) { + dev_err(&pdev->dev, "Can't map CNTCTLBase @ %pa\n", + &at->gt_block->cntctlbase); + return NULL; + } + + cnttidr = readl_relaxed(cntctlbase + CNTTIDR); + + /* + * Try to find a virtual capable frame. Otherwise fall back to a + * physical capable frame. + */ + for (int i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { + u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | + CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; + + frame = &at->gt_block->frame[i]; + if (!frame->valid) + continue; + + /* Try enabling everything, and see what sticks */ + writel_relaxed(cntacr, cntctlbase + CNTACR(i)); + cntacr = readl_relaxed(cntctlbase + CNTACR(i)); + + /* Pick a suitable frame for which we have an IRQ */ + if ((cnttidr & CNTTIDR_VIRT(i)) && + !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT)) && + frame->virt_irq) { + best_frame = frame; + at->access = VIRT_ACCESS; + break; + } + + if ((~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) || + !frame->phys_irq) + continue; + + at->access = PHYS_ACCESS; + best_frame = frame; + } + + iounmap(cntctlbase); + + return best_frame; +} + +static void arch_timer_mmio_setup(struct arch_timer *at, int irq) +{ + at->evt = (struct clock_event_device) { + .features = (CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_DYNIRQ), + .name = "arch_mem_timer", + .rating = 400, + .cpumask = cpu_possible_mask, + .irq = irq, + .set_next_event = arch_timer_mmio_set_next_event, + .set_state_oneshot_stopped = arch_timer_mmio_shutdown, + .set_state_shutdown = arch_timer_mmio_shutdown, + }; + + at->evt.set_state_shutdown(&at->evt); + + clockevents_config_and_register(&at->evt, at->rate, 0xf, + (unsigned long)CLOCKSOURCE_MASK(56)); + + enable_irq(at->evt.irq); + + at->cs = (struct clocksource) { + .name = "arch_mmio_counter", + .rating = 300, + .read = arch_mmio_counter_read, + .mask = CLOCKSOURCE_MASK(56), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + }; + + clocksource_register_hz(&at->cs, at->rate); +} + +static int arch_timer_mmio_frame_register(struct platform_device *pdev, + struct arch_timer_mem_frame *frame) +{ + struct arch_timer *at = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; + int ret, irq; + u32 rate; + + if (!devm_request_mem_region(&pdev->dev, frame->cntbase, frame->size, + "arch_mem_timer")) + return -EBUSY; + + at->base = devm_ioremap(&pdev->dev, frame->cntbase, frame->size); + if (!at->base) { + dev_err(&pdev->dev, "Can't map frame's registers\n"); + return -ENXIO; + } + + /* + * Allow "clock-frequency" to override the probed rate. If neither + * lead to something useful, use the CPU timer frequency as the + * fallback. The nice thing about that last point is that we woudn't + * made it here if we didn't have a valid frequency. + */ + rate = readl_relaxed(at->base + CNTFRQ); + + if (!np || of_property_read_u32(np, "clock-frequency", &at->rate)) + at->rate = rate; + + if (!at->rate) + at->rate = arch_timer_get_rate(); + + irq = at->access == VIRT_ACCESS ? frame->virt_irq : frame->phys_irq; + ret = devm_request_irq(&pdev->dev, irq, arch_timer_mmio_handler, + IRQF_TIMER | IRQF_NO_AUTOEN, "arch_mem_timer", + &at->evt); + if (ret) { + dev_err(&pdev->dev, "Failed to request mem timer irq\n"); + return ret; + } + + /* Afer this point, we're not allowed to fail anymore */ + arch_timer_mmio_setup(at, irq); + return 0; +} + +static int of_populate_gt_block(struct platform_device *pdev, + struct arch_timer *at) +{ + struct resource res; + + if (of_address_to_resource(pdev->dev.of_node, 0, &res)) + return -EINVAL; + + at->gt_block->cntctlbase = res.start; + at->gt_block->size = resource_size(&res); + + for_each_available_child_of_node_scoped(pdev->dev.of_node, frame_node) { + struct arch_timer_mem_frame *frame; + u32 n; + + if (of_property_read_u32(frame_node, "frame-number", &n)) { + dev_err(&pdev->dev, FW_BUG "Missing frame-number\n"); + return -EINVAL; + } + if (n >= ARCH_TIMER_MEM_MAX_FRAMES) { + dev_err(&pdev->dev, + FW_BUG "Wrong frame-number, only 0-%u are permitted\n", + ARCH_TIMER_MEM_MAX_FRAMES - 1); + return -EINVAL; + } + + frame = &at->gt_block->frame[n]; + + if (frame->valid) { + dev_err(&pdev->dev, FW_BUG "Duplicated frame-number\n"); + return -EINVAL; + } + + if (of_address_to_resource(frame_node, 0, &res)) + return -EINVAL; + + frame->cntbase = res.start; + frame->size = resource_size(&res); + + frame->phys_irq = irq_of_parse_and_map(frame_node, 0); + frame->virt_irq = irq_of_parse_and_map(frame_node, 1); + + frame->valid = true; + } + + return 0; +} + +static int arch_timer_mmio_probe(struct platform_device *pdev) +{ + struct arch_timer_mem_frame *frame; + struct arch_timer *at; + struct device_node *np; + int ret; + + np = pdev->dev.of_node; + + at = devm_kmalloc(&pdev->dev, sizeof(*at), GFP_KERNEL | __GFP_ZERO); + if (!at) + return -ENOMEM; + + if (np) { + at->gt_block = devm_kmalloc(&pdev->dev, sizeof(*at->gt_block), + GFP_KERNEL | __GFP_ZERO); + if (!at->gt_block) + return -ENOMEM; + ret = of_populate_gt_block(pdev, at); + if (ret) + return ret; + } else { + at->gt_block = dev_get_platdata(&pdev->dev); + } + + platform_set_drvdata(pdev, at); + + frame = find_best_frame(pdev); + if (!frame) { + dev_err(&pdev->dev, + "Unable to find a suitable frame in timer @ %pa\n", + &at->gt_block->cntctlbase); + return -EINVAL; + } + + ret = arch_timer_mmio_frame_register(pdev, frame); + if (!ret) + dev_info(&pdev->dev, + "mmio timer running at %lu.%02luMHz (%s)\n", + (unsigned long)at->rate / 1000000, + (unsigned long)(at->rate / 10000) % 100, + at->access == VIRT_ACCESS ? "virt" : "phys"); + + return ret; +} + +static const struct of_device_id arch_timer_mmio_of_table[] = { + { .compatible = "arm,armv7-timer-mem", }, + {} +}; + +static struct platform_driver arch_timer_mmio_drv = { + .driver = { + .name = "arch-timer-mmio", + .of_match_table = arch_timer_mmio_of_table, + }, + .probe = arch_timer_mmio_probe, +}; +builtin_platform_driver(arch_timer_mmio_drv); + +static struct platform_driver arch_timer_mmio_acpi_drv = { + .driver = { + .name = "gtdt-arm-mmio-timer", + }, + .probe = arch_timer_mmio_probe, +}; +builtin_platform_driver(arch_timer_mmio_acpi_drv); diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 2d86bbc2764a..5e3d6bb7e437 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -263,14 +263,13 @@ static void __init gt_delay_timer_init(void) register_current_timer_delay(>_delay_timer); } -static int __init gt_clocksource_init(void) +static int __init gt_clocksource_init(unsigned int psv) { writel(0, gt_base + GT_CONTROL); writel(0, gt_base + GT_COUNTER0); writel(0, gt_base + GT_COUNTER1); /* set prescaler and enable timer on all the cores */ - writel(FIELD_PREP(GT_CONTROL_PRESCALER_MASK, - CONFIG_ARM_GT_INITIAL_PRESCALER_VAL - 1) | + writel(FIELD_PREP(GT_CONTROL_PRESCALER_MASK, psv - 1) | GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL); #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK @@ -338,11 +337,45 @@ static int gt_clk_rate_change_cb(struct notifier_block *nb, return NOTIFY_DONE; } +struct gt_prescaler_config { + const char *compatible; + unsigned long prescaler; +}; + +static const struct gt_prescaler_config gt_prescaler_configs[] = { + /* + * On am43 the global timer clock is a child of the clock used for CPU + * OPPs, so the initial prescaler has to be compatible with all OPPs + * which are 300, 600, 720, 800 and 1000 with a fixed divider of 2, this + * gives us a GCD of 10. Initial frequency is 1000, so the prescaler is + * 50. + */ + { .compatible = "ti,am43", .prescaler = 50 }, + { .compatible = "xlnx,zynq-7000", .prescaler = 2 }, + { .compatible = NULL } +}; + +static unsigned long gt_get_initial_prescaler_value(struct device_node *np) +{ + const struct gt_prescaler_config *config; + + if (CONFIG_ARM_GT_INITIAL_PRESCALER_VAL != 0) + return CONFIG_ARM_GT_INITIAL_PRESCALER_VAL; + + for (config = gt_prescaler_configs; config->compatible; config++) { + if (of_machine_is_compatible(config->compatible)) + return config->prescaler; + } + + return 1; +} + static int __init global_timer_of_register(struct device_node *np) { struct clk *gt_clk; static unsigned long gt_clk_rate; int err; + unsigned long psv; /* * In A9 r2p0 the comparators for each processor with the global timer @@ -378,8 +411,9 @@ static int __init global_timer_of_register(struct device_node *np) goto out_unmap; } + psv = gt_get_initial_prescaler_value(np); gt_clk_rate = clk_get_rate(gt_clk); - gt_target_rate = gt_clk_rate / CONFIG_ARM_GT_INITIAL_PRESCALER_VAL; + gt_target_rate = gt_clk_rate / psv; gt_clk_rate_change_nb.notifier_call = gt_clk_rate_change_cb; err = clk_notifier_register(gt_clk, >_clk_rate_change_nb); @@ -404,7 +438,7 @@ static int __init global_timer_of_register(struct device_node *np) } /* Register and immediately configure the timer on the boot CPU */ - err = gt_clocksource_init(); + err = gt_clocksource_init(psv); if (err) goto out_irq; diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c index e95fdc49c226..bbceb0289d45 100644 --- a/drivers/clocksource/clps711x-timer.c +++ b/drivers/clocksource/clps711x-timer.c @@ -78,24 +78,33 @@ static int __init clps711x_timer_init(struct device_node *np) unsigned int irq = irq_of_parse_and_map(np, 0); struct clk *clock = of_clk_get(np, 0); void __iomem *base = of_iomap(np, 0); + int ret = 0; if (!base) return -ENOMEM; - if (!irq) - return -EINVAL; - if (IS_ERR(clock)) - return PTR_ERR(clock); + if (!irq) { + ret = -EINVAL; + goto unmap_io; + } + if (IS_ERR(clock)) { + ret = PTR_ERR(clock); + goto unmap_io; + } switch (of_alias_get_id(np, "timer")) { case CLPS711X_CLKSRC_CLOCKSOURCE: clps711x_clksrc_init(clock, base); break; case CLPS711X_CLKSRC_CLOCKEVENT: - return _clps711x_clkevt_init(clock, base, irq); + ret = _clps711x_clkevt_init(clock, base, irq); + break; default: - return -EINVAL; + ret = -EINVAL; + break; } - return 0; +unmap_io: + iounmap(base); + return ret; } TIMER_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init); diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c index cb6fc2f152d4..e79cfb0b8e05 100644 --- a/drivers/clocksource/ingenic-sysost.c +++ b/drivers/clocksource/ingenic-sysost.c @@ -127,18 +127,23 @@ static u8 ingenic_ost_get_prescale(unsigned long rate, unsigned long req_rate) return 2; /* /16 divider */ } -static long ingenic_ost_round_rate(struct clk_hw *hw, unsigned long req_rate, - unsigned long *parent_rate) +static int ingenic_ost_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - unsigned long rate = *parent_rate; + unsigned long rate = req->best_parent_rate; u8 prescale; - if (req_rate > rate) - return rate; + if (req->rate > rate) { + req->rate = rate; - prescale = ingenic_ost_get_prescale(rate, req_rate); + return 0; + } + + prescale = ingenic_ost_get_prescale(rate, req->rate); - return rate >> (prescale * 2); + req->rate = rate >> (prescale * 2); + + return 0; } static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long req_rate, @@ -175,14 +180,14 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re static const struct clk_ops ingenic_ost_percpu_timer_ops = { .recalc_rate = ingenic_ost_percpu_timer_recalc_rate, - .round_rate = ingenic_ost_round_rate, - .set_rate = ingenic_ost_percpu_timer_set_rate, + .determine_rate = ingenic_ost_determine_rate, + .set_rate = ingenic_ost_percpu_timer_set_rate, }; static const struct clk_ops ingenic_ost_global_timer_ops = { .recalc_rate = ingenic_ost_global_timer_recalc_rate, - .round_rate = ingenic_ost_round_rate, - .set_rate = ingenic_ost_global_timer_set_rate, + .determine_rate = ingenic_ost_determine_rate, + .set_rate = ingenic_ost_global_timer_set_rate, }; static const char * const ingenic_ost_clk_parents[] = { "ext" }; diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c index c3536fffbe9a..5a99801a1657 100644 --- a/drivers/clocksource/scx200_hrt.c +++ b/drivers/clocksource/scx200_hrt.c @@ -52,6 +52,7 @@ static struct clocksource cs_hrt = { .mask = CLOCKSOURCE_MASK(32), .flags = CLOCK_SOURCE_IS_CONTINUOUS, /* mult, shift are set based on mhz27 flag */ + .owner = THIS_MODULE, }; static int __init init_hrt_clocksource(void) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index b72b36e0abed..385eb94bbe7c 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -578,37 +578,74 @@ static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag) +static int sh_cmt_start_clocksource(struct sh_cmt_channel *ch) { int ret = 0; unsigned long flags; - if (flag & FLAG_CLOCKSOURCE) - pm_runtime_get_sync(&ch->cmt->pdev->dev); + pm_runtime_get_sync(&ch->cmt->pdev->dev); raw_spin_lock_irqsave(&ch->lock, flags); - if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) { - if (flag & FLAG_CLOCKEVENT) - pm_runtime_get_sync(&ch->cmt->pdev->dev); + if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) ret = sh_cmt_enable(ch); - } if (ret) goto out; - ch->flags |= flag; + + ch->flags |= FLAG_CLOCKSOURCE; /* setup timeout if no clockevent */ - if (ch->cmt->num_channels == 1 && - flag == FLAG_CLOCKSOURCE && (!(ch->flags & FLAG_CLOCKEVENT))) + if (ch->cmt->num_channels == 1 && !(ch->flags & FLAG_CLOCKEVENT)) __sh_cmt_set_next(ch, ch->max_match_value); +out: + raw_spin_unlock_irqrestore(&ch->lock, flags); + + return ret; +} + +static void sh_cmt_stop_clocksource(struct sh_cmt_channel *ch) +{ + unsigned long flags; + unsigned long f; + + raw_spin_lock_irqsave(&ch->lock, flags); + + f = ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE); + + ch->flags &= ~FLAG_CLOCKSOURCE; + + if (f && !(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) + sh_cmt_disable(ch); + + raw_spin_unlock_irqrestore(&ch->lock, flags); + + pm_runtime_put(&ch->cmt->pdev->dev); +} + +static int sh_cmt_start_clockevent(struct sh_cmt_channel *ch) +{ + int ret = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&ch->lock, flags); + + if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) { + pm_runtime_get_sync(&ch->cmt->pdev->dev); + ret = sh_cmt_enable(ch); + } + + if (ret) + goto out; + + ch->flags |= FLAG_CLOCKEVENT; out: raw_spin_unlock_irqrestore(&ch->lock, flags); return ret; } -static void sh_cmt_stop(struct sh_cmt_channel *ch, unsigned long flag) +static void sh_cmt_stop_clockevent(struct sh_cmt_channel *ch) { unsigned long flags; unsigned long f; @@ -616,22 +653,19 @@ static void sh_cmt_stop(struct sh_cmt_channel *ch, unsigned long flag) raw_spin_lock_irqsave(&ch->lock, flags); f = ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE); - ch->flags &= ~flag; + + ch->flags &= ~FLAG_CLOCKEVENT; if (f && !(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) { sh_cmt_disable(ch); - if (flag & FLAG_CLOCKEVENT) - pm_runtime_put(&ch->cmt->pdev->dev); + pm_runtime_put(&ch->cmt->pdev->dev); } /* adjust the timeout to maximum if only clocksource left */ - if ((flag == FLAG_CLOCKEVENT) && (ch->flags & FLAG_CLOCKSOURCE)) + if (ch->flags & FLAG_CLOCKSOURCE) __sh_cmt_set_next(ch, ch->max_match_value); raw_spin_unlock_irqrestore(&ch->lock, flags); - - if (flag & FLAG_CLOCKSOURCE) - pm_runtime_put(&ch->cmt->pdev->dev); } static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs) @@ -672,7 +706,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs) ch->total_cycles = 0; - ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); + ret = sh_cmt_start_clocksource(ch); if (!ret) ch->cs_enabled = true; @@ -685,7 +719,7 @@ static void sh_cmt_clocksource_disable(struct clocksource *cs) WARN_ON(!ch->cs_enabled); - sh_cmt_stop(ch, FLAG_CLOCKSOURCE); + sh_cmt_stop_clocksource(ch); ch->cs_enabled = false; } @@ -696,7 +730,7 @@ static void sh_cmt_clocksource_suspend(struct clocksource *cs) if (!ch->cs_enabled) return; - sh_cmt_stop(ch, FLAG_CLOCKSOURCE); + sh_cmt_stop_clocksource(ch); dev_pm_genpd_suspend(&ch->cmt->pdev->dev); } @@ -708,7 +742,7 @@ static void sh_cmt_clocksource_resume(struct clocksource *cs) return; dev_pm_genpd_resume(&ch->cmt->pdev->dev); - sh_cmt_start(ch, FLAG_CLOCKSOURCE); + sh_cmt_start_clocksource(ch); } static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, @@ -740,7 +774,7 @@ static struct sh_cmt_channel *ced_to_sh_cmt(struct clock_event_device *ced) static void sh_cmt_clock_event_start(struct sh_cmt_channel *ch, int periodic) { - sh_cmt_start(ch, FLAG_CLOCKEVENT); + sh_cmt_start_clockevent(ch); if (periodic) sh_cmt_set_next(ch, ((ch->cmt->rate + HZ/2) / HZ) - 1); @@ -752,7 +786,7 @@ static int sh_cmt_clock_event_shutdown(struct clock_event_device *ced) { struct sh_cmt_channel *ch = ced_to_sh_cmt(ced); - sh_cmt_stop(ch, FLAG_CLOCKEVENT); + sh_cmt_stop_clockevent(ch); return 0; } @@ -763,7 +797,7 @@ static int sh_cmt_clock_event_set_state(struct clock_event_device *ced, /* deal with old setting first */ if (clockevent_state_oneshot(ced) || clockevent_state_periodic(ced)) - sh_cmt_stop(ch, FLAG_CLOCKEVENT); + sh_cmt_stop_clockevent(ch); dev_info(&ch->cmt->pdev->dev, "ch%u: used for %s clock events\n", ch->index, periodic ? "periodic" : "oneshot"); diff --git a/drivers/clocksource/timer-cs5535.c b/drivers/clocksource/timer-cs5535.c index d47acfe848ae..8af666c39890 100644 --- a/drivers/clocksource/timer-cs5535.c +++ b/drivers/clocksource/timer-cs5535.c @@ -101,6 +101,7 @@ static struct clock_event_device cs5535_clockevent = { .tick_resume = mfgpt_shutdown, .set_next_event = mfgpt_next_event, .rating = 250, + .owner = THIS_MODULE, }; static irqreturn_t mfgpt_tick(int irq, void *dev_id) diff --git a/drivers/clocksource/timer-econet-en751221.c b/drivers/clocksource/timer-econet-en751221.c index 3b449fdaafee..4008076b1a21 100644 --- a/drivers/clocksource/timer-econet-en751221.c +++ b/drivers/clocksource/timer-econet-en751221.c @@ -146,7 +146,7 @@ static int __init cevt_init(struct device_node *np) for_each_possible_cpu(i) { struct clock_event_device *cd = &per_cpu(econet_timer_pcpu, i); - cd->rating = 310, + cd->rating = 310; cd->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_PERCPU; diff --git a/drivers/clocksource/timer-nxp-pit.c b/drivers/clocksource/timer-nxp-pit.c new file mode 100644 index 000000000000..2d0a3554b6bf --- /dev/null +++ b/drivers/clocksource/timer-nxp-pit.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2012-2013 Freescale Semiconductor, Inc. + * Copyright 2018,2021-2025 NXP + */ +#include <linux/interrupt.h> +#include <linux/clockchips.h> +#include <linux/cpuhotplug.h> +#include <linux/clk.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/sched_clock.h> +#include <linux/platform_device.h> + +/* + * Each pit takes 0x10 Bytes register space + */ +#define PIT0_OFFSET 0x100 +#define PIT_CH(n) (PIT0_OFFSET + 0x10 * (n)) + +#define PITMCR(__base) (__base) + +#define PITMCR_FRZ BIT(0) +#define PITMCR_MDIS BIT(1) + +#define PITLDVAL(__base) (__base) +#define PITTCTRL(__base) ((__base) + 0x08) + +#define PITCVAL_OFFSET 0x04 +#define PITCVAL(__base) ((__base) + 0x04) + +#define PITTCTRL_TEN BIT(0) +#define PITTCTRL_TIE BIT(1) + +#define PITTFLG(__base) ((__base) + 0x0c) + +#define PITTFLG_TIF BIT(0) + +struct pit_timer { + void __iomem *clksrc_base; + void __iomem *clkevt_base; + struct clock_event_device ced; + struct clocksource cs; + int rate; +}; + +struct pit_timer_data { + int max_pit_instances; +}; + +static DEFINE_PER_CPU(struct pit_timer *, pit_timers); + +/* + * Global structure for multiple PITs initialization + */ +static int pit_instances; +static int max_pit_instances = 1; + +static void __iomem *sched_clock_base; + +static inline struct pit_timer *ced_to_pit(struct clock_event_device *ced) +{ + return container_of(ced, struct pit_timer, ced); +} + +static inline struct pit_timer *cs_to_pit(struct clocksource *cs) +{ + return container_of(cs, struct pit_timer, cs); +} + +static inline void pit_module_enable(void __iomem *base) +{ + writel(0, PITMCR(base)); +} + +static inline void pit_module_disable(void __iomem *base) +{ + writel(PITMCR_MDIS, PITMCR(base)); +} + +static inline void pit_timer_enable(void __iomem *base, bool tie) +{ + u32 val = PITTCTRL_TEN | (tie ? PITTCTRL_TIE : 0); + + writel(val, PITTCTRL(base)); +} + +static inline void pit_timer_disable(void __iomem *base) +{ + writel(0, PITTCTRL(base)); +} + +static inline void pit_timer_set_counter(void __iomem *base, unsigned int cnt) +{ + writel(cnt, PITLDVAL(base)); +} + +static inline void pit_timer_irqack(struct pit_timer *pit) +{ + writel(PITTFLG_TIF, PITTFLG(pit->clkevt_base)); +} + +static u64 notrace pit_read_sched_clock(void) +{ + return ~readl(sched_clock_base); +} + +static u64 pit_timer_clocksource_read(struct clocksource *cs) +{ + struct pit_timer *pit = cs_to_pit(cs); + + return (u64)~readl(PITCVAL(pit->clksrc_base)); +} + +static int pit_clocksource_init(struct pit_timer *pit, const char *name, + void __iomem *base, unsigned long rate) +{ + /* + * The channels 0 and 1 can be chained to build a 64-bit + * timer. Let's use the channel 2 as a clocksource and leave + * the channels 0 and 1 unused for anyone else who needs them + */ + pit->clksrc_base = base + PIT_CH(2); + pit->cs.name = name; + pit->cs.rating = 300; + pit->cs.read = pit_timer_clocksource_read; + pit->cs.mask = CLOCKSOURCE_MASK(32); + pit->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; + + /* set the max load value and start the clock source counter */ + pit_timer_disable(pit->clksrc_base); + pit_timer_set_counter(pit->clksrc_base, ~0); + pit_timer_enable(pit->clksrc_base, 0); + + sched_clock_base = pit->clksrc_base + PITCVAL_OFFSET; + sched_clock_register(pit_read_sched_clock, 32, rate); + + return clocksource_register_hz(&pit->cs, rate); +} + +static int pit_set_next_event(unsigned long delta, struct clock_event_device *ced) +{ + struct pit_timer *pit = ced_to_pit(ced); + + /* + * set a new value to PITLDVAL register will not restart the timer, + * to abort the current cycle and start a timer period with the new + * value, the timer must be disabled and enabled again. + * and the PITLAVAL should be set to delta minus one according to pit + * hardware requirement. + */ + pit_timer_disable(pit->clkevt_base); + pit_timer_set_counter(pit->clkevt_base, delta - 1); + pit_timer_enable(pit->clkevt_base, true); + + return 0; +} + +static int pit_shutdown(struct clock_event_device *ced) +{ + struct pit_timer *pit = ced_to_pit(ced); + + pit_timer_disable(pit->clkevt_base); + + return 0; +} + +static int pit_set_periodic(struct clock_event_device *ced) +{ + struct pit_timer *pit = ced_to_pit(ced); + + pit_set_next_event(pit->rate / HZ, ced); + + return 0; +} + +static irqreturn_t pit_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *ced = dev_id; + struct pit_timer *pit = ced_to_pit(ced); + + pit_timer_irqack(pit); + + /* + * pit hardware doesn't support oneshot, it will generate an interrupt + * and reload the counter value from PITLDVAL when PITCVAL reach zero, + * and start the counter again. So software need to disable the timer + * to stop the counter loop in ONESHOT mode. + */ + if (likely(clockevent_state_oneshot(ced))) + pit_timer_disable(pit->clkevt_base); + + ced->event_handler(ced); + + return IRQ_HANDLED; +} + +static int pit_clockevent_per_cpu_init(struct pit_timer *pit, const char *name, + void __iomem *base, unsigned long rate, + int irq, unsigned int cpu) +{ + int ret; + + /* + * The channels 0 and 1 can be chained to build a 64-bit + * timer. Let's use the channel 3 as a clockevent and leave + * the channels 0 and 1 unused for anyone else who needs them + */ + pit->clkevt_base = base + PIT_CH(3); + pit->rate = rate; + + pit_timer_disable(pit->clkevt_base); + + pit_timer_irqack(pit); + + ret = request_irq(irq, pit_timer_interrupt, IRQF_TIMER | IRQF_NOBALANCING, + name, &pit->ced); + if (ret) + return ret; + + pit->ced.cpumask = cpumask_of(cpu); + pit->ced.irq = irq; + + pit->ced.name = name; + pit->ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + pit->ced.set_state_shutdown = pit_shutdown; + pit->ced.set_state_periodic = pit_set_periodic; + pit->ced.set_next_event = pit_set_next_event; + pit->ced.rating = 300; + + per_cpu(pit_timers, cpu) = pit; + + return 0; +} + +static void pit_clockevent_per_cpu_exit(struct pit_timer *pit, unsigned int cpu) +{ + pit_timer_disable(pit->clkevt_base); + free_irq(pit->ced.irq, &pit->ced); + per_cpu(pit_timers, cpu) = NULL; +} + +static int pit_clockevent_starting_cpu(unsigned int cpu) +{ + struct pit_timer *pit = per_cpu(pit_timers, cpu); + int ret; + + if (!pit) + return 0; + + ret = irq_force_affinity(pit->ced.irq, cpumask_of(cpu)); + if (ret) { + pit_clockevent_per_cpu_exit(pit, cpu); + return ret; + } + + /* + * The value for the LDVAL register trigger is calculated as: + * LDVAL trigger = (period / clock period) - 1 + * The pit is a 32-bit down count timer, when the counter value + * reaches 0, it will generate an interrupt, thus the minimal + * LDVAL trigger value is 1. And then the min_delta is + * minimal LDVAL trigger value + 1, and the max_delta is full 32-bit. + */ + clockevents_config_and_register(&pit->ced, pit->rate, 2, 0xffffffff); + + return 0; +} + +static int pit_timer_init(struct device_node *np) +{ + struct pit_timer *pit; + struct clk *pit_clk; + void __iomem *timer_base; + const char *name = of_node_full_name(np); + unsigned long clk_rate; + int irq, ret; + + pit = kzalloc(sizeof(*pit), GFP_KERNEL); + if (!pit) + return -ENOMEM; + + ret = -ENXIO; + timer_base = of_iomap(np, 0); + if (!timer_base) { + pr_err("Failed to iomap\n"); + goto out_kfree; + } + + ret = -EINVAL; + irq = irq_of_parse_and_map(np, 0); + if (irq <= 0) { + pr_err("Failed to irq_of_parse_and_map\n"); + goto out_iounmap; + } + + pit_clk = of_clk_get(np, 0); + if (IS_ERR(pit_clk)) { + ret = PTR_ERR(pit_clk); + goto out_irq_dispose_mapping; + } + + ret = clk_prepare_enable(pit_clk); + if (ret) + goto out_clk_put; + + clk_rate = clk_get_rate(pit_clk); + + pit_module_disable(timer_base); + + ret = pit_clocksource_init(pit, name, timer_base, clk_rate); + if (ret) { + pr_err("Failed to initialize clocksource '%pOF'\n", np); + goto out_pit_module_disable; + } + + ret = pit_clockevent_per_cpu_init(pit, name, timer_base, clk_rate, irq, pit_instances); + if (ret) { + pr_err("Failed to initialize clockevent '%pOF'\n", np); + goto out_pit_clocksource_unregister; + } + + /* enable the pit module */ + pit_module_enable(timer_base); + + pit_instances++; + + if (pit_instances == max_pit_instances) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "PIT timer:starting", + pit_clockevent_starting_cpu, NULL); + if (ret < 0) + goto out_pit_clocksource_unregister; + } + + return 0; + +out_pit_clocksource_unregister: + clocksource_unregister(&pit->cs); +out_pit_module_disable: + pit_module_disable(timer_base); + clk_disable_unprepare(pit_clk); +out_clk_put: + clk_put(pit_clk); +out_irq_dispose_mapping: + irq_dispose_mapping(irq); +out_iounmap: + iounmap(timer_base); +out_kfree: + kfree(pit); + + return ret; +} + +static int pit_timer_probe(struct platform_device *pdev) +{ + const struct pit_timer_data *pit_timer_data; + + pit_timer_data = of_device_get_match_data(&pdev->dev); + if (pit_timer_data) + max_pit_instances = pit_timer_data->max_pit_instances; + + return pit_timer_init(pdev->dev.of_node); +} + +static struct pit_timer_data s32g2_data = { .max_pit_instances = 2 }; + +static const struct of_device_id pit_timer_of_match[] = { + { .compatible = "nxp,s32g2-pit", .data = &s32g2_data }, + { } +}; +MODULE_DEVICE_TABLE(of, pit_timer_of_match); + +static struct platform_driver nxp_pit_driver = { + .driver = { + .name = "nxp-pit", + .of_match_table = pit_timer_of_match, + }, + .probe = pit_timer_probe, +}; +module_platform_driver(nxp_pit_driver); + +TIMER_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init); diff --git a/drivers/clocksource/timer-nxp-stm.c b/drivers/clocksource/timer-nxp-stm.c index d7ccf9001729..bbc40623728f 100644 --- a/drivers/clocksource/timer-nxp-stm.c +++ b/drivers/clocksource/timer-nxp-stm.c @@ -201,6 +201,7 @@ static int __init nxp_stm_clocksource_init(struct device *dev, struct stm_timer stm_timer->cs.resume = nxp_stm_clocksource_resume; stm_timer->cs.mask = CLOCKSOURCE_MASK(32); stm_timer->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; + stm_timer->cs.owner = THIS_MODULE; ret = clocksource_register_hz(&stm_timer->cs, stm_timer->rate); if (ret) @@ -314,6 +315,7 @@ static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, struct stm stm_timer->ced.cpumask = cpumask_of(cpu); stm_timer->ced.rating = 460; stm_timer->ced.irq = irq; + stm_timer->ced.owner = THIS_MODULE; per_cpu(stm_timers, cpu) = stm_timer; diff --git a/drivers/clocksource/timer-rtl-otto.c b/drivers/clocksource/timer-rtl-otto.c index 8a3068b36e75..6113d2fdd4de 100644 --- a/drivers/clocksource/timer-rtl-otto.c +++ b/drivers/clocksource/timer-rtl-otto.c @@ -38,14 +38,13 @@ #define RTTM_BIT_COUNT 28 #define RTTM_MIN_DELTA 8 #define RTTM_MAX_DELTA CLOCKSOURCE_MASK(28) +#define RTTM_MAX_DIVISOR GENMASK(15, 0) /* - * Timers are derived from the LXB clock frequency. Usually this is a fixed - * multiple of the 25 MHz oscillator. The 930X SOC is an exception from that. - * Its LXB clock has only dividers and uses the switch PLL of 2.45 GHz as its - * base. The only meaningful frequencies we can achieve from that are 175.000 - * MHz and 153.125 MHz. The greatest common divisor of all explained possible - * speeds is 3125000. Pin the timers to this 3.125 MHz reference frequency. + * Timers are derived from the lexra bus (LXB) clock frequency. This is 175 MHz + * on RTL930x and 200 MHz on the other platforms. With 3.125 MHz choose a common + * divisor to have enough range and detail. This provides comparability between + * the different platforms. */ #define RTTM_TICKS_PER_SEC 3125000 @@ -55,11 +54,6 @@ struct rttm_cs { }; /* Simple internal register functions */ -static inline void rttm_set_counter(void __iomem *base, unsigned int counter) -{ - iowrite32(counter, base + RTTM_CNT); -} - static inline unsigned int rttm_get_counter(void __iomem *base) { return ioread32(base + RTTM_CNT); @@ -112,6 +106,22 @@ static irqreturn_t rttm_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void rttm_bounce_timer(void __iomem *base, u32 mode) +{ + /* + * When a running timer has less than ~5us left, a stop/start sequence + * might fail. While the details are unknown the most evident effect is + * that the subsequent interrupt will not be fired. + * + * As a workaround issue an intermediate restart with a very slow + * frequency of ~3kHz keeping the target counter (>=8). So the follow + * up restart will always be issued outside the critical window. + */ + + rttm_disable_timer(base); + rttm_enable_timer(base, mode, RTTM_MAX_DIVISOR); +} + static void rttm_stop_timer(void __iomem *base) { rttm_disable_timer(base); @@ -120,7 +130,6 @@ static void rttm_stop_timer(void __iomem *base) static void rttm_start_timer(struct timer_of *to, u32 mode) { - rttm_set_counter(to->of_base.base, 0); rttm_enable_timer(to->of_base.base, mode, to->of_clk.rate / RTTM_TICKS_PER_SEC); } @@ -129,7 +138,8 @@ static int rttm_next_event(unsigned long delta, struct clock_event_device *clkev struct timer_of *to = to_timer_of(clkevt); RTTM_DEBUG(to->of_base.base); - rttm_stop_timer(to->of_base.base); + rttm_bounce_timer(to->of_base.base, RTTM_CTRL_COUNTER); + rttm_disable_timer(to->of_base.base); rttm_set_period(to->of_base.base, delta); rttm_start_timer(to, RTTM_CTRL_COUNTER); @@ -141,7 +151,8 @@ static int rttm_state_oneshot(struct clock_event_device *clkevt) struct timer_of *to = to_timer_of(clkevt); RTTM_DEBUG(to->of_base.base); - rttm_stop_timer(to->of_base.base); + rttm_bounce_timer(to->of_base.base, RTTM_CTRL_COUNTER); + rttm_disable_timer(to->of_base.base); rttm_set_period(to->of_base.base, RTTM_TICKS_PER_SEC / HZ); rttm_start_timer(to, RTTM_CTRL_COUNTER); @@ -153,7 +164,8 @@ static int rttm_state_periodic(struct clock_event_device *clkevt) struct timer_of *to = to_timer_of(clkevt); RTTM_DEBUG(to->of_base.base); - rttm_stop_timer(to->of_base.base); + rttm_bounce_timer(to->of_base.base, RTTM_CTRL_TIMER); + rttm_disable_timer(to->of_base.base); rttm_set_period(to->of_base.base, RTTM_TICKS_PER_SEC / HZ); rttm_start_timer(to, RTTM_CTRL_TIMER); diff --git a/drivers/clocksource/timer-stm32-lp.c b/drivers/clocksource/timer-stm32-lp.c index 6e7944ffd7c0..c2a699f5c1dd 100644 --- a/drivers/clocksource/timer-stm32-lp.c +++ b/drivers/clocksource/timer-stm32-lp.c @@ -211,6 +211,7 @@ static void stm32_clkevent_lp_init(struct stm32_lp_private *priv, priv->clkevt.rating = STM32_LP_RATING; priv->clkevt.suspend = stm32_clkevent_lp_suspend; priv->clkevt.resume = stm32_clkevent_lp_resume; + priv->clkevt.owner = THIS_MODULE; clockevents_config_and_register(&priv->clkevt, rate, 0x1, STM32_LPTIM_MAX_ARR); diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 6b48a9006444..f827d3f98f60 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -185,6 +185,7 @@ static int sun5i_setup_clocksource(struct platform_device *pdev, cs->clksrc.read = sun5i_clksrc_read; cs->clksrc.mask = CLOCKSOURCE_MASK(32); cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + cs->clksrc.owner = THIS_MODULE; ret = clocksource_register_hz(&cs->clksrc, rate); if (ret) { @@ -214,6 +215,7 @@ static int sun5i_setup_clockevent(struct platform_device *pdev, ce->clkevt.rating = 340; ce->clkevt.irq = irq; ce->clkevt.cpumask = cpu_possible_mask; + ce->clkevt.owner = THIS_MODULE; /* Enable timer0 interrupt */ val = readl(base + TIMER_IRQ_EN_REG); diff --git a/drivers/clocksource/timer-tegra186.c b/drivers/clocksource/timer-tegra186.c index e5394f98a02e..355558893e5f 100644 --- a/drivers/clocksource/timer-tegra186.c +++ b/drivers/clocksource/timer-tegra186.c @@ -159,7 +159,7 @@ static void tegra186_wdt_enable(struct tegra186_wdt *wdt) tmr_writel(wdt->tmr, TMRCSSR_SRC_USEC, TMRCSSR); /* configure timer (system reset happens on the fifth expiration) */ - value = TMRCR_PTV(wdt->base.timeout * USEC_PER_SEC / 5) | + value = TMRCR_PTV(wdt->base.timeout * (USEC_PER_SEC / 5)) | TMRCR_PERIODIC | TMRCR_ENABLE; tmr_writel(wdt->tmr, value, TMRCR); @@ -231,7 +231,7 @@ static unsigned int tegra186_wdt_get_timeleft(struct watchdog_device *wdd) { struct tegra186_wdt *wdt = to_tegra186_wdt(wdd); u32 expiration, val; - u64 timeleft; + u32 timeleft; if (!watchdog_active(&wdt->base)) { /* return zero if the watchdog timer is not activated. */ @@ -266,21 +266,26 @@ static unsigned int tegra186_wdt_get_timeleft(struct watchdog_device *wdd) * Calculate the time remaining by adding the time for the * counter value to the time of the counter expirations that * remain. + * Note: Since wdt->base.timeout is bound to 255, the maximum + * value added to timeleft is + * 255 * (1,000,000 / 5) * 4 + * = 255 * 200,000 * 4 + * = 204,000,000 + * TMRSR_PCV is a 29-bit field. + * Its maximum value is 0x1fffffff = 536,870,911. + * 204,000,000 + 536,870,911 = 740,870,911 = 0x2C28CAFF. + * timeleft can therefore not overflow, and 64-bit calculations + * are not necessary. */ - timeleft += (((u64)wdt->base.timeout * USEC_PER_SEC) / 5) * (4 - expiration); + timeleft += (wdt->base.timeout * (USEC_PER_SEC / 5)) * (4 - expiration); /* * Convert the current counter value to seconds, - * rounding up to the nearest second. Cast u64 to - * u32 under the assumption that no overflow happens - * when coverting to seconds. + * rounding to the nearest second. */ - timeleft = DIV_ROUND_CLOSEST_ULL(timeleft, USEC_PER_SEC); + timeleft = DIV_ROUND_CLOSEST(timeleft, USEC_PER_SEC); - if (WARN_ON_ONCE(timeleft > U32_MAX)) - return U32_MAX; - - return lower_32_bits(timeleft); + return timeleft; } static const struct watchdog_ops tegra186_wdt_ops = { @@ -328,16 +333,12 @@ static struct tegra186_wdt *tegra186_wdt_create(struct tegra186_timer *tegra, wdt->base.parent = tegra->dev; err = watchdog_init_timeout(&wdt->base, 5, tegra->dev); - if (err < 0) { - dev_err(tegra->dev, "failed to initialize timeout: %d\n", err); + if (err < 0) return ERR_PTR(err); - } err = devm_watchdog_register_device(tegra->dev, &wdt->base); - if (err < 0) { - dev_err(tegra->dev, "failed to register WDT: %d\n", err); + if (err < 0) return ERR_PTR(err); - } return wdt; } @@ -373,6 +374,7 @@ static int tegra186_timer_tsc_init(struct tegra186_timer *tegra) tegra->tsc.read = tegra186_timer_tsc_read; tegra->tsc.mask = CLOCKSOURCE_MASK(56); tegra->tsc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + tegra->tsc.owner = THIS_MODULE; return clocksource_register_hz(&tegra->tsc, 31250000); } @@ -392,6 +394,7 @@ static int tegra186_timer_osc_init(struct tegra186_timer *tegra) tegra->osc.read = tegra186_timer_osc_read; tegra->osc.mask = CLOCKSOURCE_MASK(32); tegra->osc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + tegra->osc.owner = THIS_MODULE; return clocksource_register_hz(&tegra->osc, 38400000); } @@ -411,6 +414,7 @@ static int tegra186_timer_usec_init(struct tegra186_timer *tegra) tegra->usec.read = tegra186_timer_usec_read; tegra->usec.mask = CLOCKSOURCE_MASK(32); tegra->usec.flags = CLOCK_SOURCE_IS_CONTINUOUS; + tegra->usec.owner = THIS_MODULE; return clocksource_register_hz(&tegra->usec, USEC_PER_SEC); } diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index e9e32df6b566..793e7cdcb1b1 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -31,6 +31,7 @@ #include <linux/platform_data/dmtimer-omap.h> #include <clocksource/timer-ti-dm.h> +#include <linux/delay.h> /* * timer errata flags @@ -836,6 +837,48 @@ static int omap_dm_timer_set_match(struct omap_dm_timer *cookie, int enable, return 0; } +static int omap_dm_timer_set_cap(struct omap_dm_timer *cookie, + int autoreload, bool config_period) +{ + struct dmtimer *timer; + struct device *dev; + int rc; + u32 l; + + timer = to_dmtimer(cookie); + if (unlikely(!timer)) + return -EINVAL; + + dev = &timer->pdev->dev; + rc = pm_runtime_resume_and_get(dev); + if (rc) + return rc; + /* + * 1. Select autoreload mode. TIMER_TCLR[1] AR bit. + * 2. TIMER_TCLR[14]: Sets the functionality of the TIMER IO pin. + * 3. TIMER_TCLR[13] : Capture mode select bit. + * 3. TIMER_TCLR[9-8] : Select transition capture mode. + */ + + l = dmtimer_read(timer, OMAP_TIMER_CTRL_REG); + + if (autoreload) + l |= OMAP_TIMER_CTRL_AR; + + l |= OMAP_TIMER_CTRL_CAPTMODE | OMAP_TIMER_CTRL_GPOCFG; + + if (config_period == true) + l |= OMAP_TIMER_CTRL_TCM_LOWTOHIGH; /* Time Period config */ + else + l |= OMAP_TIMER_CTRL_TCM_BOTHEDGES; /* Duty Cycle config */ + + dmtimer_write(timer, OMAP_TIMER_CTRL_REG, l); + + pm_runtime_put_sync(dev); + + return 0; +} + static int omap_dm_timer_set_pwm(struct omap_dm_timer *cookie, int def_on, int toggle, int trigger, int autoreload) { @@ -1023,23 +1066,92 @@ static unsigned int omap_dm_timer_read_counter(struct omap_dm_timer *cookie) return __omap_dm_timer_read_counter(timer); } +static inline unsigned int __omap_dm_timer_cap(struct dmtimer *timer, int idx) +{ + return idx == 0 ? dmtimer_read(timer, OMAP_TIMER_CAPTURE_REG) : + dmtimer_read(timer, OMAP_TIMER_CAPTURE2_REG); +} + static int omap_dm_timer_write_counter(struct omap_dm_timer *cookie, unsigned int value) { struct dmtimer *timer; + struct device *dev; timer = to_dmtimer(cookie); - if (unlikely(!timer || !atomic_read(&timer->enabled))) { - pr_err("%s: timer not available or enabled.\n", __func__); + if (unlikely(!timer)) { + pr_err("%s: timer not available.\n", __func__); return -EINVAL; } + dev = &timer->pdev->dev; + + pm_runtime_resume_and_get(dev); dmtimer_write(timer, OMAP_TIMER_COUNTER_REG, value); + pm_runtime_put_sync(dev); /* Save the context */ timer->context.tcrr = value; return 0; } +/** + * omap_dm_timer_cap_counter() - Calculate the high count or period count depending on the + * configuration. + * @cookie:Pointer to OMAP DM timer + * @is_period:Whether to configure timer in period or duty cycle mode + * + * Return high count or period count if timer is enabled else appropriate error. + */ +static unsigned int omap_dm_timer_cap_counter(struct omap_dm_timer *cookie, bool is_period) +{ + struct dmtimer *timer; + unsigned int cap1 = 0; + unsigned int cap2 = 0; + u32 l, ret; + + timer = to_dmtimer(cookie); + if (unlikely(!timer || !atomic_read(&timer->enabled))) { + pr_err("%s:timer is not available or enabled.%p\n", __func__, (void *)timer); + return -EINVAL; + } + + /* Stop the timer */ + omap_dm_timer_stop(cookie); + + /* Clear the timer counter value to 0 */ + ret = omap_dm_timer_write_counter(cookie, 0); + if (ret) + return ret; + + /* Sets the timer capture configuration for period/duty cycle calculation */ + ret = omap_dm_timer_set_cap(cookie, true, is_period); + if (ret) { + pr_err("%s: Failed to set timer capture configuration.\n", __func__); + return ret; + } + /* Start the timer */ + omap_dm_timer_start(cookie); + + /* + * 1 sec delay is given so as to provide + * enough time to capture low frequency signals. + */ + msleep(1000); + + cap1 = __omap_dm_timer_cap(timer, 0); + cap2 = __omap_dm_timer_cap(timer, 1); + + /* + * Clears the TCLR configuration. + * The start bit must be set to 1 as the timer is already in start mode. + */ + l = dmtimer_read(timer, OMAP_TIMER_CTRL_REG); + l &= ~(0xffff) | 0x1; + dmtimer_write(timer, OMAP_TIMER_CTRL_REG, l); + + return (cap2-cap1); +} + static int __maybe_unused omap_dm_timer_runtime_suspend(struct device *dev) { struct dmtimer *timer = dev_get_drvdata(dev); @@ -1246,6 +1358,9 @@ static const struct omap_dm_timer_ops dmtimer_ops = { .write_counter = omap_dm_timer_write_counter, .read_status = omap_dm_timer_read_status, .write_status = omap_dm_timer_write_status, + .set_cap = omap_dm_timer_set_cap, + .get_cap_status = omap_dm_timer_get_pwm_status, + .read_cap = omap_dm_timer_cap_counter, }; static const struct dmtimer_platform_data omap3plus_pdata = { diff --git a/drivers/clocksource/timer-vf-pit.c b/drivers/clocksource/timer-vf-pit.c deleted file mode 100644 index 911c92146eca..000000000000 --- a/drivers/clocksource/timer-vf-pit.c +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2012-2013 Freescale Semiconductor, Inc. - */ - -#include <linux/interrupt.h> -#include <linux/clockchips.h> -#include <linux/clk.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/sched_clock.h> - -/* - * Each pit takes 0x10 Bytes register space - */ -#define PITMCR 0x00 -#define PIT0_OFFSET 0x100 -#define PITn_OFFSET(n) (PIT0_OFFSET + 0x10 * (n)) -#define PITLDVAL 0x00 -#define PITCVAL 0x04 -#define PITTCTRL 0x08 -#define PITTFLG 0x0c - -#define PITMCR_MDIS (0x1 << 1) - -#define PITTCTRL_TEN (0x1 << 0) -#define PITTCTRL_TIE (0x1 << 1) -#define PITCTRL_CHN (0x1 << 2) - -#define PITTFLG_TIF 0x1 - -static void __iomem *clksrc_base; -static void __iomem *clkevt_base; -static unsigned long cycle_per_jiffy; - -static inline void pit_timer_enable(void) -{ - __raw_writel(PITTCTRL_TEN | PITTCTRL_TIE, clkevt_base + PITTCTRL); -} - -static inline void pit_timer_disable(void) -{ - __raw_writel(0, clkevt_base + PITTCTRL); -} - -static inline void pit_irq_acknowledge(void) -{ - __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG); -} - -static u64 notrace pit_read_sched_clock(void) -{ - return ~__raw_readl(clksrc_base + PITCVAL); -} - -static int __init pit_clocksource_init(unsigned long rate) -{ - /* set the max load value and start the clock source counter */ - __raw_writel(0, clksrc_base + PITTCTRL); - __raw_writel(~0UL, clksrc_base + PITLDVAL); - __raw_writel(PITTCTRL_TEN, clksrc_base + PITTCTRL); - - sched_clock_register(pit_read_sched_clock, 32, rate); - return clocksource_mmio_init(clksrc_base + PITCVAL, "vf-pit", rate, - 300, 32, clocksource_mmio_readl_down); -} - -static int pit_set_next_event(unsigned long delta, - struct clock_event_device *unused) -{ - /* - * set a new value to PITLDVAL register will not restart the timer, - * to abort the current cycle and start a timer period with the new - * value, the timer must be disabled and enabled again. - * and the PITLAVAL should be set to delta minus one according to pit - * hardware requirement. - */ - pit_timer_disable(); - __raw_writel(delta - 1, clkevt_base + PITLDVAL); - pit_timer_enable(); - - return 0; -} - -static int pit_shutdown(struct clock_event_device *evt) -{ - pit_timer_disable(); - return 0; -} - -static int pit_set_periodic(struct clock_event_device *evt) -{ - pit_set_next_event(cycle_per_jiffy, evt); - return 0; -} - -static irqreturn_t pit_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - - pit_irq_acknowledge(); - - /* - * pit hardware doesn't support oneshot, it will generate an interrupt - * and reload the counter value from PITLDVAL when PITCVAL reach zero, - * and start the counter again. So software need to disable the timer - * to stop the counter loop in ONESHOT mode. - */ - if (likely(clockevent_state_oneshot(evt))) - pit_timer_disable(); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -static struct clock_event_device clockevent_pit = { - .name = "VF pit timer", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_state_shutdown = pit_shutdown, - .set_state_periodic = pit_set_periodic, - .set_next_event = pit_set_next_event, - .rating = 300, -}; - -static int __init pit_clockevent_init(unsigned long rate, int irq) -{ - __raw_writel(0, clkevt_base + PITTCTRL); - __raw_writel(PITTFLG_TIF, clkevt_base + PITTFLG); - - BUG_ON(request_irq(irq, pit_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, - "VF pit timer", &clockevent_pit)); - - clockevent_pit.cpumask = cpumask_of(0); - clockevent_pit.irq = irq; - /* - * The value for the LDVAL register trigger is calculated as: - * LDVAL trigger = (period / clock period) - 1 - * The pit is a 32-bit down count timer, when the counter value - * reaches 0, it will generate an interrupt, thus the minimal - * LDVAL trigger value is 1. And then the min_delta is - * minimal LDVAL trigger value + 1, and the max_delta is full 32-bit. - */ - clockevents_config_and_register(&clockevent_pit, rate, 2, 0xffffffff); - - return 0; -} - -static int __init pit_timer_init(struct device_node *np) -{ - struct clk *pit_clk; - void __iomem *timer_base; - unsigned long clk_rate; - int irq, ret; - - timer_base = of_iomap(np, 0); - if (!timer_base) { - pr_err("Failed to iomap\n"); - return -ENXIO; - } - - /* - * PIT0 and PIT1 can be chained to build a 64-bit timer, - * so choose PIT2 as clocksource, PIT3 as clockevent device, - * and leave PIT0 and PIT1 unused for anyone else who needs them. - */ - clksrc_base = timer_base + PITn_OFFSET(2); - clkevt_base = timer_base + PITn_OFFSET(3); - - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) - return -EINVAL; - - pit_clk = of_clk_get(np, 0); - if (IS_ERR(pit_clk)) - return PTR_ERR(pit_clk); - - ret = clk_prepare_enable(pit_clk); - if (ret) - return ret; - - clk_rate = clk_get_rate(pit_clk); - cycle_per_jiffy = clk_rate / (HZ); - - /* enable the pit module */ - __raw_writel(~PITMCR_MDIS, timer_base + PITMCR); - - ret = pit_clocksource_init(clk_rate); - if (ret) - return ret; - - return pit_clockevent_init(clk_rate, irq); -} -TIMER_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init); diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index 394484929dae..a9626b30044a 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -13,7 +13,8 @@ ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \ tee-dev.o \ platform-access.o \ dbc.o \ - hsti.o + hsti.o \ + sfs.o obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o ccp-crypto-objs := ccp-crypto-main.o \ diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 1c5a7189631e..9e21da0e298a 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -17,6 +17,7 @@ #include "psp-dev.h" #include "sev-dev.h" #include "tee-dev.h" +#include "sfs.h" #include "platform-access.h" #include "dbc.h" #include "hsti.h" @@ -182,6 +183,17 @@ static int psp_check_tee_support(struct psp_device *psp) return 0; } +static int psp_check_sfs_support(struct psp_device *psp) +{ + /* Check if device supports SFS feature */ + if (!psp->capability.sfs) { + dev_dbg(psp->dev, "psp does not support SFS\n"); + return -ENODEV; + } + + return 0; +} + static int psp_init(struct psp_device *psp) { int ret; @@ -198,6 +210,12 @@ static int psp_init(struct psp_device *psp) return ret; } + if (!psp_check_sfs_support(psp)) { + ret = sfs_dev_init(psp); + if (ret) + return ret; + } + if (psp->vdata->platform_access) { ret = platform_access_dev_init(psp); if (ret) @@ -302,6 +320,8 @@ void psp_dev_destroy(struct sp_device *sp) tee_dev_destroy(psp); + sfs_dev_destroy(psp); + dbc_dev_destroy(psp); platform_access_dev_destroy(psp); diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h index e43ce87ede76..268c83f298cb 100644 --- a/drivers/crypto/ccp/psp-dev.h +++ b/drivers/crypto/ccp/psp-dev.h @@ -32,7 +32,8 @@ union psp_cap_register { unsigned int sev :1, tee :1, dbc_thru_ext :1, - rsvd1 :4, + sfs :1, + rsvd1 :3, security_reporting :1, fused_part :1, rsvd2 :1, @@ -68,6 +69,7 @@ struct psp_device { void *tee_data; void *platform_access_data; void *dbc_data; + void *sfs_data; union psp_cap_register capability; }; @@ -118,12 +120,16 @@ struct psp_ext_request { * @PSP_SUB_CMD_DBC_SET_UID: Set UID for DBC * @PSP_SUB_CMD_DBC_GET_PARAMETER: Get parameter from DBC * @PSP_SUB_CMD_DBC_SET_PARAMETER: Set parameter for DBC + * @PSP_SUB_CMD_SFS_GET_FW_VERS: Get firmware versions for ASP and other MP + * @PSP_SUB_CMD_SFS_UPDATE: Command to load, verify and execute SFS package */ enum psp_sub_cmd { PSP_SUB_CMD_DBC_GET_NONCE = PSP_DYNAMIC_BOOST_GET_NONCE, PSP_SUB_CMD_DBC_SET_UID = PSP_DYNAMIC_BOOST_SET_UID, PSP_SUB_CMD_DBC_GET_PARAMETER = PSP_DYNAMIC_BOOST_GET_PARAMETER, PSP_SUB_CMD_DBC_SET_PARAMETER = PSP_DYNAMIC_BOOST_SET_PARAMETER, + PSP_SUB_CMD_SFS_GET_FW_VERS = PSP_SFS_GET_FW_VERSIONS, + PSP_SUB_CMD_SFS_UPDATE = PSP_SFS_UPDATE, }; int psp_extended_mailbox_cmd(struct psp_device *psp, unsigned int timeout_msecs, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 9f5ccc1720cb..65d6d0af140a 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -82,6 +82,21 @@ MODULE_FIRMWARE("amd/amd_sev_fam19h_model1xh.sbin"); /* 4th gen EPYC */ static bool psp_dead; static int psp_timeout; +enum snp_hv_fixed_pages_state { + ALLOCATED, + HV_FIXED, +}; + +struct snp_hv_fixed_pages_entry { + struct list_head list; + struct page *page; + unsigned int order; + bool free; + enum snp_hv_fixed_pages_state page_state; +}; + +static LIST_HEAD(snp_hv_fixed_pages); + /* Trusted Memory Region (TMR): * The TMR is a 1MB area that must be 1MB aligned. Use the page allocator * to allocate the memory, which will return aligned memory for the specified @@ -1073,6 +1088,165 @@ static void snp_set_hsave_pa(void *arg) wrmsrq(MSR_VM_HSAVE_PA, 0); } +/* Hypervisor Fixed pages API interface */ +static void snp_hv_fixed_pages_state_update(struct sev_device *sev, + enum snp_hv_fixed_pages_state page_state) +{ + struct snp_hv_fixed_pages_entry *entry; + + /* List is protected by sev_cmd_mutex */ + lockdep_assert_held(&sev_cmd_mutex); + + if (list_empty(&snp_hv_fixed_pages)) + return; + + list_for_each_entry(entry, &snp_hv_fixed_pages, list) + entry->page_state = page_state; +} + +/* + * Allocate HV_FIXED pages in 2MB aligned sizes to ensure the whole + * 2MB pages are marked as HV_FIXED. + */ +struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages) +{ + struct psp_device *psp_master = psp_get_master_device(); + struct snp_hv_fixed_pages_entry *entry; + struct sev_device *sev; + unsigned int order; + struct page *page; + + if (!psp_master || !psp_master->sev_data) + return NULL; + + sev = psp_master->sev_data; + + order = get_order(PMD_SIZE * num_2mb_pages); + + /* + * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list + * also needs to be protected using the same mutex. + */ + guard(mutex)(&sev_cmd_mutex); + + /* + * This API uses SNP_INIT_EX to transition allocated pages to HV_Fixed + * page state, fail if SNP is already initialized. + */ + if (sev->snp_initialized) + return NULL; + + /* Re-use freed pages that match the request */ + list_for_each_entry(entry, &snp_hv_fixed_pages, list) { + /* Hypervisor fixed page allocator implements exact fit policy */ + if (entry->order == order && entry->free) { + entry->free = false; + memset(page_address(entry->page), 0, + (1 << entry->order) * PAGE_SIZE); + return entry->page; + } + } + + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return NULL; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + __free_pages(page, order); + return NULL; + } + + entry->page = page; + entry->order = order; + list_add_tail(&entry->list, &snp_hv_fixed_pages); + + return page; +} + +void snp_free_hv_fixed_pages(struct page *page) +{ + struct psp_device *psp_master = psp_get_master_device(); + struct snp_hv_fixed_pages_entry *entry, *nentry; + + if (!psp_master || !psp_master->sev_data) + return; + + /* + * SNP_INIT_EX is protected by sev_cmd_mutex, therefore this list + * also needs to be protected using the same mutex. + */ + guard(mutex)(&sev_cmd_mutex); + + list_for_each_entry_safe(entry, nentry, &snp_hv_fixed_pages, list) { + if (entry->page != page) + continue; + + /* + * HV_FIXED page state cannot be changed until reboot + * and they cannot be used by an SNP guest, so they cannot + * be returned back to the page allocator. + * Mark the pages as free internally to allow possible re-use. + */ + if (entry->page_state == HV_FIXED) { + entry->free = true; + } else { + __free_pages(page, entry->order); + list_del(&entry->list); + kfree(entry); + } + return; + } +} + +static void snp_add_hv_fixed_pages(struct sev_device *sev, struct sev_data_range_list *range_list) +{ + struct snp_hv_fixed_pages_entry *entry; + struct sev_data_range *range; + int num_elements; + + lockdep_assert_held(&sev_cmd_mutex); + + if (list_empty(&snp_hv_fixed_pages)) + return; + + num_elements = list_count_nodes(&snp_hv_fixed_pages) + + range_list->num_elements; + + /* + * Ensure the list of HV_FIXED pages that will be passed to firmware + * do not exceed the page-sized argument buffer. + */ + if (num_elements * sizeof(*range) + sizeof(*range_list) > PAGE_SIZE) { + dev_warn(sev->dev, "Additional HV_Fixed pages cannot be accommodated, omitting\n"); + return; + } + + range = &range_list->ranges[range_list->num_elements]; + list_for_each_entry(entry, &snp_hv_fixed_pages, list) { + range->base = page_to_pfn(entry->page) << PAGE_SHIFT; + range->page_count = 1 << entry->order; + range++; + } + range_list->num_elements = num_elements; +} + +static void snp_leak_hv_fixed_pages(void) +{ + struct snp_hv_fixed_pages_entry *entry; + + /* List is protected by sev_cmd_mutex */ + lockdep_assert_held(&sev_cmd_mutex); + + if (list_empty(&snp_hv_fixed_pages)) + return; + + list_for_each_entry(entry, &snp_hv_fixed_pages, list) + if (entry->page_state == HV_FIXED) + __snp_leak_pages(page_to_pfn(entry->page), + 1 << entry->order, false); +} + static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) { struct sev_data_range_list *range_list = arg; @@ -1163,6 +1337,12 @@ static int __sev_snp_init_locked(int *error) return rc; } + /* + * Add HV_Fixed pages from other PSP sub-devices, such as SFS to the + * HV_Fixed page list. + */ + snp_add_hv_fixed_pages(sev, snp_range_list); + memset(&data, 0, sizeof(data)); data.init_rmp = 1; data.list_paddr_en = 1; @@ -1202,6 +1382,7 @@ static int __sev_snp_init_locked(int *error) return rc; } + snp_hv_fixed_pages_state_update(sev, HV_FIXED); sev->snp_initialized = true; dev_dbg(sev->dev, "SEV-SNP firmware initialized\n"); @@ -1784,6 +1965,7 @@ static int __sev_snp_shutdown_locked(int *error, bool panic) return ret; } + snp_leak_hv_fixed_pages(); sev->snp_initialized = false; dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n"); diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h index 3e4e5574e88a..28021abc85ad 100644 --- a/drivers/crypto/ccp/sev-dev.h +++ b/drivers/crypto/ccp/sev-dev.h @@ -65,4 +65,7 @@ void sev_dev_destroy(struct psp_device *psp); void sev_pci_init(void); void sev_pci_exit(void); +struct page *snp_alloc_hv_fixed_pages(unsigned int num_2mb_pages); +void snp_free_hv_fixed_pages(struct page *page); + #endif /* __SEV_DEV_H */ diff --git a/drivers/crypto/ccp/sfs.c b/drivers/crypto/ccp/sfs.c new file mode 100644 index 000000000000..2f4beaafe7ec --- /dev/null +++ b/drivers/crypto/ccp/sfs.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Processor Seamless Firmware Servicing support. + * + * Copyright (C) 2025 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra <ashish.kalra@amd.com> + */ + +#include <linux/firmware.h> + +#include "sfs.h" +#include "sev-dev.h" + +#define SFS_DEFAULT_TIMEOUT (10 * MSEC_PER_SEC) +#define SFS_MAX_PAYLOAD_SIZE (2 * 1024 * 1024) +#define SFS_NUM_2MB_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PMD_SIZE) +#define SFS_NUM_PAGES_CMDBUF (SFS_MAX_PAYLOAD_SIZE / PAGE_SIZE) + +static DEFINE_MUTEX(sfs_ioctl_mutex); + +static struct sfs_misc_dev *misc_dev; + +static int send_sfs_cmd(struct sfs_device *sfs_dev, int msg) +{ + int ret; + + sfs_dev->command_buf->hdr.status = 0; + sfs_dev->command_buf->hdr.sub_cmd_id = msg; + + ret = psp_extended_mailbox_cmd(sfs_dev->psp, + SFS_DEFAULT_TIMEOUT, + (struct psp_ext_request *)sfs_dev->command_buf); + if (ret == -EIO) { + dev_dbg(sfs_dev->dev, + "msg 0x%x failed with PSP error: 0x%x, extended status: 0x%x\n", + msg, sfs_dev->command_buf->hdr.status, + *(u32 *)sfs_dev->command_buf->buf); + } + + return ret; +} + +static int send_sfs_get_fw_versions(struct sfs_device *sfs_dev) +{ + /* + * SFS_GET_FW_VERSIONS command needs the output buffer to be + * initialized to 0xC7 in every byte. + */ + memset(sfs_dev->command_buf->sfs_buffer, 0xc7, PAGE_SIZE); + sfs_dev->command_buf->hdr.payload_size = 2 * PAGE_SIZE; + + return send_sfs_cmd(sfs_dev, PSP_SFS_GET_FW_VERSIONS); +} + +static int send_sfs_update_package(struct sfs_device *sfs_dev, const char *payload_name) +{ + char payload_path[PAYLOAD_NAME_SIZE + sizeof("amd/")]; + const struct firmware *firmware; + unsigned long package_size; + int ret; + + /* Sanitize userspace provided payload name */ + if (!strnchr(payload_name, PAYLOAD_NAME_SIZE, '\0')) + return -EINVAL; + + snprintf(payload_path, sizeof(payload_path), "amd/%s", payload_name); + + ret = firmware_request_nowarn(&firmware, payload_path, sfs_dev->dev); + if (ret < 0) { + dev_warn_ratelimited(sfs_dev->dev, "firmware request failed for %s (%d)\n", + payload_path, ret); + return -ENOENT; + } + + /* + * SFS Update Package command's input buffer contains TEE_EXT_CMD_BUFFER + * followed by the Update Package and it should be 64KB aligned. + */ + package_size = ALIGN(firmware->size + PAGE_SIZE, 0x10000U); + + /* + * SFS command buffer is a pre-allocated 2MB buffer, fail update package + * if SFS payload is larger than the pre-allocated command buffer. + */ + if (package_size > SFS_MAX_PAYLOAD_SIZE) { + dev_warn_ratelimited(sfs_dev->dev, + "SFS payload size %ld larger than maximum supported payload size of %u\n", + package_size, SFS_MAX_PAYLOAD_SIZE); + release_firmware(firmware); + return -E2BIG; + } + + /* + * Copy firmware data to a HV_Fixed memory region. + */ + memcpy(sfs_dev->command_buf->sfs_buffer, firmware->data, firmware->size); + sfs_dev->command_buf->hdr.payload_size = package_size; + + release_firmware(firmware); + + return send_sfs_cmd(sfs_dev, PSP_SFS_UPDATE); +} + +static long sfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct sfs_user_get_fw_versions __user *sfs_get_fw_versions; + struct sfs_user_update_package __user *sfs_update_package; + struct psp_device *psp_master = psp_get_master_device(); + char payload_name[PAYLOAD_NAME_SIZE]; + struct sfs_device *sfs_dev; + int ret = 0; + + if (!psp_master || !psp_master->sfs_data) + return -ENODEV; + + sfs_dev = psp_master->sfs_data; + + guard(mutex)(&sfs_ioctl_mutex); + + switch (cmd) { + case SFSIOCFWVERS: + dev_dbg(sfs_dev->dev, "in SFSIOCFWVERS\n"); + + sfs_get_fw_versions = (struct sfs_user_get_fw_versions __user *)arg; + + ret = send_sfs_get_fw_versions(sfs_dev); + if (ret && ret != -EIO) + return ret; + + /* + * Return SFS status and extended status back to userspace + * if PSP status indicated success or command error. + */ + if (copy_to_user(&sfs_get_fw_versions->blob, sfs_dev->command_buf->sfs_buffer, + PAGE_SIZE)) + return -EFAULT; + if (copy_to_user(&sfs_get_fw_versions->sfs_status, + &sfs_dev->command_buf->hdr.status, + sizeof(sfs_get_fw_versions->sfs_status))) + return -EFAULT; + if (copy_to_user(&sfs_get_fw_versions->sfs_extended_status, + &sfs_dev->command_buf->buf, + sizeof(sfs_get_fw_versions->sfs_extended_status))) + return -EFAULT; + break; + case SFSIOCUPDATEPKG: + dev_dbg(sfs_dev->dev, "in SFSIOCUPDATEPKG\n"); + + sfs_update_package = (struct sfs_user_update_package __user *)arg; + + if (copy_from_user(payload_name, sfs_update_package->payload_name, + PAYLOAD_NAME_SIZE)) + return -EFAULT; + + ret = send_sfs_update_package(sfs_dev, payload_name); + if (ret && ret != -EIO) + return ret; + + /* + * Return SFS status and extended status back to userspace + * if PSP status indicated success or command error. + */ + if (copy_to_user(&sfs_update_package->sfs_status, + &sfs_dev->command_buf->hdr.status, + sizeof(sfs_update_package->sfs_status))) + return -EFAULT; + if (copy_to_user(&sfs_update_package->sfs_extended_status, + &sfs_dev->command_buf->buf, + sizeof(sfs_update_package->sfs_extended_status))) + return -EFAULT; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct file_operations sfs_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = sfs_ioctl, +}; + +static void sfs_exit(struct kref *ref) +{ + misc_deregister(&misc_dev->misc); + kfree(misc_dev); + misc_dev = NULL; +} + +void sfs_dev_destroy(struct psp_device *psp) +{ + struct sfs_device *sfs_dev = psp->sfs_data; + + if (!sfs_dev) + return; + + /* + * Change SFS command buffer back to the default "Write-Back" type. + */ + set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF); + + snp_free_hv_fixed_pages(sfs_dev->page); + + if (sfs_dev->misc) + kref_put(&misc_dev->refcount, sfs_exit); + + psp->sfs_data = NULL; +} + +/* Based on sev_misc_init() */ +static int sfs_misc_init(struct sfs_device *sfs) +{ + struct device *dev = sfs->dev; + int ret; + + /* + * SFS feature support can be detected on multiple devices but the SFS + * FW commands must be issued on the master. During probe, we do not + * know the master hence we create /dev/sfs on the first device probe. + */ + if (!misc_dev) { + struct miscdevice *misc; + + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); + if (!misc_dev) + return -ENOMEM; + + misc = &misc_dev->misc; + misc->minor = MISC_DYNAMIC_MINOR; + misc->name = "sfs"; + misc->fops = &sfs_fops; + misc->mode = 0600; + + ret = misc_register(misc); + if (ret) + return ret; + + kref_init(&misc_dev->refcount); + } else { + kref_get(&misc_dev->refcount); + } + + sfs->misc = misc_dev; + dev_dbg(dev, "registered SFS device\n"); + + return 0; +} + +int sfs_dev_init(struct psp_device *psp) +{ + struct device *dev = psp->dev; + struct sfs_device *sfs_dev; + struct page *page; + int ret = -ENOMEM; + + sfs_dev = devm_kzalloc(dev, sizeof(*sfs_dev), GFP_KERNEL); + if (!sfs_dev) + return -ENOMEM; + + /* + * Pre-allocate 2MB command buffer for all SFS commands using + * SNP HV_Fixed page allocator which also transitions the + * SFS command buffer to HV_Fixed page state if SNP is enabled. + */ + page = snp_alloc_hv_fixed_pages(SFS_NUM_2MB_PAGES_CMDBUF); + if (!page) { + dev_dbg(dev, "Command Buffer HV-Fixed page allocation failed\n"); + goto cleanup_dev; + } + sfs_dev->page = page; + sfs_dev->command_buf = page_address(page); + + dev_dbg(dev, "Command buffer 0x%px to be marked as HV_Fixed\n", sfs_dev->command_buf); + + /* + * SFS command buffer must be mapped as non-cacheable. + */ + ret = set_memory_uc((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF); + if (ret) { + dev_dbg(dev, "Set memory uc failed\n"); + goto cleanup_cmd_buf; + } + + dev_dbg(dev, "Command buffer 0x%px marked uncacheable\n", sfs_dev->command_buf); + + psp->sfs_data = sfs_dev; + sfs_dev->dev = dev; + sfs_dev->psp = psp; + + ret = sfs_misc_init(sfs_dev); + if (ret) + goto cleanup_mem_attr; + + dev_notice(sfs_dev->dev, "SFS support is available\n"); + + return 0; + +cleanup_mem_attr: + set_memory_wb((unsigned long)sfs_dev->command_buf, SFS_NUM_PAGES_CMDBUF); + +cleanup_cmd_buf: + snp_free_hv_fixed_pages(page); + +cleanup_dev: + psp->sfs_data = NULL; + devm_kfree(dev, sfs_dev); + + return ret; +} diff --git a/drivers/crypto/ccp/sfs.h b/drivers/crypto/ccp/sfs.h new file mode 100644 index 000000000000..97704c210efd --- /dev/null +++ b/drivers/crypto/ccp/sfs.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Platform Security Processor (PSP) Seamless Firmware (SFS) Support. + * + * Copyright (C) 2025 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra <ashish.kalra@amd.com> + */ + +#ifndef __SFS_H__ +#define __SFS_H__ + +#include <uapi/linux/psp-sfs.h> + +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/psp-sev.h> +#include <linux/psp-platform-access.h> +#include <linux/set_memory.h> + +#include "psp-dev.h" + +struct sfs_misc_dev { + struct kref refcount; + struct miscdevice misc; +}; + +struct sfs_command { + struct psp_ext_req_buffer_hdr hdr; + u8 buf[PAGE_SIZE - sizeof(struct psp_ext_req_buffer_hdr)]; + u8 sfs_buffer[]; +} __packed; + +struct sfs_device { + struct device *dev; + struct psp_device *psp; + + struct page *page; + struct sfs_command *command_buf; + + struct sfs_misc_dev *misc; +}; + +void sfs_dev_destroy(struct psp_device *psp); +int sfs_dev_init(struct psp_device *psp); + +#endif /* __SFS_H__ */ diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index cafc90d4caaf..0d05eac7c72b 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -788,7 +788,9 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry, *kernel_entry = addr + entry; - return efi_adjust_memory_range_protection(addr, kernel_text_size); + return efi_adjust_memory_range_protection(addr, kernel_text_size) ?: + efi_adjust_memory_range_protection(addr + kernel_inittext_offset, + kernel_inittext_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c index 1c7045467c48..5584e0f82cce 100644 --- a/drivers/irqchip/irq-aspeed-scu-ic.c +++ b/drivers/irqchip/irq-aspeed-scu-ic.c @@ -1,61 +1,78 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Aspeed AST24XX, AST25XX, and AST26XX SCU Interrupt Controller + * Aspeed AST24XX, AST25XX, AST26XX, and AST27XX SCU Interrupt Controller * Copyright 2019 IBM Corporation * * Eddie James <eajames@linux.ibm.com> */ #include <linux/bitops.h> +#include <linux/io.h> #include <linux/irq.h> #include <linux/irqchip.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> -#include <linux/mfd/syscon.h> +#include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/regmap.h> -#define ASPEED_SCU_IC_REG 0x018 -#define ASPEED_SCU_IC_SHIFT 0 -#define ASPEED_SCU_IC_ENABLE GENMASK(15, ASPEED_SCU_IC_SHIFT) -#define ASPEED_SCU_IC_NUM_IRQS 7 #define ASPEED_SCU_IC_STATUS GENMASK(28, 16) #define ASPEED_SCU_IC_STATUS_SHIFT 16 +#define AST2700_SCU_IC_STATUS GENMASK(15, 0) + +struct aspeed_scu_ic_variant { + const char *compatible; + unsigned long irq_enable; + unsigned long irq_shift; + unsigned int num_irqs; + unsigned long ier; + unsigned long isr; +}; -#define ASPEED_AST2600_SCU_IC0_REG 0x560 -#define ASPEED_AST2600_SCU_IC0_SHIFT 0 -#define ASPEED_AST2600_SCU_IC0_ENABLE \ - GENMASK(5, ASPEED_AST2600_SCU_IC0_SHIFT) -#define ASPEED_AST2600_SCU_IC0_NUM_IRQS 6 +#define SCU_VARIANT(_compat, _shift, _enable, _num, _ier, _isr) { \ + .compatible = _compat, \ + .irq_shift = _shift, \ + .irq_enable = _enable, \ + .num_irqs = _num, \ + .ier = _ier, \ + .isr = _isr, \ +} -#define ASPEED_AST2600_SCU_IC1_REG 0x570 -#define ASPEED_AST2600_SCU_IC1_SHIFT 4 -#define ASPEED_AST2600_SCU_IC1_ENABLE \ - GENMASK(5, ASPEED_AST2600_SCU_IC1_SHIFT) -#define ASPEED_AST2600_SCU_IC1_NUM_IRQS 2 +static const struct aspeed_scu_ic_variant scu_ic_variants[] __initconst = { + SCU_VARIANT("aspeed,ast2400-scu-ic", 0, GENMASK(15, 0), 7, 0x00, 0x00), + SCU_VARIANT("aspeed,ast2500-scu-ic", 0, GENMASK(15, 0), 7, 0x00, 0x00), + SCU_VARIANT("aspeed,ast2600-scu-ic0", 0, GENMASK(5, 0), 6, 0x00, 0x00), + SCU_VARIANT("aspeed,ast2600-scu-ic1", 4, GENMASK(5, 4), 2, 0x00, 0x00), + SCU_VARIANT("aspeed,ast2700-scu-ic0", 0, GENMASK(3, 0), 4, 0x00, 0x04), + SCU_VARIANT("aspeed,ast2700-scu-ic1", 0, GENMASK(3, 0), 4, 0x00, 0x04), + SCU_VARIANT("aspeed,ast2700-scu-ic2", 0, GENMASK(3, 0), 4, 0x04, 0x00), + SCU_VARIANT("aspeed,ast2700-scu-ic3", 0, GENMASK(1, 0), 2, 0x04, 0x00), +}; struct aspeed_scu_ic { - unsigned long irq_enable; - unsigned long irq_shift; - unsigned int num_irqs; - unsigned int reg; - struct regmap *scu; - struct irq_domain *irq_domain; + unsigned long irq_enable; + unsigned long irq_shift; + unsigned int num_irqs; + void __iomem *base; + struct irq_domain *irq_domain; + unsigned long ier; + unsigned long isr; }; -static void aspeed_scu_ic_irq_handler(struct irq_desc *desc) +static inline bool scu_has_split_isr(struct aspeed_scu_ic *scu) +{ + return scu->ier != scu->isr; +} + +static void aspeed_scu_ic_irq_handler_combined(struct irq_desc *desc) { - unsigned int sts; - unsigned long bit; - unsigned long enabled; - unsigned long max; - unsigned long status; struct aspeed_scu_ic *scu_ic = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int mask = scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT; + unsigned long bit, enabled, max, status; + unsigned int sts, mask; chained_irq_enter(chip, desc); + mask = scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT; /* * The SCU IC has just one register to control its operation and read * status. The interrupt enable bits occupy the lower 16 bits of the @@ -66,7 +83,7 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc) * shifting the status down to get the mapping and then back up to * clear the bit. */ - regmap_read(scu_ic->scu, scu_ic->reg, &sts); + sts = readl(scu_ic->base); enabled = sts & scu_ic->irq_enable; status = (sts >> ASPEED_SCU_IC_STATUS_SHIFT) & enabled; @@ -74,43 +91,83 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc) max = scu_ic->num_irqs + bit; for_each_set_bit_from(bit, &status, max) { - generic_handle_domain_irq(scu_ic->irq_domain, - bit - scu_ic->irq_shift); + generic_handle_domain_irq(scu_ic->irq_domain, bit - scu_ic->irq_shift); + writel((readl(scu_ic->base) & ~mask) | BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT), + scu_ic->base); + } + + chained_irq_exit(chip, desc); +} + +static void aspeed_scu_ic_irq_handler_split(struct irq_desc *desc) +{ + struct aspeed_scu_ic *scu_ic = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned long bit, enabled, max, status; + unsigned int sts, mask; - regmap_write_bits(scu_ic->scu, scu_ic->reg, mask, - BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT)); + chained_irq_enter(chip, desc); + + mask = scu_ic->irq_enable; + sts = readl(scu_ic->base + scu_ic->isr); + enabled = sts & scu_ic->irq_enable; + sts = readl(scu_ic->base + scu_ic->isr); + status = sts & enabled; + + bit = scu_ic->irq_shift; + max = scu_ic->num_irqs + bit; + + for_each_set_bit_from(bit, &status, max) { + generic_handle_domain_irq(scu_ic->irq_domain, bit - scu_ic->irq_shift); + /* Clear interrupt */ + writel(BIT(bit), scu_ic->base + scu_ic->isr); } chained_irq_exit(chip, desc); } -static void aspeed_scu_ic_irq_mask(struct irq_data *data) +static void aspeed_scu_ic_irq_mask_combined(struct irq_data *data) { struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data); - unsigned int mask = BIT(data->hwirq + scu_ic->irq_shift) | - (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT); + unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift); + unsigned int mask = bit | (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT); /* * Status bits are cleared by writing 1. In order to prevent the mask * operation from clearing the status bits, they should be under the * mask and written with 0. */ - regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, 0); + writel(readl(scu_ic->base) & ~mask, scu_ic->base); } -static void aspeed_scu_ic_irq_unmask(struct irq_data *data) +static void aspeed_scu_ic_irq_unmask_combined(struct irq_data *data) { struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data); unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift); - unsigned int mask = bit | - (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT); + unsigned int mask = bit | (scu_ic->irq_enable << ASPEED_SCU_IC_STATUS_SHIFT); /* * Status bits are cleared by writing 1. In order to prevent the unmask * operation from clearing the status bits, they should be under the * mask and written with 0. */ - regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, bit); + writel((readl(scu_ic->base) & ~mask) | bit, scu_ic->base); +} + +static void aspeed_scu_ic_irq_mask_split(struct irq_data *data) +{ + struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data); + unsigned int mask = BIT(data->hwirq + scu_ic->irq_shift); + + writel(readl(scu_ic->base) & ~mask, scu_ic->base + scu_ic->ier); +} + +static void aspeed_scu_ic_irq_unmask_split(struct irq_data *data) +{ + struct aspeed_scu_ic *scu_ic = irq_data_get_irq_chip_data(data); + unsigned int bit = BIT(data->hwirq + scu_ic->irq_shift); + + writel(readl(scu_ic->base) | bit, scu_ic->base + scu_ic->ier); } static int aspeed_scu_ic_irq_set_affinity(struct irq_data *data, @@ -120,17 +177,29 @@ static int aspeed_scu_ic_irq_set_affinity(struct irq_data *data, return -EINVAL; } -static struct irq_chip aspeed_scu_ic_chip = { +static struct irq_chip aspeed_scu_ic_chip_combined = { .name = "aspeed-scu-ic", - .irq_mask = aspeed_scu_ic_irq_mask, - .irq_unmask = aspeed_scu_ic_irq_unmask, - .irq_set_affinity = aspeed_scu_ic_irq_set_affinity, + .irq_mask = aspeed_scu_ic_irq_mask_combined, + .irq_unmask = aspeed_scu_ic_irq_unmask_combined, + .irq_set_affinity = aspeed_scu_ic_irq_set_affinity, +}; + +static struct irq_chip aspeed_scu_ic_chip_split = { + .name = "ast2700-scu-ic", + .irq_mask = aspeed_scu_ic_irq_mask_split, + .irq_unmask = aspeed_scu_ic_irq_unmask_split, + .irq_set_affinity = aspeed_scu_ic_irq_set_affinity, }; static int aspeed_scu_ic_map(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq) { - irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip, handle_level_irq); + struct aspeed_scu_ic *scu_ic = domain->host_data; + + if (scu_has_split_isr(scu_ic)) + irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip_split, handle_level_irq); + else + irq_set_chip_and_handler(irq, &aspeed_scu_ic_chip_combined, handle_level_irq); irq_set_chip_data(irq, domain->host_data); return 0; @@ -143,21 +212,21 @@ static const struct irq_domain_ops aspeed_scu_ic_domain_ops = { static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic, struct device_node *node) { - int irq; - int rc = 0; + int irq, rc = 0; - if (!node->parent) { - rc = -ENODEV; + scu_ic->base = of_iomap(node, 0); + if (IS_ERR(scu_ic->base)) { + rc = PTR_ERR(scu_ic->base); goto err; } - scu_ic->scu = syscon_node_to_regmap(node->parent); - if (IS_ERR(scu_ic->scu)) { - rc = PTR_ERR(scu_ic->scu); - goto err; + if (scu_has_split_isr(scu_ic)) { + writel(AST2700_SCU_IC_STATUS, scu_ic->base + scu_ic->isr); + writel(0, scu_ic->base + scu_ic->ier); + } else { + writel(ASPEED_SCU_IC_STATUS, scu_ic->base); + writel(0, scu_ic->base); } - regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_STATUS, ASPEED_SCU_IC_STATUS); - regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_ENABLE, 0); irq = irq_of_parse_and_map(node, 0); if (!irq) { @@ -166,75 +235,60 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic, } scu_ic->irq_domain = irq_domain_create_linear(of_fwnode_handle(node), scu_ic->num_irqs, - &aspeed_scu_ic_domain_ops, - scu_ic); + &aspeed_scu_ic_domain_ops, scu_ic); if (!scu_ic->irq_domain) { rc = -ENOMEM; goto err; } - irq_set_chained_handler_and_data(irq, aspeed_scu_ic_irq_handler, + irq_set_chained_handler_and_data(irq, scu_has_split_isr(scu_ic) ? + aspeed_scu_ic_irq_handler_split : + aspeed_scu_ic_irq_handler_combined, scu_ic); return 0; err: kfree(scu_ic); - return rc; } -static int __init aspeed_scu_ic_of_init(struct device_node *node, - struct device_node *parent) +static const struct aspeed_scu_ic_variant *aspeed_scu_ic_find_variant(struct device_node *np) { - struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL); - - if (!scu_ic) - return -ENOMEM; - - scu_ic->irq_enable = ASPEED_SCU_IC_ENABLE; - scu_ic->irq_shift = ASPEED_SCU_IC_SHIFT; - scu_ic->num_irqs = ASPEED_SCU_IC_NUM_IRQS; - scu_ic->reg = ASPEED_SCU_IC_REG; - - return aspeed_scu_ic_of_init_common(scu_ic, node); + for (int i = 0; i < ARRAY_SIZE(scu_ic_variants); i++) { + if (of_device_is_compatible(np, scu_ic_variants[i].compatible)) + return &scu_ic_variants[i]; + } + return NULL; } -static int __init aspeed_ast2600_scu_ic0_of_init(struct device_node *node, - struct device_node *parent) +static int __init aspeed_scu_ic_of_init(struct device_node *node, struct device_node *parent) { - struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL); + const struct aspeed_scu_ic_variant *variant; + struct aspeed_scu_ic *scu_ic; - if (!scu_ic) - return -ENOMEM; - - scu_ic->irq_enable = ASPEED_AST2600_SCU_IC0_ENABLE; - scu_ic->irq_shift = ASPEED_AST2600_SCU_IC0_SHIFT; - scu_ic->num_irqs = ASPEED_AST2600_SCU_IC0_NUM_IRQS; - scu_ic->reg = ASPEED_AST2600_SCU_IC0_REG; - - return aspeed_scu_ic_of_init_common(scu_ic, node); -} - -static int __init aspeed_ast2600_scu_ic1_of_init(struct device_node *node, - struct device_node *parent) -{ - struct aspeed_scu_ic *scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL); + variant = aspeed_scu_ic_find_variant(node); + if (!variant) + return -ENODEV; + scu_ic = kzalloc(sizeof(*scu_ic), GFP_KERNEL); if (!scu_ic) return -ENOMEM; - scu_ic->irq_enable = ASPEED_AST2600_SCU_IC1_ENABLE; - scu_ic->irq_shift = ASPEED_AST2600_SCU_IC1_SHIFT; - scu_ic->num_irqs = ASPEED_AST2600_SCU_IC1_NUM_IRQS; - scu_ic->reg = ASPEED_AST2600_SCU_IC1_REG; + scu_ic->irq_enable = variant->irq_enable; + scu_ic->irq_shift = variant->irq_shift; + scu_ic->num_irqs = variant->num_irqs; + scu_ic->ier = variant->ier; + scu_ic->isr = variant->isr; return aspeed_scu_ic_of_init_common(scu_ic, node); } IRQCHIP_DECLARE(ast2400_scu_ic, "aspeed,ast2400-scu-ic", aspeed_scu_ic_of_init); IRQCHIP_DECLARE(ast2500_scu_ic, "aspeed,ast2500-scu-ic", aspeed_scu_ic_of_init); -IRQCHIP_DECLARE(ast2600_scu_ic0, "aspeed,ast2600-scu-ic0", - aspeed_ast2600_scu_ic0_of_init); -IRQCHIP_DECLARE(ast2600_scu_ic1, "aspeed,ast2600-scu-ic1", - aspeed_ast2600_scu_ic1_of_init); +IRQCHIP_DECLARE(ast2600_scu_ic0, "aspeed,ast2600-scu-ic0", aspeed_scu_ic_of_init); +IRQCHIP_DECLARE(ast2600_scu_ic1, "aspeed,ast2600-scu-ic1", aspeed_scu_ic_of_init); +IRQCHIP_DECLARE(ast2700_scu_ic0, "aspeed,ast2700-scu-ic0", aspeed_scu_ic_of_init); +IRQCHIP_DECLARE(ast2700_scu_ic1, "aspeed,ast2700-scu-ic1", aspeed_scu_ic_of_init); +IRQCHIP_DECLARE(ast2700_scu_ic2, "aspeed,ast2700-scu-ic2", aspeed_scu_ic_of_init); +IRQCHIP_DECLARE(ast2700_scu_ic3, "aspeed,ast2700-scu-ic3", aspeed_scu_ic_of_init); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 24ef5af569fe..8a3410c2b7b5 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -153,14 +153,19 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, { msi_alloc_info_t *info = args; struct v2m_data *v2m = NULL, *tmp; - int hwirq, offset, i, err = 0; + int hwirq, i, err = 0; + unsigned long offset; + unsigned long align_mask = nr_irqs - 1; spin_lock(&v2m_lock); list_for_each_entry(tmp, &v2m_nodes, entry) { - offset = bitmap_find_free_region(tmp->bm, tmp->nr_spis, - get_count_order(nr_irqs)); - if (offset >= 0) { + unsigned long align_off = tmp->spi_start - (tmp->spi_start & ~align_mask); + + offset = bitmap_find_next_zero_area_off(tmp->bm, tmp->nr_spis, 0, + nr_irqs, align_mask, align_off); + if (offset < tmp->nr_spis) { v2m = tmp; + bitmap_set(v2m->bm, offset, nr_irqs); break; } } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index dbeb85677b08..3de351e66ee8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1766,8 +1766,9 @@ static int gic_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token) { - unsigned int type, ret, ppi_idx; + unsigned int type, ppi_idx; irq_hw_number_t hwirq; + int ret; /* Not for us */ if (fwspec->fwnode != d->fwnode) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index 13c035727e32..ce2732d649a3 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -571,7 +571,7 @@ static void __init gicv5_irs_init_bases(struct gicv5_irs_chip_data *irs_data, FIELD_PREP(GICV5_IRS_CR1_IST_RA, GICV5_NO_READ_ALLOC) | FIELD_PREP(GICV5_IRS_CR1_IC, GICV5_NON_CACHE) | FIELD_PREP(GICV5_IRS_CR1_OC, GICV5_NON_CACHE); - irs_data->flags |= IRS_FLAGS_NON_COHERENT; + irs_data->flags |= IRS_FLAGS_NON_COHERENT; } else { cr1 = FIELD_PREP(GICV5_IRS_CR1_VPED_WA, GICV5_WRITE_ALLOC) | FIELD_PREP(GICV5_IRS_CR1_VPED_RA, GICV5_READ_ALLOC) | diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c index 9290ac741949..554485f0be1f 100644 --- a/drivers/irqchip/irq-gic-v5-its.c +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -191,9 +191,9 @@ static int gicv5_its_create_itt_two_level(struct gicv5_its_chip_data *its, unsigned int num_events) { unsigned int l1_bits, l2_bits, span, events_per_l2_table; - unsigned int i, complete_tables, final_span, num_ents; + unsigned int complete_tables, final_span, num_ents; __le64 *itt_l1, *itt_l2, **l2ptrs; - int ret; + int i, ret; u64 val; ret = gicv5_its_l2sz_to_l2_bits(itt_l2sz); @@ -768,8 +768,6 @@ static struct gicv5_its_dev *gicv5_its_alloc_device(struct gicv5_its_chip_data * goto out_dev_free; } - gicv5_its_device_cache_inv(its, its_dev); - its_dev->its_node = its; its_dev->event_map = (unsigned long *)bitmap_zalloc(its_dev->num_events, GFP_KERNEL); @@ -949,15 +947,18 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi device_id = its_dev->device_id; for (i = 0; i < nr_irqs; i++) { - lpi = gicv5_alloc_lpi(); + ret = gicv5_alloc_lpi(); if (ret < 0) { pr_debug("Failed to find free LPI!\n"); - goto out_eventid; + goto out_free_irqs; } + lpi = ret; ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi); - if (ret) - goto out_free_lpi; + if (ret) { + gicv5_free_lpi(lpi); + goto out_free_irqs; + } /* * Store eventid and deviceid into the hwirq for later use. @@ -977,8 +978,13 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi return 0; -out_free_lpi: - gicv5_free_lpi(lpi); +out_free_irqs: + while (--i >= 0) { + irqd = irq_domain_get_irq_data(domain, virq + i); + gicv5_free_lpi(irqd->parent_data->hwirq); + irq_domain_reset_irq_data(irqd); + irq_domain_free_irqs_parent(domain, virq + i, 1); + } out_eventid: gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs); return ret; diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index b2860eb2d32c..39e5a72ccd3c 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -46,6 +46,7 @@ #define EIOINTC_ALL_ENABLE_VEC_MASK(vector) (EIOINTC_ALL_ENABLE & ~BIT(vector & 0x1f)) #define EIOINTC_REG_ENABLE_VEC(vector) (EIOINTC_REG_ENABLE + ((vector >> 5) << 2)) #define EIOINTC_USE_CPU_ENCODE BIT(0) +#define EIOINTC_ROUTE_MULT_IP BIT(1) #define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE) @@ -59,6 +60,14 @@ #define EIOINTC_REG_ROUTE_VEC_MASK(vector) (0xff << EIOINTC_REG_ROUTE_VEC_SHIFT(vector)) static int nr_pics; +struct eiointc_priv; + +struct eiointc_ip_route { + struct eiointc_priv *priv; + /* Offset Routed destination IP */ + int start; + int end; +}; struct eiointc_priv { u32 node; @@ -68,6 +77,8 @@ struct eiointc_priv { struct fwnode_handle *domain_handle; struct irq_domain *eiointc_domain; int flags; + irq_hw_number_t parent_hwirq; + struct eiointc_ip_route route_info[VEC_REG_COUNT]; }; static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; @@ -188,6 +199,7 @@ static int eiointc_router_init(unsigned int cpu) { int i, bit, cores, index, node; unsigned int data; + int hwirq, mask; node = cpu_to_eio_node(cpu); index = eiointc_index(node); @@ -197,6 +209,13 @@ static int eiointc_router_init(unsigned int cpu) return -EINVAL; } + /* Enable cpu interrupt pin from eiointc */ + hwirq = eiointc_priv[index]->parent_hwirq; + mask = BIT(hwirq); + if (eiointc_priv[index]->flags & EIOINTC_ROUTE_MULT_IP) + mask |= BIT(hwirq + 1) | BIT(hwirq + 2) | BIT(hwirq + 3); + set_csr_ecfg(mask); + if (!(eiointc_priv[index]->flags & EIOINTC_USE_CPU_ENCODE)) cores = CORES_PER_EIO_NODE; else @@ -211,8 +230,31 @@ static int eiointc_router_init(unsigned int cpu) } for (i = 0; i < eiointc_priv[0]->vec_count / 32 / 4; i++) { - bit = BIT(1 + index); /* Route to IP[1 + index] */ - data = bit | (bit << 8) | (bit << 16) | (bit << 24); + /* + * Route to interrupt pin, relative offset used here + * Offset 0 means routing to IP0 and so on + * + * If flags is set with EIOINTC_ROUTE_MULT_IP, + * every 64 vector routes to different consecutive + * IPs, otherwise all vector routes to the same IP + */ + if (eiointc_priv[index]->flags & EIOINTC_ROUTE_MULT_IP) { + /* The first 64 vectors route to hwirq */ + bit = BIT(hwirq++ - INT_HWI0); + data = bit | (bit << 8); + + /* The second 64 vectors route to hwirq + 1 */ + bit = BIT(hwirq++ - INT_HWI0); + data |= (bit << 16) | (bit << 24); + + /* + * Route to hwirq + 2/hwirq + 3 separately + * in next loop + */ + } else { + bit = BIT(hwirq - INT_HWI0); + data = bit | (bit << 8) | (bit << 16) | (bit << 24); + } iocsr_write32(data, EIOINTC_REG_IPMAP + i * 4); } @@ -241,15 +283,22 @@ static int eiointc_router_init(unsigned int cpu) static void eiointc_irq_dispatch(struct irq_desc *desc) { - int i; - u64 pending; - bool handled = false; + struct eiointc_ip_route *info = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - struct eiointc_priv *priv = irq_desc_get_handler_data(desc); + bool handled = false; + u64 pending; + int i; chained_irq_enter(chip, desc); - for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) { + /* + * If EIOINTC_ROUTE_MULT_IP is set, every 64 interrupt vectors in + * eiointc interrupt controller routes to different cpu interrupt pins + * + * Every cpu interrupt pin has its own irq handler, it is ok to + * read ISR for these 64 interrupt vectors rather than all vectors + */ + for (i = info->start; i < info->end; i++) { pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3)); /* Skip handling if pending bitmap is zero */ @@ -262,7 +311,7 @@ static void eiointc_irq_dispatch(struct irq_desc *desc) int bit = __ffs(pending); int irq = bit + VEC_COUNT_PER_REG * i; - generic_handle_domain_irq(priv->eiointc_domain, irq); + generic_handle_domain_irq(info->priv->eiointc_domain, irq); pending &= ~BIT(bit); handled = true; } @@ -462,8 +511,33 @@ static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq, } eiointc_priv[nr_pics++] = priv; + /* + * Only the first eiointc device on VM supports routing to + * different CPU interrupt pins. The later eiointc devices use + * generic method if there are multiple eiointc devices in future + */ + if (cpu_has_hypervisor && (nr_pics == 1)) { + priv->flags |= EIOINTC_ROUTE_MULT_IP; + priv->parent_hwirq = INT_HWI0; + } + + if (priv->flags & EIOINTC_ROUTE_MULT_IP) { + for (i = 0; i < priv->vec_count / VEC_COUNT_PER_REG; i++) { + priv->route_info[i].start = priv->parent_hwirq - INT_HWI0 + i; + priv->route_info[i].end = priv->route_info[i].start + 1; + priv->route_info[i].priv = priv; + parent_irq = get_percpu_irq(priv->parent_hwirq + i); + irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, + &priv->route_info[i]); + } + } else { + priv->route_info[0].start = 0; + priv->route_info[0].end = priv->vec_count / VEC_COUNT_PER_REG; + priv->route_info[0].priv = priv; + irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, + &priv->route_info[0]); + } eiointc_router_init(0); - irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv); if (nr_pics == 1) { register_syscore_ops(&eiointc_syscore_ops); @@ -495,7 +569,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent, priv->vec_count = VEC_COUNT; priv->node = acpi_eiointc->node; - + priv->parent_hwirq = acpi_eiointc->cascade; parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade); ret = eiointc_init(priv, parent_irq, acpi_eiointc->node_map); @@ -527,8 +601,9 @@ out_free_priv: static int __init eiointc_of_init(struct device_node *of_node, struct device_node *parent) { - int parent_irq, ret; struct eiointc_priv *priv; + struct irq_data *irq_data; + int parent_irq, ret; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -544,6 +619,12 @@ static int __init eiointc_of_init(struct device_node *of_node, if (ret < 0) goto out_free_priv; + irq_data = irq_get_irq_data(parent_irq); + if (!irq_data) { + ret = -ENODEV; + goto out_free_priv; + } + /* * In particular, the number of devices supported by the LS2K0500 * extended I/O interrupt vector is 128. @@ -552,7 +633,7 @@ static int __init eiointc_of_init(struct device_node *of_node, priv->vec_count = 128; else priv->vec_count = VEC_COUNT; - + priv->parent_hwirq = irqd_to_hwirq(irq_data); priv->node = 0; priv->domain_handle = of_fwnode_handle(of_node); diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c index 2d4c3ec128b8..912bf50a5c7c 100644 --- a/drivers/irqchip/irq-loongson-pch-lpc.c +++ b/drivers/irqchip/irq-loongson-pch-lpc.c @@ -200,8 +200,13 @@ int __init pch_lpc_acpi_init(struct irq_domain *parent, goto iounmap_base; } - priv->lpc_domain = irq_domain_create_linear(irq_handle, LPC_COUNT, - &pch_lpc_domain_ops, priv); + /* + * The LPC interrupt controller is a legacy i8259-compatible device, + * which requires a static 1:1 mapping for IRQs 0-15. + * Use irq_domain_create_legacy to establish this static mapping early. + */ + priv->lpc_domain = irq_domain_create_legacy(irq_handle, LPC_COUNT, 0, 0, + &pch_lpc_domain_ops, priv); if (!priv->lpc_domain) { pr_err("Failed to create IRQ domain\n"); goto free_irq_handle; diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index 908944009c21..d5eefc3d7215 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -112,6 +112,20 @@ bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, */ if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY)) chip->irq_set_affinity = msi_domain_set_affinity; + + /* + * If the parent domain insists on being in charge of masking, obey + * blindly. The interrupt is un-masked at the PCI level on startup + * and masked on shutdown to prevent rogue interrupts after the + * driver freed the interrupt. Not masking it at the PCI level + * speeds up operation for disable/enable_irq() as it avoids + * getting all the way out to the PCI device. + */ + if (info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT) { + chip->irq_mask = irq_chip_mask_parent; + chip->irq_unmask = irq_chip_unmask_parent; + } + return true; } EXPORT_SYMBOL_GPL(msi_lib_init_dev_msi_info); diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index 76e11cac9631..2191a2b79578 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -73,8 +73,9 @@ static int __init nvic_of_init(struct device_node *node, struct device_node *parent) { unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - unsigned int irqs, i, ret, numbanks; + unsigned int irqs, i, numbanks; void __iomem *nvic_base; + int ret; numbanks = (readl_relaxed(V7M_SCS_ICTR) & V7M_SCS_ICTR_INTLINESNUM_MASK) + 1; diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c index a697eb55ac90..6047a524ac77 100644 --- a/drivers/irqchip/irq-renesas-rza1.c +++ b/drivers/irqchip/irq-renesas-rza1.c @@ -142,11 +142,12 @@ static const struct irq_domain_ops rza1_irqc_domain_ops = { static int rza1_irqc_parse_map(struct rza1_irqc_priv *priv, struct device_node *gic_node) { - unsigned int imaplen, i, j, ret; struct device *dev = priv->dev; + unsigned int imaplen, i, j; struct device_node *ipar; const __be32 *imap; u32 intsize; + int ret; imap = of_get_property(dev->of_node, "interrupt-map", &imaplen); if (!imap) diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 360d88687e4f..2a54adeb4cc7 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -578,7 +578,7 @@ static int rzg2l_irqc_common_init(struct device_node *node, struct device_node * &rzg2l_irqc_domain_ops, rzg2l_irqc_data); if (!irq_domain) { pm_runtime_put(dev); - return dev_err_probe(dev, -ENOMEM, "failed to add irq domain\n"); + return -ENOMEM; } register_syscore_ops(&rzg2l_irqc_syscore_ops); diff --git a/drivers/irqchip/irq-sg2042-msi.c b/drivers/irqchip/irq-sg2042-msi.c index bcfddc51bc6a..f7cf0dc72eab 100644 --- a/drivers/irqchip/irq-sg2042-msi.c +++ b/drivers/irqchip/irq-sg2042-msi.c @@ -30,6 +30,7 @@ struct sg204x_msi_chip_info { * @doorbell_addr: see TRM, 10.1.32, GP_INTR0_SET * @irq_first: First vectors number that MSIs starts * @num_irqs: Number of vectors for MSIs + * @irq_type: IRQ type for MSIs * @msi_map: mapping for allocated MSI vectors. * @msi_map_lock: Lock for msi_map * @chip_info: chip specific infomations @@ -41,6 +42,7 @@ struct sg204x_msi_chipdata { u32 irq_first; u32 num_irqs; + unsigned int irq_type; unsigned long *msi_map; struct mutex msi_map_lock; @@ -85,6 +87,8 @@ static void sg2042_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *m static const struct irq_chip sg2042_msi_middle_irq_chip = { .name = "SG2042 MSI", + .irq_startup = irq_chip_startup_parent, + .irq_shutdown = irq_chip_shutdown_parent, .irq_ack = sg2042_msi_irq_ack, .irq_mask = irq_chip_mask_parent, .irq_unmask = irq_chip_unmask_parent, @@ -114,6 +118,8 @@ static void sg2044_msi_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *m static struct irq_chip sg2044_msi_middle_irq_chip = { .name = "SG2044 MSI", + .irq_startup = irq_chip_startup_parent, + .irq_shutdown = irq_chip_shutdown_parent, .irq_ack = sg2044_msi_irq_ack, .irq_mask = irq_chip_mask_parent, .irq_unmask = irq_chip_unmask_parent, @@ -133,14 +139,14 @@ static int sg204x_msi_parent_domain_alloc(struct irq_domain *domain, unsigned in fwspec.fwnode = domain->parent->fwnode; fwspec.param_count = 2; fwspec.param[0] = data->irq_first + hwirq; - fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + fwspec.param[1] = data->irq_type; ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); if (ret) return ret; d = irq_domain_get_irq_data(domain->parent, virq); - return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING); + return d->chip->irq_set_type(d, data->irq_type); } static int sg204x_msi_middle_domain_alloc(struct irq_domain *domain, unsigned int virq, @@ -185,8 +191,10 @@ static const struct irq_domain_ops sg204x_msi_middle_domain_ops = { .select = msi_lib_irq_domain_select, }; -#define SG2042_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ - MSI_FLAG_USE_DEF_CHIP_OPS) +#define SG2042_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_PCI_MSI_MASK_PARENT | \ + MSI_FLAG_PCI_MSI_STARTUP_PARENT) #define SG2042_MSI_FLAGS_SUPPORTED MSI_GENERIC_FLAGS_MASK @@ -200,10 +208,13 @@ static const struct msi_parent_ops sg2042_msi_parent_ops = { .init_dev_msi_info = msi_lib_init_dev_msi_info, }; -#define SG2044_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ - MSI_FLAG_USE_DEF_CHIP_OPS) +#define SG2044_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_PCI_MSI_MASK_PARENT | \ + MSI_FLAG_PCI_MSI_STARTUP_PARENT) -#define SG2044_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ +#define SG2044_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_MULTI_PCI_MSI | \ MSI_FLAG_PCI_MSIX) static const struct msi_parent_ops sg2044_msi_parent_ops = { @@ -289,6 +300,7 @@ static int sg2042_msi_probe(struct platform_device *pdev) } data->irq_first = (u32)args.args[0]; + data->irq_type = (unsigned int)args.args[1]; data->num_irqs = (u32)args.args[args.nargs - 1]; mutex_init(&data->msi_map_lock); diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index bf69a4802b71..559fda8fb3a8 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -179,12 +179,14 @@ static int plic_set_affinity(struct irq_data *d, if (cpu >= nr_cpu_ids) return -EINVAL; - plic_irq_disable(d); + /* Invalidate the original routing entry */ + plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 0); irq_data_update_effective_affinity(d, cpumask_of(cpu)); + /* Setting the new routing entry if irq is enabled */ if (!irqd_irq_disabled(d)) - plic_irq_enable(d); + plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1); return IRQ_SET_MASK_OK_DONE; } @@ -257,7 +259,7 @@ static int plic_irq_suspend(void) readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID)); } - for_each_cpu(cpu, cpu_present_mask) { + for_each_present_cpu(cpu) { struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu); if (!handler->present) @@ -289,7 +291,7 @@ static void plic_irq_resume(void) priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID); } - for_each_cpu(cpu, cpu_present_mask) { + for_each_present_cpu(cpu) { struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu); if (!handler->present) diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c index 84533fdd61aa..047472dc9244 100644 --- a/drivers/media/rc/pwm-ir-tx.c +++ b/drivers/media/rc/pwm-ir-tx.c @@ -117,7 +117,6 @@ static int pwm_ir_tx_atomic(struct rc_dev *dev, unsigned int *txbuf, static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer) { struct pwm_ir *pwm_ir = container_of(timer, struct pwm_ir, timer); - ktime_t now; /* * If we happen to hit an odd latency spike, loop through the @@ -139,9 +138,7 @@ static enum hrtimer_restart pwm_ir_timer(struct hrtimer *timer) hrtimer_add_expires_ns(timer, ns); pwm_ir->txbuf_index++; - - now = timer->base->get_time(); - } while (hrtimer_get_expires_tv64(timer) < now); + } while (hrtimer_expires_remaining(timer) > 0); return HRTIMER_RESTART; } diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 74aaea61de13..d2b690857e58 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -519,6 +519,7 @@ int of_irq_count(struct device_node *dev) return nr; } +EXPORT_SYMBOL_GPL(of_irq_count); /** * of_irq_to_resource_table - Fill in resource table with node's IRQ info diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 0938ef7ebabf..dfb61f152702 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -148,20 +148,43 @@ static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *d arg->hwirq = desc->msi_index; } -static __always_inline void cond_mask_parent(struct irq_data *data) +static void cond_shutdown_parent(struct irq_data *data) { struct msi_domain_info *info = data->domain->host_data; - if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT)) + if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT)) + irq_chip_shutdown_parent(data); + else if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT)) irq_chip_mask_parent(data); } -static __always_inline void cond_unmask_parent(struct irq_data *data) +static unsigned int cond_startup_parent(struct irq_data *data) { struct msi_domain_info *info = data->domain->host_data; - if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT)) + if (unlikely(info->flags & MSI_FLAG_PCI_MSI_STARTUP_PARENT)) + return irq_chip_startup_parent(data); + else if (unlikely(info->flags & MSI_FLAG_PCI_MSI_MASK_PARENT)) irq_chip_unmask_parent(data); + + return 0; +} + +static void pci_irq_shutdown_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + + pci_msi_mask(desc, BIT(data->irq - desc->irq)); + cond_shutdown_parent(data); +} + +static unsigned int pci_irq_startup_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + unsigned int ret = cond_startup_parent(data); + + pci_msi_unmask(desc, BIT(data->irq - desc->irq)); + return ret; } static void pci_irq_mask_msi(struct irq_data *data) @@ -169,14 +192,12 @@ static void pci_irq_mask_msi(struct irq_data *data) struct msi_desc *desc = irq_data_get_msi_desc(data); pci_msi_mask(desc, BIT(data->irq - desc->irq)); - cond_mask_parent(data); } static void pci_irq_unmask_msi(struct irq_data *data) { struct msi_desc *desc = irq_data_get_msi_desc(data); - cond_unmask_parent(data); pci_msi_unmask(desc, BIT(data->irq - desc->irq)); } @@ -194,6 +215,8 @@ static void pci_irq_unmask_msi(struct irq_data *data) static const struct msi_domain_template pci_msi_template = { .chip = { .name = "PCI-MSI", + .irq_startup = pci_irq_startup_msi, + .irq_shutdown = pci_irq_shutdown_msi, .irq_mask = pci_irq_mask_msi, .irq_unmask = pci_irq_unmask_msi, .irq_write_msi_msg = pci_msi_domain_write_msg, @@ -210,15 +233,27 @@ static const struct msi_domain_template pci_msi_template = { }, }; +static void pci_irq_shutdown_msix(struct irq_data *data) +{ + pci_msix_mask(irq_data_get_msi_desc(data)); + cond_shutdown_parent(data); +} + +static unsigned int pci_irq_startup_msix(struct irq_data *data) +{ + unsigned int ret = cond_startup_parent(data); + + pci_msix_unmask(irq_data_get_msi_desc(data)); + return ret; +} + static void pci_irq_mask_msix(struct irq_data *data) { pci_msix_mask(irq_data_get_msi_desc(data)); - cond_mask_parent(data); } static void pci_irq_unmask_msix(struct irq_data *data) { - cond_unmask_parent(data); pci_msix_unmask(irq_data_get_msi_desc(data)); } @@ -234,6 +269,8 @@ EXPORT_SYMBOL_GPL(pci_msix_prepare_desc); static const struct msi_domain_template pci_msix_template = { .chip = { .name = "PCI-MSIX", + .irq_startup = pci_irq_startup_msix, + .irq_shutdown = pci_irq_shutdown_msix, .irq_mask = pci_irq_mask_msix, .irq_unmask = pci_irq_unmask_msix, .irq_write_msi_msg = pci_msi_domain_write_msg, diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h new file mode 100644 index 000000000000..ee3793e9b1a4 --- /dev/null +++ b/include/asm-generic/thread_info_tif.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_THREAD_INFO_TIF_H_ +#define _ASM_GENERIC_THREAD_INFO_TIF_H_ + +#include <vdso/bits.h> + +/* Bits 16-31 are reserved for architecture specific purposes */ + +#define TIF_NOTIFY_RESUME 0 // callback before returning to user +#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) + +#define TIF_SIGPENDING 1 // signal pending +#define _TIF_SIGPENDING BIT(TIF_SIGPENDING) + +#define TIF_NOTIFY_SIGNAL 2 // signal notifications exist +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) + +#define TIF_MEMDIE 3 // is terminating due to OOM killer +#define _TIF_MEMDIE BIT(TIF_MEMDIE) + +#define TIF_NEED_RESCHED 4 // rescheduling necessary +#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) + +#ifdef HAVE_TIF_NEED_RESCHED_LAZY +# define TIF_NEED_RESCHED_LAZY 5 // Lazy rescheduling needed +# define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) +#endif + +#ifdef HAVE_TIF_POLLING_NRFLAG +# define TIF_POLLING_NRFLAG 6 // idle is polling for TIF_NEED_RESCHED +# define _TIF_POLLING_NRFLAG BIT(TIF_POLLING_NRFLAG) +#endif + +#define TIF_USER_RETURN_NOTIFY 7 // notify kernel of userspace return +#define _TIF_USER_RETURN_NOTIFY BIT(TIF_USER_RETURN_NOTIFY) + +#define TIF_UPROBE 8 // breakpointed or singlestepping +#define _TIF_UPROBE BIT(TIF_UPROBE) + +#define TIF_PATCH_PENDING 9 // pending live patching update +#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) + +#ifdef HAVE_TIF_RESTORE_SIGMASK +# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */ +# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) +#endif + +#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */ diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 7fc0b560007d..5c6d9799f4e7 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -4,8 +4,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE - #ifndef __arch_get_vdso_u_time_data static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { @@ -20,8 +18,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo } #endif -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ - #ifndef __arch_update_vdso_clock static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index ce6521ad04d1..2eda895f19f5 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -9,9 +9,6 @@ #include <linux/timecounter.h> #include <linux/types.h> -#define ARCH_TIMER_TYPE_CP15 BIT(0) -#define ARCH_TIMER_TYPE_MEM BIT(1) - #define ARCH_TIMER_CTRL_ENABLE (1 << 0) #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) @@ -51,8 +48,6 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 -#define ARCH_TIMER_MEM_PHYS_ACCESS 2 -#define ARCH_TIMER_MEM_VIRT_ACCESS 3 #define ARCH_TIMER_MEM_MAX_FRAMES 8 diff --git a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h index f315d5a7f5ee..7dd04424afcc 100644 --- a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h +++ b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h @@ -20,4 +20,18 @@ #define ASPEED_AST2600_SCU_IC1_LPC_RESET_LO_TO_HI 0 #define ASPEED_AST2600_SCU_IC1_LPC_RESET_HI_TO_LO 1 +#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_HI_TO_LO 2 + +#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_HI_TO_LO 2 + +#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_HI_TO_LO 2 +#define ASPEED_AST2700_SCU_IC2_LPC_RESET_LO_TO_HI 1 +#define ASPEED_AST2700_SCU_IC2_LPC_RESET_HI_TO_LO 0 + +#define ASPEED_AST2700_SCU_IC3_LPC_RESET_LO_TO_HI 1 +#define ASPEED_AST2700_SCU_IC3_LPC_RESET_HI_TO_LO 0 + #endif /* _DT_BINDINGS_INTERRUPT_CONTROLLER_ASPEED_SCU_IC_H_ */ diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index 0bf7d33a1048..7fcec025c5e0 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -96,6 +96,14 @@ enum cc_attr { * enabled to run SEV-SNP guests. */ CC_ATTR_HOST_SEV_SNP, + + /** + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. + * + * The host kernel is running with the necessary features enabled + * to run SEV-SNP guests with full Secure AVIC capabilities. + */ + CC_ATTR_SNP_SECURE_AVIC, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1ef867bb8c44..2cf1bf65b225 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -154,14 +154,11 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) return ktime_to_ns(timer->node.expires); } -static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) -{ - return ktime_sub(timer->node.expires, timer->base->get_time()); -} +ktime_t hrtimer_cb_get_time(const struct hrtimer *timer); -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) +static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return timer->base->get_time(); + return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer)); } static inline int hrtimer_is_hres_active(struct hrtimer *timer) @@ -200,8 +197,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) static inline ktime_t hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) { - return __hrtimer_expires_remaining_adjusted(timer, - timer->base->get_time()); + return __hrtimer_expires_remaining_adjusted(timer, hrtimer_cb_get_time(timer)); } #ifdef CONFIG_TIMERFD @@ -363,7 +359,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { - return hrtimer_forward(timer, timer->base->get_time(), interval); + return hrtimer_forward(timer, hrtimer_cb_get_time(timer), interval); } /* Precise sleep: */ diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 84a5045f80f3..aa49ffa130e5 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -41,7 +41,6 @@ * @seq: seqcount around __run_hrtimer * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { @@ -51,7 +50,6 @@ struct hrtimer_clock_base { seqcount_raw_spinlock_t seq; struct hrtimer *running; struct timerqueue_head active; - ktime_t (*get_time)(void); ktime_t offset; } __hrtimer_clock_base_align; diff --git a/include/linux/irq.h b/include/linux/irq.h index 1d6b606a81ef..c67e76fbcc07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data, extern int irq_chip_get_parent_state(struct irq_data *data, enum irqchip_irq_state which, bool *state); +extern void irq_chip_shutdown_parent(struct irq_data *data); +extern unsigned int irq_chip_startup_parent(struct irq_data *data); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); @@ -976,10 +978,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq); -#endif - /** * struct irq_chip_regs - register offsets for struct irq_gci * @enable: Enable register offset to reg_base diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 91b20788273d..0d1927da8055 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -61,7 +61,7 @@ extern void register_refined_jiffies(long clock_tick_rate); -/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ +/* TICK_USEC is the time between ticks in usec */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ diff --git a/include/linux/msi.h b/include/linux/msi.h index faac634ac230..d415dd15a0a9 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -564,6 +564,8 @@ enum { MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Support for parent mask/unmask */ MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9), + /* Support for parent startup/shutdown */ + MSI_FLAG_PCI_MSI_STARTUP_PARENT = (1 << 10), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h index 95d852aef130..726d89143842 100644 --- a/include/linux/platform_data/dmtimer-omap.h +++ b/include/linux/platform_data/dmtimer-omap.h @@ -36,9 +36,13 @@ struct omap_dm_timer_ops { int (*set_pwm)(struct omap_dm_timer *timer, int def_on, int toggle, int trigger, int autoreload); int (*get_pwm_status)(struct omap_dm_timer *timer); + int (*set_cap)(struct omap_dm_timer *timer, + int autoreload, bool config_period); + int (*get_cap_status)(struct omap_dm_timer *timer); int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler); unsigned int (*read_counter)(struct omap_dm_timer *timer); + unsigned int (*read_cap)(struct omap_dm_timer *timer, bool is_period); int (*write_counter)(struct omap_dm_timer *timer, unsigned int value); unsigned int (*read_status)(struct omap_dm_timer *timer); diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 1504fb012c05..540abf7de048 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -7,6 +7,8 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, + PSP_SFS_GET_FW_VERSIONS, + PSP_SFS_UPDATE, PSP_CMD_HSTI_QUERY = 0x14, PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, diff --git a/include/linux/rseq.h b/include/linux/rseq.h index a96fd345aa38..69553e7c14c1 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -7,6 +7,12 @@ #include <linux/preempt.h> #include <linux/sched.h> +#ifdef CONFIG_MEMBARRIER +# define RSEQ_EVENT_GUARD irq +#else +# define RSEQ_EVENT_GUARD preempt +#endif + /* * Map the event mask on the user-space ABI enum rseq_cs_flags * for direct mask checks. @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig, static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - preempt_disable(); - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); - preempt_enable(); + scoped_guard(RSEQ_EVENT_GUARD) + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); rseq_handle_notify_resume(ksig, regs); } diff --git a/include/uapi/linux/psp-sfs.h b/include/uapi/linux/psp-sfs.h new file mode 100644 index 000000000000..94e51670383c --- /dev/null +++ b/include/uapi/linux/psp-sfs.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Userspace interface for AMD Seamless Firmware Servicing (SFS) + * + * Copyright (C) 2025 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra <ashish.kalra@amd.com> + */ + +#ifndef __PSP_SFS_USER_H__ +#define __PSP_SFS_USER_H__ + +#include <linux/types.h> + +/** + * SFS: AMD Seamless Firmware Support (SFS) interface + */ + +#define PAYLOAD_NAME_SIZE 64 +#define TEE_EXT_CMD_BUFFER_SIZE 4096 + +/** + * struct sfs_user_get_fw_versions - get current level of base firmware (output). + * @blob: current level of base firmware for ASP and patch levels (input/output). + * @sfs_status: 32-bit SFS status value (output). + * @sfs_extended_status: 32-bit SFS extended status value (output). + */ +struct sfs_user_get_fw_versions { + __u8 blob[TEE_EXT_CMD_BUFFER_SIZE]; + __u32 sfs_status; + __u32 sfs_extended_status; +} __packed; + +/** + * struct sfs_user_update_package - update SFS package (input). + * @payload_name: name of SFS package to load, verify and execute (input). + * @sfs_status: 32-bit SFS status value (output). + * @sfs_extended_status: 32-bit SFS extended status value (output). + */ +struct sfs_user_update_package { + char payload_name[PAYLOAD_NAME_SIZE]; + __u32 sfs_status; + __u32 sfs_extended_status; +} __packed; + +/** + * Seamless Firmware Support (SFS) IOC + * + * possible return codes for all SFS IOCTLs: + * 0: success + * -EINVAL: invalid input + * -E2BIG: excess data passed + * -EFAULT: failed to copy to/from userspace + * -EBUSY: mailbox in recovery or in use + * -ENODEV: driver not bound with PSP device + * -EACCES: request isn't authorized + * -EINVAL: invalid parameter + * -ETIMEDOUT: request timed out + * -EAGAIN: invalid request for state machine + * -ENOENT: not implemented + * -ENFILE: overflow + * -EPERM: invalid signature + * -EIO: PSP I/O error + */ +#define SFS_IOC_TYPE 'S' + +/** + * SFSIOCFWVERS - returns blob containing FW versions + * ASP provides the current level of Base Firmware for the ASP + * and the other microprocessors as well as current patch + * level(s). + */ +#define SFSIOCFWVERS _IOWR(SFS_IOC_TYPE, 0x1, struct sfs_user_get_fw_versions) + +/** + * SFSIOCUPDATEPKG - updates package/payload + * ASP loads, verifies and executes the SFS package. + * By default, the SFS package/payload is loaded from + * /lib/firmware/amd, but alternative firmware loading + * path can be specified using kernel parameter + * firmware_class.path or the firmware loading path + * can be customized using sysfs file: + * /sys/module/firmware_class/parameters/path. + */ +#define SFSIOCUPDATEPKG _IOWR(SFS_IOC_TYPE, 0x2, struct sfs_user_update_package) + +#endif /* __PSP_SFS_USER_H__ */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 02533038640e..23c39b96190f 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -31,7 +31,7 @@ struct arch_vdso_time_data {}; #if defined(CONFIG_ARCH_HAS_VDSO_ARCH_DATA) #include <asm/vdso/arch_data.h> -#elif defined(CONFIG_GENERIC_VDSO_DATA_STORE) +#else struct vdso_arch_data { /* Needed for the generic code, never actually used at runtime */ char __unused; @@ -164,7 +164,6 @@ struct vdso_rng_data { * With the hidden visibility, the compiler simply generates a PC-relative * relocation, and this is what we need. */ -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE extern struct vdso_time_data vdso_u_time_data __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); @@ -185,8 +184,6 @@ enum vdso_pages { VDSO_NR_PAGES }; -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ - /* * The generic vDSO implementation requires that gettimeofday.h * provides: @@ -196,11 +193,7 @@ enum vdso_pages { * - clock_gettime_fallback(): fallback for clock_gettime. * - clock_getres_fallback(): fallback for clock_getres. */ -#ifdef ENABLE_COMPAT_VDSO -#include <asm/vdso/compat_gettimeofday.h> -#else #include <asm/vdso/gettimeofday.h> -#endif /* ENABLE_COMPAT_VDSO */ #else /* !__ASSEMBLY__ */ diff --git a/include/vdso/gettime.h b/include/vdso/gettime.h index c50d152e7b3e..9ac161866653 100644 --- a/include/vdso/gettime.h +++ b/include/vdso/gettime.h @@ -5,6 +5,7 @@ #include <linux/types.h> struct __kernel_timespec; +struct __kernel_old_timeval; struct timezone; #if !defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) diff --git a/include/vdso/jiffies.h b/include/vdso/jiffies.h index 2f9d596c8b29..8ca04a141412 100644 --- a/include/vdso/jiffies.h +++ b/include/vdso/jiffies.h @@ -5,7 +5,7 @@ #include <asm/param.h> /* for HZ */ #include <vdso/time64.h> -/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ +/* TICK_NSEC is the time between ticks in nsec */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) #endif /* __VDSO_JIFFIES_H */ diff --git a/init/Kconfig b/init/Kconfig index c2b0c9df679a..f3b13463ec26 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1363,7 +1363,7 @@ config UTS_NS config TIME_NS bool "TIME namespace" - depends on GENERIC_VDSO_TIME_NS + depends on GENERIC_GETTIMEOFDAY default y help In this namespace boottime and monotonic clocks can be set. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 54ea59ff8fbe..da326800c1c9 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -103,6 +103,19 @@ config PREEMPT_RT Select this if you are building a kernel for systems which require real-time guarantees. +config PREEMPT_RT_NEEDS_BH_LOCK + bool "Enforce softirq synchronisation on PREEMPT_RT" + depends on PREEMPT_RT + help + Enforce synchronisation across the softirqs context. On PREEMPT_RT + the softirq is preemptible. This enforces the same per-CPU BLK + semantic non-PREEMPT_RT builds have. This should not be needed + because per-CPU locks were added to avoid the per-CPU BKL. + + This switch provides the old behaviour for testing reasons. Select + this if you suspect an error with preemptible softirq and want test + the old synchronized behaviour. + config PREEMPT_COUNT bool diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 4b6da9116aa6..880c9bf2f315 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -39,6 +39,56 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, return 0; } +static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat) +{ +#ifdef CONFIG_COMPAT + if (compat) + return p->compat_robust_list; +#endif + return p->robust_list; +} + +static void __user *futex_get_robust_list_common(int pid, bool compat) +{ + struct task_struct *p = current; + void __user *head; + int ret; + + scoped_guard(rcu) { + if (pid) { + p = find_task_by_vpid(pid); + if (!p) + return (void __user *)ERR_PTR(-ESRCH); + } + get_task_struct(p); + } + + /* + * Hold exec_update_lock to serialize with concurrent exec() + * so ptrace_may_access() is checked against stable credentials + */ + ret = down_read_killable(&p->signal->exec_update_lock); + if (ret) + goto err_put; + + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) + goto err_unlock; + + head = futex_task_robust_list(p, compat); + + up_read(&p->signal->exec_update_lock); + put_task_struct(p); + + return head; + +err_unlock: + up_read(&p->signal->exec_update_lock); +err_put: + put_task_struct(p); + return (void __user *)ERR_PTR(ret); +} + /** * sys_get_robust_list() - Get the robust-futex list head of a task * @pid: pid of the process [zero for current task] @@ -49,36 +99,14 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { - struct robust_list_head __user *head; - unsigned long ret; - struct task_struct *p; - - rcu_read_lock(); - - ret = -ESRCH; - if (!pid) - p = current; - else { - p = find_task_by_vpid(pid); - if (!p) - goto err_unlock; - } - - ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) - goto err_unlock; + struct robust_list_head __user *head = futex_get_robust_list_common(pid, false); - head = p->robust_list; - rcu_read_unlock(); + if (IS_ERR(head)) + return PTR_ERR(head); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); - -err_unlock: - rcu_read_unlock(); - - return ret; } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, @@ -455,36 +483,14 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { - struct compat_robust_list_head __user *head; - unsigned long ret; - struct task_struct *p; - - rcu_read_lock(); - - ret = -ESRCH; - if (!pid) - p = current; - else { - p = find_task_by_vpid(pid); - if (!p) - goto err_unlock; - } - - ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) - goto err_unlock; + struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true); - head = p->compat_robust_list; - rcu_read_unlock(); + if (IS_ERR(head)) + return PTR_ERR(head); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); - -err_unlock: - rcu_read_unlock(); - - return ret; } #endif /* CONFIG_COMPAT */ diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1da5e9d9da71..1b4254d19a73 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -6,10 +6,6 @@ menu "IRQ subsystem" config MAY_HAVE_SPARSE_IRQ bool -# Legacy support, required for itanic -config GENERIC_IRQ_LEGACY - bool - # Enable the generic irq autoprobe mechanism config GENERIC_IRQ_PROBE bool @@ -147,7 +143,9 @@ config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD config IRQ_KUNIT_TEST bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS depends on KUNIT=y + depends on SPARSE_IRQ default KUNIT_ALL_TESTS + select IRQ_DOMAIN imply SMP help This option enables KUnit tests for the IRQ subsystem API. These are diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0d0276378c70..3ffa0d80ddd1 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1260,6 +1260,43 @@ int irq_chip_get_parent_state(struct irq_data *data, EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); /** + * irq_chip_shutdown_parent - Shutdown the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_shutdown() callback of the parent if available or falls + * back to irq_chip_disable_parent(). + */ +void irq_chip_shutdown_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_shutdown) + parent->chip->irq_shutdown(parent); + else + irq_chip_disable_parent(data); +} +EXPORT_SYMBOL_GPL(irq_chip_shutdown_parent); + +/** + * irq_chip_startup_parent - Startup the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_startup() callback of the parent if available or falls + * back to irq_chip_enable_parent(). + */ +unsigned int irq_chip_startup_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_startup) + return parent->chip->irq_startup(parent); + + irq_chip_enable_parent(data); + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_startup_parent); + +/** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) * @data: Pointer to interrupt specific data diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index eb16a58e0322..b41188698622 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -30,29 +30,22 @@ static int devm_irq_match(struct device *dev, void *res, void *data) return this->irq == match->irq && this->dev_id == match->dev_id; } -/** - * devm_request_threaded_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @thread_fn: function to be called in a threaded interrupt context. NULL - * for devices which handle everything in @handler - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function - * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_threaded_irq(). IRQs requested with this function will be - * automatically freed on driver detach. - * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. - */ -int devm_request_threaded_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, irq_handler_t thread_fn, - unsigned long irqflags, const char *devname, - void *dev_id) +static int devm_request_result(struct device *dev, int rc, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + const char *devname) +{ + if (rc >= 0) + return rc; + + return dev_err_probe(dev, rc, "request_irq(%u) %ps %ps %s\n", + irq, handler, thread_fn, devname ? : ""); +} + +static int __devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -78,28 +71,48 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq, return 0; } -EXPORT_SYMBOL(devm_request_threaded_irq); /** - * devm_request_any_context_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function + * devm_request_threaded_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_any_context_irq(). IRQs requested with this function will be - * automatically freed on driver detach. + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_threaded_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. + * Return: 0 on success or a negative error number. */ -int devm_request_any_context_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) +{ + int rc = __devm_request_threaded_irq(dev, irq, handler, thread_fn, + irqflags, devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, thread_fn, devname); +} +EXPORT_SYMBOL(devm_request_threaded_irq); + +static int __devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -124,6 +137,40 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return rc; } + +/** + * devm_request_any_context_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_any_context_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. + * + * Return: IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success, or a negative error + * number. + */ +int devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + int rc = __devm_request_any_context_irq(dev, irq, handler, irqflags, + devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, NULL, devname); +} EXPORT_SYMBOL(devm_request_any_context_irq); /** diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3db3..e103451243a0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); +static u64 irqhandler_duration_threshold_ns __ro_after_init; + +static int __init irqhandler_duration_check_setup(char *arg) +{ + unsigned long val; + int ret; + + ret = kstrtoul(arg, 0, &val); + if (ret) { + pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret); + return 0; + } + + if (!val) { + pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n"); + return 0; + } + + irqhandler_duration_threshold_ns = val * 1000; + static_branch_enable(&irqhandler_duration_check_enabled); + + return 1; +} +__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup); + +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, + const struct irqaction *action) +{ + u64 delta_ns = local_clock() - ts_start; + + if (unlikely(delta_ns > irqhandler_duration_threshold_ns)) { + pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n", + smp_processor_id(), irq, action->handler, + div_u64(delta_ns, NSEC_PER_USEC)); + } +} + irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; @@ -155,7 +193,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) lockdep_hardirq_threaded(); trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { + u64 ts_start = local_clock(); + + res = action->handler(irq, action->dev_id); + irqhandler_duration_check(ts_start, irq, action); + } else { + res = action->handler(irq, action->dev_id); + } + trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c index a75abebed7f2..e2d31914b3c4 100644 --- a/kernel/irq/irq_test.c +++ b/kernel/irq/irq_test.c @@ -41,21 +41,37 @@ static struct irq_chip fake_irq_chip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void irq_disable_depth_test(struct kunit *test) +static int irq_test_setup_fake_irq(struct kunit *test, struct irq_affinity_desc *affd) { struct irq_desc *desc; - int virq, ret; + int virq; - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, affd); KUNIT_ASSERT_GE(test, virq, 0); - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + /* On some architectures, IRQs are NOREQUEST | NOPROBE by default. */ + irq_settings_clr_norequest(desc); + + return virq; +} + +static void irq_disable_depth_test(struct kunit *test) +{ + struct irq_desc *desc; + int virq, ret; + + virq = irq_test_setup_fake_irq(test, NULL); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); @@ -73,16 +89,13 @@ static void irq_free_disabled_test(struct kunit *test) struct irq_desc *desc; int virq, ret; - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, NULL); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, NULL); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); @@ -93,7 +106,7 @@ static void irq_free_disabled_test(struct kunit *test) KUNIT_EXPECT_GE(test, desc->depth, 1); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_EQ(test, desc->depth, 0); free_irq(virq, NULL); @@ -112,10 +125,7 @@ static void irq_shutdown_depth_test(struct kunit *test) if (!IS_ENABLED(CONFIG_SMP)) kunit_skip(test, "requires CONFIG_SMP for managed shutdown"); - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, &affinity); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); @@ -124,7 +134,7 @@ static void irq_shutdown_depth_test(struct kunit *test) KUNIT_ASSERT_PTR_NE(test, data, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); @@ -169,13 +179,12 @@ static void irq_cpuhotplug_test(struct kunit *test) kunit_skip(test, "requires more than 1 CPU for CPU hotplug"); if (!cpu_is_hotpluggable(1)) kunit_skip(test, "CPU 1 must be hotpluggable"); + if (!cpu_online(1)) + kunit_skip(test, "CPU 1 must be online"); cpumask_copy(&affinity.mask, cpumask_of(1)); - virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, &affinity); - KUNIT_ASSERT_GE(test, virq, 0); - - irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); + virq = irq_test_setup_fake_irq(test, &affinity); desc = irq_to_desc(virq); KUNIT_ASSERT_PTR_NE(test, desc, NULL); @@ -184,7 +193,7 @@ static void irq_cpuhotplug_test(struct kunit *test) KUNIT_ASSERT_PTR_NE(test, data, NULL); ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); - KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); @@ -196,13 +205,9 @@ static void irq_cpuhotplug_test(struct kunit *test) KUNIT_EXPECT_EQ(test, desc->depth, 1); KUNIT_EXPECT_EQ(test, remove_cpu(1), 0); - KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); - KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); KUNIT_EXPECT_GE(test, desc->depth, 1); KUNIT_EXPECT_EQ(test, add_cpu(1), 0); - KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); - KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); KUNIT_EXPECT_EQ(test, desc->depth, 1); enable_irq(virq); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index b64c57b44c20..db714d3014b5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -653,13 +653,6 @@ void irq_mark_irq(unsigned int irq) irq_insert_desc(irq, irq_desc + irq); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq) -{ - free_desc(irq); -} -#endif - #endif /* !CONFIG_SPARSE_IRQ */ int handle_irq_desc(struct irq_desc *desc) diff --git a/kernel/rseq.c b/kernel/rseq.c index b7a1ec327e81..2452b7366b00 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) /* * Load and clear event mask atomically with respect to - * scheduler preemption. + * scheduler preemption and membarrier IPIs. */ - preempt_disable(); - event_mask = t->rseq_event_mask; - t->rseq_event_mask = 0; - preempt_enable(); + scoped_guard(RSEQ_EVENT_GUARD) { + event_mask = t->rseq_event_mask; + t->rseq_event_mask = 0; + } return !!event_mask; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec126f85ff40..198d2dd45f59 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -919,7 +919,7 @@ void hrtick_start(struct rq *rq, u64 delay) * doesn't make sense and can cause timer DoS. */ delta = max_t(s64, delay, 10000LL); - rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); + rq->hrtick_time = ktime_add_ns(hrtimer_cb_get_time(timer), delta); if (rq == this_rq()) __hrtick_restart(rq); diff --git a/kernel/smp.c b/kernel/smp.c index 56f83aa58ec8..02f52291fae4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -884,16 +884,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask, * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. - * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait - * (atomically) until function has completed on other CPUs. If - * %SCF_RUN_LOCAL is set, the function will also be run locally - * if the local CPU is set in the @cpumask. - * - * If @wait is true, then returns once @func has returned. + * @wait: If true, wait (atomically) until function has completed + * on other CPUs. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption * must be disabled when calling this function. + * + * @func is not called on the local CPU even if @mask contains it. Consider + * using on_each_cpu_cond_mask() instead if this is not desirable. */ void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) diff --git a/kernel/softirq.c b/kernel/softirq.c index 513b1945987c..77198911b8dd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) /* First entry of a task into a BH disabled section? */ if (!current->softirq_disable_cnt) { if (preemptible()) { - local_lock(&softirq_ctrl.lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + local_lock(&softirq_ctrl.lock); + else + migrate_disable(); + /* Required to meet the RCU bottomhalf requirements. */ rcu_read_lock(); } else { @@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) * Track the per CPU softirq disabled state. On RT this is per CPU * state to allow preemption of bottom half disabled sections. */ - newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); - /* - * Reflect the result in the task state to prevent recursion on the - * local lock and to make softirq_count() & al work. - */ - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { - raw_local_irq_save(flags); - lockdep_softirqs_off(ip); - raw_local_irq_restore(flags); + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } + } else { + bool sirq_dis = false; + + if (!current->softirq_disable_cnt) + sirq_dis = true; + + this_cpu_add(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt += cnt; + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } } } EXPORT_SYMBOL(__local_bh_disable_ip); @@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip); static void __local_bh_enable(unsigned int cnt, bool unlock) { unsigned long flags; + bool sirq_en = false; int newcnt; - DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != - this_cpu_read(softirq_ctrl.cnt)); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + if (softirq_count() == cnt) + sirq_en = true; + } else { + if (current->softirq_disable_cnt == cnt) + sirq_en = true; + } - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) { raw_local_irq_save(flags); lockdep_softirqs_on(_RET_IP_); raw_local_irq_restore(flags); } - newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); - current->softirq_disable_cnt = newcnt; + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) { + newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; - if (!newcnt && unlock) { - rcu_read_unlock(); - local_unlock(&softirq_ctrl.lock); + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } + } else { + current->softirq_disable_cnt -= cnt; + this_cpu_sub(softirq_ctrl.cnt, cnt); + if (unlock && !current->softirq_disable_cnt) { + migrate_enable(); + rcu_read_unlock(); + } else { + WARN_ON_ONCE(current->softirq_disable_cnt < 0); + } } } @@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) lock_map_release(&bh_lock_map); local_irq_save(flags); - curcnt = __this_cpu_read(softirq_ctrl.cnt); + if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) + curcnt = this_cpu_read(softirq_ctrl.cnt); + else + curcnt = current->softirq_disable_cnt; /* * If this is not reenabling soft interrupts, no point in trying to @@ -805,6 +848,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t) return false; } +#ifdef CONFIG_PREEMPT_RT +struct tasklet_sync_callback { + spinlock_t cb_lock; + atomic_t cb_waiters; +}; + +static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = { + .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock), + .cb_waiters = ATOMIC_INIT(0), +}; + +static void tasklet_lock_callback(void) +{ + spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_unlock_callback(void) +{ + spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); +} + +static void tasklet_callback_cancel_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + atomic_inc(&sync_cb->cb_waiters); + spin_lock(&sync_cb->cb_lock); + atomic_dec(&sync_cb->cb_waiters); + spin_unlock(&sync_cb->cb_lock); +} + +static void tasklet_callback_sync_wait_running(void) +{ + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); + + if (atomic_read(&sync_cb->cb_waiters)) { + spin_unlock(&sync_cb->cb_lock); + spin_lock(&sync_cb->cb_lock); + } +} + +#else /* !CONFIG_PREEMPT_RT: */ + +static void tasklet_lock_callback(void) { } +static void tasklet_unlock_callback(void) { } +static void tasklet_callback_sync_wait_running(void) { } + +#ifdef CONFIG_SMP +static void tasklet_callback_cancel_wait_running(void) { } +#endif +#endif /* !CONFIG_PREEMPT_RT */ + static void tasklet_action_common(struct tasklet_head *tl_head, unsigned int softirq_nr) { @@ -816,6 +911,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, tl_head->tail = &tl_head->head; local_irq_enable(); + tasklet_lock_callback(); while (list) { struct tasklet_struct *t = list; @@ -835,6 +931,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, } } tasklet_unlock(t); + tasklet_callback_sync_wait_running(); continue; } tasklet_unlock(t); @@ -847,6 +944,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, __raise_softirq_irqoff(softirq_nr); local_irq_enable(); } + tasklet_unlock_callback(); } static __latent_entropy void tasklet_action(void) @@ -897,12 +995,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t) /* * Prevent a live lock when current preempted soft * interrupt processing or prevents ksoftirqd from - * running. If the tasklet runs on a different CPU - * then this has no effect other than doing the BH - * disable/enable dance for nothing. + * running. */ - local_bh_disable(); - local_bh_enable(); + tasklet_callback_cancel_wait_running(); } else { cpu_relax(); } diff --git a/kernel/time/Makefile b/kernel/time/Makefile index e6e9b85d4db5..f7d52d9543cc 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_LEGACY_TIMER_TICK) += tick-legacy.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_NO_HZ_COMMON) += timer_migration.o endif -obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o +obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o obj-$(CONFIG_TIME_NS) += namespace.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 577f0e6842d4..069d93bfb0c7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -35,7 +35,7 @@ /** * struct alarm_base - Alarm timer bases - * @lock: Lock for syncrhonized access to the base + * @lock: Lock for synchronized access to the base * @timerqueue: Timerqueue head managing the list of events * @get_ktime: Function to read the time correlating to the base * @get_timespec: Function to read the namespace time correlating to the base diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index f3e831f62906..a59bc75ab7c5 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -633,7 +633,7 @@ void tick_offline_cpu(unsigned int cpu) raw_spin_lock(&clockevents_lock); tick_broadcast_offline(cpu); - tick_shutdown(cpu); + tick_shutdown(); /* * Unregister the clock event devices which were diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 0aef0e349e49..a1890a073196 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -144,7 +144,7 @@ static u64 suspend_start; * Default for maximum permissible skew when cs->uncertainty_margin is * not specified, and the lower bound even when cs->uncertainty_margin * is specified. This is also the default that is used when registering - * clocks with unspecifed cs->uncertainty_margin, so this macro is used + * clocks with unspecified cs->uncertainty_margin, so this macro is used * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels. */ #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) @@ -407,9 +407,8 @@ void clocksource_verify_percpu(struct clocksource *cs) if (!cpumask_empty(&cpus_behind)) pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n", cpumask_pr_args(&cpus_behind), testcpu, cs->name); - if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) - pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", - testcpu, cs_nsec_min, cs_nsec_max, cs->name); + pr_info(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", + testcpu, cs_nsec_min, cs_nsec_max, cs->name); } EXPORT_SYMBOL_GPL(clocksource_verify_percpu); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e8c479329282..88aa062b8a55 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -59,6 +59,7 @@ #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) static void retrigger_next_event(void *arg); +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); /* * The timer bases: @@ -76,42 +77,34 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .index = HRTIMER_BASE_MONOTONIC, .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME, .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME, .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI, .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, }, { .index = HRTIMER_BASE_MONOTONIC_SOFT, .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME_SOFT, .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME_SOFT, .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI_SOFT, .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, }, }, .csd = CSD_INIT(retrigger_next_event, NULL) @@ -208,7 +201,7 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_ /* * The offline local CPU can't be the default target if the * next remote target event is after this timer. Keep the - * elected new base. An IPI will we issued to reprogram + * elected new base. An IPI will be issued to reprogram * it as a last resort. */ if (!hrtimer_base_is_online(this_cpu_base)) @@ -1253,7 +1246,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, remove_hrtimer(timer, base, true, force_local); if (mode & HRTIMER_MODE_REL) - tim = ktime_add_safe(tim, base->get_time()); + tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); tim = hrtimer_update_lowres(timer, tim, mode); @@ -1574,10 +1567,10 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) static inline int hrtimer_clockid_to_base(clockid_t clock_id) { switch (clock_id) { - case CLOCK_REALTIME: - return HRTIMER_BASE_REALTIME; case CLOCK_MONOTONIC: return HRTIMER_BASE_MONOTONIC; + case CLOCK_REALTIME: + return HRTIMER_BASE_REALTIME; case CLOCK_BOOTTIME: return HRTIMER_BASE_BOOTTIME; case CLOCK_TAI: @@ -1588,6 +1581,29 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) } } +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) +{ + switch (clock_id) { + case CLOCK_MONOTONIC: + return ktime_get(); + case CLOCK_REALTIME: + return ktime_get_real(); + case CLOCK_BOOTTIME: + return ktime_get_boottime(); + case CLOCK_TAI: + return ktime_get_clocktai(); + default: + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return ktime_get(); + } +} + +ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) +{ + return __hrtimer_cb_get_time(timer->base->clockid); +} +EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); + static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 876d389b2e21..7c6110e964e7 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -163,8 +163,7 @@ void posixtimer_rearm_itimer(struct task_struct *tsk) struct hrtimer *tmr = &tsk->signal->real_timer; if (!hrtimer_is_queued(tmr) && tsk->signal->it_real_incr != 0) { - hrtimer_forward(tmr, tmr->base->get_time(), - tsk->signal->it_real_incr); + hrtimer_forward_now(tmr, tsk->signal->it_real_incr); hrtimer_restart(tmr); } } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8b582174b1f9..aa3120104a51 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -299,8 +299,7 @@ static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; - timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), - timr->it_interval); + timr->it_overrun += hrtimer_forward_now(timer, timr->it_interval); hrtimer_restart(timer); } @@ -535,7 +534,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, goto out; } /* - * After succesful copy out, the timer ID is visible to user space + * After successful copy out, the timer ID is visible to user space * now but not yet valid because new_timer::signal low order bit is 1. * * Complete the initialization with the clock specific create @@ -825,7 +824,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, hrtimer_setup(&timr->it.real.timer, posix_timer_fn, timr->it_clock, mode); if (!absolute) - expires = ktime_add_safe(expires, timer->base->get_time()); + expires = ktime_add_safe(expires, hrtimer_cb_get_time(timer)); hrtimer_set_expires(timer, expires); if (!sigev_none) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index cc15fe293719..cc1afec306b3 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -174,8 +174,7 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) return HRTIMER_RESTART; } -void __init -sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) +void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; @@ -247,6 +246,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pS as sched_clock source\n", read); } +EXPORT_SYMBOL_GPL(sched_clock_register); void __init generic_sched_clock_init(void) { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 9a3859443c04..7e33d3f2e889 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -411,24 +411,18 @@ int tick_cpu_dying(unsigned int dying_cpu) } /* - * Shutdown an event device on a given cpu: + * Shutdown an event device on the outgoing CPU: * - * This is called on a life CPU, when a CPU is dead. So we cannot - * access the hardware device itself. - * We just set the mode and remove it from the lists. + * Called by the dying CPU during teardown, with clockevents_lock held + * and interrupts disabled. */ -void tick_shutdown(unsigned int cpu) +void tick_shutdown(void) { - struct tick_device *td = &per_cpu(tick_cpu_device, cpu); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; if (dev) { - /* - * Prevent that the clock events layer tries to call - * the set mode function! - */ - clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index faac36de35b9..4e4f7bbe2a64 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -26,7 +26,7 @@ extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_offline_cpu(unsigned int cpu); -extern void tick_shutdown(unsigned int cpu); +extern void tick_shutdown(void); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index b03d0ada6469..488e47e96e93 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -102,8 +102,6 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) SEQ_printf(m, " .index: %d\n", base->index); SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution); - - SEQ_printf(m, " .get_time: %ps\n", base->get_time); #ifdef CONFIG_HIGH_RES_TIMERS SEQ_printf(m, " .offset: %Lu nsecs\n", (unsigned long long) ktime_to_ns(base->offset)); diff --git a/lib/test_objpool.c b/lib/test_objpool.c index 8f688187fa87..6a34a7582fdb 100644 --- a/lib/test_objpool.c +++ b/lib/test_objpool.c @@ -164,7 +164,7 @@ static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) /* do bulk-testings for objects pop/push */ item->worker(item, 1); - hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); + hrtimer_forward_now(hrt, item->hrtcycle); return HRTIMER_RESTART; } diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 45df764b49ad..db87ba34ef19 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -12,24 +12,6 @@ config GENERIC_GETTIMEOFDAY Each architecture that enables this feature has to provide the fallback implementation. -config GENERIC_VDSO_32 - bool - depends on GENERIC_GETTIMEOFDAY && !64BIT - help - This config option helps to avoid possible performance issues - in 32 bit only architectures. - -config GENERIC_COMPAT_VDSO - bool - help - This config option enables the compat VDSO layer. - -config GENERIC_VDSO_TIME_NS - bool - help - Selected by architectures which support time namespaces in the - VDSO - config GENERIC_VDSO_OVERFLOW_PROTECT bool help @@ -37,14 +19,9 @@ config GENERIC_VDSO_OVERFLOW_PROTECT time getter functions for the price of an extra conditional in the hotpath. -endif - config VDSO_GETRANDOM bool help Selected by architectures that support vDSO getrandom(). -config GENERIC_VDSO_DATA_STORE - bool - help - Selected by architectures that use the generic vDSO data store. +endif diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index aedd40aaa950..405f743253d7 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_GENERIC_VDSO_DATA_STORE) += datastore.o +obj-$(CONFIG_HAVE_GENERIC_VDSO) += datastore.o diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index 3693c6caf2c4..a565c30c71a0 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -11,14 +11,14 @@ /* * The vDSO data page. */ -#ifdef CONFIG_HAVE_GENERIC_VDSO +#ifdef CONFIG_GENERIC_GETTIMEOFDAY static union { struct vdso_time_data data; u8 page[PAGE_SIZE]; } vdso_time_data_store __page_aligned_data; struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); -#endif /* CONFIG_HAVE_GENERIC_VDSO */ +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #ifdef CONFIG_VDSO_GETRANDOM static union { @@ -46,7 +46,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, switch (vmf->pgoff) { case VDSO_TIME_PAGE_OFFSET: - if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO)) + if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); if (timens_page) { diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 02ea19f67164..95df0153f05a 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -108,15 +108,11 @@ bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock return true; } -#ifdef CONFIG_TIME_NS - -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE static __always_inline const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) { return (void *)vd + PAGE_SIZE; } -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ static __always_inline bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, @@ -149,20 +145,6 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * return true; } -#else -static __always_inline -const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) -{ - return NULL; -} - -static __always_inline -bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) -{ - return false; -} -#endif static __always_inline bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, @@ -204,7 +186,6 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, return true; } -#ifdef CONFIG_TIME_NS static __always_inline bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, clockid_t clk, struct __kernel_timespec *ts) @@ -233,14 +214,6 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock return true; } -#else -static __always_inline -bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) -{ - return false; -} -#endif static __always_inline bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, diff --git a/scripts/gdb/linux/timerlist.py b/scripts/gdb/linux/timerlist.py index 98445671fe83..ccc24d30de80 100644 --- a/scripts/gdb/linux/timerlist.py +++ b/scripts/gdb/linux/timerlist.py @@ -56,8 +56,6 @@ def print_base(base): text += " .index: {}\n".format(base['index']) text += " .resolution: {} nsecs\n".format(constants.LX_hrtimer_resolution) - - text += " .get_time: {}\n".format(base['get_time']) if constants.LX_CONFIG_HIGH_RES_TIMERS: text += " .offset: {} nsecs\n".format(base['offset']) text += "active timers:\n" diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index c364bd126ac8..2d5f4d47071f 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -44,7 +44,7 @@ static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) } /* calculate the drift */ - delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt)); + delta = ktime_sub(hrtimer_cb_get_time(hrt), hrtimer_get_expires(hrt)); if (delta > 0) ticks += ktime_divns(delta, ticks * resolution); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 98c4713c1b09..0ad5cc70ecbe 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -880,3 +880,15 @@ unsigned int arch_reloc_size(struct reloc *reloc) return 8; } } + +bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc) +{ + switch (reloc_type(reloc)) { + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + return true; + default: + return false; + } +} diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 80239843e9f0..0f6b197cfcb0 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -87,6 +87,7 @@ static const struct option check_options[] = { OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"), OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"), + OPT_BOOLEAN(0 , "noabs", &opts.noabs, "reject absolute references in allocatable sections"), OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), OPT_GROUP("Options:"), @@ -162,6 +163,7 @@ static bool opts_valid(void) opts.hack_noinstr || opts.ibt || opts.mcount || + opts.noabs || opts.noinstr || opts.orc || opts.retpoline || diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d14f20ef1db1..093fcd01dd6e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3564,7 +3564,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { /* Ignore KCFI type preambles, which always fall through */ if (!strncmp(func->name, "__cfi_", 6) || - !strncmp(func->name, "__pfx_", 6)) + !strncmp(func->name, "__pfx_", 6) || + !strncmp(func->name, "__pi___cfi_", 11) || + !strncmp(func->name, "__pi___pfx_", 11)) return 0; if (file->ignore_unreachables) @@ -4644,6 +4646,47 @@ static void disas_warned_funcs(struct objtool_file *file) disas_funcs(funcs); } +__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc) +{ + unsigned int type = reloc_type(reloc); + size_t sz = elf_addr_size(elf); + + return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32); +} + +static int check_abs_references(struct objtool_file *file) +{ + struct section *sec; + struct reloc *reloc; + int ret = 0; + + for_each_sec(file, sec) { + /* absolute references in non-loadable sections are fine */ + if (!(sec->sh.sh_flags & SHF_ALLOC)) + continue; + + /* section must have an associated .rela section */ + if (!sec->rsec) + continue; + + /* + * Special case for compiler generated metadata that is not + * consumed until after boot. + */ + if (!strcmp(sec->name, "__patchable_function_entries")) + continue; + + for_each_reloc(sec->rsec, reloc) { + if (arch_absolute_reloc(file->elf, reloc)) { + WARN("section %s has absolute relocation at offset 0x%lx", + sec->name, reloc_offset(reloc)); + ret++; + } + } + } + return ret; +} + struct insn_chunk { void *addr; struct insn_chunk *next; @@ -4777,6 +4820,9 @@ int check(struct objtool_file *file) goto out; } + if (opts.noabs) + warnings += check_abs_references(file); + if (opts.orc && nr_insns) { ret = orc_create(file); if (ret) diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 01ef6f415adf..be33c7b43180 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -97,6 +97,7 @@ bool arch_is_embedded_insn(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); bool arch_pc_relative_reloc(struct reloc *reloc); +bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc); unsigned int arch_reloc_size(struct reloc *reloc); unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 6b08666fa69d..ab22673862e1 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -26,6 +26,7 @@ struct opts { bool uaccess; int prefix; bool cfi; + bool noabs; /* options: */ bool backtrace; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 6a922d046b8e..802895fae3ca 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -45,7 +45,6 @@ NORETURN(rewind_stack_and_make_dead) NORETURN(rust_begin_unwind) NORETURN(rust_helper_BUG) NORETURN(sev_es_terminate) -NORETURN(snp_abort) NORETURN(start_kernel) NORETURN(stop_this_cpu) NORETURN(usercopy_abort) diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 8cfb87f7f7c5..490ace1f017e 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 +PKG_CONFIG ?= pkg-config +LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO") + INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) -CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) +CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1 LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ - ../include/atomic.h \ - ../include/logging.h + ../include/atomic.h TEST_GEN_PROGS := \ futex_wait_timeout \ futex_wait_wouldblock \ diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c index f29e4d627e79..e0a33510ccb6 100644 --- a/tools/testing/selftests/futex/functional/futex_numa.c +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -5,9 +5,10 @@ #include <sys/mman.h> #include <fcntl.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> #include <time.h> #include <assert.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index a9ecfb2d3932..d037a3f10ee8 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -16,9 +16,9 @@ #include <linux/futex.h> #include <sys/mman.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -77,7 +77,7 @@ static void join_max_threads(void) } } -static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags) { int to_wake, ret, i, need_exit = 0; @@ -88,11 +88,17 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag do { ret = futex2_wake(futex_ptr, to_wake, futex_flags); - if (must_fail) { - if (ret < 0) - break; - ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", - to_wake, futex_flags); + + if (err_value) { + if (ret >= 0) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + + if (errno != err_value) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n", + to_wake, futex_flags, err_value, errno, strerror(errno)); + + break; } if (ret < 0) { ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", @@ -106,12 +112,12 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag join_max_threads(); for (i = 0; i < MAX_THREADS; i++) { - if (must_fail && thread_args[i].result != -1) { + if (err_value && thread_args[i].result != -1) { ksft_print_msg("Thread %d should fail but succeeded (%d)\n", i, thread_args[i].result); need_exit = 1; } - if (!must_fail && thread_args[i].result != 0) { + if (!err_value && thread_args[i].result != 0) { ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); need_exit = 1; } @@ -120,58 +126,30 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag ksft_exit_fail_msg("Aborting due to earlier errors.\n"); } -static void test_futex(void *futex_ptr, int must_fail) +static void test_futex(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); } -static void test_futex_mpol(void *futex_ptr, int must_fail) +static void test_futex_mpol(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_numa_mpol) { struct futex32_numa *futex_numa; - int mem_size, i; void *futex_ptr; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); + int mem_size; mem_size = sysconf(_SC_PAGE_SIZE); - futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); if (futex_ptr == MAP_FAILED) ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + /* Create an invalid memory region for the "Memory out of range" test */ + mprotect(futex_ptr + mem_size, mem_size, PROT_NONE); + futex_numa = futex_ptr; ksft_print_msg("Regular test\n"); @@ -182,27 +160,31 @@ int main(int argc, char *argv[]) if (futex_numa->numa == FUTEX_NO_NODE) ksft_exit_fail_msg("NUMA node is left uninitialized\n"); - ksft_print_msg("Memory too small\n"); - test_futex(futex_ptr + mem_size - 4, 1); + /* FUTEX2_NUMA futex must be 8-byte aligned */ + ksft_print_msg("Mis-aligned futex\n"); + test_futex(futex_ptr + mem_size - 4, EINVAL); ksft_print_msg("Memory out of range\n"); - test_futex(futex_ptr + mem_size, 1); + test_futex(futex_ptr + mem_size, EFAULT); futex_numa->numa = FUTEX_NO_NODE; mprotect(futex_ptr, mem_size, PROT_READ); ksft_print_msg("Memory, RO\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_NONE); ksft_print_msg("Memory, no access\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); ksft_print_msg("Memory back to RW\n"); test_futex(futex_ptr, 0); + ksft_test_result_pass("futex2 memory boundary tests passed\n"); + /* MPOL test. Does not work as expected */ - for (i = 0; i < 4; i++) { +#ifdef LIBNUMA_VER_SUFFICIENT + for (int i = 0; i < 4; i++) { unsigned long nodemask; int ret; @@ -221,15 +203,17 @@ int main(int argc, char *argv[]) ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); if (ret < 0) ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); - if (0) - test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", futex_numa->numa, i); } } } - ksft_test_result_pass("NUMA MPOL tests passed\n"); - ksft_finished(); - return 0; + ksft_test_result_pass("futex2 MPOL hints test passed\n"); +#else + ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n"); +#endif + munmap(futex_ptr, mem_size * 2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index aea001ac4946..3b7b5851f290 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -14,7 +14,7 @@ #include <linux/prctl.h> #include <sys/prctl.h> -#include "logging.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -128,46 +128,14 @@ static void futex_dummy_op(void) ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -g Test global hash instead intead local immutable \n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; -int main(int argc, char *argv[]) +TEST(priv_hash) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; int ret, retry = 20; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -189,14 +157,14 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); - /* First thread, has to initialiaze private hash */ + /* First thread, has to initialize private hash */ futex_slots1 = futex_hash_slots_get(); if (futex_slots1 <= 0) { ksft_print_msg("Current hash buckets: %d\n", futex_slots1); - ksft_exit_fail_msg(test_msg_auto_create); + ksft_exit_fail_msg("%s", test_msg_auto_create); } - ksft_test_result_pass(test_msg_auto_create); + ksft_test_result_pass("%s", test_msg_auto_create); online_cpus = sysconf(_SC_NPROCESSORS_ONLN); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); @@ -237,11 +205,11 @@ retry_getslots: } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); - ksft_exit_fail_msg(test_msg_auto_inc); + ksft_exit_fail_msg("%s", test_msg_auto_inc); } - ksft_test_result_pass(test_msg_auto_inc); + ksft_test_result_pass("%s", test_msg_auto_inc); } else { - ksft_test_result_skip(test_msg_auto_inc); + ksft_test_result_skip("%s", test_msg_auto_inc); } ret = pthread_mutex_unlock(&global_lock); @@ -257,17 +225,17 @@ retry_getslots: futex_hash_slots_set_verify(2); join_max_threads(); - ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n", counter, MAX_THREADS); counter = 0; - /* Once the user set something, auto reisze must be disabled */ + /* Once the user set something, auto resize must be disabled */ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); - ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n", ret); futex_hash_slots_set_must_fail(1 << 29); @@ -280,7 +248,7 @@ retry_getslots: ret = futex_hash_slots_set(0); ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) - goto out; + return; futex_hash_slots_set_must_fail(4); futex_hash_slots_set_must_fail(8); @@ -289,17 +257,14 @@ retry_getslots: futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); - if (ret != 0) { + if (ret != 0) ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); - return 1; - } + create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); ksft_test_result(ret == 0, "Continue to use global hash\n"); - -out: - ksft_finished(); - return 0; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c index 51485be6eb2f..69e2555b6039 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue.c +++ b/tools/testing/selftests/futex/functional/futex_requeue.c @@ -7,24 +7,15 @@ #include <pthread.h> #include <limits.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 volatile futex_t *f1; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -38,67 +29,49 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(requeue_single) { - pthread_t waiter[10]; - int res, ret = RET_PASS; - int c, i; volatile futex_t _f1 = 0; volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res; f1 = &_f1; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test futex_requeue\n", - basename(argv[0])); - /* * Requeue a waiter from f1 to f2, and wake f2. */ if (pthread_create(&waiter[0], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Requeuing 1 futex from f1 to f2\n"); + ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0); - if (res != 1) { + if (res != 1) ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; - } - - info("Waking 1 futex at f2\n"); + ksft_print_dbg_msg("Waking 1 futex at f2\n"); res = futex_wake(&f2, 1, 0); if (res != 1) { ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue simple succeeds\n"); } +} + +TEST(requeue_multiple) +{ + volatile futex_t _f1 = 0; + volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res, i; + f1 = &_f1; /* * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7. @@ -106,31 +79,28 @@ int main(int argc, char *argv[]) */ for (i = 0; i < 10; i++) { if (pthread_create(&waiter[i], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); } usleep(WAKE_WAIT_US); - info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); + ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0); if (res != 10) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } - info("Waking INT_MAX futexes at f2\n"); + ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n"); res = futex_wake(&f2, INT_MAX, 0); if (res != 7) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue many succeeds\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c index 215c6cb539b4..f299d75848cd 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -26,11 +26,11 @@ #include <stdlib.h> #include <signal.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi" #define MAX_WAKE_ITERS 1000 #define THREAD_MAX 10 #define SIGNAL_PERIOD_US 100 @@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; futex_t wake_complete = FUTEX_INITIALIZER; -/* Test option defaults */ -static long timeout_ns; -static int broadcast; -static int owner; -static int locked; - struct thread_arg { long id; struct timespec *timeout; @@ -56,18 +50,73 @@ struct thread_arg { }; #define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } -void usage(char *prog) +FIXTURE(args) { - printf("Usage: %s\n", prog); - printf(" -b Broadcast wakeup (all waiters)\n"); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -l Lock the pi futex across requeue\n"); - printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); - printf(" -t N Timeout in nanoseconds (default: 0)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} +}; + +FIXTURE_SETUP(args) +{ +}; + +FIXTURE_TEARDOWN(args) +{ +}; + +FIXTURE_VARIANT(args) +{ + long timeout_ns; + bool broadcast; + bool owner; + bool locked; +}; + +/* + * For a given timeout value, this macro creates a test input with all the + * possible combinations of valid arguments + */ +#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout) \ +{ \ + .timeout_ns = timeout, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .owner = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \ +{ \ + .timeout_ns = timeout, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \ +{ \ + .timeout_ns = timeout, \ + .owner = true, \ +}; \ + +FIXTURE_VARIANT_ADD_TIMEOUT(0); +FIXTURE_VARIANT_ADD_TIMEOUT(5000); +FIXTURE_VARIANT_ADD_TIMEOUT(500000); +FIXTURE_VARIANT_ADD_TIMEOUT(2000000000); int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) @@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); if (ret) { - error("pthread_attr_setinheritsched\n", ret); + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); return -1; } ret = pthread_attr_setschedpolicy(&attr, policy); if (ret) { - error("pthread_attr_setschedpolicy\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); return -1; } schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); if (ret) { - error("pthread_attr_setschedparam\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); return -1; } ret = pthread_create(pth, &attr, func, arg); if (ret) { - error("pthread_create\n", ret); + ksft_exit_fail_msg("pthread_create\n"); return -1; } return 0; @@ -112,7 +161,7 @@ void *waiterfn(void *arg) struct thread_arg *args = (struct thread_arg *)arg; futex_t old_val; - info("Waiter %ld: running\n", args->id); + ksft_print_dbg_msg("Waiter %ld: running\n", args->id); /* Each thread sleeps for a different amount of time * This is to avoid races, because we don't lock the * external mutex here */ @@ -120,26 +169,25 @@ void *waiterfn(void *arg) old_val = f1; atomic_inc(&waiters_blocked); - info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", &f1, f1, &f2); args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, FUTEX_PRIVATE_FLAG); - info("waiter %ld woke with %d %s\n", args->id, args->ret, + ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret, args->ret < 0 ? strerror(errno) : ""); atomic_inc(&waiters_woken); if (args->ret < 0) { if (args->timeout && errno == ETIMEDOUT) args->ret = 0; else { - args->ret = RET_ERROR; - error("futex_wait_requeue_pi\n", errno); + ksft_exit_fail_msg("futex_wait_requeue_pi\n"); } futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("Waiter %ld: exiting with %d\n", args->id, args->ret); + ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret); pthread_exit((void *)&args->ret); } @@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); - info("Waker: Calling broadcast\n"); + ksft_print_dbg_msg("Waker: Calling broadcast\n"); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } continue_requeue: @@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg) args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, FUTEX_PRIVATE_FLAG); if (args->ret < 0) { - args->ret = RET_ERROR; - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } else if (++i < MAX_WAKE_ITERS) { task_count += args->ret; if (task_count < THREAD_MAX - waiters_woken.val) goto continue_requeue; } else { - error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", - 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); - args->ret = RET_ERROR; + ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + MAX_WAKE_ITERS, task_count, THREAD_MAX); } futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); @@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg) if (args->ret > 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); pthread_exit((void *)&args->ret); } @@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { - info("task_count: %d, waiters_woken: %d\n", + ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n", task_count, waiters_woken.val); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } - info("Waker: Calling signal\n"); + ksft_print_dbg_msg("Waker: Calling signal\n"); /* cond_signal */ old_val = f1; args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, @@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg) FUTEX_PRIVATE_FLAG); if (args->ret < 0) args->ret = -errno; - info("futex: %x\n", f2); + ksft_print_dbg_msg("futex: %x\n", f2); if (args->lock) { - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); } - info("futex: %x\n", f2); - if (args->ret < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - args->ret = RET_ERROR; - break; - } + ksft_print_dbg_msg("futex: %x\n", f2); + if (args->ret < 0) + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); task_count += args->ret; usleep(SIGNAL_PERIOD_US); i++; /* we have to loop at least THREAD_MAX times */ if (i > MAX_WAKE_ITERS + THREAD_MAX) { - error("max signaling iterations (%d) reached, giving up on pending waiters.\n", - 0, MAX_WAKE_ITERS + THREAD_MAX); - args->ret = RET_ERROR; - break; + ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n", + MAX_WAKE_ITERS + THREAD_MAX); } } @@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg) if (args->ret >= 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); - info("Waker: waiters_woken: %d\n", waiters_woken.val); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val); pthread_exit((void *)&args->ret); } @@ -269,35 +310,40 @@ void *third_party_blocker(void *arg) ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); out: - if (args->ret || ret2) { - error("third_party_blocker() futex error", 0); - args->ret = RET_ERROR; - } + if (args->ret || ret2) + ksft_exit_fail_msg("third_party_blocker() futex error"); pthread_exit((void *)&args->ret); } -int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +TEST_F(args, futex_requeue_pi) { - void *(*wakerfn)(void *) = signal_wakerfn; struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; pthread_t waiter[THREAD_MAX], waker, blocker; - struct timespec ts, *tsp = NULL; + void *(*wakerfn)(void *) = signal_wakerfn; + bool third_party_owner = variant->owner; + long timeout_ns = variant->timeout_ns; + bool broadcast = variant->broadcast; struct thread_arg args[THREAD_MAX]; - int *waiter_ret; - int i, ret = RET_PASS; + struct timespec ts, *tsp = NULL; + bool lock = variant->locked; + int *waiter_ret, i, ret = 0; + + ksft_print_msg( + "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, lock, third_party_owner, timeout_ns); if (timeout_ns) { time_t secs; - info("timeout_ns = %ld\n", timeout_ns); + ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns); ret = clock_gettime(CLOCK_MONOTONIC, &ts); secs = (ts.tv_nsec + timeout_ns) / 1000000000; ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; ts.tv_sec += secs; - info("ts.tv_sec = %ld\n", ts.tv_sec); - info("ts.tv_nsec = %ld\n", ts.tv_nsec); + ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec); + ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec); tsp = &ts; } @@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) if (third_party_owner) { if (create_rt_thread(&blocker, third_party_blocker, (void *)&blocker_arg, SCHED_FIFO, 1)) { - error("Creating third party blocker thread failed\n", - errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating third party blocker thread failed\n"); } } @@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) for (i = 0; i < THREAD_MAX; i++) { args[i].id = i; args[i].timeout = tsp; - info("Starting thread %d\n", i); + ksft_print_dbg_msg("Starting thread %d\n", i); if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], SCHED_FIFO, 1)) { - error("Creating waiting thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waiting thread failed\n"); } } waker_arg.lock = lock; if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, SCHED_FIFO, 1)) { - error("Creating waker thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waker thread failed\n"); } /* Wait for threads to finish */ @@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) pthread_join(blocker, NULL); pthread_join(waker, NULL); -out: if (!ret) { if (*waiter_ret) ret = *waiter_ret; @@ -355,66 +393,8 @@ out: ret = blocker_arg.ret; } - return ret; + if (ret) + ksft_test_result_fail("fail"); } -int main(int argc, char *argv[]) -{ - char *test_name; - int c, ret; - - while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { - switch (c) { - case 'b': - broadcast = 1; - break; - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'l': - locked = 1; - break; - case 'o': - owner = 1; - locked = 0; - break; - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0])); - ksft_print_msg( - "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", - broadcast, locked, owner, timeout_ns); - - ret = asprintf(&test_name, - "%s broadcast=%d locked=%d owner=%d timeout=%ldns", - TEST_NAME, broadcast, locked, owner, timeout_ns); - if (ret < 0) { - ksft_print_msg("Failed to generate test name\n"); - test_name = TEST_NAME; - } - - /* - * FIXME: unit_test is obsolete now that we parse options and the - * various style of runs are done by run.sh - simplify the code and move - * unit_test into main() - */ - ret = unit_test(broadcast, locked, owner, timeout_ns); - - print_result(test_name, ret); - return ret; -} +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c index d0a4d332ea44..77135a22a583 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -23,67 +23,32 @@ #include <stdlib.h> #include <string.h> #include <time.h> -#include "futextest.h" -#include "logging.h" -#define TEST_NAME "futex-requeue-pi-mismatched-ops" +#include "futextest.h" +#include "../../kselftest_harness.h" futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; int child_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *blocking_child(void *arg) { child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); if (child_ret < 0) { child_ret = -errno; - error("futex_wait\n", errno); + ksft_exit_fail_msg("futex_wait\n"); } return (void *)&child_ret; } -int main(int argc, char *argv[]) +TEST(requeue_pi_mismatched_ops) { - int ret = RET_PASS; pthread_t child; - int c; + int ret; - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Detect mismatched requeue_pi operations\n", - basename(argv[0])); + if (pthread_create(&child, NULL, blocking_child, NULL)) + ksft_exit_fail_msg("pthread_create\n"); - if (pthread_create(&child, NULL, blocking_child, NULL)) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } /* Allow the child to block in the kernel. */ sleep(1); @@ -102,34 +67,27 @@ int main(int argc, char *argv[]) * FUTEX_WAKE. */ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); - if (ret == 1) { - ret = RET_PASS; - } else if (ret < 0) { - error("futex_wake\n", errno); - ret = RET_ERROR; - } else { - error("futex_wake did not wake the child\n", 0); - ret = RET_ERROR; - } + if (ret == 1) + ret = 0; + else if (ret < 0) + ksft_exit_fail_msg("futex_wake\n"); + else + ksft_exit_fail_msg("futex_wake did not wake the child\n"); } else { - error("futex_cmp_requeue_pi\n", errno); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi\n"); } } else if (ret > 0) { - fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); - ret = RET_FAIL; + ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); } else { - error("futex_cmp_requeue_pi found no waiters\n", 0); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n"); } pthread_join(child, NULL); - if (!ret) - ret = child_ret; - - out: - /* If the kernel crashes, we shouldn't return at all. */ - print_result(TEST_NAME, ret); - return ret; + if (!ret && !child_ret) + ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n"); + else + ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index c6b8f32990c8..e34ee0f9ebcc 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -24,11 +24,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi-signal-restart" #define DELAY_US 100 futex_t f1 = FUTEX_INITIALIZER; @@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER; int waiter_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) { @@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, memset(&schedp, 0, sizeof(schedp)); ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); - if (ret) { - error("pthread_attr_setinheritsched\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); ret = pthread_attr_setschedpolicy(&attr, policy); - if (ret) { - error("pthread_attr_setschedpolicy\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); - if (ret) { - error("pthread_attr_setschedparam\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); ret = pthread_create(pth, &attr, func, arg); - if (ret) { - error("pthread_create\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); + return 0; } void handle_signal(int signo) { - info("signal received %s requeue\n", + ksft_print_dbg_msg("signal received %s requeue\n", requeued.val ? "after" : "prior to"); } @@ -94,78 +78,46 @@ void *waiterfn(void *arg) unsigned int old_val; int res; - waiter_ret = RET_PASS; - - info("Waiter running\n"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Waiter running\n"); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); old_val = f1; res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, FUTEX_PRIVATE_FLAG); if (!requeued.val || errno != EWOULDBLOCK) { - fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", res, strerror(errno)); - info("w2:futex: %x\n", f2); + ksft_print_dbg_msg("w2:futex: %x\n", f2); if (!res) futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - waiter_ret = RET_FAIL; } - info("Waiter exiting with %d\n", waiter_ret); pthread_exit(NULL); } -int main(int argc, char *argv[]) +TEST(futex_requeue_pi_signal_restart) { unsigned int old_val; struct sigaction sa; pthread_t waiter; - int c, res, ret = RET_PASS; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test signal handling during requeue_pi\n", - basename(argv[0])); - ksft_print_msg("\tArguments: <none>\n"); + int res; sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; - if (sigaction(SIGUSR1, &sa, NULL)) { - error("sigaction\n", errno); - exit(1); - } + if (sigaction(SIGUSR1, &sa, NULL)) + ksft_exit_fail_msg("sigaction\n"); - info("m1:f2: %x\n", f2); - info("Creating waiter\n"); + ksft_print_dbg_msg("m1:f2: %x\n", f2); + ksft_print_dbg_msg("Creating waiter\n"); res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); - if (res) { - error("Creating waiting thread failed", res); - ret = RET_ERROR; - goto out; - } + if (res) + ksft_exit_fail_msg("Creating waiting thread failed"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); - info("m2:f2: %x\n", f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("m2:f2: %x\n", f2); futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); - info("m3:f2: %x\n", f2); + ksft_print_dbg_msg("m3:f2: %x\n", f2); while (1) { /* @@ -173,11 +125,11 @@ int main(int argc, char *argv[]) * restart futex_wait_requeue_pi() in the kernel. Wait for the * waiter to block on f1 again. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); usleep(DELAY_US); - info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); old_val = f1; res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, FUTEX_PRIVATE_FLAG); @@ -191,12 +143,10 @@ int main(int argc, char *argv[]) atomic_set(&requeued, 1); break; } else if (res < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - ret = RET_ERROR; - break; + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } } - info("m4:f2: %x\n", f2); + ksft_print_dbg_msg("m4:f2: %x\n", f2); /* * Signal the waiter after requeue, waiter should return from @@ -204,19 +154,14 @@ int main(int argc, char *argv[]) * futex_unlock_pi() can't happen before the signal wakeup is detected * in the kernel. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); - info("Waiting for waiter to return\n"); + ksft_print_dbg_msg("Waiting for waiter to return\n"); pthread_join(waiter, NULL); - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("m5:f2: %x\n", f2); - - out: - if (ret == RET_PASS && waiter_ret) - ret = waiter_ret; - - print_result(TEST_NAME, ret); - return ret; + ksft_print_dbg_msg("m5:f2: %x\n", f2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c index 685140d9b93d..152ca4612886 100644 --- a/tools/testing/selftests/futex/functional/futex_wait.c +++ b/tools/testing/selftests/futex/functional/futex_wait.c @@ -9,25 +9,16 @@ #include <sys/shm.h> #include <sys/mman.h> #include <fcntl.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 #define SHM_PATH "futex_shm_file" void *futex; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static void *waiterfn(void *arg) { struct timespec to; @@ -45,53 +36,37 @@ static void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_futex) { - int res, ret = RET_PASS, fd, c, shm_id; - u_int32_t f_private = 0, *shared_data; unsigned int flags = FUTEX_PRIVATE_FLAG; + u_int32_t f_private = 0; pthread_t waiter; - void *shm; + int res; futex = &f_private; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(3); - ksft_print_msg("%s: Test futex_wait\n", basename(argv[0])); - /* Testing a private futex */ - info("Calling private futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling private futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG); if (res != 1) { ksft_test_result_fail("futex_wake private returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake private succeeds\n"); } +} + +TEST(anon_page) +{ + u_int32_t *shared_data; + pthread_t waiter; + int res, shm_id; /* Testing an anon page shared memory */ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); @@ -105,67 +80,65 @@ int main(int argc, char *argv[]) *shared_data = 0; futex = shared_data; - info("Calling shared (page anon) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (page anon) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (page anon) succeeds\n"); } + shmdt(shared_data); +} + +TEST(file_backed) +{ + u_int32_t f_private = 0; + pthread_t waiter; + int res, fd; + void *shm; /* Testing a file backed shared memory */ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - if (fd < 0) { - perror("open"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("open"); - if (ftruncate(fd, sizeof(f_private))) { - perror("ftruncate"); - exit(1); - } + if (ftruncate(fd, sizeof(f_private))) + ksft_exit_fail_msg("ftruncate"); shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (shm == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (shm == MAP_FAILED) + ksft_exit_fail_msg("mmap"); memcpy(shm, &f_private, sizeof(f_private)); futex = shm; - info("Calling shared (file backed) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (file backed) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex); res = futex_wake(shm, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (file backed) succeeds\n"); } - /* Freeing resources */ - shmdt(shared_data); munmap(shm, sizeof(f_private)); remove(SHM_PATH); close(fd); - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c index fb4148f23fa3..8952ebda14ab 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -27,10 +27,9 @@ #include <libgen.h> #include <signal.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-private-mapped-file" #define PAGE_SZ 4096 char pad[PAGE_SZ] = {1}; @@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1}; #define WAKE_WAIT_US 3000000 struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *thr_futex_wait(void *arg) { int ret; - info("futex wait\n"); + ksft_print_dbg_msg("futex wait\n"); ret = futex_wait(&val, 1, &wait_timeout, 0); - if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { - error("futex error.\n", errno); - print_result(TEST_NAME, RET_ERROR); - exit(RET_ERROR); - } + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) + ksft_exit_fail_msg("futex error.\n"); if (ret && errno == ETIMEDOUT) - fail("waiter timedout\n"); + ksft_exit_fail_msg("waiter timedout\n"); - info("futex_wait: ret = %d, errno = %d\n", ret, errno); + ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno); return NULL; } -int main(int argc, char **argv) +TEST(wait_private_mapped_file) { pthread_t thr; - int ret = RET_PASS; int res; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg( - "%s: Test the futex value of private file mappings in FUTEX_WAIT\n", - basename(argv[0])); - - ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); - if (ret < 0) { - fprintf(stderr, "pthread_create error\n"); - ret = RET_ERROR; - goto out; - } - - info("wait a while\n"); + + res = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (res < 0) + ksft_exit_fail_msg("pthread_create error\n"); + + ksft_print_dbg_msg("wait a while\n"); usleep(WAKE_WAIT_US); val = 2; res = futex_wake(&val, 1, 0); - info("futex_wake %d\n", res); - if (res != 1) { - fail("FUTEX_WAKE didn't find the waiting thread.\n"); - ret = RET_FAIL; - } + ksft_print_dbg_msg("futex_wake %d\n", res); + if (res != 1) + ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n"); - info("join\n"); + ksft_print_dbg_msg("join\n"); pthread_join(thr, NULL); - out: - print_result(TEST_NAME, ret); - return ret; + ksft_test_result_pass("wait_private_mapped_file"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index d183f878360b..0c8766aced2e 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -16,26 +16,15 @@ *****************************************************************************/ #include <pthread.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" - -#define TEST_NAME "futex-wait-timeout" +#include "../../kselftest_harness.h" static long timeout_ns = 100000; /* 100us default timeout */ static futex_t futex_pi; static pthread_barrier_t barrier; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - /* * Get a PI lock and hold it forever, so the main thread lock_pi will block * and we can test the timeout @@ -47,13 +36,13 @@ void *get_pi_lock(void *arg) ret = futex_lock_pi(&futex_pi, NULL, 0, 0); if (ret != 0) - error("futex_lock_pi failed\n", ret); + ksft_exit_fail_msg("futex_lock_pi failed\n"); pthread_barrier_wait(&barrier); /* Blocks forever */ ret = futex_wait(&lock, 0, NULL, 0); - error("futex_wait failed\n", ret); + ksft_exit_fail_msg("futex_wait failed\n"); return NULL; } @@ -61,12 +50,11 @@ void *get_pi_lock(void *arg) /* * Check if the function returned the expected error */ -static void test_timeout(int res, int *ret, char *test_name, int err) +static void test_timeout(int res, char *test_name, int err) { if (!res || errno != err) { ksft_test_result_fail("%s returned %d\n", test_name, res < 0 ? errno : res); - *ret = RET_FAIL; } else { ksft_test_result_pass("%s succeeds\n", test_name); } @@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err) static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, long timeout_ns) { - if (clock_gettime(clockid, to)) { - error("clock_gettime failed\n", errno); - return errno; - } + if (clock_gettime(clockid, to)) + ksft_exit_fail_msg("clock_gettime failed\n"); to->tv_nsec += timeout_ns; @@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, return 0; } -int main(int argc, char *argv[]) +TEST(wait_bitset) { futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; struct timespec to; - pthread_t thread; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1, - .flags = FUTEX_32, - .__reserved = 0 - }; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(9); - ksft_print_msg("%s: Block on a futex and wait for timeout\n", - basename(argv[0])); - ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); - - pthread_barrier_init(&barrier, NULL, 2); - pthread_create(&thread, NULL, get_pi_lock, NULL); + int res; /* initialize relative timeout */ to.tv_sec = 0; to.tv_nsec = timeout_ns; res = futex_wait(&f1, f1, &to, 0); - test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT); + test_timeout(res, "futex_wait relative", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, 0); - test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT); +} + +TEST(requeue_pi) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res; /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT); /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0); - test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + +} + +TEST(lock_pi) +{ + struct timespec to; + pthread_t thread; + int res; + + /* Create a thread that will lock forever so any waiter will timeout */ + pthread_barrier_init(&barrier, NULL, 2); + pthread_create(&thread, NULL, get_pi_lock, NULL); /* Wait until the other thread calls futex_lock_pi() */ pthread_barrier_wait(&barrier); pthread_barrier_destroy(&barrier); + /* * FUTEX_LOCK_PI with CLOCK_REALTIME * Due to historical reasons, FUTEX_LOCK_PI supports only realtime @@ -181,26 +150,38 @@ int main(int argc, char *argv[]) * smaller than realtime and the syscall will timeout immediately. */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_lock_pi(&futex_pi, &to, 0, 0); - test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT); /* Test operations that don't support FUTEX_CLOCK_REALTIME */ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); + test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS); +} + +TEST(waitv) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1, + .flags = FUTEX_32, + .__reserved = 0, + }; + struct timespec to; + int res; /* futex_waitv with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); - test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT); + test_timeout(res, "futex_waitv monotonic", ETIMEDOUT); /* futex_waitv with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME); - test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT); - - ksft_print_cnts(); - return ret; + test_timeout(res, "futex_waitv realtime", ETIMEDOUT); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c index ed9cd07e31c1..ce2301500d83 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -29,95 +29,55 @@ #include <linux/futex.h> #include <libgen.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-uninitialized-heap" #define WAIT_US 5000000 static int child_blocked = 1; -static int child_ret; +static bool child_ret; void *buf; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *wait_thread(void *arg) { int res; - child_ret = RET_PASS; + child_ret = true; res = futex_wait(buf, 1, NULL, 0); child_blocked = 0; if (res != 0 && errno != EWOULDBLOCK) { - error("futex failure\n", errno); - child_ret = RET_ERROR; + ksft_exit_fail_msg("futex failure\n"); + child_ret = false; } pthread_exit(NULL); } -int main(int argc, char **argv) +TEST(futex_wait_uninitialized_heap) { - int c, ret = RET_PASS; long page_size; pthread_t thr; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } + int ret; page_size = sysconf(_SC_PAGESIZE); buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); - if (buf == (void *)-1) { - error("mmap\n", errno); - exit(1); - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n", - basename(argv[0])); - + if (buf == (void *)-1) + ksft_exit_fail_msg("mmap\n"); ret = pthread_create(&thr, NULL, wait_thread, NULL); - if (ret) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); - info("waiting %dus for child to return\n", WAIT_US); + ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US); usleep(WAIT_US); - ret = child_ret; - if (child_blocked) { - fail("child blocked in kernel\n"); - ret = RET_FAIL; - } + if (child_blocked) + ksft_test_result_fail("child blocked in kernel\n"); - out: - print_result(TEST_NAME, ret); - return ret; + if (!child_ret) + ksft_test_result_fail("child error\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 2d8230da9064..36b7a54a4085 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -21,72 +21,44 @@ #include <stdlib.h> #include <string.h> #include <time.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-wouldblock" #define timeout_ns 100000 -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_wait_wouldblock) { struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1+1, - .flags = FUTEX_32, - .__reserved = 0 - }; + int res; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", - basename(argv[0])); - - info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_wait returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wait\n"); } +} - if (clock_gettime(CLOCK_MONOTONIC, &to)) { - error("clock_gettime failed\n", errno); - return errno; - } +TEST(futex_waitv_wouldblock) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1 + 1, + .flags = FUTEX_32, + .__reserved = 0, + }; + int res; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + ksft_exit_fail_msg("clock_gettime failed %d\n", errno); to.tv_nsec += timeout_ns; @@ -95,17 +67,15 @@ int main(int argc, char *argv[]) to.tv_nsec -= 1000000000; } - info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c index a94337f677e1..c684b10eb76e 100644 --- a/tools/testing/selftests/futex/functional/futex_waitv.c +++ b/tools/testing/selftests/futex/functional/futex_waitv.c @@ -15,25 +15,16 @@ #include <pthread.h> #include <stdint.h> #include <sys/shm.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define WAKE_WAIT_US 10000 #define NR_FUTEXES 30 static struct futex_waitv waitv[NR_FUTEXES]; u_int32_t futexes[NR_FUTEXES] = {0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -41,7 +32,7 @@ void *waiterfn(void *arg) /* setting absolute timeout for futex2 */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -57,34 +48,10 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_waitv) { pthread_t waiter; - int res, ret = RET_PASS; - struct timespec to; - int c, i; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(7); - ksft_print_msg("%s: Test FUTEX_WAITV\n", - basename(argv[0])); + int res, i; for (i = 0; i < NR_FUTEXES; i++) { waitv[i].uaddr = (uintptr_t)&futexes[i]; @@ -95,7 +62,7 @@ int main(int argc, char *argv[]) /* Private waitv */ if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -104,10 +71,15 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv private\n"); } +} + +TEST(shared_waitv) +{ + pthread_t waiter; + int res, i; /* Shared waitv */ for (i = 0; i < NR_FUTEXES; i++) { @@ -128,7 +100,7 @@ int main(int argc, char *argv[]) } if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -137,19 +109,24 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake shared returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv shared\n"); } for (i = 0; i < NR_FUTEXES; i++) shmdt(u64_to_ptr(waitv[i].uaddr)); +} + +TEST(invalid_flag) +{ + struct timespec to; + int res; /* Testing a waiter without FUTEX_32 flag */ waitv[0].flags = FUTEX_PRIVATE_FLAG; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -158,17 +135,22 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv without FUTEX_32\n"); } +} + +TEST(unaligned_address) +{ + struct timespec to; + int res; /* Testing a waiter with an unaligned address */ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32; waitv[0].uaddr = 1; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -177,16 +159,21 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv with an unaligned address\n"); } +} + +TEST(null_address) +{ + struct timespec to; + int res; /* Testing a NULL address for waiters.uaddr */ waitv[0].uaddr = 0x00000000; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -195,14 +182,13 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n"); } /* Testing a NULL address for *waiters */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -211,14 +197,19 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in *waiters\n"); } +} + +TEST(invalid_clockid) +{ + struct timespec to; + int res; /* Testing an invalid clockid */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -227,11 +218,9 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv invalid clockid\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 81739849f299..e88545c06d57 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -18,74 +18,36 @@ # ############################################################################### -# Test for a color capable console -if [ -z "$USE_COLOR" ]; then - tput setf 7 || tput setaf 7 - if [ $? -eq 0 ]; then - USE_COLOR=1 - tput sgr0 - fi -fi -if [ "$USE_COLOR" -eq 1 ]; then - COLOR="-c" -fi - - echo -# requeue pi testing -# without timeouts -./futex_requeue_pi $COLOR -./futex_requeue_pi $COLOR -b -./futex_requeue_pi $COLOR -b -l -./futex_requeue_pi $COLOR -b -o -./futex_requeue_pi $COLOR -l -./futex_requeue_pi $COLOR -o -# with timeouts -./futex_requeue_pi $COLOR -b -l -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -l -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -./futex_requeue_pi $COLOR -b -t 5000 -./futex_requeue_pi $COLOR -t 5000 -./futex_requeue_pi $COLOR -b -t 500000 -./futex_requeue_pi $COLOR -t 500000 -./futex_requeue_pi $COLOR -b -o -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -o -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -# with long timeout -./futex_requeue_pi $COLOR -b -l -t 2000000000 -./futex_requeue_pi $COLOR -l -t 2000000000 - +./futex_requeue_pi echo -./futex_requeue_pi_mismatched_ops $COLOR +./futex_requeue_pi_mismatched_ops echo -./futex_requeue_pi_signal_restart $COLOR +./futex_requeue_pi_signal_restart echo -./futex_wait_timeout $COLOR +./futex_wait_timeout echo -./futex_wait_wouldblock $COLOR +./futex_wait_wouldblock echo -./futex_wait_uninitialized_heap $COLOR -./futex_wait_private_mapped_file $COLOR +./futex_wait_uninitialized_heap +./futex_wait_private_mapped_file echo -./futex_wait $COLOR +./futex_wait echo -./futex_requeue $COLOR +./futex_requeue echo -./futex_waitv $COLOR +./futex_waitv echo -./futex_priv_hash $COLOR -./futex_priv_hash -g $COLOR +./futex_priv_hash echo -./futex_numa_mpol $COLOR +./futex_numa_mpol diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index 7a5fd1d5355e..3d48e9789d9f 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -58,6 +58,17 @@ typedef volatile u_int32_t futex_t; #define SYS_futex SYS_futex_time64 #endif +/* + * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if + * we are using a newer compiler then the size of the timestamps will be 64bit, + * however, the SYS_futex will still point to the 32bit futex system call. + */ +#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \ + defined(_TIME_BITS) && _TIME_BITS == 64 +# undef SYS_futex +# define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h deleted file mode 100644 index 874c69ce5cce..000000000000 --- a/tools/testing/selftests/futex/include/logging.h +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * - * Copyright © International Business Machines Corp., 2009 - * - * DESCRIPTION - * Glibc independent futex library for testing kernel functionality. - * - * AUTHOR - * Darren Hart <dvhart@linux.intel.com> - * - * HISTORY - * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> - * - *****************************************************************************/ - -#ifndef _LOGGING_H -#define _LOGGING_H - -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <linux/futex.h> -#include "kselftest.h" - -/* - * Define PASS, ERROR, and FAIL strings with and without color escape - * sequences, default to no color. - */ -#define ESC 0x1B, '[' -#define BRIGHT '1' -#define GREEN '3', '2' -#define YELLOW '3', '3' -#define RED '3', '1' -#define ESCEND 'm' -#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND -#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND -#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND -#define RESET_COLOR ESC, '0', 'm' -static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', - RESET_COLOR, 0}; -static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', - RESET_COLOR, 0}; -static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', - RESET_COLOR, 0}; -static const char INFO_NORMAL[] = " INFO"; -static const char PASS_NORMAL[] = " PASS"; -static const char ERROR_NORMAL[] = "ERROR"; -static const char FAIL_NORMAL[] = " FAIL"; -const char *INFO = INFO_NORMAL; -const char *PASS = PASS_NORMAL; -const char *ERROR = ERROR_NORMAL; -const char *FAIL = FAIL_NORMAL; - -/* Verbosity setting for INFO messages */ -#define VQUIET 0 -#define VCRITICAL 1 -#define VINFO 2 -#define VMAX VINFO -int _verbose = VCRITICAL; - -/* Functional test return codes */ -#define RET_PASS 0 -#define RET_ERROR -1 -#define RET_FAIL -2 - -/** - * log_color() - Use colored output for PASS, ERROR, and FAIL strings - * @use_color: use color (1) or not (0) - */ -void log_color(int use_color) -{ - if (use_color) { - PASS = PASS_COLOR; - ERROR = ERROR_COLOR; - FAIL = FAIL_COLOR; - } else { - PASS = PASS_NORMAL; - ERROR = ERROR_NORMAL; - FAIL = FAIL_NORMAL; - } -} - -/** - * log_verbosity() - Set verbosity of test output - * @verbose: Enable (1) verbose output or not (0) - * - * Currently setting verbose=1 will enable INFO messages and 0 will disable - * them. FAIL and ERROR messages are always displayed. - */ -void log_verbosity(int level) -{ - if (level > VMAX) - level = VMAX; - else if (level < 0) - level = 0; - _verbose = level; -} - -/** - * print_result() - Print standard PASS | ERROR | FAIL results - * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL - * - * print_result() is primarily intended for functional tests. - */ -void print_result(const char *test_name, int ret) -{ - switch (ret) { - case RET_PASS: - ksft_test_result_pass("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_ERROR: - ksft_test_result_error("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_FAIL: - ksft_test_result_fail("%s\n", test_name); - ksft_print_cnts(); - return; - } -} - -/* log level macros */ -#define info(message, vargs...) \ -do { \ - if (_verbose >= VINFO) \ - fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ -} while (0) - -#define error(message, err, args...) \ -do { \ - if (_verbose >= VCRITICAL) {\ - if (err) \ - fprintf(stderr, "\t%s: %s: "message, \ - ERROR, strerror(err), ##args); \ - else \ - fprintf(stderr, "\t%s: "message, ERROR, ##args); \ - } \ -} while (0) - -#define fail(message, args...) \ -do { \ - if (_verbose >= VCRITICAL) \ - fprintf(stderr, "\t%s: "message, FAIL, ##args); \ -} while (0) - -#endif diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index c3b6d2604b1e..8deeb4b72e73 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -54,6 +54,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <stdbool.h> #include <string.h> #include <stdio.h> #include <sys/utsname.h> @@ -104,6 +105,7 @@ struct ksft_count { static struct ksft_count ksft_cnt; static unsigned int ksft_plan; +static bool ksft_debug_enabled; static inline unsigned int ksft_test_num(void) { @@ -175,6 +177,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) va_end(args); } +static inline void ksft_print_dbg_msg(const char *msg, ...) +{ + va_list args; + + if (!ksft_debug_enabled) + return; + + va_start(args, msg); + ksft_print_msg(msg, args); + va_end(args); +} + static inline void ksft_perror(const char *msg) { ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 8516e8434bc4..3f66e862e83e 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1091,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) { + while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) { switch (opt) { case 'f': case 'F': @@ -1104,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv) case 'l': test_harness_list_tests(); return KSFT_SKIP; + case 'd': + ksft_debug_enabled = true; + break; case 'h': default: fprintf(stderr, - "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n" + "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n" "\t-h print help\n" "\t-l list all tests\n" + "\t-d enable debug prints\n" "\n" "\t-t name include test\n" "\t-T name exclude test\n" @@ -1142,8 +1146,9 @@ static bool test_enabled(int argc, char **argv, int opt; optind = 1; - while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) { - has_positive |= islower(opt); + while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) { + if (opt != 'd') + has_positive |= islower(opt); switch (tolower(opt)) { case 't': diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index 6133524710f7..cf7cc0ce0248 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,6 +4,5 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_TIME_NS=y -CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_CGROUPS=y CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 663a9cef1952..dcac5cbe7933 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -40,9 +40,9 @@ * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. */ -__weak ptrdiff_t __rseq_offset; -__weak unsigned int __rseq_size; -__weak unsigned int __rseq_flags; +extern __weak ptrdiff_t __rseq_offset; +extern __weak unsigned int __rseq_size; +extern __weak unsigned int __rseq_flags; static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; static const unsigned int *libc_rseq_size_p = &__rseq_size; @@ -209,7 +209,7 @@ void rseq_init(void) * libc not having registered a restartable sequence. Try to find the * symbols if that's the case. */ - if (!*libc_rseq_size_p) { + if (!libc_rseq_size_p || !*libc_rseq_size_p) { libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 30d5c8f0e5c7..ba322a353aff 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test vdso_test_abi -vdso_test_clock_getres vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 918a2caa070e..e361aca22a74 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -4,7 +4,6 @@ include ../../../scripts/Makefile.arch TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu TEST_GEN_PROGS += vdso_test_abi -TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif @@ -29,7 +28,6 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c -$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers $(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h index bb237d771051..e7205584cbdc 100644 --- a/tools/testing/selftests/vDSO/vdso_call.h +++ b/tools/testing/selftests/vDSO/vdso_call.h @@ -44,7 +44,6 @@ register long _r6 asm ("r6"); \ register long _r7 asm ("r7"); \ register long _r8 asm ("r8"); \ - register long _rval asm ("r3"); \ \ LOADARGS_##nr(fn, args); \ \ @@ -54,13 +53,13 @@ " bns+ 1f\n" \ " neg 3, 3\n" \ "1:" \ - : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \ + : "+r" (_r0), "+r" (_r3), "+r" (_r4), "+r" (_r5), \ "+r" (_r6), "+r" (_r7), "+r" (_r8) \ - : "r" (_rval) \ + : \ : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \ "cr6", "cr7", "xer", "lr", "ctr", "memory" \ ); \ - _rval; \ + _r3; \ }) #else diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index a54424e2336f..238d609a457a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -26,24 +26,31 @@ static const char *version; static const char **name; +/* The same as struct __kernel_timespec */ +struct vdso_timespec64 { + uint64_t tv_sec; + uint64_t tv_nsec; +}; + typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz); typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); +typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", +static const char * const vdso_clock_name[] = { + [CLOCK_REALTIME] = "CLOCK_REALTIME", + [CLOCK_MONOTONIC] = "CLOCK_MONOTONIC", + [CLOCK_PROCESS_CPUTIME_ID] = "CLOCK_PROCESS_CPUTIME_ID", + [CLOCK_THREAD_CPUTIME_ID] = "CLOCK_THREAD_CPUTIME_ID", + [CLOCK_MONOTONIC_RAW] = "CLOCK_MONOTONIC_RAW", + [CLOCK_REALTIME_COARSE] = "CLOCK_REALTIME_COARSE", + [CLOCK_MONOTONIC_COARSE] = "CLOCK_MONOTONIC_COARSE", + [CLOCK_BOOTTIME] = "CLOCK_BOOTTIME", + [CLOCK_REALTIME_ALARM] = "CLOCK_REALTIME_ALARM", + [CLOCK_BOOTTIME_ALARM] = "CLOCK_BOOTTIME_ALARM", + [10 /* CLOCK_SGI_CYCLE */] = "CLOCK_SGI_CYCLE", + [CLOCK_TAI] = "CLOCK_TAI", }; static void vdso_test_gettimeofday(void) @@ -70,6 +77,33 @@ static void vdso_test_gettimeofday(void) } } +static void vdso_test_clock_gettime64(clockid_t clk_id) +{ + /* Find clock_gettime64. */ + vdso_clock_gettime64_t vdso_clock_gettime64 = + (vdso_clock_gettime64_t)vdso_sym(version, name[5]); + + if (!vdso_clock_gettime64) { + ksft_print_msg("Couldn't find %s\n", name[5]); + ksft_test_result_skip("%s %s\n", name[5], + vdso_clock_name[clk_id]); + return; + } + + struct vdso_timespec64 ts; + long ret = VDSO_CALL(vdso_clock_gettime64, 2, clk_id, &ts); + + if (ret == 0) { + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } else { + ksft_test_result_fail("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } +} + static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ @@ -171,23 +205,23 @@ static inline void vdso_test_clock(clockid_t clock_id) ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]); vdso_test_clock_gettime(clock_id); + vdso_test_clock_gettime64(clock_id); vdso_test_clock_getres(clock_id); } -#define VDSO_TEST_PLAN 16 +#define VDSO_TEST_PLAN 29 int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); ksft_print_header(); - ksft_set_plan(VDSO_TEST_PLAN); - if (!sysinfo_ehdr) { - ksft_print_msg("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present!\n"); + + ksft_set_plan(VDSO_TEST_PLAN); version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; @@ -198,40 +232,17 @@ int main(int argc, char **argv) vdso_test_gettimeofday(); -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif + vdso_test_clock(CLOCK_PROCESS_CPUTIME_ID); + vdso_test_clock(CLOCK_THREAD_CPUTIME_ID); vdso_test_time(); - ksft_print_cnts(); - return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; + ksft_finished(); } diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c deleted file mode 100644 index b5d5f59f725a..000000000000 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * vdso_clock_getres.c: Sample code to test clock_getres. - * Copyright (c) 2019 Arm Ltd. - * - * Compile with: - * gcc -std=gnu99 vdso_clock_getres.c - * - * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit), - * Power (32-bit and 64-bit), S390x (32-bit and 64-bit). - * Might work on other architectures. - */ - -#define _GNU_SOURCE -#include <elf.h> -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <sys/auxv.h> -#include <sys/mman.h> -#include <sys/time.h> -#include <unistd.h> -#include <sys/syscall.h> - -#include "../kselftest.h" - -static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts) -{ - long ret; - - ret = syscall(SYS_clock_getres, _clkid, _ts); - - return ret; -} - -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", -}; - -/* - * This function calls clock_getres in vdso and by system call - * with different values for clock_id. - * - * Example of output: - * - * clock_id: CLOCK_REALTIME [PASS] - * clock_id: CLOCK_BOOTTIME [PASS] - * clock_id: CLOCK_TAI [PASS] - * clock_id: CLOCK_REALTIME_COARSE [PASS] - * clock_id: CLOCK_MONOTONIC [PASS] - * clock_id: CLOCK_MONOTONIC_RAW [PASS] - * clock_id: CLOCK_MONOTONIC_COARSE [PASS] - */ -static inline int vdso_test_clock(unsigned int clock_id) -{ - struct timespec x, y; - - printf("clock_id: %s", vdso_clock_name[clock_id]); - clock_getres(clock_id, &x); - syscall_clock_getres(clock_id, &y); - - if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) { - printf(" [FAIL]\n"); - return KSFT_FAIL; - } - - printf(" [PASS]\n"); - return KSFT_PASS; -} - -int main(int argc, char **argv) -{ - int ret = 0; - -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif - if (ret > 0) - return KSFT_FAIL; - - return KSFT_PASS; -} |