summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig26
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/crypto/Kconfig21
-rw-r--r--arch/arm64/crypto/Makefile6
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c116
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c87
-rw-r--r--arch/arm64/crypto/aes-glue.c139
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c150
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c27
-rw-r--r--arch/arm64/crypto/nhpoly1305-neon-glue.c5
-rw-r--r--arch/arm64/crypto/polyval-ce-core.S361
-rw-r--r--arch/arm64/crypto/polyval-ce-glue.c158
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S212
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c15
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c16
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-glue.c49
-rw-r--r--arch/arm64/crypto/sm4-ce-cipher-glue.c10
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-glue.c62
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c214
-rw-r--r--arch/arm64/crypto/sm4-neon-glue.c25
-rw-r--r--arch/arm64/include/asm/alternative-macros.h8
-rw-r--r--arch/arm64/include/asm/alternative.h4
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h4
-rw-r--r--arch/arm64/include/asm/asm-extable.h6
-rw-r--r--arch/arm64/include/asm/assembler.h12
-rw-r--r--arch/arm64/include/asm/atomic_lse.h20
-rw-r--r--arch/arm64/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/cache.h4
-rw-r--r--arch/arm64/include/asm/cpucaps.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h8
-rw-r--r--arch/arm64/include/asm/cputype.h6
-rw-r--r--arch/arm64/include/asm/current.h4
-rw-r--r--arch/arm64/include/asm/debug-monitors.h4
-rw-r--r--arch/arm64/include/asm/efi.h13
-rw-r--r--arch/arm64/include/asm/el2_setup.h4
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/esr.h4
-rw-r--r--arch/arm64/include/asm/fixmap.h4
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/fpu.h16
-rw-r--r--arch/arm64/include/asm/ftrace.h6
-rw-r--r--arch/arm64/include/asm/gpr-num.h6
-rw-r--r--arch/arm64/include/asm/hwcap.h2
-rw-r--r--arch/arm64/include/asm/image.h4
-rw-r--r--arch/arm64/include/asm/insn.h4
-rw-r--r--arch/arm64/include/asm/jump_label.h4
-rw-r--r--arch/arm64/include/asm/kasan.h2
-rw-r--r--arch/arm64/include/asm/kexec.h4
-rw-r--r--arch/arm64/include/asm/kgdb.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h4
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h4
-rw-r--r--arch/arm64/include/asm/kvm_mte.h4
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h6
-rw-r--r--arch/arm64/include/asm/linkage.h2
-rw-r--r--arch/arm64/include/asm/memory.h5
-rw-r--r--arch/arm64/include/asm/mmu.h4
-rw-r--r--arch/arm64/include/asm/mmu_context.h20
-rw-r--r--arch/arm64/include/asm/mte-kasan.h4
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/neon.h4
-rw-r--r--arch/arm64/include/asm/page.h4
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h143
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h6
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/arm64/include/asm/proc-fns.h4
-rw-r--r--arch/arm64/include/asm/processor.h11
-rw-r--r--arch/arm64/include/asm/ptrace.h4
-rw-r--r--arch/arm64/include/asm/rsi_smc.h4
-rw-r--r--arch/arm64/include/asm/rwonce.h4
-rw-r--r--arch/arm64/include/asm/scs.h4
-rw-r--r--arch/arm64/include/asm/sdei.h4
-rw-r--r--arch/arm64/include/asm/simd.h12
-rw-r--r--arch/arm64/include/asm/smp.h4
-rw-r--r--arch/arm64/include/asm/spectre.h4
-rw-r--r--arch/arm64/include/asm/stacktrace/frame.h4
-rw-r--r--arch/arm64/include/asm/suspend.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h12
-rw-r--r--arch/arm64/include/asm/system_misc.h4
-rw-r--r--arch/arm64/include/asm/thread_info.h2
-rw-r--r--arch/arm64/include/asm/tlbflush.h85
-rw-r--r--arch/arm64/include/asm/vdso.h4
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h4
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h4
-rw-r--r--arch/arm64/include/asm/vdso/getrandom.h4
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h4
-rw-r--r--arch/arm64/include/asm/vdso/processor.h4
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h4
-rw-r--r--arch/arm64/include/asm/virt.h4
-rw-r--r--arch/arm64/include/asm/vmap_stack.h4
-rw-r--r--arch/arm64/include/asm/xor.h22
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h4
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h4
-rw-r--r--arch/arm64/kernel/acpi.c4
-rw-r--r--arch/arm64/kernel/cpufeature.c6
-rw-r--r--arch/arm64/kernel/efi.c46
-rw-r--r--arch/arm64/kernel/entry-common.c28
-rw-r--r--arch/arm64/kernel/entry-ftrace.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c83
-rw-r--r--arch/arm64/kernel/ftrace.c2
-rw-r--r--arch/arm64/kernel/irq.c2
-rw-r--r--arch/arm64/kernel/machine_kexec.c2
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c8
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/ptrace.c40
-rw-r--r--arch/arm64/kernel/sdei.c6
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/arm64/kernel/syscall.c2
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kernel/vmcore_info.c2
-rw-r--r--arch/arm64/kvm/arch_timer.c2
-rw-r--r--arch/arm64/kvm/arm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c2
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/kvm/nested.c2
-rw-r--r--arch/arm64/mm/contpte.c3
-rw-r--r--arch/arm64/mm/fault.c8
-rw-r--r--arch/arm64/mm/mmu.c220
-rw-r--r--arch/arm64/mm/pageattr.c12
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/arm64/mm/proc.S36
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk146
-rw-r--r--arch/arm64/tools/sysreg21
125 files changed, 1237 insertions, 1955 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6663ffd23f25..65db12f66b8f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -47,7 +47,6 @@ config ARM64
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select ARCH_STACKWALK
select ARCH_HAS_STRICT_KERNEL_RWX
@@ -2023,6 +2022,31 @@ config ARM64_TLB_RANGE
ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
range of input addresses.
+config ARM64_MPAM
+ bool "Enable support for MPAM"
+ select ARM64_MPAM_DRIVER if EXPERT # does nothing yet
+ select ACPI_MPAM if ACPI
+ help
+ Memory System Resource Partitioning and Monitoring (MPAM) is an
+ optional extension to the Arm architecture that allows each
+ transaction issued to the memory system to be labelled with a
+ Partition identifier (PARTID) and Performance Monitoring Group
+ identifier (PMG).
+
+ Memory system components, such as the caches, can be configured with
+ policies to control how much of various physical resources (such as
+ memory bandwidth or cache memory) the transactions labelled with each
+ PARTID can consume. Depending on the capabilities of the hardware,
+ the PARTID and PMG can also be used as filtering criteria to measure
+ the memory system resource consumption of different parts of a
+ workload.
+
+ Use of this extension requires CPU support, support in the
+ Memory System Components (MSC), and a description from firmware
+ of where the MSCs are in the address space.
+
+ MPAM is exposed to user-space via the resctrl pseudo filesystem.
+
endmenu # "ARMv8.4 architectural features"
menu "ARMv8.5 architectural features"
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1a48faad2473..997fa7cd9de5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1783,10 +1783,10 @@ CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_GHASH_ARM64_CE=y
-CONFIG_CRYPTO_SHA3_ARM64=m
CONFIG_CRYPTO_SM3_ARM64_CE=m
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_AES_ARM64_BS=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 91f3093eee6a..bdd276a6e540 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_SHA3_ARM64
- tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_SHA3
- help
- SHA-3 secure hash algorithms (FIPS 202)
-
- Architecture: arm64 using:
- - ARMv8.2 Crypto Extensions
-
config CRYPTO_SM3_NEON
tristate "Hash functions: SM3 (NEON)"
depends on KERNEL_MODE_NEON
@@ -58,16 +47,6 @@ config CRYPTO_SM3_ARM64_CE
Architecture: arm64 using:
- ARMv8.2 Crypto Extensions
-config CRYPTO_POLYVAL_ARM64_CE
- tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_POLYVAL
- help
- POLYVAL hash function for HCTR2
-
- Architecture: arm64 using:
- - ARMv8 Crypto Extensions
-
config CRYPTO_AES_ARM64
tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS"
select CRYPTO_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index a8b2cdbe202c..1e330aa08d3f 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -5,9 +5,6 @@
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
-obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
-sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
-
obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
@@ -32,9 +29,6 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
-obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
-polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 2d791d51891b..c4fd648471f1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -8,7 +8,6 @@
* Author: Ard Biesheuvel <ardb@kernel.org>
*/
-#include <asm/neon.h>
#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
@@ -16,6 +15,8 @@
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
@@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
in += adv;
abytes -= adv;
- if (unlikely(rem)) {
- kernel_neon_end();
- kernel_neon_begin();
+ if (unlikely(rem))
macp = 0;
- }
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
@@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
-
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
@@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
-
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index 00b8749013c5..a4dad370991d 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
@@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
}
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
- kernel_neon_begin();
- for (i = 0; i < sizeof(rcon); i++) {
- u32 *rki = ctx->key_enc + (i * kwords);
- u32 *rko = rki + kwords;
-
- rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
- rko[1] = rko[0] ^ rki[1];
- rko[2] = rko[1] ^ rki[2];
- rko[3] = rko[2] ^ rki[3];
-
- if (key_len == AES_KEYSIZE_192) {
- if (i >= 7)
- break;
- rko[4] = rko[3] ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- } else if (key_len == AES_KEYSIZE_256) {
- if (i >= 6)
- break;
- rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- rko[6] = rko[5] ^ rki[6];
- rko[7] = rko[6] ^ rki[7];
+ scoped_ksimd() {
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = ctx->key_enc + (i * kwords);
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
+ rcon[i] ^ rki[0];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
}
- }
- /*
- * Generate the decryption keys for the Equivalent Inverse Cipher.
- * This involves reversing the order of the round keys, and applying
- * the Inverse Mix Columns transformation on all but the first and
- * the last one.
- */
- key_enc = (struct aes_block *)ctx->key_enc;
- key_dec = (struct aes_block *)ctx->key_dec;
- j = num_rounds(ctx);
-
- key_dec[0] = key_enc[j];
- for (i = 1, j--; j > 0; i++, j--)
- __aes_ce_invert(key_dec + i, key_enc + j);
- key_dec[i] = key_enc[0];
+ /*
+ * Generate the decryption keys for the Equivalent Inverse
+ * Cipher. This involves reversing the order of the round
+ * keys, and applying the Inverse Mix Columns transformation on
+ * all but the first and the last one.
+ */
+ key_enc = (struct aes_block *)ctx->key_enc;
+ key_dec = (struct aes_block *)ctx->key_dec;
+ j = num_rounds(ctx);
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ __aes_ce_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+ }
- kernel_neon_end();
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5e207ff34482..b087b900d279 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -5,8 +5,6 @@
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/hash.h>
@@ -20,6 +18,9 @@
#include <linux/module.h>
#include <linux/string.h>
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
#ifdef USE_V8_CRYPTO_EXTENSIONS
@@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_enc, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_enc, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_dec, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_dec, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_encrypt_walk(req, &walk);
@@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_decrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_decrypt_walk(req, &walk);
@@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv, byte_ctr);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv, byte_ctr);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
return err;
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
/* encrypt the zero vector */
- kernel_neon_begin();
- aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
- rounds, 1);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){},
+ ctx->key.key_enc, rounds, 1);
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
@@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
if (err)
return err;
- kernel_neon_begin();
- aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
- aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
- kernel_neon_end();
+ scoped_ksimd() {
+ aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
+ }
return cbcmac_setkey(tfm, key, sizeof(key));
}
@@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
int rem;
do {
- kernel_neon_begin();
- rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
- dg, enc_before, !enc_before);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
+ dg, enc_before, !enc_before);
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
} while (blocks);
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index c4a623e86593..d496effb0a5b 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
ctx->rounds = 6 + key_len / 4;
- kernel_neon_begin();
- aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
return 0;
}
@@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
- ctx->rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
+ ctx->rounds, blocks);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
- kernel_neon_begin();
- aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
memzero_explicit(&rk, sizeof(rk));
return 0;
@@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
/* fall back to the non-bitsliced NEON implementation */
- kernel_neon_begin();
- neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->enc, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ neon_aes_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->enc, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req)
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- kernel_neon_begin();
- if (blocks >= 8) {
- aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
- blocks, walk.iv);
- dst += blocks * AES_BLOCK_SIZE;
- src += blocks * AES_BLOCK_SIZE;
- }
- if (nbytes && walk.nbytes == walk.total) {
- u8 buf[AES_BLOCK_SIZE];
- u8 *d = dst;
-
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(buf + sizeof(buf) - nbytes,
- src, nbytes);
-
- neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
- nbytes, walk.iv);
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ aesbs_ctr_encrypt(dst, src, ctx->key.rk,
+ ctx->key.rounds, blocks,
+ walk.iv);
+ dst += blocks * AES_BLOCK_SIZE;
+ src += blocks * AES_BLOCK_SIZE;
+ }
+ if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) -
+ nbytes, src, nbytes);
+
+ neon_aes_ctr_encrypt(dst, src, ctx->enc,
+ ctx->key.rounds, nbytes,
+ walk.iv);
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- memcpy(d, dst, nbytes);
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
- nbytes = 0;
+ nbytes = 0;
+ }
}
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
@@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (blocks >= 8) {
- if (first == 1)
- neon_aes_ecb_encrypt(walk.iv, walk.iv,
- ctx->twkey,
- ctx->key.rounds, 1);
- first = 2;
-
- fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
-
- out += blocks * AES_BLOCK_SIZE;
- in += blocks * AES_BLOCK_SIZE;
- nbytes -= blocks * AES_BLOCK_SIZE;
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ if (first == 1)
+ neon_aes_ecb_encrypt(walk.iv, walk.iv,
+ ctx->twkey,
+ ctx->key.rounds, 1);
+ first = 2;
+
+ fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
+
+ out += blocks * AES_BLOCK_SIZE;
+ in += blocks * AES_BLOCK_SIZE;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ }
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ nbytes = first = 0;
+ }
}
- if (walk.nbytes == walk.total && nbytes > 0) {
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- nbytes = first = 0;
- }
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
@@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ }
return skcipher_walk_done(&walk, 0);
}
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 4995b6e22335..7951557a285a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
@@ -22,6 +21,8 @@
#include <linux/string.h>
#include <linux/unaligned.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
u64 const h[][2],
const char *head))
{
- kernel_neon_begin();
- simd_update(blocks, dg, src, key->h, head);
- kernel_neon_end();
+ scoped_ksimd()
+ simd_update(blocks, dg, src, key->h, head);
}
/* avoid hogging the CPU for too long */
@@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc, nrounds,
- tag);
- kernel_neon_end();
+ scoped_ksimd()
+ pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
+ tag);
if (unlikely(!nbytes))
break;
@@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc,
- nrounds, tag, otag, authsize);
- kernel_neon_end();
+ scoped_ksimd()
+ ret = pmull_gcm_decrypt(nbytes, dst, src,
+ ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc,
+ nrounds, tag, otag, authsize);
if (unlikely(!nbytes))
break;
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index e4a0b463f080..013de6ac569a 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
do {
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
- kernel_neon_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
src += n;
srclen -= n;
} while (srclen);
diff --git a/arch/arm64/crypto/polyval-ce-core.S b/arch/arm64/crypto/polyval-ce-core.S
deleted file mode 100644
index b5326540d2e3..000000000000
--- a/arch/arm64/crypto/polyval-ce-core.S
+++ /dev/null
@@ -1,361 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Implementation of POLYVAL using ARMv8 Crypto Extensions.
- *
- * Copyright 2021 Google LLC
- */
-/*
- * This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
- * It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
- * ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
- * finite field multiplication into two steps.
- *
- * In the first step, we consider h^i, m_i as normal polynomials of degree less
- * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
- * is simply polynomial multiplication.
- *
- * In the second step, we compute the reduction of p(x) modulo the finite field
- * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
- *
- * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
- * multiplication is finite field multiplication. The advantage is that the
- * two-step process only requires 1 finite field reduction for every 8
- * polynomial multiplications. Further parallelism is gained by interleaving the
- * multiplications and polynomial reductions.
- */
-
-#include <linux/linkage.h>
-#define STRIDE_BLOCKS 8
-
-KEY_POWERS .req x0
-MSG .req x1
-BLOCKS_LEFT .req x2
-ACCUMULATOR .req x3
-KEY_START .req x10
-EXTRA_BYTES .req x11
-TMP .req x13
-
-M0 .req v0
-M1 .req v1
-M2 .req v2
-M3 .req v3
-M4 .req v4
-M5 .req v5
-M6 .req v6
-M7 .req v7
-KEY8 .req v8
-KEY7 .req v9
-KEY6 .req v10
-KEY5 .req v11
-KEY4 .req v12
-KEY3 .req v13
-KEY2 .req v14
-KEY1 .req v15
-PL .req v16
-PH .req v17
-TMP_V .req v18
-LO .req v20
-MI .req v21
-HI .req v22
-SUM .req v23
-GSTAR .req v24
-
- .text
-
- .arch armv8-a+crypto
- .align 4
-
-.Lgstar:
- .quad 0xc200000000000000, 0xc200000000000000
-
-/*
- * Computes the product of two 128-bit polynomials in X and Y and XORs the
- * components of the 256-bit product into LO, MI, HI.
- *
- * Given:
- * X = [X_1 : X_0]
- * Y = [Y_1 : Y_0]
- *
- * We compute:
- * LO += X_0 * Y_0
- * MI += (X_0 + X_1) * (Y_0 + Y_1)
- * HI += X_1 * Y_1
- *
- * Later, the 256-bit result can be extracted as:
- * [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
- * This step is done when computing the polynomial reduction for efficiency
- * reasons.
- *
- * Karatsuba multiplication is used instead of Schoolbook multiplication because
- * it was found to be slightly faster on ARM64 CPUs.
- *
- */
-.macro karatsuba1 X Y
- X .req \X
- Y .req \Y
- ext v25.16b, X.16b, X.16b, #8
- ext v26.16b, Y.16b, Y.16b, #8
- eor v25.16b, v25.16b, X.16b
- eor v26.16b, v26.16b, Y.16b
- pmull2 v28.1q, X.2d, Y.2d
- pmull v29.1q, X.1d, Y.1d
- pmull v27.1q, v25.1d, v26.1d
- eor HI.16b, HI.16b, v28.16b
- eor LO.16b, LO.16b, v29.16b
- eor MI.16b, MI.16b, v27.16b
- .unreq X
- .unreq Y
-.endm
-
-/*
- * Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
- * them.
- */
-.macro karatsuba1_store X Y
- X .req \X
- Y .req \Y
- ext v25.16b, X.16b, X.16b, #8
- ext v26.16b, Y.16b, Y.16b, #8
- eor v25.16b, v25.16b, X.16b
- eor v26.16b, v26.16b, Y.16b
- pmull2 HI.1q, X.2d, Y.2d
- pmull LO.1q, X.1d, Y.1d
- pmull MI.1q, v25.1d, v26.1d
- .unreq X
- .unreq Y
-.endm
-
-/*
- * Computes the 256-bit polynomial represented by LO, HI, MI. Stores
- * the result in PL, PH.
- * [PH : PL] =
- * [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
- */
-.macro karatsuba2
- // v4 = [HI_1 + MI_1 : HI_0 + MI_0]
- eor v4.16b, HI.16b, MI.16b
- // v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
- eor v4.16b, v4.16b, LO.16b
- // v5 = [HI_0 : LO_1]
- ext v5.16b, LO.16b, HI.16b, #8
- // v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
- eor v4.16b, v4.16b, v5.16b
- // HI = [HI_0 : HI_1]
- ext HI.16b, HI.16b, HI.16b, #8
- // LO = [LO_0 : LO_1]
- ext LO.16b, LO.16b, LO.16b, #8
- // PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
- ext PH.16b, v4.16b, HI.16b, #8
- // PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
- ext PL.16b, LO.16b, v4.16b, #8
-.endm
-
-/*
- * Computes the 128-bit reduction of PH : PL. Stores the result in dest.
- *
- * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
- * x^128 + x^127 + x^126 + x^121 + 1.
- *
- * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
- * product of two 128-bit polynomials in Montgomery form. We need to reduce it
- * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
- * of x^128, this product has two extra factors of x^128. To get it back into
- * Montgomery form, we need to remove one of these factors by dividing by x^128.
- *
- * To accomplish both of these goals, we add multiples of g(x) that cancel out
- * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
- * bits are zero, the polynomial division by x^128 can be done by right
- * shifting.
- *
- * Since the only nonzero term in the low 64 bits of g(x) is the constant term,
- * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
- * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
- * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
- * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
- * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
- *
- * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
- * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
- * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
- * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
- * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
- *
- * So our final computation is:
- * T = T_1 : T_0 = g*(x) * P_0
- * V = V_1 : V_0 = g*(x) * (P_1 + T_0)
- * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
- *
- * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
- * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
- * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
- */
-.macro montgomery_reduction dest
- DEST .req \dest
- // TMP_V = T_1 : T_0 = P_0 * g*(x)
- pmull TMP_V.1q, PL.1d, GSTAR.1d
- // TMP_V = T_0 : T_1
- ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
- // TMP_V = P_1 + T_0 : P_0 + T_1
- eor TMP_V.16b, PL.16b, TMP_V.16b
- // PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
- eor PH.16b, PH.16b, TMP_V.16b
- // TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
- pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
- eor DEST.16b, PH.16b, TMP_V.16b
- .unreq DEST
-.endm
-
-/*
- * Compute Polyval on 8 blocks.
- *
- * If reduce is set, also computes the montgomery reduction of the
- * previous full_stride call and XORs with the first message block.
- * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
- * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
- *
- * Sets PL, PH.
- */
-.macro full_stride reduce
- eor LO.16b, LO.16b, LO.16b
- eor MI.16b, MI.16b, MI.16b
- eor HI.16b, HI.16b, HI.16b
-
- ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
- ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
-
- karatsuba1 M7 KEY1
- .if \reduce
- pmull TMP_V.1q, PL.1d, GSTAR.1d
- .endif
-
- karatsuba1 M6 KEY2
- .if \reduce
- ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
- .endif
-
- karatsuba1 M5 KEY3
- .if \reduce
- eor TMP_V.16b, PL.16b, TMP_V.16b
- .endif
-
- karatsuba1 M4 KEY4
- .if \reduce
- eor PH.16b, PH.16b, TMP_V.16b
- .endif
-
- karatsuba1 M3 KEY5
- .if \reduce
- pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
- .endif
-
- karatsuba1 M2 KEY6
- .if \reduce
- eor SUM.16b, PH.16b, TMP_V.16b
- .endif
-
- karatsuba1 M1 KEY7
- eor M0.16b, M0.16b, SUM.16b
-
- karatsuba1 M0 KEY8
- karatsuba2
-.endm
-
-/*
- * Handle any extra blocks after full_stride loop.
- */
-.macro partial_stride
- add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
- sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
- ld1 {KEY1.16b}, [KEY_POWERS], #16
-
- ld1 {TMP_V.16b}, [MSG], #16
- eor SUM.16b, SUM.16b, TMP_V.16b
- karatsuba1_store KEY1 SUM
- sub BLOCKS_LEFT, BLOCKS_LEFT, #1
-
- tst BLOCKS_LEFT, #4
- beq .Lpartial4BlocksDone
- ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
- ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
- karatsuba1 M0 KEY8
- karatsuba1 M1 KEY7
- karatsuba1 M2 KEY6
- karatsuba1 M3 KEY5
-.Lpartial4BlocksDone:
- tst BLOCKS_LEFT, #2
- beq .Lpartial2BlocksDone
- ld1 {M0.16b, M1.16b}, [MSG], #32
- ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
- karatsuba1 M0 KEY8
- karatsuba1 M1 KEY7
-.Lpartial2BlocksDone:
- tst BLOCKS_LEFT, #1
- beq .LpartialDone
- ld1 {M0.16b}, [MSG], #16
- ld1 {KEY8.16b}, [KEY_POWERS], #16
- karatsuba1 M0 KEY8
-.LpartialDone:
- karatsuba2
- montgomery_reduction SUM
-.endm
-
-/*
- * Perform montgomery multiplication in GF(2^128) and store result in op1.
- *
- * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
- * If op1, op2 are in montgomery form, this computes the montgomery
- * form of op1*op2.
- *
- * void pmull_polyval_mul(u8 *op1, const u8 *op2);
- */
-SYM_FUNC_START(pmull_polyval_mul)
- adr TMP, .Lgstar
- ld1 {GSTAR.2d}, [TMP]
- ld1 {v0.16b}, [x0]
- ld1 {v1.16b}, [x1]
- karatsuba1_store v0 v1
- karatsuba2
- montgomery_reduction SUM
- st1 {SUM.16b}, [x0]
- ret
-SYM_FUNC_END(pmull_polyval_mul)
-
-/*
- * Perform polynomial evaluation as specified by POLYVAL. This computes:
- * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
- * where n=nblocks, h is the hash key, and m_i are the message blocks.
- *
- * x0 - pointer to precomputed key powers h^8 ... h^1
- * x1 - pointer to message blocks
- * x2 - number of blocks to hash
- * x3 - pointer to accumulator
- *
- * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
- * size_t nblocks, u8 *accumulator);
- */
-SYM_FUNC_START(pmull_polyval_update)
- adr TMP, .Lgstar
- mov KEY_START, KEY_POWERS
- ld1 {GSTAR.2d}, [TMP]
- ld1 {SUM.16b}, [ACCUMULATOR]
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- blt .LstrideLoopExit
- ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
- ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
- full_stride 0
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- blt .LstrideLoopExitReduce
-.LstrideLoop:
- full_stride 1
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- bge .LstrideLoop
-.LstrideLoopExitReduce:
- montgomery_reduction SUM
-.LstrideLoopExit:
- adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- beq .LskipPartial
- partial_stride
-.LskipPartial:
- st1 {SUM.16b}, [ACCUMULATOR]
- ret
-SYM_FUNC_END(pmull_polyval_update)
diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c
deleted file mode 100644
index c4e653688ea0..000000000000
--- a/arch/arm64/crypto/polyval-ce-glue.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Glue code for POLYVAL using ARMv8 Crypto Extensions
- *
- * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
- * Copyright (c) 2009 Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- * Copyright 2021 Google LLC
- */
-
-/*
- * Glue code based on ghash-clmulni-intel_glue.c.
- *
- * This implementation of POLYVAL uses montgomery multiplication accelerated by
- * ARMv8 Crypto Extensions instructions to implement the finite field operations.
- */
-
-#include <asm/neon.h>
-#include <crypto/internal/hash.h>
-#include <crypto/polyval.h>
-#include <crypto/utils.h>
-#include <linux/cpufeature.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#define NUM_KEY_POWERS 8
-
-struct polyval_tfm_ctx {
- /*
- * These powers must be in the order h^8, ..., h^1.
- */
- u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
-};
-
-struct polyval_desc_ctx {
- u8 buffer[POLYVAL_BLOCK_SIZE];
-};
-
-asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator);
-asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
-
-static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator)
-{
- kernel_neon_begin();
- pmull_polyval_update(keys, in, nblocks, accumulator);
- kernel_neon_end();
-}
-
-static void internal_polyval_mul(u8 *op1, const u8 *op2)
-{
- kernel_neon_begin();
- pmull_polyval_mul(op1, op2);
- kernel_neon_end();
-}
-
-static int polyval_arm64_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
- int i;
-
- if (keylen != POLYVAL_BLOCK_SIZE)
- return -EINVAL;
-
- memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
-
- for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
- memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
- internal_polyval_mul(tctx->key_powers[i],
- tctx->key_powers[i+1]);
- }
-
- return 0;
-}
-
-static int polyval_arm64_init(struct shash_desc *desc)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
-
- memset(dctx, 0, sizeof(*dctx));
-
- return 0;
-}
-
-static int polyval_arm64_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- unsigned int nblocks;
-
- do {
- /* allow rescheduling every 4K bytes */
- nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
- internal_polyval_update(tctx, src, nblocks, dctx->buffer);
- srclen -= nblocks * POLYVAL_BLOCK_SIZE;
- src += nblocks * POLYVAL_BLOCK_SIZE;
- } while (srclen >= POLYVAL_BLOCK_SIZE);
-
- return srclen;
-}
-
-static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src,
- unsigned int len, u8 *dst)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
-
- if (len) {
- crypto_xor(dctx->buffer, src, len);
- internal_polyval_mul(dctx->buffer,
- tctx->key_powers[NUM_KEY_POWERS-1]);
- }
-
- memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
-
- return 0;
-}
-
-static struct shash_alg polyval_alg = {
- .digestsize = POLYVAL_DIGEST_SIZE,
- .init = polyval_arm64_init,
- .update = polyval_arm64_update,
- .finup = polyval_arm64_finup,
- .setkey = polyval_arm64_setkey,
- .descsize = sizeof(struct polyval_desc_ctx),
- .base = {
- .cra_name = "polyval",
- .cra_driver_name = "polyval-ce",
- .cra_priority = 200,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = POLYVAL_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct polyval_tfm_ctx),
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init polyval_ce_mod_init(void)
-{
- return crypto_register_shash(&polyval_alg);
-}
-
-static void __exit polyval_ce_mod_exit(void)
-{
- crypto_unregister_shash(&polyval_alg);
-}
-
-module_cpu_feature_match(PMULL, polyval_ce_mod_init)
-module_exit(polyval_ce_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
-MODULE_ALIAS_CRYPTO("polyval");
-MODULE_ALIAS_CRYPTO("polyval-ce");
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
deleted file mode 100644
index 9c77313f5a60..000000000000
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
- .set .Lv\b\().2d, \b
- .set .Lv\b\().16b, \b
- .endr
-
- /*
- * ARMv8.2 Crypto Extensions instructions
- */
- .macro eor3, rd, rn, rm, ra
- .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro rax1, rd, rn, rm
- .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
- .endm
-
- .macro bcax, rd, rn, rm, ra
- .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro xar, rd, rn, rm, imm6
- .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
- .endm
-
- /*
- * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
- */
- .text
-SYM_FUNC_START(sha3_ce_transform)
- /* load state */
- add x8, x0, #32
- ld1 { v0.1d- v3.1d}, [x0]
- ld1 { v4.1d- v7.1d}, [x8], #32
- ld1 { v8.1d-v11.1d}, [x8], #32
- ld1 {v12.1d-v15.1d}, [x8], #32
- ld1 {v16.1d-v19.1d}, [x8], #32
- ld1 {v20.1d-v23.1d}, [x8], #32
- ld1 {v24.1d}, [x8]
-
-0: sub w2, w2, #1
- mov w8, #24
- adr_l x9, .Lsha3_rcon
-
- /* load input */
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v31.8b}, [x1], #24
- eor v0.8b, v0.8b, v25.8b
- eor v1.8b, v1.8b, v26.8b
- eor v2.8b, v2.8b, v27.8b
- eor v3.8b, v3.8b, v28.8b
- eor v4.8b, v4.8b, v29.8b
- eor v5.8b, v5.8b, v30.8b
- eor v6.8b, v6.8b, v31.8b
-
- tbnz x3, #6, 2f // SHA3-512
-
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v30.8b}, [x1], #16
- eor v7.8b, v7.8b, v25.8b
- eor v8.8b, v8.8b, v26.8b
- eor v9.8b, v9.8b, v27.8b
- eor v10.8b, v10.8b, v28.8b
- eor v11.8b, v11.8b, v29.8b
- eor v12.8b, v12.8b, v30.8b
-
- tbnz x3, #4, 1f // SHA3-384 or SHA3-224
-
- // SHA3-256
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v13.8b, v13.8b, v25.8b
- eor v14.8b, v14.8b, v26.8b
- eor v15.8b, v15.8b, v27.8b
- eor v16.8b, v16.8b, v28.8b
- b 3f
-
-1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
-
- // SHA3-224
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b}, [x1], #8
- eor v13.8b, v13.8b, v25.8b
- eor v14.8b, v14.8b, v26.8b
- eor v15.8b, v15.8b, v27.8b
- eor v16.8b, v16.8b, v28.8b
- eor v17.8b, v17.8b, v29.8b
- b 3f
-
- // SHA3-512
-2: ld1 {v25.8b-v26.8b}, [x1], #16
- eor v7.8b, v7.8b, v25.8b
- eor v8.8b, v8.8b, v26.8b
-
-3: sub w8, w8, #1
-
- eor3 v29.16b, v4.16b, v9.16b, v14.16b
- eor3 v26.16b, v1.16b, v6.16b, v11.16b
- eor3 v28.16b, v3.16b, v8.16b, v13.16b
- eor3 v25.16b, v0.16b, v5.16b, v10.16b
- eor3 v27.16b, v2.16b, v7.16b, v12.16b
- eor3 v29.16b, v29.16b, v19.16b, v24.16b
- eor3 v26.16b, v26.16b, v16.16b, v21.16b
- eor3 v28.16b, v28.16b, v18.16b, v23.16b
- eor3 v25.16b, v25.16b, v15.16b, v20.16b
- eor3 v27.16b, v27.16b, v17.16b, v22.16b
-
- rax1 v30.2d, v29.2d, v26.2d // bc[0]
- rax1 v26.2d, v26.2d, v28.2d // bc[2]
- rax1 v28.2d, v28.2d, v25.2d // bc[4]
- rax1 v25.2d, v25.2d, v27.2d // bc[1]
- rax1 v27.2d, v27.2d, v29.2d // bc[3]
-
- eor v0.16b, v0.16b, v30.16b
- xar v29.2d, v1.2d, v25.2d, (64 - 1)
- xar v1.2d, v6.2d, v25.2d, (64 - 44)
- xar v6.2d, v9.2d, v28.2d, (64 - 20)
- xar v9.2d, v22.2d, v26.2d, (64 - 61)
- xar v22.2d, v14.2d, v28.2d, (64 - 39)
- xar v14.2d, v20.2d, v30.2d, (64 - 18)
- xar v31.2d, v2.2d, v26.2d, (64 - 62)
- xar v2.2d, v12.2d, v26.2d, (64 - 43)
- xar v12.2d, v13.2d, v27.2d, (64 - 25)
- xar v13.2d, v19.2d, v28.2d, (64 - 8)
- xar v19.2d, v23.2d, v27.2d, (64 - 56)
- xar v23.2d, v15.2d, v30.2d, (64 - 41)
- xar v15.2d, v4.2d, v28.2d, (64 - 27)
- xar v28.2d, v24.2d, v28.2d, (64 - 14)
- xar v24.2d, v21.2d, v25.2d, (64 - 2)
- xar v8.2d, v8.2d, v27.2d, (64 - 55)
- xar v4.2d, v16.2d, v25.2d, (64 - 45)
- xar v16.2d, v5.2d, v30.2d, (64 - 36)
- xar v5.2d, v3.2d, v27.2d, (64 - 28)
- xar v27.2d, v18.2d, v27.2d, (64 - 21)
- xar v3.2d, v17.2d, v26.2d, (64 - 15)
- xar v25.2d, v11.2d, v25.2d, (64 - 10)
- xar v26.2d, v7.2d, v26.2d, (64 - 6)
- xar v30.2d, v10.2d, v30.2d, (64 - 3)
-
- bcax v20.16b, v31.16b, v22.16b, v8.16b
- bcax v21.16b, v8.16b, v23.16b, v22.16b
- bcax v22.16b, v22.16b, v24.16b, v23.16b
- bcax v23.16b, v23.16b, v31.16b, v24.16b
- bcax v24.16b, v24.16b, v8.16b, v31.16b
-
- ld1r {v31.2d}, [x9], #8
-
- bcax v17.16b, v25.16b, v19.16b, v3.16b
- bcax v18.16b, v3.16b, v15.16b, v19.16b
- bcax v19.16b, v19.16b, v16.16b, v15.16b
- bcax v15.16b, v15.16b, v25.16b, v16.16b
- bcax v16.16b, v16.16b, v3.16b, v25.16b
-
- bcax v10.16b, v29.16b, v12.16b, v26.16b
- bcax v11.16b, v26.16b, v13.16b, v12.16b
- bcax v12.16b, v12.16b, v14.16b, v13.16b
- bcax v13.16b, v13.16b, v29.16b, v14.16b
- bcax v14.16b, v14.16b, v26.16b, v29.16b
-
- bcax v7.16b, v30.16b, v9.16b, v4.16b
- bcax v8.16b, v4.16b, v5.16b, v9.16b
- bcax v9.16b, v9.16b, v6.16b, v5.16b
- bcax v5.16b, v5.16b, v30.16b, v6.16b
- bcax v6.16b, v6.16b, v4.16b, v30.16b
-
- bcax v3.16b, v27.16b, v0.16b, v28.16b
- bcax v4.16b, v28.16b, v1.16b, v0.16b
- bcax v0.16b, v0.16b, v2.16b, v1.16b
- bcax v1.16b, v1.16b, v27.16b, v2.16b
- bcax v2.16b, v2.16b, v28.16b, v27.16b
-
- eor v0.16b, v0.16b, v31.16b
-
- cbnz w8, 3b
- cond_yield 4f, x8, x9
- cbnz w2, 0b
-
- /* save state */
-4: st1 { v0.1d- v3.1d}, [x0], #32
- st1 { v4.1d- v7.1d}, [x0], #32
- st1 { v8.1d-v11.1d}, [x0], #32
- st1 {v12.1d-v15.1d}, [x0], #32
- st1 {v16.1d-v19.1d}, [x0], #32
- st1 {v20.1d-v23.1d}, [x0], #32
- st1 {v24.1d}, [x0]
- mov w0, w2
- ret
-SYM_FUNC_END(sha3_ce_transform)
-
- .section ".rodata", "a"
- .align 8
-.Lsha3_rcon:
- .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
- .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
- .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
- .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
- .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
- .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
- .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
- .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
deleted file mode 100644
index b4f1001046c9..000000000000
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha3.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha3-224");
-MODULE_ALIAS_CRYPTO("sha3-256");
-MODULE_ALIAS_CRYPTO("sha3-384");
-MODULE_ALIAS_CRYPTO("sha3-512");
-
-asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks,
- int md_len);
-
-static int sha3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- unsigned int bs, ds;
- int blocks;
-
- ds = crypto_shash_digestsize(tfm);
- bs = crypto_shash_blocksize(tfm);
- blocks = len / bs;
- len -= blocks * bs;
- do {
- int rem;
-
- kernel_neon_begin();
- rem = sha3_ce_transform(sctx->st, data, blocks, ds);
- kernel_neon_end();
- data += (blocks - rem) * bs;
- blocks = rem;
- } while (blocks);
- return len;
-}
-
-static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
- u8 *out)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- __le64 *digest = (__le64 *)out;
- u8 block[SHA3_224_BLOCK_SIZE];
- unsigned int bs, ds;
- int i;
-
- ds = crypto_shash_digestsize(tfm);
- bs = crypto_shash_blocksize(tfm);
- memcpy(block, src, len);
-
- block[len++] = 0x06;
- memset(block + len, 0, bs - len);
- block[bs - 1] |= 0x80;
-
- kernel_neon_begin();
- sha3_ce_transform(sctx->st, block, 1, ds);
- kernel_neon_end();
- memzero_explicit(block , sizeof(block));
-
- for (i = 0; i < ds / 8; i++)
- put_unaligned_le64(sctx->st[i], digest++);
-
- if (ds & 4)
- put_unaligned_le32(sctx->st[i], (__le32 *)digest);
-
- return 0;
-}
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA3_224_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-224",
- .base.cra_driver_name = "sha3-224-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_224_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_256_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-256",
- .base.cra_driver_name = "sha3-256-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_256_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_384_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-384",
- .base.cra_driver_name = "sha3-384-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_384_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_512_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-512",
- .base.cra_driver_name = "sha3-512-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_512_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-} };
-
-static int __init sha3_neon_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha3_neon_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_cpu_feature_match(SHA3, sha3_neon_mod_init);
-module_exit(sha3_neon_mod_fini);
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index eac6f5fa0abe..24c1fcfae072 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -13,6 +12,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
{
int remain;
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
+ }
return remain;
}
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm3_base_do_finup(desc, data, len, sm3_ce_transform);
+ }
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
index 6c4611a503a3..15f30cc24f32 100644
--- a/arch/arm64/crypto/sm3-neon-glue.c
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -5,7 +5,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- int remain;
-
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
- return remain;
+ scoped_ksimd()
+ return sm3_base_do_update_blocks(desc, data, len,
+ sm3_neon_transform);
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
+ scoped_ksimd()
+ sm3_base_do_finup(desc, data, len, sm3_neon_transform);
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
index e9cc1c1364ec..332f02167a96 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
@@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
crypto_inc(walk->iv, SM4_BLOCK_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
-
- while (walk->nbytes && walk->nbytes != walk->total) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
-
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes - tail, mac);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
- }
-
- if (walk->nbytes) {
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes, mac);
+ while (walk->nbytes) {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+ if (walk->nbytes == walk->total)
+ tail = 0;
- kernel_neon_end();
+ sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
+ walk->src.virt.addr, walk->iv,
+ walk->nbytes - tail, mac);
- err = skcipher_walk_done(walk, 0);
- } else {
+ err = skcipher_walk_done(walk, tail);
+ }
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
-
- kernel_neon_end();
}
return err;
diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c
index c31d76fb5a17..bceec833ef4e 100644
--- a/arch/arm64/crypto/sm4-ce-cipher-glue.c
+++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c
@@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_enc, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_enc, out, in);
}
}
@@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_dec, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_dec, out, in);
}
}
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
index c2ea3d5f690b..ef06f4f768a1 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
@@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
+ }
return 0;
}
@@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, ghash);
- if (req->assoclen)
- gcm_calculate_auth_mac(req, ghash);
+ do {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+ const u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ const u8 *l = NULL;
- while (walk->nbytes) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- const u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
+ if (walk->nbytes == walk->total) {
+ l = (const u8 *)&lengths;
+ tail = 0;
+ }
- if (walk->nbytes == walk->total) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes, ghash,
- ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
- return skcipher_walk_done(walk, 0);
- }
+ walk->nbytes - tail, ghash,
+ ctx->ghash_table, l);
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes - tail, ghash,
- ctx->ghash_table, NULL);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
+ err = skcipher_walk_done(walk, tail);
+ } while (walk->nbytes);
}
-
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
- walk->nbytes, ghash, ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
return err;
}
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 7a60e7b559dc..5569cece5a0b 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -8,7 +8,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
@@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (ret)
return ret;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key1.rkey_enc,
- ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
- ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+ ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
+ ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_crypt(rkey, dst, src, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_crypt(rkey, dst, src, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req,
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
- else
- sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
+ else
+ sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
+ }
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
- else
- sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ else
+ sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
-
- /* tail */
- if (walk.nbytes == walk.total && nbytes > 0) {
- u8 keystream[SM4_BLOCK_SIZE];
-
- sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
- crypto_inc(walk.iv, SM4_BLOCK_SIZE);
- crypto_xor_cpy(dst, src, keystream, nbytes);
- nbytes = 0;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (nbytes < walk.total)
nbytes &= ~(SM4_BLOCK_SIZE - 1);
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ }
rkey2_enc = NULL;
@@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
-
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
memset(consts, 0, SM4_BLOCK_SIZE);
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- /* encrypt the zero block */
- sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
-
- kernel_neon_end();
+ /* encrypt the zero block */
+ sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+ }
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
a = be64_to_cpu(consts[0].a);
@@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
- sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+ sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+ sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
- sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
-
- kernel_neon_end();
+ sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
unsigned int nblocks = len / SM4_BLOCK_SIZE;
len %= SM4_BLOCK_SIZE;
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
- nblocks, false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+ nblocks, false, true);
return len;
}
@@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src,
ctx->digest[len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
- false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+ false, true);
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
@@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src,
if (len) {
crypto_xor(ctx->digest, src, len);
- kernel_neon_begin();
- sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
- ctx->digest);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+ ctx->digest);
}
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
index e3500aca2d18..e944c2a2efb0 100644
--- a/arch/arm64/crypto/sm4-neon-glue.c
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_crypt(rkey, dst, src, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_crypt(rkey, dst, src, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index c8c77f9e36d6..862416624852 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -19,7 +19,7 @@
#error "cpucaps have overflown ARM64_CB_BIT"
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
@@ -207,7 +207,7 @@ alternative_endif
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
@@ -219,7 +219,7 @@ alternative_endif
#define ALTERNATIVE(oldinstr, newinstr, ...) \
_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -263,6 +263,6 @@ l_yes:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 51746005239b..621aa8550174 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -4,7 +4,7 @@
#include <asm/alternative-macros.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/init.h>
#include <linux/types.h>
@@ -37,5 +37,5 @@ static inline int apply_alternatives_module(void *start, size_t length)
void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
__le32 *updptr, int nr_inst);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 9e96f024b2f1..d20b03931a8d 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -9,7 +9,7 @@
#include <asm/sysreg.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/irqchip/arm-gic-common.h>
#include <linux/stringify.h>
@@ -188,5 +188,5 @@ static inline bool gic_has_relaxed_pmr_sync(void)
return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index 292f2687a12e..d67e2fdd1aee 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -27,7 +27,7 @@
/* Data fields for EX_TYPE_UACCESS_CPY */
#define EX_DATA_UACCESS_WRITE BIT(0)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
.pushsection __ex_table, "a"; \
@@ -77,7 +77,7 @@
__ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write)
.endm
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#include <linux/stringify.h>
@@ -132,6 +132,6 @@
EX_DATA_REG(ADDR, addr) \
")")
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 23be85d93348..f0ca7196f6fa 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -5,7 +5,7 @@
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#error "Only include this from assembly code"
#endif
@@ -325,14 +325,14 @@ alternative_cb_end
* tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
*/
.macro tcr_set_t0sz, valreg, t0sz
- bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+ bfi \valreg, \t0sz, #TCR_EL1_T0SZ_SHIFT, #TCR_EL1_T0SZ_WIDTH
.endm
/*
* tcr_set_t1sz - update TCR.T1SZ
*/
.macro tcr_set_t1sz, valreg, t1sz
- bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
+ bfi \valreg, \t1sz, #TCR_EL1_T1SZ_SHIFT, #TCR_EL1_T1SZ_WIDTH
.endm
/*
@@ -371,7 +371,7 @@ alternative_endif
* [start, end) with dcache line size explicitly provided.
*
* op: operation passed to dc instruction
- * domain: domain used in dsb instruciton
+ * domain: domain used in dsb instruction
* start: starting virtual address of the region
* end: end virtual address of the region
* linesz: dcache line size
@@ -412,7 +412,7 @@ alternative_endif
* [start, end)
*
* op: operation passed to dc instruction
- * domain: domain used in dsb instruciton
+ * domain: domain used in dsb instruction
* start: starting virtual address of the region
* end: end virtual address of the region
* fixup: optional label to branch to on user fault
@@ -589,7 +589,7 @@ alternative_endif
.macro offset_ttbr1, ttbr, tmp
#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
mrs \tmp, tcr_el1
- and \tmp, \tmp, #TCR_T1SZ_MASK
+ and \tmp, \tmp, #TCR_EL1_T1SZ_MASK
cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
csel \ttbr, \tmp, \ttbr, eq
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 87f568a94e55..afad1849c4cf 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -103,17 +103,17 @@ static __always_inline void __lse_atomic_and(int i, atomic_t *v)
return __lse_atomic_andnot(~i, v);
}
-#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
+#define ATOMIC_FETCH_OP_AND(name) \
static __always_inline int \
__lse_atomic_fetch_and##name(int i, atomic_t *v) \
{ \
return __lse_atomic_fetch_andnot##name(~i, v); \
}
-ATOMIC_FETCH_OP_AND(_relaxed, )
-ATOMIC_FETCH_OP_AND(_acquire, a, "memory")
-ATOMIC_FETCH_OP_AND(_release, l, "memory")
-ATOMIC_FETCH_OP_AND( , al, "memory")
+ATOMIC_FETCH_OP_AND(_relaxed)
+ATOMIC_FETCH_OP_AND(_acquire)
+ATOMIC_FETCH_OP_AND(_release)
+ATOMIC_FETCH_OP_AND( )
#undef ATOMIC_FETCH_OP_AND
@@ -210,17 +210,17 @@ static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v)
return __lse_atomic64_andnot(~i, v);
}
-#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
+#define ATOMIC64_FETCH_OP_AND(name) \
static __always_inline long \
__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
return __lse_atomic64_fetch_andnot##name(~i, v); \
}
-ATOMIC64_FETCH_OP_AND(_relaxed, )
-ATOMIC64_FETCH_OP_AND(_acquire, a, "memory")
-ATOMIC64_FETCH_OP_AND(_release, l, "memory")
-ATOMIC64_FETCH_OP_AND( , al, "memory")
+ATOMIC64_FETCH_OP_AND(_relaxed)
+ATOMIC64_FETCH_OP_AND(_acquire)
+ATOMIC64_FETCH_OP_AND(_release)
+ATOMIC64_FETCH_OP_AND( )
#undef ATOMIC64_FETCH_OP_AND
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index f5801b0ba9e9..9495c4441a46 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -7,7 +7,7 @@
#ifndef __ASM_BARRIER_H
#define __ASM_BARRIER_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kasan-checks.h>
@@ -221,6 +221,6 @@ do { \
#include <asm-generic/barrier.h>
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_BARRIER_H */
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 09963004ceea..dd2c8586a725 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -35,7 +35,7 @@
#define ARCH_DMA_MINALIGN (128)
#define ARCH_KMALLOC_MINALIGN (8)
-#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO)
+#if !defined(__ASSEMBLER__) && !defined(BUILD_VDSO)
#include <linux/bitops.h>
#include <linux/kasan-enabled.h>
@@ -135,6 +135,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
return ctr;
}
-#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */
+#endif /* !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) */
#endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 9d769291a306..2c8029472ad4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -5,7 +5,7 @@
#include <asm/cpucap-defs.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
/*
* Check whether a cpucap is possible at compiletime.
@@ -77,6 +77,6 @@ cpucap_is_possible(const unsigned int cap)
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e223cbf350e4..4de51f8d92cb 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -19,7 +19,7 @@
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
#include <linux/jump_label.h>
@@ -199,7 +199,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
* registers (e.g, SCTLR, TCR etc.) or patching the kernel via
* alternatives. The kernel patching is batched and performed at later
* point. The actions are always initiated only after the capability
- * is finalised. This is usally denoted by "enabling" the capability.
+ * is finalised. This is usually denoted by "enabling" the capability.
* The actions are initiated as follows :
* a) Action is triggered on all online CPUs, after the capability is
* finalised, invoked within the stop_machine() context from
@@ -251,7 +251,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
#define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0))
#define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1))
/*
- * The capabilitiy is detected on the Boot CPU and is used by kernel
+ * The capability is detected on the Boot CPU and is used by kernel
* during early boot. i.e, the capability should be "detected" and
* "enabled" as early as possibly on all booting CPUs.
*/
@@ -1078,6 +1078,6 @@ static inline bool cpu_has_lpa2(void)
#endif
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 9b00b75acbf2..08860d482e60 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -247,9 +247,9 @@
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0))
-#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0)
+#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_EL1_NFD1 | TCR_EL1_NFD0)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/sysreg.h>
@@ -328,6 +328,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
return read_cpuid(CTR_EL0);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
index 54ceae0874c7..c92912eaf186 100644
--- a/arch/arm64/include/asm/current.h
+++ b/arch/arm64/include/asm/current.h
@@ -4,7 +4,7 @@
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
@@ -23,7 +23,7 @@ static __always_inline struct task_struct *get_current(void)
#define current get_current()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_CURRENT_H */
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index f5e3ed2420ce..8d5f92418838 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -48,7 +48,7 @@
#define AARCH32_BREAK_THUMB2_LO 0xf7f0
#define AARCH32_BREAK_THUMB2_HI 0xa000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
#define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */
@@ -88,5 +88,5 @@ static inline bool try_step_suspended_breakpoints(struct pt_regs *regs)
bool try_handle_aarch32_break(struct pt_regs *regs);
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index bcd5622aa096..aa91165ca140 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -126,21 +126,14 @@ static inline void efi_set_pgd(struct mm_struct *mm)
if (mm != current->active_mm) {
/*
* Update the current thread's saved ttbr0 since it is
- * restored as part of a return from exception. Enable
- * access to the valid TTBR0_EL1 and invoke the errata
- * workaround directly since there is no return from
- * exception when invoking the EFI run-time services.
+ * restored as part of a return from exception.
*/
update_saved_ttbr0(current, mm);
- uaccess_ttbr0_enable();
- post_ttbr_update_workaround();
} else {
/*
- * Defer the switch to the current thread's TTBR0_EL1
- * until uaccess_enable(). Restore the current
- * thread's saved ttbr0 corresponding to its active_mm
+ * Restore the current thread's saved ttbr0
+ * corresponding to its active_mm
*/
- uaccess_ttbr0_disable();
update_saved_ttbr0(current, current->active_mm);
}
}
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 99a7c0235e6d..cacd20df1786 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -7,7 +7,7 @@
#ifndef __ARM_KVM_INIT_H__
#define __ARM_KVM_INIT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#error Assembly-only header
#endif
@@ -24,7 +24,7 @@
* ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
* can reset into an UNKNOWN state and might not read as 1 until it has
* been initialized explicitly.
- * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
+ * Initialize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
* indicating whether the CPU is running in E2H mode.
*/
mrs_s x1, SYS_ID_AA64MMFR4_EL1
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 3f93f4eef953..d2779d604c7b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -133,7 +133,7 @@
#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3)
#endif /* CONFIG_ARM64_FORCE_52BIT */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <uapi/linux/elf.h>
#include <linux/bug.h>
@@ -293,6 +293,6 @@ static inline int arch_check_elf(void *ehdr, bool has_interp,
return 0;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index e1deed824464..4975a92cbd17 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -431,7 +431,7 @@
#define ESR_ELx_IT_GCSPOPCX 6
#define ESR_ELx_IT_GCSPOPX 7
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/types.h>
static inline unsigned long esr_brk_comment(unsigned long esr)
@@ -534,6 +534,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr)
}
const char *esr_get_class_string(unsigned long esr);
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ESR_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 635a43c4ec85..65555284446e 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -15,7 +15,7 @@
#ifndef _ASM_ARM64_FIXMAP_H
#define _ASM_ARM64_FIXMAP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
#include <linux/math.h>
#include <linux/sizes.h>
@@ -117,5 +117,5 @@ extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t pr
#include <asm-generic/fixmap.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index b8cf0ea43cc0..1d2e33559bd5 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -12,7 +12,7 @@
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bitmap.h>
#include <linux/build_bug.h>
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
index 2ae50bdce59b..751e88a96734 100644
--- a/arch/arm64/include/asm/fpu.h
+++ b/arch/arm64/include/asm/fpu.h
@@ -6,10 +6,22 @@
#ifndef __ASM_FPU_H
#define __ASM_FPU_H
+#include <linux/preempt.h>
#include <asm/neon.h>
#define kernel_fpu_available() cpu_has_neon()
-#define kernel_fpu_begin() kernel_neon_begin()
-#define kernel_fpu_end() kernel_neon_end()
+
+static inline void kernel_fpu_begin(void)
+{
+ BUG_ON(!in_task());
+ preempt_disable();
+ kernel_neon_begin(NULL);
+}
+
+static inline void kernel_fpu_end(void)
+{
+ kernel_neon_end(NULL);
+ preempt_enable();
+}
#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index ba7cf7fec5e9..1621c84f44b3 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -37,7 +37,7 @@
*/
#define ARCH_FTRACE_SHIFT_STACK_TRACER 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compat.h>
extern void _mcount(unsigned long);
@@ -217,9 +217,9 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
*/
return !strcmp(sym + 8, name);
}
-#endif /* ifndef __ASSEMBLY__ */
+#endif /* ifndef __ASSEMBLER__ */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h
index 05da4a7c5788..a114e4f8209b 100644
--- a/arch/arm64/include/asm/gpr-num.h
+++ b/arch/arm64/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__gpr_num_x\num, \num
@@ -11,7 +11,7 @@
.equ .L__gpr_num_xzr, 31
.equ .L__gpr_num_wzr, 31
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __DEFINE_ASM_GPR_NUMS \
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
@@ -21,6 +21,6 @@
" .equ .L__gpr_num_xzr, 31\n" \
" .equ .L__gpr_num_wzr, 31\n"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6d567265467c..1f63814ae6c4 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -46,7 +46,7 @@
#define COMPAT_HWCAP2_SB (1 << 5)
#define COMPAT_HWCAP2_SSBS (1 << 6)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/log2.h>
/*
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
index c09cf942dc92..9ba85173f857 100644
--- a/arch/arm64/include/asm/image.h
+++ b/arch/arm64/include/asm/image.h
@@ -20,7 +20,7 @@
#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3
#define ARM64_IMAGE_FLAG_PHYS_BASE 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define arm64_image_flag_field(flags, field) \
(((flags) >> field##_SHIFT) & field##_MASK)
@@ -54,6 +54,6 @@ struct arm64_image_header {
__le32 res5;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 18c7811774d3..e1d30ba99d01 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -12,7 +12,7 @@
#include <asm/insn-def.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
enum aarch64_insn_hint_cr_op {
AARCH64_INSN_HINT_NOP = 0x0 << 5,
@@ -730,6 +730,6 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn);
typedef bool (pstate_check_t)(unsigned long);
extern pstate_check_t * const aarch32_opcode_cond_checks[16];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 424ed421cd97..0cb211d3607d 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -8,7 +8,7 @@
#ifndef __ASM_JUMP_LABEL_H
#define __ASM_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/insn.h>
@@ -58,5 +58,5 @@ l_yes:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index e1b57c13f8a4..b167e9d3da91 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -2,7 +2,7 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
#include <asm/memory.h>
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 4d9cc7a76d9c..892e5bebda95 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -25,7 +25,7 @@
#define KEXEC_ARCH KEXEC_ARCH_AARCH64
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/**
* crash_setup_regs() - save registers for the panic kernel
@@ -130,6 +130,6 @@ extern int load_other_segments(struct kimage *image,
char *cmdline);
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index 3184f5d1e3ae..67ef1c5532ae 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -14,7 +14,7 @@
#include <linux/ptrace.h>
#include <asm/debug-monitors.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static inline void arch_kgdb_breakpoint(void)
{
@@ -36,7 +36,7 @@ static inline int kgdb_single_step_handler(struct pt_regs *regs,
}
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* gdb remote procotol (well most versions of it) expects the following
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9da54d4ee49e..4b34f7b7ed2f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -46,7 +46,7 @@
#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm.h>
@@ -303,7 +303,7 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt,
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
.macro get_host_ctxt reg, tmp
adr_this_cpu \reg, kvm_host_data, \tmp
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e4069f2ce642..2dc5e6e742bb 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -49,7 +49,7 @@
* mappings, and none of this applies in that case.
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/alternative.h>
@@ -396,5 +396,5 @@ void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
index de002636eb1f..3171963ad25c 100644
--- a/arch/arm64/include/asm/kvm_mte.h
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -5,7 +5,7 @@
#ifndef __ASM_KVM_MTE_H
#define __ASM_KVM_MTE_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/sysreg.h>
@@ -62,5 +62,5 @@ alternative_else_nop_endif
.endm
#endif /* CONFIG_ARM64_MTE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 6199c9f7ec6e..e50987b32483 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -8,7 +8,7 @@
#ifndef __ASM_KVM_PTRAUTH_H
#define __ASM_KVM_PTRAUTH_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/sysreg.h>
@@ -100,7 +100,7 @@ alternative_else_nop_endif
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
-#else /* !__ASSEMBLY */
+#else /* !__ASSEMBLER__ */
#define __ptrauth_save_key(ctxt, key) \
do { \
@@ -120,5 +120,5 @@ alternative_else_nop_endif
__ptrauth_save_key(ctxt, APGA); \
} while(0)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index d3acd9c87509..40bd17add539 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,7 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/assembler.h>
#endif
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f1505c4acb38..9d54b2ea49d6 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -207,7 +207,7 @@
*/
#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bitops.h>
#include <linux/compiler.h>
@@ -392,7 +392,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
* virt_to_page(x) convert a _valid_ virtual address to struct page *
* virt_addr_valid(x) indicates whether a virtual address is valid
*/
-#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
#if defined(CONFIG_DEBUG_VIRTUAL)
#define page_to_virt(x) ({ \
@@ -422,7 +421,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
})
void dump_mem_limit(void);
-#endif /* !ASSEMBLY */
+#endif /* !__ASSEMBLER__ */
/*
* Given that the GIC architecture permits ITS implementations that can only be
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 78a4dbf75e60..137a173df1ff 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -12,7 +12,7 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/refcount.h>
#include <asm/cpufeature.h>
@@ -112,5 +112,5 @@ void kpti_install_ng_mappings(void);
static inline void kpti_install_ng_mappings(void) {}
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 0dbe3b29049b..cc80af59c69e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -8,7 +8,7 @@
#ifndef __ASM_MMU_CONTEXT_H
#define __ASM_MMU_CONTEXT_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/sched.h>
@@ -62,29 +62,21 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
}
/*
- * TCR.T0SZ value to use when the ID map is active.
- */
-#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
-
-/*
* Ensure TCR.T0SZ is set to the provided value.
*/
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
unsigned long tcr = read_sysreg(tcr_el1);
- if ((tcr & TCR_T0SZ_MASK) == t0sz)
+ if ((tcr & TCR_EL1_T0SZ_MASK) == t0sz)
return;
- tcr &= ~TCR_T0SZ_MASK;
+ tcr &= ~TCR_EL1_T0SZ_MASK;
tcr |= t0sz;
write_sysreg(tcr, tcr_el1);
isb();
}
-#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual))
-#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz)
-
/*
* Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
*
@@ -103,7 +95,7 @@ static inline void cpu_uninstall_idmap(void)
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- cpu_set_default_tcr_t0sz();
+ __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual));
if (mm != &init_mm && !system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
@@ -113,7 +105,7 @@ static inline void cpu_install_idmap(void)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- cpu_set_idmap_tcr_t0sz();
+ __cpu_set_tcr_t0sz(TCR_T0SZ(IDMAP_VA_BITS));
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
@@ -330,6 +322,6 @@ static inline void deactivate_mm(struct task_struct *tsk,
#include <asm-generic/mmu_context.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 0f9b08e8fb8d..352139271918 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -9,7 +9,7 @@
#include <asm/cputype.h>
#include <asm/mte-def.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -259,6 +259,6 @@ static inline int mte_enable_kernel_store_only(void)
#endif /* CONFIG_ARM64_MTE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_MTE_KASAN_H */
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 3b5069f4683d..6d4a78b9dc3e 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -8,7 +8,7 @@
#include <asm/compiler.h>
#include <asm/mte-def.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bitfield.h>
#include <linux/kasan-enabled.h>
@@ -282,5 +282,5 @@ static inline void mte_check_tfsr_exit(void)
}
#endif /* CONFIG_KASAN_HW_TAGS */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_MTE_H */
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index d4b1d172a79b..acebee4605b5 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
-void kernel_neon_end(void);
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 258cca4b4873..00f117ff4f7a 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -10,7 +10,7 @@
#include <asm/page-def.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
#include <linux/types.h> /* for gfp_t */
@@ -45,7 +45,7 @@ int pfn_is_map_memory(unsigned long pfn);
#include <asm/memory.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index f3b77deedfa2..d49180bb7cb3 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -228,102 +228,53 @@
/*
* TCR flags.
*/
-#define TCR_T0SZ_OFFSET 0
-#define TCR_T1SZ_OFFSET 16
-#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
-#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
-#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
-#define TCR_TxSZ_WIDTH 6
-#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
-#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET)
-
-#define TCR_EPD0_SHIFT 7
-#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT)
-#define TCR_IRGN0_SHIFT 8
-#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
-#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
-#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
-#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
-#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
-
-#define TCR_EPD1_SHIFT 23
-#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT)
-#define TCR_IRGN1_SHIFT 24
-#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT)
-#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT)
-#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT)
-#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT)
-#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT)
-
-#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC)
-#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA)
-#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT)
-#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA)
-#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK)
-
-
-#define TCR_ORGN0_SHIFT 10
-#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
-#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
-#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
-#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
-#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
-
-#define TCR_ORGN1_SHIFT 26
-#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT)
-#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT)
-#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT)
-#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT)
-#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT)
-
-#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC)
-#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)
-#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT)
-#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA)
-#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK)
-
-#define TCR_SH0_SHIFT 12
-#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
-#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
-
-#define TCR_SH1_SHIFT 28
-#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT)
-#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT)
-#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER)
-
-#define TCR_TG0_SHIFT 14
-#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
-#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
-#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
-#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
-
-#define TCR_TG1_SHIFT 30
-#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT)
-#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT)
-#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
-#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
-
-#define TCR_IPS_SHIFT 32
-#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
-#define TCR_A1 (UL(1) << 22)
-#define TCR_ASID16 (UL(1) << 36)
-#define TCR_TBI0 (UL(1) << 37)
-#define TCR_TBI1 (UL(1) << 38)
-#define TCR_HA (UL(1) << 39)
-#define TCR_HD (UL(1) << 40)
-#define TCR_HPD0_SHIFT 41
-#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT)
-#define TCR_HPD1_SHIFT 42
-#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT)
-#define TCR_TBID0 (UL(1) << 51)
-#define TCR_TBID1 (UL(1) << 52)
-#define TCR_NFD0 (UL(1) << 53)
-#define TCR_NFD1 (UL(1) << 54)
-#define TCR_E0PD0 (UL(1) << 55)
-#define TCR_E0PD1 (UL(1) << 56)
-#define TCR_TCMA0 (UL(1) << 57)
-#define TCR_TCMA1 (UL(1) << 58)
-#define TCR_DS (UL(1) << 59)
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_EL1_T0SZ_SHIFT)
+#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_EL1_T1SZ_SHIFT)
+
+#define TCR_T0SZ_MASK TCR_EL1_T0SZ_MASK
+#define TCR_T1SZ_MASK TCR_EL1_T1SZ_MASK
+
+#define TCR_EPD0_MASK TCR_EL1_EPD0_MASK
+#define TCR_EPD1_MASK TCR_EL1_EPD1_MASK
+
+#define TCR_IRGN0_MASK TCR_EL1_IRGN0_MASK
+#define TCR_IRGN0_WBWA (TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT)
+
+#define TCR_ORGN0_MASK TCR_EL1_ORGN0_MASK
+#define TCR_ORGN0_WBWA (TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT)
+
+#define TCR_SH0_MASK TCR_EL1_SH0_MASK
+#define TCR_SH0_INNER (TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT)
+
+#define TCR_SH1_MASK TCR_EL1_SH1_MASK
+
+#define TCR_TG0_SHIFT TCR_EL1_TG0_SHIFT
+#define TCR_TG0_MASK TCR_EL1_TG0_MASK
+#define TCR_TG0_4K (TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT)
+#define TCR_TG0_64K (TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT)
+#define TCR_TG0_16K (TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT)
+
+#define TCR_TG1_SHIFT TCR_EL1_TG1_SHIFT
+#define TCR_TG1_MASK TCR_EL1_TG1_MASK
+#define TCR_TG1_16K (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT)
+#define TCR_TG1_4K (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT)
+#define TCR_TG1_64K (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT)
+
+#define TCR_IPS_SHIFT TCR_EL1_IPS_SHIFT
+#define TCR_IPS_MASK TCR_EL1_IPS_MASK
+#define TCR_A1 TCR_EL1_A1
+#define TCR_ASID16 TCR_EL1_AS
+#define TCR_TBI0 TCR_EL1_TBI0
+#define TCR_TBI1 TCR_EL1_TBI1
+#define TCR_HA TCR_EL1_HA
+#define TCR_HD TCR_EL1_HD
+#define TCR_HPD0 TCR_EL1_HPD0
+#define TCR_HPD1 TCR_EL1_HPD1
+#define TCR_TBID0 TCR_EL1_TBID0
+#define TCR_TBID1 TCR_EL1_TBID1
+#define TCR_E0PD0 TCR_EL1_E0PD0
+#define TCR_E0PD1 TCR_EL1_E0PD1
+#define TCR_DS TCR_EL1_DS
/*
* TTBR.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 85dceb1c66f4..161e8660eddd 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -62,7 +62,7 @@
#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cpufeature.h>
#include <asm/pgtable-types.h>
@@ -84,7 +84,7 @@ extern unsigned long prot_ns_shared;
#else
static inline bool __pure lpa2_is_enabled(void)
{
- return read_tcr() & TCR_DS;
+ return read_tcr() & TCR_EL1_DS;
}
#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
@@ -127,7 +127,7 @@ static inline bool __pure lpa2_is_enabled(void)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC)
#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define pte_pi_index(pte) ( \
((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0944e296dd4a..64d5f1d9cce9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -30,7 +30,7 @@
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
@@ -130,12 +130,16 @@ static inline void arch_leave_lazy_mmu_mode(void)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Outside of a few very special situations (e.g. hibernation), we always
- * use broadcast TLB invalidation instructions, therefore a spurious page
- * fault on one CPU which has been handled concurrently by another CPU
- * does not need to perform additional invalidation.
+ * We use local TLB invalidation instruction when reusing page in
+ * write protection fault handler to avoid TLBI broadcast in the hot
+ * path. This will cause spurious page faults if stale read-only TLB
+ * entries exist.
*/
-#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) \
+ local_flush_tlb_page_nonotify(vma, address)
+
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
+ local_flush_tlb_page_nonotify(vma, address)
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -433,7 +437,7 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new);
* 1 0 | 1 0 1
* 1 1 | 0 1 x
*
- * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
+ * When hardware DBM is not present, the software PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
@@ -599,7 +603,7 @@ static inline int pte_protnone(pte_t pte)
/*
* pte_present_invalid() tells us that the pte is invalid from HW
* perspective but present from SW perspective, so the fields are to be
- * interpretted as per the HW layout. The second 2 checks are the unique
+ * interpreted as per the HW layout. The second 2 checks are the unique
* encoding that we use for PROT_NONE. It is insufficient to only use
* the first check because we share the same encoding scheme with pmds
* which support pmd_mkinvalid(), so can be present-invalid without
@@ -1949,6 +1953,6 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
#endif /* CONFIG_ARM64_CONTPTE */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 0d5d1f0525eb..ab78a78821a2 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -9,7 +9,7 @@
#ifndef __ASM_PROCFNS_H
#define __ASM_PROCFNS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/page.h>
@@ -21,5 +21,5 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61d62bfd5a7b..e30c4c8e3a7a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -25,7 +25,7 @@
#define MTE_CTRL_STORE_ONLY (1UL << 19)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/build_bug.h>
#include <linux/cache.h>
@@ -172,7 +172,12 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ /*
+ * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
+ * address of a caller provided buffer that will be used to preserve a
+ * task's kernel mode FPSIMD state while it is scheduled out.
+ */
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
@@ -437,5 +442,5 @@ int set_tsc_mode(unsigned int val);
#define GET_TSC_CTL(adr) get_tsc_mode((adr))
#define SET_TSC_CTL(val) set_tsc_mode((val))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 65b053a24d82..39582511ad72 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -94,7 +94,7 @@
*/
#define NO_SYSCALL (-1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
#include <linux/types.h>
@@ -361,5 +361,5 @@ static inline void procedure_link_pointer_set(struct pt_regs *regs,
extern unsigned long profile_pc(struct pt_regs *regs);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h
index 6cb070eca9e9..e19253f96c94 100644
--- a/arch/arm64/include/asm/rsi_smc.h
+++ b/arch/arm64/include/asm/rsi_smc.h
@@ -122,7 +122,7 @@
*/
#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct realm_config {
union {
@@ -142,7 +142,7 @@ struct realm_config {
*/
} __aligned(0x1000);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Read configuration for the current Realm.
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
index 97d9256d33c9..78beceec10cd 100644
--- a/arch/arm64/include/asm/rwonce.h
+++ b/arch/arm64/include/asm/rwonce.h
@@ -5,7 +5,7 @@
#ifndef __ASM_RWONCE_H
#define __ASM_RWONCE_H
-#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__)
+#if defined(CONFIG_LTO) && !defined(__ASSEMBLER__)
#include <linux/compiler_types.h>
#include <asm/alternative-macros.h>
@@ -62,7 +62,7 @@
})
#endif /* !BUILD_VDSO */
-#endif /* CONFIG_LTO && !__ASSEMBLY__ */
+#endif /* CONFIG_LTO && !__ASSEMBLER__ */
#include <asm-generic/rwonce.h>
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c59f6324f2bb..0fbc2e7867d3 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -2,7 +2,7 @@
#ifndef _ASM_SCS_H
#define _ASM_SCS_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/asm-offsets.h>
#include <asm/sysreg.h>
@@ -55,6 +55,6 @@ enum {
int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
-#endif /* __ASSEMBLY __ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 484cb6972e99..b2248bd3cb58 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -9,7 +9,7 @@
#define SDEI_STACK_SIZE IRQ_STACK_SIZE
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
#include <linux/preempt.h>
@@ -49,5 +49,5 @@ unsigned long do_sdei_event(struct pt_regs *regs,
unsigned long sdei_arch_get_entry_point(int conduit);
#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 8e86c9e70e48..0941f6f58a14 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -6,12 +6,15 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
+#include <linux/cleanup.h>
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
+#include <asm/neon.h>
+
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -29,7 +32,7 @@ static __must_check inline bool may_use_simd(void)
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
- !in_hardirq() && !irqs_disabled() && !in_nmi();
+ !in_hardirq() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
@@ -40,4 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
+
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
+
#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d48ef6d5abcc..10ea4f543069 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -23,7 +23,7 @@
#define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT)
#define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/threads.h>
#include <linux/cpumask.h>
@@ -155,6 +155,6 @@ bool cpus_are_stuck_in_kernel(void);
extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
-#endif /* ifndef __ASSEMBLY__ */
+#endif /* ifndef __ASSEMBLER__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 900454aaa292..296ae3420bfd 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -12,7 +12,7 @@
#define BP_HARDEN_EL2_SLOTS 4
#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/smp.h>
#include <asm/percpu.h>
@@ -119,5 +119,5 @@ void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void spectre_print_disabled_mitigations(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h
index 0ee0f6ba0fd8..796797b8db7e 100644
--- a/arch/arm64/include/asm/stacktrace/frame.h
+++ b/arch/arm64/include/asm/stacktrace/frame.h
@@ -25,7 +25,7 @@
#define FRAME_META_TYPE_FINAL 1
#define FRAME_META_TYPE_PT_REGS 2
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* A standard AAPCS64 frame record.
*/
@@ -43,6 +43,6 @@ struct frame_record_meta {
struct frame_record record;
u64 type;
};
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_STACKTRACE_FRAME_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 0cde2f473971..e65f33edf9d6 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -23,7 +23,7 @@ struct cpu_suspend_ctx {
* __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter().
* This data must survive until cpu_resume() is called.
*
- * This struct desribes the size and the layout of the saved cpu state.
+ * This struct describes the size and the layout of the saved cpu state.
* The layout of the callee_saved_regs is defined by the implementation
* of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed
* in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c231d2a3e515..9df51accbb02 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -52,7 +52,7 @@
#ifndef CONFIG_BROKEN_GAS_INST
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
// The space separator is omitted so that __emit_inst(x) can be parsed as
// either an assembler directive or an assembler macro argument.
#define __emit_inst(x) .inst(x)
@@ -71,11 +71,11 @@
(((x) >> 24) & 0x000000ff))
#endif /* CONFIG_CPU_BIG_ENDIAN */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __emit_inst(x) .long __INSTR_BSWAP(x)
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_BROKEN_GAS_INST */
@@ -1129,9 +1129,7 @@
#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
-#define ARM64_FEATURE_FIELD_BITS 4
-
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 344b1c1a4bbb..d316a804eb38 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -7,7 +7,7 @@
#ifndef __ASM_SYSTEM_MISC_H
#define __ASM_SYSTEM_MISC_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/linkage.h>
@@ -28,6 +28,6 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
struct mm_struct;
extern void __show_regs(struct pt_regs *);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f241b8601ebd..a803b887b0b4 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -10,7 +10,7 @@
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 18a5dc0c9a54..a2d65d7d6aae 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -8,7 +8,7 @@
#ifndef __ASM_TLBFLUSH_H
#define __ASM_TLBFLUSH_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bitfield.h>
#include <linux/mm_types.h>
@@ -249,6 +249,19 @@ static inline unsigned long get_trans_granule(void)
* cannot be easily determined, the value TLBI_TTL_UNKNOWN will
* perform a non-hinted invalidation.
*
+ * local_flush_tlb_page(vma, addr)
+ * Local variant of flush_tlb_page(). Stale TLB entries may
+ * remain in remote CPUs.
+ *
+ * local_flush_tlb_page_nonotify(vma, addr)
+ * Same as local_flush_tlb_page() except MMU notifier will not be
+ * called.
+ *
+ * local_flush_tlb_contpte(vma, addr)
+ * Invalidate the virtual-address range
+ * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU
+ * for the user address space corresponding to 'vma->mm'. Stale
+ * TLB entries may remain in remote CPUs.
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
* on top of these routines, since that is our interface to the mmu_gather
@@ -282,6 +295,33 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
+static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ unsigned long addr;
+
+ dsb(nshst);
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+}
+
+static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
+ dsb(nsh);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+ dsb(nsh);
+}
+
static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
unsigned long uaddr)
{
@@ -472,6 +512,22 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ish);
}
+static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ unsigned long asid;
+
+ addr = round_down(addr, CONT_PTE_SIZE);
+
+ dsb(nshst);
+ asid = ASID(vma->vm_mm);
+ __flush_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid,
+ 3, true, lpa2_is_enabled());
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr,
+ addr + CONT_PTE_SIZE);
+ dsb(nsh);
+}
+
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
@@ -524,6 +580,33 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{
__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
}
+
+static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
+{
+ ptdesc_t diff = oldval ^ newval;
+
+ /* invalid to valid transition requires no flush */
+ if (!(oldval & PTE_VALID))
+ return false;
+
+ /* Transition in the SW bits requires no flush */
+ diff &= ~PTE_SWBITS_MASK;
+
+ return diff;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
#endif
#endif
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 61679070f595..232b46969088 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -7,7 +7,7 @@
#define __VDSO_PAGES 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <generated/vdso-offsets.h>
@@ -19,6 +19,6 @@
extern char vdso_start[], vdso_end[];
extern char vdso32_start[], vdso32_end[];
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
index 6d75e03d3827..d7ebe7ceefa0 100644
--- a/arch/arm64/include/asm/vdso/compat_barrier.h
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -5,7 +5,7 @@
#ifndef __COMPAT_BARRIER_H
#define __COMPAT_BARRIER_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Warning: This code is meant to be used from the compat vDSO only.
*/
@@ -31,6 +31,6 @@
#define smp_rmb() aarch32_smp_rmb()
#define smp_wmb() aarch32_smp_wmb()
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __COMPAT_BARRIER_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 7d1a116549b1..0d513f924321 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
#define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/barrier.h>
#include <asm/unistd_compat_32.h>
@@ -161,6 +161,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
}
#define vdso_clocksource_ok vdso_clocksource_ok
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
index a2197da1951b..da1d58bbfabe 100644
--- a/arch/arm64/include/asm/vdso/getrandom.h
+++ b/arch/arm64/include/asm/vdso/getrandom.h
@@ -3,7 +3,7 @@
#ifndef __ASM_VDSO_GETRANDOM_H
#define __ASM_VDSO_GETRANDOM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/unistd.h>
#include <asm/vdso/vsyscall.h>
@@ -33,6 +33,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
return ret;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index c59e84105b43..3658a757e255 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -7,7 +7,7 @@
#ifdef __aarch64__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/alternative.h>
#include <asm/arch_timer.h>
@@ -96,7 +96,7 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(
#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#else /* !__aarch64__ */
diff --git a/arch/arm64/include/asm/vdso/processor.h b/arch/arm64/include/asm/vdso/processor.h
index ff830b766ad2..7abb0cc81cd6 100644
--- a/arch/arm64/include/asm/vdso/processor.h
+++ b/arch/arm64/include/asm/vdso/processor.h
@@ -5,13 +5,13 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static inline void cpu_relax(void)
{
asm volatile("yield" ::: "memory");
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index 417aae5763a8..3f3c8eb74e2e 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -2,7 +2,7 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <vdso/datapage.h>
@@ -22,6 +22,6 @@ void __arch_update_vdso_clock(struct vdso_clock *vc)
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index aa280f356b96..530af9620fdb 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -56,7 +56,7 @@
*/
#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/ptrace.h>
#include <asm/sections.h>
@@ -161,6 +161,6 @@ static inline bool is_hyp_nvhe(void)
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 20873099c035..75daee1a07e9 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -3,9 +3,7 @@
#ifndef __ASM_VMAP_STACK_H
#define __ASM_VMAP_STACK_H
-#include <linux/bug.h>
#include <linux/gfp.h>
-#include <linux/kconfig.h>
#include <linux/vmalloc.h>
#include <linux/pgtable.h>
#include <asm/memory.h>
@@ -19,8 +17,6 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
{
void *p;
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
-
p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
__builtin_return_address(0));
return kasan_reset_tag(p);
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index befcd8a7abc9..c38e3d017a79 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -9,7 +9,7 @@
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -19,9 +19,8 @@ static void
xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_2(bytes, p1, p2);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_2(bytes, p1, p2);
}
static void
@@ -29,9 +28,8 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_3(bytes, p1, p2, p3);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_3(bytes, p1, p2, p3);
}
static void
@@ -40,9 +38,8 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
}
static void
@@ -52,9 +49,8 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4,
const unsigned long * __restrict p5)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
}
static struct xor_block_template xor_block_arm64 = {
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ed5f3892674c..a792a599b9d6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,7 +31,7 @@
#define KVM_SPSR_FIQ 4
#define KVM_NR_SPSR 5
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/psci.h>
#include <linux/types.h>
#include <asm/ptrace.h>
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 0f39ba4f3efd..6fed93fb2536 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -80,7 +80,7 @@
#define PTRACE_PEEKMTETAGS 33
#define PTRACE_POKEMTETAGS 34
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* User structures for general purpose, floating point and debug registers.
@@ -332,6 +332,6 @@ struct user_gcs {
__u64 gcspr_el0;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index d42f7a92238b..e29bf3e2d0cc 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -17,7 +17,7 @@
#ifndef _UAPI__ASM_SIGCONTEXT_H
#define _UAPI__ASM_SIGCONTEXT_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -192,7 +192,7 @@ struct gcs_context {
__u64 reserved;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#include <asm/sve_context.h>
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index f1cb2447afc9..af90128cfed5 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -133,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void)
/*
* FADT is required on arm64; retrieve it to check its presence
- * and carry out revision and ACPI HW reduced compliancy tests
+ * and carry out revision and ACPI HW reduced compliance tests
*/
status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
if (ACPI_FAILURE(status)) {
@@ -423,7 +423,7 @@ int apei_claim_sea(struct pt_regs *regs)
irq_work_run();
__irq_exit();
} else {
- pr_warn_ratelimited("APEI work queued but not completed");
+ pr_warn_ratelimited("APEI work queued but not completed\n");
err = -EINPROGRESS;
}
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e25b0f84a22d..42b182cfa404 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1003,7 +1003,7 @@ static void __init sort_ftr_regs(void)
/*
* Initialise the CPU feature register from Boot CPU values.
- * Also initiliases the strict_mask for the register.
+ * Also initialises the strict_mask for the register.
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
@@ -1970,7 +1970,7 @@ static struct cpumask dbm_cpus __read_mostly;
static inline void __cpu_enable_hw_dbm(void)
{
- u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD;
write_sysreg(tcr, tcr_el1);
isb();
@@ -2256,7 +2256,7 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
{
if (this_cpu_has_cap(ARM64_HAS_E0PD))
- sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+ sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1);
}
#endif /* CONFIG_ARM64_E0PD */
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 6c371b158b99..a81cb4aa4738 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/kmemleak.h>
+#include <linux/kthread.h>
#include <linux/screen_info.h>
#include <linux/vmalloc.h>
@@ -165,20 +166,53 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
return s;
}
-static DEFINE_RAW_SPINLOCK(efi_rt_lock);
-
void arch_efi_call_virt_setup(void)
{
- efi_virtmap_load();
- raw_spin_lock(&efi_rt_lock);
+ efi_runtime_assert_lock_held();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ /*
+ * Disable migration to ensure that a preempted EFI runtime
+ * service call will be resumed on the same CPU. This avoids
+ * potential issues with EFI runtime calls that are preempted
+ * while polling for an asynchronous completion of a secure
+ * firmware call, which may not permit the CPU to change.
+ */
+ migrate_disable();
+ kthread_use_mm(&efi_mm);
+ } else {
+ efi_virtmap_load();
+ }
+
+ /*
+ * Enable access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from exception when
+ * invoking the EFI run-time services.
+ */
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
+
__efi_fpsimd_begin();
}
void arch_efi_call_virt_teardown(void)
{
__efi_fpsimd_end();
- raw_spin_unlock(&efi_rt_lock);
- efi_virtmap_unload();
+
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1 until
+ * uaccess_enable(). Do so before efi_virtmap_unload() updates the
+ * saved TTBR0 value, so the userland page tables are not activated
+ * inadvertently over the back of an exception.
+ */
+ uaccess_ttbr0_disable();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ kthread_unuse_mm(&efi_mm);
+ migrate_enable();
+ } else {
+ efi_virtmap_unload();
+ }
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 0a97e2621f60..3625797e9ee8 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -34,20 +34,12 @@
* Handle IRQ/context state management when entering from kernel mode.
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_enter(), handling the kernel
- * mode transitions only.
*/
-static __always_inline irqentry_state_t __enter_from_kernel_mode(struct pt_regs *regs)
-{
- return irqentry_enter(regs);
-}
-
static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs)
{
irqentry_state_t state;
- state = __enter_from_kernel_mode(regs);
+ state = irqentry_enter(regs);
mte_check_tfsr_entry();
mte_disable_tco_entry(current);
@@ -58,21 +50,12 @@ static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs)
* Handle IRQ/context state management when exiting to kernel mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_exit(), handling the kernel
- * mode transitions only, and with preemption handled elsewhere.
*/
-static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs,
- irqentry_state_t state)
-{
- irqentry_exit(regs, state);
-}
-
static void noinstr exit_to_kernel_mode(struct pt_regs *regs,
irqentry_state_t state)
{
mte_check_tfsr_exit();
- __exit_to_kernel_mode(regs, state);
+ irqentry_exit(regs, state);
}
/*
@@ -80,17 +63,12 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs,
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
+static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
{
enter_from_user_mode(regs);
mte_disable_tco_entry(current);
}
-static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
-{
- __enter_from_user_mode(regs);
-}
-
/*
* Handle IRQ/context state management when exiting to user mode.
* After this function returns it is not safe to call regular kernel code,
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 169ccf600066..025140caafe7 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller)
stp x29, x30, [sp, #FREGS_SIZE]
add x29, sp, #FREGS_SIZE
- /* Prepare arguments for the the tracer func */
+ /* Prepare arguments for the tracer func */
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
mov x1, x9 // parent_ip (callsite's LR)
mov x3, sp // regs
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e3f8f51748bc..c154f72634e0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -225,10 +225,21 @@ static void fpsimd_bind_task_to_cpu(void);
*/
static void get_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_disable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ /*
+ * The softirq subsystem lacks a true unmask/mask API, and
+ * re-enabling softirq processing using local_bh_enable() will
+ * not only unmask softirqs, it will also result in immediate
+ * delivery of any pending softirqs.
+ * This is undesirable when running with IRQs disabled, but in
+ * that case, there is no need to mask softirqs in the first
+ * place, so only bother doing so when IRQs are enabled.
+ */
+ if (!irqs_disabled())
+ local_bh_disable();
+ } else {
preempt_disable();
+ }
}
/*
@@ -240,10 +251,12 @@ static void get_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_enable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (!irqs_disabled())
+ local_bh_enable();
+ } else {
preempt_enable();
+ }
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
@@ -1489,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1774,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
+ *
+ * Unless called from non-preemptible task context, @state must point to a
+ * caller provided buffer that will be used to preserve the task's kernel mode
+ * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
+ * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ WARN_ON((preemptible() || in_serving_softirq()) && !state);
+
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@@ -1846,7 +1869,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
- fpsimd_save_kernel_state(current);
+ fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1867,8 +1890,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1886,22 +1917,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
+ *
+ * The value of @state must match the value passed to the preceding call to
+ * kernel_neon_begin().
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
- test_thread_flag(TIF_KERNEL_FPSTATE))
- fpsimd_load_kernel_state(current);
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1934,11 +1973,11 @@ void __efi_fpsimd_begin(void)
if (!system_supports_fpsimd())
return;
- WARN_ON(preemptible());
-
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
+ WARN_ON(preemptible());
+
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
* preserving:
@@ -1986,7 +2025,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
- kernel_neon_end();
+ kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 5adad37ab4fa..5a1554a44162 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -492,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return ret;
/*
- * When using mcount, callsites in modules may have been initalized to
+ * When using mcount, callsites in modules may have been initialized to
* call an arbitrary module PLT (which redirects to the _mcount stub)
* rather than the ftrace PLT we'll use at runtime (which redirects to
* the ftrace trampoline). We can ignore the old PLT when initializing
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index c0065a1d77cf..15dedb385b9e 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -62,7 +62,7 @@ static void __init init_irq_stacks(void)
}
}
-#ifndef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static void ____do_softirq(struct pt_regs *regs)
{
__do_softirq();
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 6f121a0164a4..239c16e3d02f 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -251,7 +251,7 @@ void crash_post_resume(void)
* marked as Reserved as memory was allocated via memblock_reserve().
*
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
- * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * from the hibernation image. crash_is_nosave() does thich check for crash
* dump kernel and will reduce the total size of hibernation image.
*/
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index 659297f87cfa..a852264958c3 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -141,13 +141,13 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
{
u64 sctlr = read_sysreg(sctlr_el1);
- u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS;
u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
- tcr &= ~TCR_IPS_MASK;
- tcr |= parange << TCR_IPS_SHIFT;
+ tcr &= ~TCR_EL1_IPS_MASK;
+ tcr |= parange << TCR_EL1_IPS_SHIFT;
asm(" msr sctlr_el1, %0 ;"
" isb ;"
@@ -263,7 +263,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
}
if (va_bits > VA_BITS_MIN)
- sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
+ sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits));
/*
* The virtual KASLR displacement modulo 2MiB is decided by the
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 2799bdb2fb82..941668800aea 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -131,7 +131,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 4b001121c72d..b9bdd83fbbca 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -912,13 +912,39 @@ static int sve_set_common(struct task_struct *target,
return -EINVAL;
/*
- * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
- * vec_set_vector_length(), which will also validate them for us:
+ * On systems without SVE we accept FPSIMD format writes with
+ * a VL of 0 to allow exiting streaming mode, otherwise a VL
+ * is required.
*/
- ret = vec_set_vector_length(target, type, header.vl,
- ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
- if (ret)
- return ret;
+ if (header.vl) {
+ /*
+ * If the system does not support SVE we can't
+ * configure a SVE VL.
+ */
+ if (!system_supports_sve() && type == ARM64_VEC_SVE)
+ return -EINVAL;
+
+ /*
+ * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are
+ * consumed by vec_set_vector_length(), which will
+ * also validate them for us:
+ */
+ ret = vec_set_vector_length(target, type, header.vl,
+ ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+ if (ret)
+ return ret;
+ } else {
+ /* If the system supports SVE we require a VL. */
+ if (system_supports_sve())
+ return -EINVAL;
+
+ /*
+ * Only FPSIMD formatted data with no flags set is
+ * supported.
+ */
+ if (header.flags != SVE_PT_REGS_FPSIMD)
+ return -EINVAL;
+ }
/* Allocate SME storage if necessary, preserving any existing ZA/ZT state */
if (type == ARM64_VEC_SME) {
@@ -1016,7 +1042,7 @@ static int sve_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
return sve_set_common(target, regset, pos, count, kbuf, ubuf,
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 95169f7b6531..778f2a1faac8 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -63,8 +63,6 @@ static void free_sdei_stacks(void)
{
int cpu;
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
-
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -88,8 +86,6 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
-
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
@@ -202,7 +198,7 @@ out_err:
/*
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
- * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * SDEI_EV_FAILED - failure, return this error code to firmware.
* virtual-address - success, return to this address.
*/
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6fb838eee2e7..1aa324104afb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -350,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
- * in-kernel synchronisation, try to get the firwmare to help us to
+ * in-kernel synchronisation, try to get the firmware to help us to
* verify that it has really left the kernel before we consider
* clobbering anything it might still be using.
*/
@@ -523,7 +523,7 @@ int arch_register_cpu(int cpu)
/*
* Availability of the acpi handle is sufficient to establish
- * that _STA has aleady been checked. No need to recheck here.
+ * that _STA has already been checked. No need to recheck here.
*/
c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index aba7ca6bca2d..c062badd1a56 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -96,7 +96,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- if (flags & _TIF_MTE_ASYNC_FAULT) {
+ if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) {
/*
* Process the asynchronous tag check fault before the actual
* syscall. do_notify_resume() will send a signal to userspace
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 681939ef5d16..914282016069 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -922,7 +922,7 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne
__show_regs(regs);
/*
- * We use nmi_panic to limit the potential for recusive overflows, and
+ * We use nmi_panic to limit the potential for recursive overflows, and
* to get a better stack trace.
*/
nmi_panic(NULL, "kernel stack overflow");
diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c
index b19d5d6cb8b3..9619ece66b79 100644
--- a/arch/arm64/kernel/vmcore_info.c
+++ b/arch/arm64/kernel/vmcore_info.c
@@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void);
static inline u64 get_tcr_el1_t1sz(void)
{
- return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+ return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT;
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3f675875abea..99a07972068d 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -806,7 +806,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
tpt = tpc = true;
/*
- * For the poor sods that could not correctly substract one value
+ * For the poor sods that could not correctly subtract one value
* from another, trap the full virtual timer and counter.
*/
if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 052bf0d4d0b0..12acc024f7a8 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2448,7 +2448,7 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
- /* Propagate the FGT state to the the nVHE side */
+ /* Propagate the FGT state to the nVHE side */
kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks;
kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks;
kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks;
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 58b7d0c477d7..f731cc4c3f28 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -115,7 +115,7 @@ static void ffa_set_retval(struct kvm_cpu_context *ctxt,
*
* FFA-1.3 introduces 64-bit variants of the CPU cycle management
* interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests
- * complete with SMC32 direct reponses which *should* allow us use the
+ * complete with SMC32 direct responses which *should* allow us use the
* function ID sent by the caller to determine whether to return x8-x17.
*
* Note that we also cannot rely on function IDs in the response.
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7cc964af8d30..9a6a80c3fbe5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1755,7 +1755,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/*
* Check if this is non-struct page memory PFN, and cannot support
- * CMOs. It could potentially be unsafe to access as cachable.
+ * CMOs. It could potentially be unsafe to access as cacheable.
*/
if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
if (is_vma_cacheable) {
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f04cda40545b..be6bbd167770 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -85,7 +85,7 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
/*
* Let's treat memory allocation failures as benign: If we fail to
* allocate anything, return an error and keep the allocated array
- * alive. Userspace may try to recover by intializing the vcpu
+ * alive. Userspace may try to recover by initializing the vcpu
* again, and there is no reason to affect the whole VM for this.
*/
num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index c0557945939c..589bcf878938 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -622,8 +622,7 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
__ptep_set_access_flags(vma, addr, ptep, entry, 0);
if (dirty)
- __flush_tlb_range(vma, start_addr, addr,
- PAGE_SIZE, true, 3);
+ local_flush_tlb_contpte(vma, start_addr);
} else {
__contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte);
__ptep_set_access_flags(vma, addr, ptep, entry, dirty);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a193b6a5d1e6..be9dab2c7d6a 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -233,9 +233,13 @@ int __ptep_set_access_flags(struct vm_area_struct *vma,
pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
- /* Invalidate a stale read-only entry */
+ /*
+ * Invalidate the local stale read-only entry. Remote stale entries
+ * may still cause page faults and be invalidated via
+ * flush_tlb_fix_spurious_fault().
+ */
if (dirty)
- flush_tlb_page(vma, address);
+ local_flush_tlb_page(vma, address);
return 1;
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2ba01dc8ef82..c8d24b7cdd62 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -49,6 +49,8 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
+#define INVALID_PHYS_ADDR (-1ULL)
+
DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
u64 kimage_voffset __ro_after_init;
@@ -194,11 +196,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
} while (ptep++, addr += PAGE_SIZE, addr != end);
}
-static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
- unsigned long end, phys_addr_t phys,
- pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, phys_addr_t phys,
+ pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
unsigned long next;
pmd_t pmd = READ_ONCE(*pmdp);
@@ -213,6 +215,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc(TABLE_PTE);
+ if (pte_phys == INVALID_PHYS_ADDR)
+ return -ENOMEM;
ptep = pte_set_fixmap(pte_phys);
init_clear_pgtable(ptep);
ptep += pte_index(addr);
@@ -244,11 +248,13 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
* walker.
*/
pte_clear_fixmap();
+
+ return 0;
}
-static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags)
+static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags)
{
unsigned long next;
@@ -269,22 +275,29 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
READ_ONCE(pmd_val(*pmdp))));
} else {
- alloc_init_cont_pte(pmdp, addr, next, phys, prot,
- pgtable_alloc, flags);
+ int ret;
+
+ ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot,
+ pgtable_alloc, flags);
+ if (ret)
+ return ret;
BUG_ON(pmd_val(old_pmd) != 0 &&
pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
}
phys += next - addr;
} while (pmdp++, addr = next, addr != end);
+
+ return 0;
}
-static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
- unsigned long end, phys_addr_t phys,
- pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
+ unsigned long end, phys_addr_t phys,
+ pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
+ int ret;
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
pmd_t *pmdp;
@@ -301,6 +314,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc(TABLE_PMD);
+ if (pmd_phys == INVALID_PHYS_ADDR)
+ return -ENOMEM;
pmdp = pmd_set_fixmap(pmd_phys);
init_clear_pgtable(pmdp);
pmdp += pmd_index(addr);
@@ -320,20 +335,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
+ ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
+ if (ret)
+ goto out;
pmdp += pmd_index(next) - pmd_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
+out:
pmd_clear_fixmap();
+
+ return ret;
}
-static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
+ int ret = 0;
unsigned long next;
p4d_t p4d = READ_ONCE(*p4dp);
pud_t *pudp;
@@ -346,6 +367,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc(TABLE_PUD);
+ if (pud_phys == INVALID_PHYS_ADDR)
+ return -ENOMEM;
pudp = pud_set_fixmap(pud_phys);
init_clear_pgtable(pudp);
pudp += pud_index(addr);
@@ -375,8 +398,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
READ_ONCE(pud_val(*pudp))));
} else {
- alloc_init_cont_pmd(pudp, addr, next, phys, prot,
- pgtable_alloc, flags);
+ ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot,
+ pgtable_alloc, flags);
+ if (ret)
+ goto out;
BUG_ON(pud_val(old_pud) != 0 &&
pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
@@ -384,14 +409,18 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
phys += next - addr;
} while (pudp++, addr = next, addr != end);
+out:
pud_clear_fixmap();
+
+ return ret;
}
-static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
+ int ret;
unsigned long next;
pgd_t pgd = READ_ONCE(*pgdp);
p4d_t *p4dp;
@@ -404,6 +433,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
p4d_phys = pgtable_alloc(TABLE_P4D);
+ if (p4d_phys == INVALID_PHYS_ADDR)
+ return -ENOMEM;
p4dp = p4d_set_fixmap(p4d_phys);
init_clear_pgtable(p4dp);
p4dp += p4d_index(addr);
@@ -418,8 +449,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
next = p4d_addr_end(addr, end);
- alloc_init_pud(p4dp, addr, next, phys, prot,
- pgtable_alloc, flags);
+ ret = alloc_init_pud(p4dp, addr, next, phys, prot,
+ pgtable_alloc, flags);
+ if (ret)
+ goto out;
BUG_ON(p4d_val(old_p4d) != 0 &&
p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp)));
@@ -427,15 +460,19 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys += next - addr;
} while (p4dp++, addr = next, addr != end);
+out:
p4d_clear_fixmap();
+
+ return ret;
}
-static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
- unsigned long virt, phys_addr_t size,
- pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
+ int ret;
unsigned long addr, end, next;
pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
@@ -444,7 +481,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
* within a page, we cannot map the region as the caller expects.
*/
if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
- return;
+ return -EINVAL;
phys &= PAGE_MASK;
addr = virt & PAGE_MASK;
@@ -452,25 +489,45 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
do {
next = pgd_addr_end(addr, end);
- alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc,
- flags);
+ ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc,
+ flags);
+ if (ret)
+ return ret;
phys += next - addr;
} while (pgdp++, addr = next, addr != end);
+
+ return 0;
}
-static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
- unsigned long virt, phys_addr_t size,
- pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(enum pgtable_type),
- int flags)
+static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
{
+ int ret;
+
mutex_lock(&fixmap_lock);
- __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
- pgtable_alloc, flags);
+ ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
+ pgtable_alloc, flags);
mutex_unlock(&fixmap_lock);
+
+ return ret;
}
-#define INVALID_PHYS_ADDR (-1ULL)
+static void early_create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(enum pgtable_type),
+ int flags)
+{
+ int ret;
+
+ ret = __create_pgd_mapping(pgdir, phys, virt, size, prot, pgtable_alloc,
+ flags);
+ if (ret)
+ panic("Failed to create page tables\n");
+}
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
enum pgtable_type pgtable_type)
@@ -503,7 +560,7 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
}
static phys_addr_t
-try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp)
+pgd_pgtable_alloc_init_mm_gfp(enum pgtable_type pgtable_type, gfp_t gfp)
{
return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type);
}
@@ -511,21 +568,13 @@ try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp)
static phys_addr_t __maybe_unused
pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
{
- phys_addr_t pa;
-
- pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type);
- BUG_ON(pa == INVALID_PHYS_ADDR);
- return pa;
+ return pgd_pgtable_alloc_init_mm_gfp(pgtable_type, GFP_PGTABLE_KERNEL);
}
static phys_addr_t
pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
{
- phys_addr_t pa;
-
- pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
- BUG_ON(pa == INVALID_PHYS_ADDR);
- return pa;
+ return __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
}
static void split_contpte(pte_t *ptep)
@@ -546,7 +595,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
pte_t *ptep;
int i;
- pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp);
+ pte_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PTE, gfp);
if (pte_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
ptep = (pte_t *)phys_to_virt(pte_phys);
@@ -591,7 +640,7 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
pmd_t *pmdp;
int i;
- pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp);
+ pmd_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PMD, gfp);
if (pmd_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
pmdp = (pmd_t *)phys_to_virt(pmd_phys);
@@ -935,8 +984,8 @@ void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
- NO_CONT_MAPPINGS);
+ early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
+ NO_CONT_MAPPINGS);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -950,8 +999,8 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
if (page_mappings_only)
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
- __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc_special_mm, flags);
+ early_create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+ pgd_pgtable_alloc_special_mm, flags);
}
static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
@@ -963,8 +1012,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
return;
}
- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
- NO_CONT_MAPPINGS);
+ early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
+ NO_CONT_MAPPINGS);
/* flush the TLBs after updating live kernel mappings */
flush_tlb_kernel_range(virt, virt + size);
@@ -973,8 +1022,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
phys_addr_t end, pgprot_t prot, int flags)
{
- __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
- prot, early_pgtable_alloc, flags);
+ early_create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
+ prot, early_pgtable_alloc, flags);
}
void __init mark_linear_text_alias_ro(void)
@@ -1207,6 +1256,8 @@ static int __init __kpti_install_ng_mappings(void *__unused)
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
if (!cpu) {
+ int ret;
+
alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
@@ -1227,9 +1278,11 @@ static int __init __kpti_install_ng_mappings(void *__unused)
// covers the PTE[] page itself, the remaining entries are free
// to be used as a ad-hoc fixmap.
//
- __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc),
- KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
- kpti_ng_pgd_alloc, 0);
+ ret = __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc),
+ KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
+ kpti_ng_pgd_alloc, 0);
+ if (ret)
+ panic("Failed to create page tables\n");
}
cpu_install_idmap();
@@ -1282,9 +1335,9 @@ static int __init map_entry_trampoline(void)
/* Map only the text into the trampoline page table */
memset(tramp_pg_dir, 0, PGD_SIZE);
- __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
- entry_tramp_text_size(), prot,
- pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);
+ early_create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
+ entry_tramp_text_size(), prot,
+ pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);
/* Map both the text and data into the kernel page table */
for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
@@ -1926,23 +1979,28 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
- __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, params->pgprot, pgd_pgtable_alloc_init_mm,
- flags);
+ ret = __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
+ size, params->pgprot, pgd_pgtable_alloc_init_mm,
+ flags);
+ if (ret)
+ goto err;
memblock_clear_nomap(start, size);
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
params);
if (ret)
- __remove_pgd_mapping(swapper_pg_dir,
- __phys_to_virt(start), size);
- else {
- /* Address of hotplugged memory can be smaller */
- max_pfn = max(max_pfn, PFN_UP(start + size));
- max_low_pfn = max_pfn;
- }
+ goto err;
+
+ /* Address of hotplugged memory can be smaller */
+ max_pfn = max(max_pfn, PFN_UP(start + size));
+ max_low_pfn = max_pfn;
+
+ return 0;
+err:
+ __remove_pgd_mapping(swapper_pg_dir,
+ __phys_to_virt(start), size);
return ret;
}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 5135f2d66958..f0e784b963e6 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -148,7 +148,7 @@ static int change_memory_common(unsigned long addr, int numpages,
unsigned long size = PAGE_SIZE * numpages;
unsigned long end = start + size;
struct vm_struct *area;
- int i;
+ int ret;
if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
@@ -184,9 +184,13 @@ static int change_memory_common(unsigned long addr, int numpages,
*/
if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
- for (i = 0; i < area->nr_pages; i++) {
- __change_memory_common((u64)page_address(area->pages[i]),
- PAGE_SIZE, set_mask, clear_mask);
+ unsigned long idx = (start - (unsigned long)kasan_reset_tag(area->addr))
+ >> PAGE_SHIFT;
+ for (; numpages; idx++, numpages--) {
+ ret = __change_memory_common((u64)page_address(area->pages[idx]),
+ PAGE_SIZE, set_mask, clear_mask);
+ if (ret)
+ return ret;
}
}
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 8160cff35089..bf5110b91e2f 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -56,7 +56,7 @@ void __init pgtable_cache_init(void)
* With 52-bit physical addresses, the architecture requires the
* top-level table to be aligned to at least 64 bytes.
*/
- BUILD_BUG_ON(PGD_SIZE < 64);
+ BUILD_BUG_ON(!IS_ALIGNED(PGD_SIZE, 64));
#endif
/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 86818511962b..01e868116448 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -23,15 +23,18 @@
#include <asm/sysreg.h>
#ifdef CONFIG_ARM64_64K_PAGES
-#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
+#define TCR_TG_FLAGS ((TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT) |\
+ (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT))
#elif defined(CONFIG_ARM64_16K_PAGES)
-#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
+#define TCR_TG_FLAGS ((TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT) |\
+ (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT))
#else /* CONFIG_ARM64_4K_PAGES */
-#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
+#define TCR_TG_FLAGS ((TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT) |\
+ (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT))
#endif
#ifdef CONFIG_RANDOMIZE_BASE
-#define TCR_KASLR_FLAGS TCR_NFD1
+#define TCR_KASLR_FLAGS TCR_EL1_NFD1
#else
#define TCR_KASLR_FLAGS 0
#endif
@@ -40,23 +43,30 @@
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
#ifdef CONFIG_KASAN_SW_TAGS
-#define TCR_KASAN_SW_FLAGS TCR_TBI1 | TCR_TBID1
+#define TCR_KASAN_SW_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1
#else
#define TCR_KASAN_SW_FLAGS 0
#endif
#ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS TCR_EL1_TCMA1 | TCR_EL1_TBI1 | TCR_EL1_TBID1
#elif defined(CONFIG_ARM64_MTE)
/*
* The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
* TBI being enabled at EL1.
*/
-#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1
#else
#define TCR_MTE_FLAGS 0
#endif
+#define TCR_IRGN_WBWA ((TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT) |\
+ (TCR_EL1_IRGN1_WBWA << TCR_EL1_IRGN1_SHIFT))
+#define TCR_ORGN_WBWA ((TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT) |\
+ (TCR_EL1_ORGN1_WBWA << TCR_EL1_ORGN1_SHIFT))
+#define TCR_SHARED ((TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT) |\
+ (TCR_EL1_SH1_INNER << TCR_EL1_SH1_SHIFT))
+
/*
* Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and
* changed during mte_cpu_setup to Normal Tagged if the system supports MTE.
@@ -129,7 +139,7 @@ SYM_FUNC_START(cpu_do_resume)
/* Don't change t0sz here, mask those bits when restoring */
mrs x7, tcr_el1
- bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+ bfi x8, x7, TCR_EL1_T0SZ_SHIFT, TCR_EL1_T0SZ_WIDTH
msr tcr_el1, x8
msr vbar_el1, x9
@@ -481,8 +491,8 @@ SYM_FUNC_START(__cpu_setup)
tcr2 .req x15
mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
- TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
- TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
+ TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_EL1_AS | \
+ TCR_EL1_TBI0 | TCR_EL1_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
mov tcr2, xzr
tcr_clear_errata_bits tcr, x9, x5
@@ -492,7 +502,7 @@ SYM_FUNC_START(__cpu_setup)
alternative_if ARM64_HAS_VA52
tcr_set_t1sz tcr, x9
#ifdef CONFIG_ARM64_LPA2
- orr tcr, tcr, #TCR_DS
+ orr tcr, tcr, #TCR_EL1_DS
#endif
alternative_else_nop_endif
#endif
@@ -500,7 +510,7 @@ alternative_else_nop_endif
/*
* Set the IPS bits in TCR_EL1.
*/
- tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6
+ tcr_compute_pa_size tcr, #TCR_EL1_IPS_SHIFT, x5, x6
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Enable hardware update of the Access Flags bit.
@@ -510,7 +520,7 @@ alternative_else_nop_endif
mrs x9, ID_AA64MMFR1_EL1
ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4
cbz x9, 1f
- orr tcr, tcr, #TCR_HA // hardware Access flag update
+ orr tcr, tcr, #TCR_EL1_HA // hardware Access flag update
#ifdef CONFIG_ARM64_HAFT
cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
b.lt 1f
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0c9a50a1e73e..afd05b41ea9e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -3054,7 +3054,7 @@ bool bpf_jit_supports_exceptions(void)
/* We unwind through both kernel frames starting from within bpf_throw
* call and BPF frames. Therefore we require FP unwinder to be enabled
* to walk kernel frames and reach BPF frames in the stack trace.
- * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
+ * ARM64 kernel is always compiled with CONFIG_FRAME_POINTER=y
*/
return true;
}
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index bbbb812603e8..86860ab672dc 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -44,21 +44,38 @@ function expect_fields(nf) {
# Print a CPP macro definition, padded with spaces so that the macro bodies
# line up in a column
-function define(name, val) {
- printf "%-56s%s\n", "#define " name, val
+function define(prefix, name, val) {
+ printf "%-56s%s\n", "#define " prefix name, val
+}
+
+# Same as above, but without a prefix
+function define_reg(name, val) {
+ define(null, name, val)
}
# Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field
-function define_field(reg, field, msb, lsb) {
- define(reg "_" field, "GENMASK(" msb ", " lsb ")")
- define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")")
- define(reg "_" field "_SHIFT", lsb)
- define(reg "_" field "_WIDTH", msb - lsb + 1)
+function define_field(prefix, reg, field, msb, lsb) {
+ define(prefix, reg "_" field, "GENMASK(" msb ", " lsb ")")
+ define(prefix, reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")")
+ define(prefix, reg "_" field "_SHIFT", lsb)
+ define(prefix, reg "_" field "_WIDTH", msb - lsb + 1)
}
# Print a field _SIGNED definition for a field
-function define_field_sign(reg, field, sign) {
- define(reg "_" field "_SIGNED", sign)
+function define_field_sign(prefix, reg, field, sign) {
+ define(prefix, reg "_" field "_SIGNED", sign)
+}
+
+# Print the Res0, Res1, Unkn masks
+function define_resx_unkn(prefix, reg, res0, res1, unkn) {
+ if (res0 != null)
+ define(prefix, reg "_RES0", "(" res0 ")")
+ if (res1 != null)
+ define(prefix, reg "_RES1", "(" res1 ")")
+ if (unkn != null)
+ define(prefix, reg "_UNKN", "(" unkn ")")
+ if (res0 != null || res1 != null || unkn != null)
+ print ""
}
# Parse a "<msb>[:<lsb>]" string into the global variables @msb and @lsb
@@ -128,18 +145,17 @@ $1 == "SysregFields" && block_current() == "Root" {
next_bit = 63
+ delete seen_prefixes
+
next
}
$1 == "EndSysregFields" && block_current() == "SysregFields" {
expect_fields(1)
- if (next_bit > 0)
+ if (next_bit >= 0)
fatal("Unspecified bits in " reg)
- define(reg "_RES0", "(" res0 ")")
- define(reg "_RES1", "(" res1 ")")
- define(reg "_UNKN", "(" unkn ")")
- print ""
+ define_resx_unkn(prefix, reg, res0, res1, unkn)
reg = null
res0 = null
@@ -170,35 +186,31 @@ $1 == "Sysreg" && block_current() == "Root" {
fatal("Duplicate Sysreg definition for " reg)
defined_regs[reg] = 1
- define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2)
- define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")")
+ define_reg("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2)
+ define_reg("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")")
- define("SYS_" reg "_Op0", op0)
- define("SYS_" reg "_Op1", op1)
- define("SYS_" reg "_CRn", crn)
- define("SYS_" reg "_CRm", crm)
- define("SYS_" reg "_Op2", op2)
+ define_reg("SYS_" reg "_Op0", op0)
+ define_reg("SYS_" reg "_Op1", op1)
+ define_reg("SYS_" reg "_CRn", crn)
+ define_reg("SYS_" reg "_CRm", crm)
+ define_reg("SYS_" reg "_Op2", op2)
print ""
+ prefix = null
next_bit = 63
+ delete seen_prefixes
+
next
}
$1 == "EndSysreg" && block_current() == "Sysreg" {
expect_fields(1)
- if (next_bit > 0)
+ if (next_bit >= 0)
fatal("Unspecified bits in " reg)
- if (res0 != null)
- define(reg "_RES0", "(" res0 ")")
- if (res1 != null)
- define(reg "_RES1", "(" res1 ")")
- if (unkn != null)
- define(reg "_UNKN", "(" unkn ")")
- if (res0 != null || res1 != null || unkn != null)
- print ""
+ define_resx_unkn(prefix, reg, res0, res1, unkn)
reg = null
op0 = null
@@ -209,6 +221,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" {
res0 = null
res1 = null
unkn = null
+ prefix = null
block_pop()
next
@@ -225,7 +238,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" {
print "/* For " reg " fields see " $2 " */"
print ""
- next_bit = 0
+ next_bit = -1
res0 = null
res1 = null
unkn = null
@@ -233,8 +246,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" {
next
}
-
-$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
expect_fields(2)
parse_bitdef(reg, "RES0", $2)
field = "RES0_" msb "_" lsb
@@ -244,7 +256,7 @@ $1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields
next
}
-$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
expect_fields(2)
parse_bitdef(reg, "RES1", $2)
field = "RES1_" msb "_" lsb
@@ -254,7 +266,7 @@ $1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields
next
}
-$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
expect_fields(2)
parse_bitdef(reg, "UNKN", $2)
field = "UNKN_" msb "_" lsb
@@ -264,62 +276,62 @@ $1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields
next
}
-$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
expect_fields(3)
field = $3
parse_bitdef(reg, field, $2)
- define_field(reg, field, msb, lsb)
+ define_field(prefix, reg, field, msb, lsb)
print ""
next
}
-$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
expect_fields(2)
parse_bitdef(reg, field, $2)
next
}
-$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
block_push("Enum")
expect_fields(3)
field = $3
parse_bitdef(reg, field, $2)
- define_field(reg, field, msb, lsb)
- define_field_sign(reg, field, "true")
+ define_field(prefix, reg, field, msb, lsb)
+ define_field_sign(prefix, reg, field, "true")
delete seen_enum_vals
next
}
-$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
block_push("Enum")
expect_fields(3)
field = $3
parse_bitdef(reg, field, $2)
- define_field(reg, field, msb, lsb)
- define_field_sign(reg, field, "false")
+ define_field(prefix, reg, field, msb, lsb)
+ define_field_sign(prefix, reg, field, "false")
delete seen_enum_vals
next
}
-$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") {
block_push("Enum")
expect_fields(3)
field = $3
parse_bitdef(reg, field, $2)
- define_field(reg, field, msb, lsb)
+ define_field(prefix, reg, field, msb, lsb)
delete seen_enum_vals
@@ -349,7 +361,47 @@ $1 == "EndEnum" && block_current() == "Enum" {
fatal("Duplicate Enum value " val " for " name)
seen_enum_vals[val] = 1
- define(reg "_" field "_" name, "UL(" val ")")
+ define(prefix, reg "_" field "_" name, "UL(" val ")")
+ next
+}
+
+$1 == "Prefix" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+ block_push("Prefix")
+
+ expect_fields(2)
+
+ if (next_bit < 63)
+ fatal("Prefixed fields must precede non-prefixed fields (" reg ")")
+
+ prefix = $2 "_"
+
+ if (prefix in seen_prefixes)
+ fatal("Duplicate prefix " prefix " for " reg)
+ seen_prefixes[prefix] = 1
+
+ res0 = "UL(0)"
+ res1 = "UL(0)"
+ unkn = "UL(0)"
+ next_bit = 63
+
+ next
+}
+
+$1 == "EndPrefix" && block_current() == "Prefix" {
+ expect_fields(1)
+ if (next_bit >= 0)
+ fatal("Unspecified bits in prefix " prefix " for " reg)
+
+ define_resx_unkn(prefix, reg, res0, res1, unkn)
+
+ prefix = null
+ res0 = "UL(0)"
+ res1 = "UL(0)"
+ unkn = "UL(0)"
+ next_bit = 63
+
+ block_pop()
+
next
}
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 1c6cdf9d54bb..8921b51866d6 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -4669,6 +4669,27 @@ Field 1 V3
Field 0 En
EndSysreg
+Sysreg ICH_VMCR_EL2 3 4 12 11 7
+Prefix FEAT_GCIE
+Res0 63:32
+Field 31:27 VPMR
+Res0 26:1
+Field 0 EN
+EndPrefix
+Res0 63:32
+Field 31:24 VPMR
+Field 23:21 VBPR0
+Field 20:18 VBPR1
+Res0 17:10
+Field 9 VEOIM
+Res0 8:5
+Field 4 VCBPR
+Field 3 VFIQEn
+Field 2 VAckCtl
+Field 1 VENG1
+Field 0 VENG0
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg