summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@kernel.org>2026-01-12 11:20:09 -0800
committerEric Biggers <ebiggers@kernel.org>2026-01-12 11:39:58 -0800
commit2b1ef7aeeb184ee78523f3d24e221296574c6f2d (patch)
treebe1ca1de79fb2af1f8870dcb97629277ab4d2f29 /lib
parentfa2297750c2cc61788d1843f358dbfecaa42944f (diff)
lib/crypto: arm64/aes: Migrate optimized code into library
Move the ARM64 optimized AES key expansion and single-block AES en/decryption code into lib/crypto/, wire it up to the AES library API, and remove the superseded crypto_cipher algorithms. The result is that both the AES library and crypto_cipher APIs are now optimized for ARM64, whereas previously only crypto_cipher was (and the optimizations weren't enabled by default, which this fixes as well). Note: to see the diff from arch/arm64/crypto/aes-ce-glue.c to lib/crypto/arm64/aes.h, view this commit with 'git show -M10'. Acked-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20260112192035.10427-12-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/crypto/Makefile5
-rw-r--r--lib/crypto/arm64/aes-ce-core.S84
-rw-r--r--lib/crypto/arm64/aes-cipher-core.S132
-rw-r--r--lib/crypto/arm64/aes.h164
5 files changed, 386 insertions, 0 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 60420b421e04..ead47b2a7db6 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -15,6 +15,7 @@ config CRYPTO_LIB_AES_ARCH
bool
depends on CRYPTO_LIB_AES && !UML && !KMSAN
default y if ARM
+ default y if ARM64
config CRYPTO_LIB_AESCFB
tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 2f6b0f59eb1b..1b690c63fafb 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -24,6 +24,11 @@ CFLAGS_aes.o += -I$(src)/$(SRCARCH)
libaes-$(CONFIG_ARM) += arm/aes-cipher-core.o
+ifeq ($(CONFIG_ARM64),y)
+libaes-y += arm64/aes-cipher-core.o
+libaes-$(CONFIG_KERNEL_MODE_NEON) += arm64/aes-ce-core.o
+endif
+
endif # CONFIG_CRYPTO_LIB_AES_ARCH
################################################################################
diff --git a/lib/crypto/arm64/aes-ce-core.S b/lib/crypto/arm64/aes-ce-core.S
new file mode 100644
index 000000000000..e52e13eb8fdb
--- /dev/null
+++ b/lib/crypto/arm64/aes-ce-core.S
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .arch armv8-a+crypto
+
+SYM_FUNC_START(__aes_ce_encrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aese v0.16b, v2.16b
+ aesmc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aese v0.16b, v1.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aese v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+SYM_FUNC_END(__aes_ce_encrypt)
+
+SYM_FUNC_START(__aes_ce_decrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aesd v0.16b, v2.16b
+ aesimc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aesd v0.16b, v3.16b
+ aesimc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aesd v0.16b, v1.16b
+ aesimc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aesd v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+SYM_FUNC_END(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ * substitution on each byte in 'input'
+ */
+SYM_FUNC_START(__aes_ce_sub)
+ dup v1.4s, w0
+ movi v0.16b, #0
+ aese v0.16b, v1.16b
+ umov w0, v0.s[0]
+ ret
+SYM_FUNC_END(__aes_ce_sub)
+
+SYM_FUNC_START(__aes_ce_invert)
+ ld1 {v0.4s}, [x1]
+ aesimc v1.16b, v0.16b
+ st1 {v1.4s}, [x0]
+ ret
+SYM_FUNC_END(__aes_ce_invert)
diff --git a/lib/crypto/arm64/aes-cipher-core.S b/lib/crypto/arm64/aes-cipher-core.S
new file mode 100644
index 000000000000..651f701c56a8
--- /dev/null
+++ b/lib/crypto/arm64/aes-cipher-core.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+ .text
+
+ rk .req x0
+ out .req x1
+ in .req x2
+ rounds .req x3
+ tt .req x2
+
+ .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ .ifc \op\shift, b0
+ ubfiz \reg0, \in0, #2, #8
+ ubfiz \reg1, \in1e, #2, #8
+ .else
+ ubfx \reg0, \in0, #\shift, #8
+ ubfx \reg1, \in1e, #\shift, #8
+ .endif
+
+ /*
+ * AArch64 cannot do byte size indexed loads from a table containing
+ * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+ * valid instruction. So perform the shift explicitly first for the
+ * high bytes (the low byte is shifted implicitly by using ubfiz rather
+ * than ubfx above)
+ */
+ .ifnc \op, b
+ ldr \reg0, [tt, \reg0, uxtw #2]
+ ldr \reg1, [tt, \reg1, uxtw #2]
+ .else
+ .if \shift > 0
+ lsl \reg0, \reg0, #2
+ lsl \reg1, \reg1, #2
+ .endif
+ ldrb \reg0, [tt, \reg0, uxtw]
+ ldrb \reg1, [tt, \reg1, uxtw]
+ .endif
+ .endm
+
+ .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ ubfx \reg0, \in0, #\shift, #8
+ ubfx \reg1, \in1d, #\shift, #8
+ ldr\op \reg0, [tt, \reg0, uxtw #\sz]
+ ldr\op \reg1, [tt, \reg1, uxtw #\sz]
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
+ ldp \out0, \out1, [rk], #8
+
+ __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
+ __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
+ __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
+ __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+ eor \out0, \out0, w12
+ eor \out1, \out1, w13
+ eor \out0, \out0, w14, ror #24
+ eor \out1, \out1, w15, ror #24
+ eor \out0, \out0, w16, ror #16
+ eor \out1, \out1, w17, ror #16
+ eor \out0, \out0, \t0, ror #8
+ eor \out1, \out1, \t1, ror #8
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+ .endm
+
+ .macro do_crypt, round, ttab, ltab, bsz
+ ldp w4, w5, [in]
+ ldp w6, w7, [in, #8]
+ ldp w8, w9, [rk], #16
+ ldp w10, w11, [rk, #-8]
+
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+
+ eor w4, w4, w8
+ eor w5, w5, w9
+ eor w6, w6, w10
+ eor w7, w7, w11
+
+ adr_l tt, \ttab
+
+ tbnz rounds, #1, 1f
+
+0: \round w8, w9, w10, w11, w4, w5, w6, w7
+ \round w4, w5, w6, w7, w8, w9, w10, w11
+
+1: subs rounds, rounds, #4
+ \round w8, w9, w10, w11, w4, w5, w6, w7
+ b.ls 3f
+2: \round w4, w5, w6, w7, w8, w9, w10, w11
+ b 0b
+3: adr_l tt, \ltab
+ \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+
+ stp w4, w5, [out]
+ stp w6, w7, [out, #8]
+ ret
+ .endm
+
+SYM_FUNC_START(__aes_arm64_encrypt)
+ do_crypt fround, aes_enc_tab, aes_enc_tab + 1, 2
+SYM_FUNC_END(__aes_arm64_encrypt)
+
+ .align 5
+SYM_FUNC_START(__aes_arm64_decrypt)
+ do_crypt iround, aes_dec_tab, crypto_aes_inv_sbox, 0
+SYM_FUNC_END(__aes_arm64_decrypt)
diff --git a/lib/crypto/arm64/aes.h b/lib/crypto/arm64/aes.h
new file mode 100644
index 000000000000..63eea6271ef9
--- /dev/null
+++ b/lib/crypto/arm64/aes.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AES block cipher, optimized for ARM64
+ *
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright 2026 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <linux/unaligned.h>
+#include <linux/cpufeature.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+asmlinkage void __aes_arm64_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_arm64_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE], int rounds);
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+ const struct aes_block *in);
+
+/*
+ * Expand an AES key using the crypto extensions if supported and usable or
+ * generic code otherwise. The expanded key format is compatible between the
+ * two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
+ */
+static void aes_expandkey_arm64(u32 rndkeys[], u32 *inv_rndkeys,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ /*
+ * The AES key schedule round constants
+ */
+ static u8 const rcon[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+ };
+
+ u32 kwords = key_len / sizeof(u32);
+ struct aes_block *key_enc, *key_dec;
+ int i, j;
+
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) ||
+ !static_branch_likely(&have_aes) || unlikely(!may_use_simd())) {
+ aes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);
+ return;
+ }
+
+ for (i = 0; i < kwords; i++)
+ rndkeys[i] = get_unaligned_le32(&in_key[i * sizeof(u32)]);
+
+ scoped_ksimd() {
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = &rndkeys[i * kwords];
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
+ rcon[i] ^ rki[0];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
+ }
+
+ /*
+ * Generate the decryption keys for the Equivalent Inverse
+ * Cipher. This involves reversing the order of the round
+ * keys, and applying the Inverse Mix Columns transformation on
+ * all but the first and the last one.
+ */
+ if (inv_rndkeys) {
+ key_enc = (struct aes_block *)rndkeys;
+ key_dec = (struct aes_block *)inv_rndkeys;
+ j = nrounds;
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ __aes_ce_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+ }
+ }
+}
+
+static void aes_preparekey_arch(union aes_enckey_arch *k,
+ union aes_invkey_arch *inv_k,
+ const u8 *in_key, int key_len, int nrounds)
+{
+ aes_expandkey_arm64(k->rndkeys, inv_k ? inv_k->inv_rndkeys : NULL,
+ in_key, key_len, nrounds);
+}
+
+/*
+ * This is here temporarily until the remaining AES mode implementations are
+ * migrated from arch/arm64/crypto/ to lib/crypto/arm64/.
+ */
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len)
+{
+ if (aes_check_keylen(key_len) != 0)
+ return -EINVAL;
+ ctx->key_length = key_len;
+ aes_expandkey_arm64(ctx->key_enc, ctx->key_dec, in_key, key_len,
+ 6 + key_len / 4);
+ return 0;
+}
+EXPORT_SYMBOL(ce_aes_expandkey);
+
+static void aes_encrypt_arch(const struct aes_enckey *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_aes) && likely(may_use_simd())) {
+ scoped_ksimd()
+ __aes_ce_encrypt(key->k.rndkeys, out, in, key->nrounds);
+ } else {
+ __aes_arm64_encrypt(key->k.rndkeys, out, in, key->nrounds);
+ }
+}
+
+static void aes_decrypt_arch(const struct aes_key *key,
+ u8 out[AES_BLOCK_SIZE],
+ const u8 in[AES_BLOCK_SIZE])
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ static_branch_likely(&have_aes) && likely(may_use_simd())) {
+ scoped_ksimd()
+ __aes_ce_decrypt(key->inv_k.inv_rndkeys, out, in,
+ key->nrounds);
+ } else {
+ __aes_arm64_decrypt(key->inv_k.inv_rndkeys, out, in,
+ key->nrounds);
+ }
+}
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+#define aes_mod_init_arch aes_mod_init_arch
+static void aes_mod_init_arch(void)
+{
+ if (cpu_have_named_feature(AES))
+ static_branch_enable(&have_aes);
+}
+#endif /* CONFIG_KERNEL_MODE_NEON */