summaryrefslogtreecommitdiff
path: root/arch/arm64/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/crypto')
-rw-r--r--arch/arm64/crypto/Kconfig21
-rw-r--r--arch/arm64/crypto/Makefile6
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c116
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c87
-rw-r--r--arch/arm64/crypto/aes-glue.c139
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c150
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c27
-rw-r--r--arch/arm64/crypto/nhpoly1305-neon-glue.c5
-rw-r--r--arch/arm64/crypto/polyval-ce-core.S361
-rw-r--r--arch/arm64/crypto/polyval-ce-glue.c158
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S212
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c15
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c16
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-glue.c49
-rw-r--r--arch/arm64/crypto/sm4-ce-cipher-glue.c10
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-glue.c62
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c214
-rw-r--r--arch/arm64/crypto/sm4-neon-glue.c25
19 files changed, 411 insertions, 1413 deletions
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 91f3093eee6a..bdd276a6e540 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_SHA3_ARM64
- tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_SHA3
- help
- SHA-3 secure hash algorithms (FIPS 202)
-
- Architecture: arm64 using:
- - ARMv8.2 Crypto Extensions
-
config CRYPTO_SM3_NEON
tristate "Hash functions: SM3 (NEON)"
depends on KERNEL_MODE_NEON
@@ -58,16 +47,6 @@ config CRYPTO_SM3_ARM64_CE
Architecture: arm64 using:
- ARMv8.2 Crypto Extensions
-config CRYPTO_POLYVAL_ARM64_CE
- tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_POLYVAL
- help
- POLYVAL hash function for HCTR2
-
- Architecture: arm64 using:
- - ARMv8 Crypto Extensions
-
config CRYPTO_AES_ARM64
tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS"
select CRYPTO_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index a8b2cdbe202c..1e330aa08d3f 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -5,9 +5,6 @@
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
#
-obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
-sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
-
obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
@@ -32,9 +29,6 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
-obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
-polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 2d791d51891b..c4fd648471f1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -8,7 +8,6 @@
* Author: Ard Biesheuvel <ardb@kernel.org>
*/
-#include <asm/neon.h>
#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
@@ -16,6 +15,8 @@
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
@@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
in += adv;
abytes -= adv;
- if (unlikely(rem)) {
- kernel_neon_end();
- kernel_neon_begin();
+ if (unlikely(rem))
macp = 0;
- }
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
@@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
-
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
@@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
-
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index 00b8749013c5..a4dad370991d 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
@@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
}
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
- kernel_neon_begin();
- for (i = 0; i < sizeof(rcon); i++) {
- u32 *rki = ctx->key_enc + (i * kwords);
- u32 *rko = rki + kwords;
-
- rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
- rko[1] = rko[0] ^ rki[1];
- rko[2] = rko[1] ^ rki[2];
- rko[3] = rko[2] ^ rki[3];
-
- if (key_len == AES_KEYSIZE_192) {
- if (i >= 7)
- break;
- rko[4] = rko[3] ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- } else if (key_len == AES_KEYSIZE_256) {
- if (i >= 6)
- break;
- rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- rko[6] = rko[5] ^ rki[6];
- rko[7] = rko[6] ^ rki[7];
+ scoped_ksimd() {
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = ctx->key_enc + (i * kwords);
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
+ rcon[i] ^ rki[0];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
}
- }
- /*
- * Generate the decryption keys for the Equivalent Inverse Cipher.
- * This involves reversing the order of the round keys, and applying
- * the Inverse Mix Columns transformation on all but the first and
- * the last one.
- */
- key_enc = (struct aes_block *)ctx->key_enc;
- key_dec = (struct aes_block *)ctx->key_dec;
- j = num_rounds(ctx);
-
- key_dec[0] = key_enc[j];
- for (i = 1, j--; j > 0; i++, j--)
- __aes_ce_invert(key_dec + i, key_enc + j);
- key_dec[i] = key_enc[0];
+ /*
+ * Generate the decryption keys for the Equivalent Inverse
+ * Cipher. This involves reversing the order of the round
+ * keys, and applying the Inverse Mix Columns transformation on
+ * all but the first and the last one.
+ */
+ key_enc = (struct aes_block *)ctx->key_enc;
+ key_dec = (struct aes_block *)ctx->key_dec;
+ j = num_rounds(ctx);
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ __aes_ce_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+ }
- kernel_neon_end();
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5e207ff34482..b087b900d279 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -5,8 +5,6 @@
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/hash.h>
@@ -20,6 +18,9 @@
#include <linux/module.h>
#include <linux/string.h>
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
#ifdef USE_V8_CRYPTO_EXTENSIONS
@@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_enc, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_enc, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_dec, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_dec, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_encrypt_walk(req, &walk);
@@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_decrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_decrypt_walk(req, &walk);
@@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv, byte_ctr);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv, byte_ctr);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
return err;
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
/* encrypt the zero vector */
- kernel_neon_begin();
- aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
- rounds, 1);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){},
+ ctx->key.key_enc, rounds, 1);
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
@@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
if (err)
return err;
- kernel_neon_begin();
- aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
- aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
- kernel_neon_end();
+ scoped_ksimd() {
+ aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
+ }
return cbcmac_setkey(tfm, key, sizeof(key));
}
@@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
int rem;
do {
- kernel_neon_begin();
- rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
- dg, enc_before, !enc_before);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
+ dg, enc_before, !enc_before);
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
} while (blocks);
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index c4a623e86593..d496effb0a5b 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
ctx->rounds = 6 + key_len / 4;
- kernel_neon_begin();
- aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
return 0;
}
@@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
- ctx->rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
+ ctx->rounds, blocks);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
- kernel_neon_begin();
- aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
memzero_explicit(&rk, sizeof(rk));
return 0;
@@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
/* fall back to the non-bitsliced NEON implementation */
- kernel_neon_begin();
- neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->enc, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ neon_aes_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->enc, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req)
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- kernel_neon_begin();
- if (blocks >= 8) {
- aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
- blocks, walk.iv);
- dst += blocks * AES_BLOCK_SIZE;
- src += blocks * AES_BLOCK_SIZE;
- }
- if (nbytes && walk.nbytes == walk.total) {
- u8 buf[AES_BLOCK_SIZE];
- u8 *d = dst;
-
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(buf + sizeof(buf) - nbytes,
- src, nbytes);
-
- neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
- nbytes, walk.iv);
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ aesbs_ctr_encrypt(dst, src, ctx->key.rk,
+ ctx->key.rounds, blocks,
+ walk.iv);
+ dst += blocks * AES_BLOCK_SIZE;
+ src += blocks * AES_BLOCK_SIZE;
+ }
+ if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) -
+ nbytes, src, nbytes);
+
+ neon_aes_ctr_encrypt(dst, src, ctx->enc,
+ ctx->key.rounds, nbytes,
+ walk.iv);
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- memcpy(d, dst, nbytes);
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
- nbytes = 0;
+ nbytes = 0;
+ }
}
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
@@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (blocks >= 8) {
- if (first == 1)
- neon_aes_ecb_encrypt(walk.iv, walk.iv,
- ctx->twkey,
- ctx->key.rounds, 1);
- first = 2;
-
- fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
-
- out += blocks * AES_BLOCK_SIZE;
- in += blocks * AES_BLOCK_SIZE;
- nbytes -= blocks * AES_BLOCK_SIZE;
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ if (first == 1)
+ neon_aes_ecb_encrypt(walk.iv, walk.iv,
+ ctx->twkey,
+ ctx->key.rounds, 1);
+ first = 2;
+
+ fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
+
+ out += blocks * AES_BLOCK_SIZE;
+ in += blocks * AES_BLOCK_SIZE;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ }
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ nbytes = first = 0;
+ }
}
- if (walk.nbytes == walk.total && nbytes > 0) {
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- nbytes = first = 0;
- }
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
@@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ }
return skcipher_walk_done(&walk, 0);
}
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 4995b6e22335..7951557a285a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
@@ -22,6 +21,8 @@
#include <linux/string.h>
#include <linux/unaligned.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
u64 const h[][2],
const char *head))
{
- kernel_neon_begin();
- simd_update(blocks, dg, src, key->h, head);
- kernel_neon_end();
+ scoped_ksimd()
+ simd_update(blocks, dg, src, key->h, head);
}
/* avoid hogging the CPU for too long */
@@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc, nrounds,
- tag);
- kernel_neon_end();
+ scoped_ksimd()
+ pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
+ tag);
if (unlikely(!nbytes))
break;
@@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc,
- nrounds, tag, otag, authsize);
- kernel_neon_end();
+ scoped_ksimd()
+ ret = pmull_gcm_decrypt(nbytes, dst, src,
+ ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc,
+ nrounds, tag, otag, authsize);
if (unlikely(!nbytes))
break;
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index e4a0b463f080..013de6ac569a 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
do {
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
- kernel_neon_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
src += n;
srclen -= n;
} while (srclen);
diff --git a/arch/arm64/crypto/polyval-ce-core.S b/arch/arm64/crypto/polyval-ce-core.S
deleted file mode 100644
index b5326540d2e3..000000000000
--- a/arch/arm64/crypto/polyval-ce-core.S
+++ /dev/null
@@ -1,361 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Implementation of POLYVAL using ARMv8 Crypto Extensions.
- *
- * Copyright 2021 Google LLC
- */
-/*
- * This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
- * It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
- * ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
- * finite field multiplication into two steps.
- *
- * In the first step, we consider h^i, m_i as normal polynomials of degree less
- * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
- * is simply polynomial multiplication.
- *
- * In the second step, we compute the reduction of p(x) modulo the finite field
- * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
- *
- * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
- * multiplication is finite field multiplication. The advantage is that the
- * two-step process only requires 1 finite field reduction for every 8
- * polynomial multiplications. Further parallelism is gained by interleaving the
- * multiplications and polynomial reductions.
- */
-
-#include <linux/linkage.h>
-#define STRIDE_BLOCKS 8
-
-KEY_POWERS .req x0
-MSG .req x1
-BLOCKS_LEFT .req x2
-ACCUMULATOR .req x3
-KEY_START .req x10
-EXTRA_BYTES .req x11
-TMP .req x13
-
-M0 .req v0
-M1 .req v1
-M2 .req v2
-M3 .req v3
-M4 .req v4
-M5 .req v5
-M6 .req v6
-M7 .req v7
-KEY8 .req v8
-KEY7 .req v9
-KEY6 .req v10
-KEY5 .req v11
-KEY4 .req v12
-KEY3 .req v13
-KEY2 .req v14
-KEY1 .req v15
-PL .req v16
-PH .req v17
-TMP_V .req v18
-LO .req v20
-MI .req v21
-HI .req v22
-SUM .req v23
-GSTAR .req v24
-
- .text
-
- .arch armv8-a+crypto
- .align 4
-
-.Lgstar:
- .quad 0xc200000000000000, 0xc200000000000000
-
-/*
- * Computes the product of two 128-bit polynomials in X and Y and XORs the
- * components of the 256-bit product into LO, MI, HI.
- *
- * Given:
- * X = [X_1 : X_0]
- * Y = [Y_1 : Y_0]
- *
- * We compute:
- * LO += X_0 * Y_0
- * MI += (X_0 + X_1) * (Y_0 + Y_1)
- * HI += X_1 * Y_1
- *
- * Later, the 256-bit result can be extracted as:
- * [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
- * This step is done when computing the polynomial reduction for efficiency
- * reasons.
- *
- * Karatsuba multiplication is used instead of Schoolbook multiplication because
- * it was found to be slightly faster on ARM64 CPUs.
- *
- */
-.macro karatsuba1 X Y
- X .req \X
- Y .req \Y
- ext v25.16b, X.16b, X.16b, #8
- ext v26.16b, Y.16b, Y.16b, #8
- eor v25.16b, v25.16b, X.16b
- eor v26.16b, v26.16b, Y.16b
- pmull2 v28.1q, X.2d, Y.2d
- pmull v29.1q, X.1d, Y.1d
- pmull v27.1q, v25.1d, v26.1d
- eor HI.16b, HI.16b, v28.16b
- eor LO.16b, LO.16b, v29.16b
- eor MI.16b, MI.16b, v27.16b
- .unreq X
- .unreq Y
-.endm
-
-/*
- * Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
- * them.
- */
-.macro karatsuba1_store X Y
- X .req \X
- Y .req \Y
- ext v25.16b, X.16b, X.16b, #8
- ext v26.16b, Y.16b, Y.16b, #8
- eor v25.16b, v25.16b, X.16b
- eor v26.16b, v26.16b, Y.16b
- pmull2 HI.1q, X.2d, Y.2d
- pmull LO.1q, X.1d, Y.1d
- pmull MI.1q, v25.1d, v26.1d
- .unreq X
- .unreq Y
-.endm
-
-/*
- * Computes the 256-bit polynomial represented by LO, HI, MI. Stores
- * the result in PL, PH.
- * [PH : PL] =
- * [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
- */
-.macro karatsuba2
- // v4 = [HI_1 + MI_1 : HI_0 + MI_0]
- eor v4.16b, HI.16b, MI.16b
- // v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
- eor v4.16b, v4.16b, LO.16b
- // v5 = [HI_0 : LO_1]
- ext v5.16b, LO.16b, HI.16b, #8
- // v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
- eor v4.16b, v4.16b, v5.16b
- // HI = [HI_0 : HI_1]
- ext HI.16b, HI.16b, HI.16b, #8
- // LO = [LO_0 : LO_1]
- ext LO.16b, LO.16b, LO.16b, #8
- // PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
- ext PH.16b, v4.16b, HI.16b, #8
- // PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
- ext PL.16b, LO.16b, v4.16b, #8
-.endm
-
-/*
- * Computes the 128-bit reduction of PH : PL. Stores the result in dest.
- *
- * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
- * x^128 + x^127 + x^126 + x^121 + 1.
- *
- * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
- * product of two 128-bit polynomials in Montgomery form. We need to reduce it
- * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
- * of x^128, this product has two extra factors of x^128. To get it back into
- * Montgomery form, we need to remove one of these factors by dividing by x^128.
- *
- * To accomplish both of these goals, we add multiples of g(x) that cancel out
- * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
- * bits are zero, the polynomial division by x^128 can be done by right
- * shifting.
- *
- * Since the only nonzero term in the low 64 bits of g(x) is the constant term,
- * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
- * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
- * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
- * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
- * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
- *
- * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
- * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
- * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
- * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
- * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
- *
- * So our final computation is:
- * T = T_1 : T_0 = g*(x) * P_0
- * V = V_1 : V_0 = g*(x) * (P_1 + T_0)
- * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
- *
- * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
- * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
- * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
- */
-.macro montgomery_reduction dest
- DEST .req \dest
- // TMP_V = T_1 : T_0 = P_0 * g*(x)
- pmull TMP_V.1q, PL.1d, GSTAR.1d
- // TMP_V = T_0 : T_1
- ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
- // TMP_V = P_1 + T_0 : P_0 + T_1
- eor TMP_V.16b, PL.16b, TMP_V.16b
- // PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
- eor PH.16b, PH.16b, TMP_V.16b
- // TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
- pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
- eor DEST.16b, PH.16b, TMP_V.16b
- .unreq DEST
-.endm
-
-/*
- * Compute Polyval on 8 blocks.
- *
- * If reduce is set, also computes the montgomery reduction of the
- * previous full_stride call and XORs with the first message block.
- * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
- * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
- *
- * Sets PL, PH.
- */
-.macro full_stride reduce
- eor LO.16b, LO.16b, LO.16b
- eor MI.16b, MI.16b, MI.16b
- eor HI.16b, HI.16b, HI.16b
-
- ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
- ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
-
- karatsuba1 M7 KEY1
- .if \reduce
- pmull TMP_V.1q, PL.1d, GSTAR.1d
- .endif
-
- karatsuba1 M6 KEY2
- .if \reduce
- ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
- .endif
-
- karatsuba1 M5 KEY3
- .if \reduce
- eor TMP_V.16b, PL.16b, TMP_V.16b
- .endif
-
- karatsuba1 M4 KEY4
- .if \reduce
- eor PH.16b, PH.16b, TMP_V.16b
- .endif
-
- karatsuba1 M3 KEY5
- .if \reduce
- pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
- .endif
-
- karatsuba1 M2 KEY6
- .if \reduce
- eor SUM.16b, PH.16b, TMP_V.16b
- .endif
-
- karatsuba1 M1 KEY7
- eor M0.16b, M0.16b, SUM.16b
-
- karatsuba1 M0 KEY8
- karatsuba2
-.endm
-
-/*
- * Handle any extra blocks after full_stride loop.
- */
-.macro partial_stride
- add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
- sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
- ld1 {KEY1.16b}, [KEY_POWERS], #16
-
- ld1 {TMP_V.16b}, [MSG], #16
- eor SUM.16b, SUM.16b, TMP_V.16b
- karatsuba1_store KEY1 SUM
- sub BLOCKS_LEFT, BLOCKS_LEFT, #1
-
- tst BLOCKS_LEFT, #4
- beq .Lpartial4BlocksDone
- ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
- ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
- karatsuba1 M0 KEY8
- karatsuba1 M1 KEY7
- karatsuba1 M2 KEY6
- karatsuba1 M3 KEY5
-.Lpartial4BlocksDone:
- tst BLOCKS_LEFT, #2
- beq .Lpartial2BlocksDone
- ld1 {M0.16b, M1.16b}, [MSG], #32
- ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
- karatsuba1 M0 KEY8
- karatsuba1 M1 KEY7
-.Lpartial2BlocksDone:
- tst BLOCKS_LEFT, #1
- beq .LpartialDone
- ld1 {M0.16b}, [MSG], #16
- ld1 {KEY8.16b}, [KEY_POWERS], #16
- karatsuba1 M0 KEY8
-.LpartialDone:
- karatsuba2
- montgomery_reduction SUM
-.endm
-
-/*
- * Perform montgomery multiplication in GF(2^128) and store result in op1.
- *
- * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
- * If op1, op2 are in montgomery form, this computes the montgomery
- * form of op1*op2.
- *
- * void pmull_polyval_mul(u8 *op1, const u8 *op2);
- */
-SYM_FUNC_START(pmull_polyval_mul)
- adr TMP, .Lgstar
- ld1 {GSTAR.2d}, [TMP]
- ld1 {v0.16b}, [x0]
- ld1 {v1.16b}, [x1]
- karatsuba1_store v0 v1
- karatsuba2
- montgomery_reduction SUM
- st1 {SUM.16b}, [x0]
- ret
-SYM_FUNC_END(pmull_polyval_mul)
-
-/*
- * Perform polynomial evaluation as specified by POLYVAL. This computes:
- * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
- * where n=nblocks, h is the hash key, and m_i are the message blocks.
- *
- * x0 - pointer to precomputed key powers h^8 ... h^1
- * x1 - pointer to message blocks
- * x2 - number of blocks to hash
- * x3 - pointer to accumulator
- *
- * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
- * size_t nblocks, u8 *accumulator);
- */
-SYM_FUNC_START(pmull_polyval_update)
- adr TMP, .Lgstar
- mov KEY_START, KEY_POWERS
- ld1 {GSTAR.2d}, [TMP]
- ld1 {SUM.16b}, [ACCUMULATOR]
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- blt .LstrideLoopExit
- ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
- ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
- full_stride 0
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- blt .LstrideLoopExitReduce
-.LstrideLoop:
- full_stride 1
- subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- bge .LstrideLoop
-.LstrideLoopExitReduce:
- montgomery_reduction SUM
-.LstrideLoopExit:
- adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
- beq .LskipPartial
- partial_stride
-.LskipPartial:
- st1 {SUM.16b}, [ACCUMULATOR]
- ret
-SYM_FUNC_END(pmull_polyval_update)
diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c
deleted file mode 100644
index c4e653688ea0..000000000000
--- a/arch/arm64/crypto/polyval-ce-glue.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Glue code for POLYVAL using ARMv8 Crypto Extensions
- *
- * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
- * Copyright (c) 2009 Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- * Copyright 2021 Google LLC
- */
-
-/*
- * Glue code based on ghash-clmulni-intel_glue.c.
- *
- * This implementation of POLYVAL uses montgomery multiplication accelerated by
- * ARMv8 Crypto Extensions instructions to implement the finite field operations.
- */
-
-#include <asm/neon.h>
-#include <crypto/internal/hash.h>
-#include <crypto/polyval.h>
-#include <crypto/utils.h>
-#include <linux/cpufeature.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#define NUM_KEY_POWERS 8
-
-struct polyval_tfm_ctx {
- /*
- * These powers must be in the order h^8, ..., h^1.
- */
- u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
-};
-
-struct polyval_desc_ctx {
- u8 buffer[POLYVAL_BLOCK_SIZE];
-};
-
-asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator);
-asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
-
-static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator)
-{
- kernel_neon_begin();
- pmull_polyval_update(keys, in, nblocks, accumulator);
- kernel_neon_end();
-}
-
-static void internal_polyval_mul(u8 *op1, const u8 *op2)
-{
- kernel_neon_begin();
- pmull_polyval_mul(op1, op2);
- kernel_neon_end();
-}
-
-static int polyval_arm64_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
- int i;
-
- if (keylen != POLYVAL_BLOCK_SIZE)
- return -EINVAL;
-
- memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
-
- for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
- memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
- internal_polyval_mul(tctx->key_powers[i],
- tctx->key_powers[i+1]);
- }
-
- return 0;
-}
-
-static int polyval_arm64_init(struct shash_desc *desc)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
-
- memset(dctx, 0, sizeof(*dctx));
-
- return 0;
-}
-
-static int polyval_arm64_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- unsigned int nblocks;
-
- do {
- /* allow rescheduling every 4K bytes */
- nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
- internal_polyval_update(tctx, src, nblocks, dctx->buffer);
- srclen -= nblocks * POLYVAL_BLOCK_SIZE;
- src += nblocks * POLYVAL_BLOCK_SIZE;
- } while (srclen >= POLYVAL_BLOCK_SIZE);
-
- return srclen;
-}
-
-static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src,
- unsigned int len, u8 *dst)
-{
- struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
-
- if (len) {
- crypto_xor(dctx->buffer, src, len);
- internal_polyval_mul(dctx->buffer,
- tctx->key_powers[NUM_KEY_POWERS-1]);
- }
-
- memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
-
- return 0;
-}
-
-static struct shash_alg polyval_alg = {
- .digestsize = POLYVAL_DIGEST_SIZE,
- .init = polyval_arm64_init,
- .update = polyval_arm64_update,
- .finup = polyval_arm64_finup,
- .setkey = polyval_arm64_setkey,
- .descsize = sizeof(struct polyval_desc_ctx),
- .base = {
- .cra_name = "polyval",
- .cra_driver_name = "polyval-ce",
- .cra_priority = 200,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = POLYVAL_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct polyval_tfm_ctx),
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init polyval_ce_mod_init(void)
-{
- return crypto_register_shash(&polyval_alg);
-}
-
-static void __exit polyval_ce_mod_exit(void)
-{
- crypto_unregister_shash(&polyval_alg);
-}
-
-module_cpu_feature_match(PMULL, polyval_ce_mod_init)
-module_exit(polyval_ce_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
-MODULE_ALIAS_CRYPTO("polyval");
-MODULE_ALIAS_CRYPTO("polyval-ce");
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
deleted file mode 100644
index 9c77313f5a60..000000000000
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
- .set .Lv\b\().2d, \b
- .set .Lv\b\().16b, \b
- .endr
-
- /*
- * ARMv8.2 Crypto Extensions instructions
- */
- .macro eor3, rd, rn, rm, ra
- .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro rax1, rd, rn, rm
- .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
- .endm
-
- .macro bcax, rd, rn, rm, ra
- .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
- .endm
-
- .macro xar, rd, rn, rm, imm6
- .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
- .endm
-
- /*
- * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
- */
- .text
-SYM_FUNC_START(sha3_ce_transform)
- /* load state */
- add x8, x0, #32
- ld1 { v0.1d- v3.1d}, [x0]
- ld1 { v4.1d- v7.1d}, [x8], #32
- ld1 { v8.1d-v11.1d}, [x8], #32
- ld1 {v12.1d-v15.1d}, [x8], #32
- ld1 {v16.1d-v19.1d}, [x8], #32
- ld1 {v20.1d-v23.1d}, [x8], #32
- ld1 {v24.1d}, [x8]
-
-0: sub w2, w2, #1
- mov w8, #24
- adr_l x9, .Lsha3_rcon
-
- /* load input */
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v31.8b}, [x1], #24
- eor v0.8b, v0.8b, v25.8b
- eor v1.8b, v1.8b, v26.8b
- eor v2.8b, v2.8b, v27.8b
- eor v3.8b, v3.8b, v28.8b
- eor v4.8b, v4.8b, v29.8b
- eor v5.8b, v5.8b, v30.8b
- eor v6.8b, v6.8b, v31.8b
-
- tbnz x3, #6, 2f // SHA3-512
-
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v30.8b}, [x1], #16
- eor v7.8b, v7.8b, v25.8b
- eor v8.8b, v8.8b, v26.8b
- eor v9.8b, v9.8b, v27.8b
- eor v10.8b, v10.8b, v28.8b
- eor v11.8b, v11.8b, v29.8b
- eor v12.8b, v12.8b, v30.8b
-
- tbnz x3, #4, 1f // SHA3-384 or SHA3-224
-
- // SHA3-256
- ld1 {v25.8b-v28.8b}, [x1], #32
- eor v13.8b, v13.8b, v25.8b
- eor v14.8b, v14.8b, v26.8b
- eor v15.8b, v15.8b, v27.8b
- eor v16.8b, v16.8b, v28.8b
- b 3f
-
-1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
-
- // SHA3-224
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b}, [x1], #8
- eor v13.8b, v13.8b, v25.8b
- eor v14.8b, v14.8b, v26.8b
- eor v15.8b, v15.8b, v27.8b
- eor v16.8b, v16.8b, v28.8b
- eor v17.8b, v17.8b, v29.8b
- b 3f
-
- // SHA3-512
-2: ld1 {v25.8b-v26.8b}, [x1], #16
- eor v7.8b, v7.8b, v25.8b
- eor v8.8b, v8.8b, v26.8b
-
-3: sub w8, w8, #1
-
- eor3 v29.16b, v4.16b, v9.16b, v14.16b
- eor3 v26.16b, v1.16b, v6.16b, v11.16b
- eor3 v28.16b, v3.16b, v8.16b, v13.16b
- eor3 v25.16b, v0.16b, v5.16b, v10.16b
- eor3 v27.16b, v2.16b, v7.16b, v12.16b
- eor3 v29.16b, v29.16b, v19.16b, v24.16b
- eor3 v26.16b, v26.16b, v16.16b, v21.16b
- eor3 v28.16b, v28.16b, v18.16b, v23.16b
- eor3 v25.16b, v25.16b, v15.16b, v20.16b
- eor3 v27.16b, v27.16b, v17.16b, v22.16b
-
- rax1 v30.2d, v29.2d, v26.2d // bc[0]
- rax1 v26.2d, v26.2d, v28.2d // bc[2]
- rax1 v28.2d, v28.2d, v25.2d // bc[4]
- rax1 v25.2d, v25.2d, v27.2d // bc[1]
- rax1 v27.2d, v27.2d, v29.2d // bc[3]
-
- eor v0.16b, v0.16b, v30.16b
- xar v29.2d, v1.2d, v25.2d, (64 - 1)
- xar v1.2d, v6.2d, v25.2d, (64 - 44)
- xar v6.2d, v9.2d, v28.2d, (64 - 20)
- xar v9.2d, v22.2d, v26.2d, (64 - 61)
- xar v22.2d, v14.2d, v28.2d, (64 - 39)
- xar v14.2d, v20.2d, v30.2d, (64 - 18)
- xar v31.2d, v2.2d, v26.2d, (64 - 62)
- xar v2.2d, v12.2d, v26.2d, (64 - 43)
- xar v12.2d, v13.2d, v27.2d, (64 - 25)
- xar v13.2d, v19.2d, v28.2d, (64 - 8)
- xar v19.2d, v23.2d, v27.2d, (64 - 56)
- xar v23.2d, v15.2d, v30.2d, (64 - 41)
- xar v15.2d, v4.2d, v28.2d, (64 - 27)
- xar v28.2d, v24.2d, v28.2d, (64 - 14)
- xar v24.2d, v21.2d, v25.2d, (64 - 2)
- xar v8.2d, v8.2d, v27.2d, (64 - 55)
- xar v4.2d, v16.2d, v25.2d, (64 - 45)
- xar v16.2d, v5.2d, v30.2d, (64 - 36)
- xar v5.2d, v3.2d, v27.2d, (64 - 28)
- xar v27.2d, v18.2d, v27.2d, (64 - 21)
- xar v3.2d, v17.2d, v26.2d, (64 - 15)
- xar v25.2d, v11.2d, v25.2d, (64 - 10)
- xar v26.2d, v7.2d, v26.2d, (64 - 6)
- xar v30.2d, v10.2d, v30.2d, (64 - 3)
-
- bcax v20.16b, v31.16b, v22.16b, v8.16b
- bcax v21.16b, v8.16b, v23.16b, v22.16b
- bcax v22.16b, v22.16b, v24.16b, v23.16b
- bcax v23.16b, v23.16b, v31.16b, v24.16b
- bcax v24.16b, v24.16b, v8.16b, v31.16b
-
- ld1r {v31.2d}, [x9], #8
-
- bcax v17.16b, v25.16b, v19.16b, v3.16b
- bcax v18.16b, v3.16b, v15.16b, v19.16b
- bcax v19.16b, v19.16b, v16.16b, v15.16b
- bcax v15.16b, v15.16b, v25.16b, v16.16b
- bcax v16.16b, v16.16b, v3.16b, v25.16b
-
- bcax v10.16b, v29.16b, v12.16b, v26.16b
- bcax v11.16b, v26.16b, v13.16b, v12.16b
- bcax v12.16b, v12.16b, v14.16b, v13.16b
- bcax v13.16b, v13.16b, v29.16b, v14.16b
- bcax v14.16b, v14.16b, v26.16b, v29.16b
-
- bcax v7.16b, v30.16b, v9.16b, v4.16b
- bcax v8.16b, v4.16b, v5.16b, v9.16b
- bcax v9.16b, v9.16b, v6.16b, v5.16b
- bcax v5.16b, v5.16b, v30.16b, v6.16b
- bcax v6.16b, v6.16b, v4.16b, v30.16b
-
- bcax v3.16b, v27.16b, v0.16b, v28.16b
- bcax v4.16b, v28.16b, v1.16b, v0.16b
- bcax v0.16b, v0.16b, v2.16b, v1.16b
- bcax v1.16b, v1.16b, v27.16b, v2.16b
- bcax v2.16b, v2.16b, v28.16b, v27.16b
-
- eor v0.16b, v0.16b, v31.16b
-
- cbnz w8, 3b
- cond_yield 4f, x8, x9
- cbnz w2, 0b
-
- /* save state */
-4: st1 { v0.1d- v3.1d}, [x0], #32
- st1 { v4.1d- v7.1d}, [x0], #32
- st1 { v8.1d-v11.1d}, [x0], #32
- st1 {v12.1d-v15.1d}, [x0], #32
- st1 {v16.1d-v19.1d}, [x0], #32
- st1 {v20.1d-v23.1d}, [x0], #32
- st1 {v24.1d}, [x0]
- mov w0, w2
- ret
-SYM_FUNC_END(sha3_ce_transform)
-
- .section ".rodata", "a"
- .align 8
-.Lsha3_rcon:
- .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
- .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
- .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
- .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
- .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
- .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
- .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
- .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
deleted file mode 100644
index b4f1001046c9..000000000000
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha3.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("sha3-224");
-MODULE_ALIAS_CRYPTO("sha3-256");
-MODULE_ALIAS_CRYPTO("sha3-384");
-MODULE_ALIAS_CRYPTO("sha3-512");
-
-asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks,
- int md_len);
-
-static int sha3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- unsigned int bs, ds;
- int blocks;
-
- ds = crypto_shash_digestsize(tfm);
- bs = crypto_shash_blocksize(tfm);
- blocks = len / bs;
- len -= blocks * bs;
- do {
- int rem;
-
- kernel_neon_begin();
- rem = sha3_ce_transform(sctx->st, data, blocks, ds);
- kernel_neon_end();
- data += (blocks - rem) * bs;
- blocks = rem;
- } while (blocks);
- return len;
-}
-
-static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
- u8 *out)
-{
- struct sha3_state *sctx = shash_desc_ctx(desc);
- struct crypto_shash *tfm = desc->tfm;
- __le64 *digest = (__le64 *)out;
- u8 block[SHA3_224_BLOCK_SIZE];
- unsigned int bs, ds;
- int i;
-
- ds = crypto_shash_digestsize(tfm);
- bs = crypto_shash_blocksize(tfm);
- memcpy(block, src, len);
-
- block[len++] = 0x06;
- memset(block + len, 0, bs - len);
- block[bs - 1] |= 0x80;
-
- kernel_neon_begin();
- sha3_ce_transform(sctx->st, block, 1, ds);
- kernel_neon_end();
- memzero_explicit(block , sizeof(block));
-
- for (i = 0; i < ds / 8; i++)
- put_unaligned_le64(sctx->st[i], digest++);
-
- if (ds & 4)
- put_unaligned_le32(sctx->st[i], (__le32 *)digest);
-
- return 0;
-}
-
-static struct shash_alg algs[] = { {
- .digestsize = SHA3_224_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-224",
- .base.cra_driver_name = "sha3-224-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_224_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_256_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-256",
- .base.cra_driver_name = "sha3-256-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_256_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_384_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-384",
- .base.cra_driver_name = "sha3-384-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_384_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-}, {
- .digestsize = SHA3_512_DIGEST_SIZE,
- .init = crypto_sha3_init,
- .update = sha3_update,
- .finup = sha3_finup,
- .descsize = SHA3_STATE_SIZE,
- .base.cra_name = "sha3-512",
- .base.cra_driver_name = "sha3-512-ce",
- .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .base.cra_blocksize = SHA3_512_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
-} };
-
-static int __init sha3_neon_mod_init(void)
-{
- return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha3_neon_mod_fini(void)
-{
- crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_cpu_feature_match(SHA3, sha3_neon_mod_init);
-module_exit(sha3_neon_mod_fini);
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index eac6f5fa0abe..24c1fcfae072 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -13,6 +12,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
{
int remain;
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
+ }
return remain;
}
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm3_base_do_finup(desc, data, len, sm3_ce_transform);
+ }
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
index 6c4611a503a3..15f30cc24f32 100644
--- a/arch/arm64/crypto/sm3-neon-glue.c
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -5,7 +5,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- int remain;
-
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
- return remain;
+ scoped_ksimd()
+ return sm3_base_do_update_blocks(desc, data, len,
+ sm3_neon_transform);
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
+ scoped_ksimd()
+ sm3_base_do_finup(desc, data, len, sm3_neon_transform);
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
index e9cc1c1364ec..332f02167a96 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
@@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
crypto_inc(walk->iv, SM4_BLOCK_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
-
- while (walk->nbytes && walk->nbytes != walk->total) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
-
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes - tail, mac);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
- }
-
- if (walk->nbytes) {
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes, mac);
+ while (walk->nbytes) {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+ if (walk->nbytes == walk->total)
+ tail = 0;
- kernel_neon_end();
+ sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
+ walk->src.virt.addr, walk->iv,
+ walk->nbytes - tail, mac);
- err = skcipher_walk_done(walk, 0);
- } else {
+ err = skcipher_walk_done(walk, tail);
+ }
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
-
- kernel_neon_end();
}
return err;
diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c
index c31d76fb5a17..bceec833ef4e 100644
--- a/arch/arm64/crypto/sm4-ce-cipher-glue.c
+++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c
@@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_enc, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_enc, out, in);
}
}
@@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_dec, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_dec, out, in);
}
}
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
index c2ea3d5f690b..ef06f4f768a1 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
@@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
+ }
return 0;
}
@@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, ghash);
- if (req->assoclen)
- gcm_calculate_auth_mac(req, ghash);
+ do {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+ const u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ const u8 *l = NULL;
- while (walk->nbytes) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- const u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
+ if (walk->nbytes == walk->total) {
+ l = (const u8 *)&lengths;
+ tail = 0;
+ }
- if (walk->nbytes == walk->total) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes, ghash,
- ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
- return skcipher_walk_done(walk, 0);
- }
+ walk->nbytes - tail, ghash,
+ ctx->ghash_table, l);
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes - tail, ghash,
- ctx->ghash_table, NULL);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
+ err = skcipher_walk_done(walk, tail);
+ } while (walk->nbytes);
}
-
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
- walk->nbytes, ghash, ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
return err;
}
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 7a60e7b559dc..5569cece5a0b 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -8,7 +8,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
@@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (ret)
return ret;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key1.rkey_enc,
- ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
- ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+ ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
+ ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_crypt(rkey, dst, src, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_crypt(rkey, dst, src, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req,
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
- else
- sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
+ else
+ sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
+ }
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
- else
- sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ else
+ sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
-
- /* tail */
- if (walk.nbytes == walk.total && nbytes > 0) {
- u8 keystream[SM4_BLOCK_SIZE];
-
- sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
- crypto_inc(walk.iv, SM4_BLOCK_SIZE);
- crypto_xor_cpy(dst, src, keystream, nbytes);
- nbytes = 0;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (nbytes < walk.total)
nbytes &= ~(SM4_BLOCK_SIZE - 1);
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ }
rkey2_enc = NULL;
@@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
-
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
memset(consts, 0, SM4_BLOCK_SIZE);
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- /* encrypt the zero block */
- sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
-
- kernel_neon_end();
+ /* encrypt the zero block */
+ sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+ }
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
a = be64_to_cpu(consts[0].a);
@@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
- sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+ sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+ sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
- sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
-
- kernel_neon_end();
+ sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
unsigned int nblocks = len / SM4_BLOCK_SIZE;
len %= SM4_BLOCK_SIZE;
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
- nblocks, false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+ nblocks, false, true);
return len;
}
@@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src,
ctx->digest[len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
- false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+ false, true);
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
@@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src,
if (len) {
crypto_xor(ctx->digest, src, len);
- kernel_neon_begin();
- sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
- ctx->digest);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+ ctx->digest);
}
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
index e3500aca2d18..e944c2a2efb0 100644
--- a/arch/arm64/crypto/sm4-neon-glue.c
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_crypt(rkey, dst, src, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_crypt(rkey, dst, src, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;