diff options
Diffstat (limited to 'arch/arm64/crypto')
| -rw-r--r-- | arch/arm64/crypto/Kconfig | 21 | ||||
| -rw-r--r-- | arch/arm64/crypto/Makefile | 6 | ||||
| -rw-r--r-- | arch/arm64/crypto/aes-ce-ccm-glue.c | 116 | ||||
| -rw-r--r-- | arch/arm64/crypto/aes-ce-glue.c | 87 | ||||
| -rw-r--r-- | arch/arm64/crypto/aes-glue.c | 139 | ||||
| -rw-r--r-- | arch/arm64/crypto/aes-neonbs-glue.c | 150 | ||||
| -rw-r--r-- | arch/arm64/crypto/ghash-ce-glue.c | 27 | ||||
| -rw-r--r-- | arch/arm64/crypto/nhpoly1305-neon-glue.c | 5 | ||||
| -rw-r--r-- | arch/arm64/crypto/polyval-ce-core.S | 361 | ||||
| -rw-r--r-- | arch/arm64/crypto/polyval-ce-glue.c | 158 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha3-ce-core.S | 212 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha3-ce-glue.c | 151 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm3-ce-glue.c | 15 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm3-neon-glue.c | 16 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm4-ce-ccm-glue.c | 49 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm4-ce-cipher-glue.c | 10 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm4-ce-gcm-glue.c | 62 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm4-ce-glue.c | 214 | ||||
| -rw-r--r-- | arch/arm64/crypto/sm4-neon-glue.c | 25 |
19 files changed, 411 insertions, 1413 deletions
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 91f3093eee6a..bdd276a6e540 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_SHA3_ARM64 - tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA3 - help - SHA-3 secure hash algorithms (FIPS 202) - - Architecture: arm64 using: - - ARMv8.2 Crypto Extensions - config CRYPTO_SM3_NEON tristate "Hash functions: SM3 (NEON)" depends on KERNEL_MODE_NEON @@ -58,16 +47,6 @@ config CRYPTO_SM3_ARM64_CE Architecture: arm64 using: - ARMv8.2 Crypto Extensions -config CRYPTO_POLYVAL_ARM64_CE - tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_POLYVAL - help - POLYVAL hash function for HCTR2 - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_AES_ARM64 tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index a8b2cdbe202c..1e330aa08d3f 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -5,9 +5,6 @@ # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> # -obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o -sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o - obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o @@ -32,9 +29,6 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o -obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o -polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o - obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 2d791d51891b..c4fd648471f1 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -8,7 +8,6 @@ * Author: Ard Biesheuvel <ardb@kernel.org> */ -#include <asm/neon.h> #include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> @@ -16,6 +15,8 @@ #include <crypto/internal/skcipher.h> #include <linux/module.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" MODULE_IMPORT_NS("CRYPTO_INTERNAL"); @@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, in += adv; abytes -= adv; - if (unlikely(rem)) { - kernel_neon_end(); - kernel_neon_begin(); + if (unlikely(rem)) macp = 0; - } } else { u32 l = min(AES_BLOCK_SIZE - macp, abytes); @@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } - - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; @@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); - - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 00b8749013c5..a4dad370991d 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); } static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) @@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); } int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, for (i = 0; i < kwords; i++) ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); - kernel_neon_begin(); - for (i = 0; i < sizeof(rcon); i++) { - u32 *rki = ctx->key_enc + (i * kwords); - u32 *rko = rki + kwords; - - rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; - rko[1] = rko[0] ^ rki[1]; - rko[2] = rko[1] ^ rki[2]; - rko[3] = rko[2] ^ rki[3]; - - if (key_len == AES_KEYSIZE_192) { - if (i >= 7) - break; - rko[4] = rko[3] ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - } else if (key_len == AES_KEYSIZE_256) { - if (i >= 6) - break; - rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - rko[6] = rko[5] ^ rki[6]; - rko[7] = rko[6] ^ rki[7]; + scoped_ksimd() { + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ + rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } } - } - /* - * Generate the decryption keys for the Equivalent Inverse Cipher. - * This involves reversing the order of the round keys, and applying - * the Inverse Mix Columns transformation on all but the first and - * the last one. - */ - key_enc = (struct aes_block *)ctx->key_enc; - key_dec = (struct aes_block *)ctx->key_dec; - j = num_rounds(ctx); - - key_dec[0] = key_enc[j]; - for (i = 1, j--; j > 0; i++, j--) - __aes_ce_invert(key_dec + i, key_enc + j); - key_dec[i] = key_enc[0]; + /* + * Generate the decryption keys for the Equivalent Inverse + * Cipher. This involves reversing the order of the round + * keys, and applying the Inverse Mix Columns transformation on + * all but the first and the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + __aes_ce_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + } - kernel_neon_end(); return 0; } EXPORT_SYMBOL(ce_aes_expandkey); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5e207ff34482..b087b900d279 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -5,8 +5,6 @@ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/hwcap.h> -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/ctr.h> #include <crypto/internal/hash.h> @@ -20,6 +18,9 @@ #include <linux/module.h> #include <linux/string.h> +#include <asm/hwcap.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" #ifdef USE_V8_CRYPTO_EXTENSIONS @@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_enc, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_enc, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_dec, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_dec, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_enc, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_encrypt_walk(req, &walk); @@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_decrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_dec, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_decrypt_walk(req, &walk); @@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, byte_ctr); - kernel_neon_end(); + scoped_ksimd() + aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv, byte_ctr); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) return err; - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key, return err; /* encrypt the zero vector */ - kernel_neon_begin(); - aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc, - rounds, 1); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, + ctx->key.key_enc, rounds, 1); cmac_gf128_mul_by_x(consts, consts); cmac_gf128_mul_by_x(consts + 1, consts); @@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key, if (err) return err; - kernel_neon_begin(); - aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); - aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); - kernel_neon_end(); + scoped_ksimd() { + aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); + aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); + } return cbcmac_setkey(tfm, key, sizeof(key)); } @@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, int rem; do { - kernel_neon_begin(); - rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, - dg, enc_before, !enc_before); - kernel_neon_end(); + scoped_ksimd() + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, + dg, enc_before, !enc_before); in += (blocks - rem) * AES_BLOCK_SIZE; blocks = rem; } while (blocks); diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index c4a623e86593..d496effb0a5b 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, ctx->rounds = 6 + key_len / 4; - kernel_neon_begin(); - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); return 0; } @@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, - ctx->rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); - kernel_neon_begin(); - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); memzero_explicit(&rk, sizeof(rk)); return 0; @@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req) unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; /* fall back to the non-bitsliced NEON implementation */ - kernel_neon_begin(); - neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->enc, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + neon_aes_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->enc, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req) const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - kernel_neon_begin(); - if (blocks >= 8) { - aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, - blocks, walk.iv); - dst += blocks * AES_BLOCK_SIZE; - src += blocks * AES_BLOCK_SIZE; - } - if (nbytes && walk.nbytes == walk.total) { - u8 buf[AES_BLOCK_SIZE]; - u8 *d = dst; - - if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(buf + sizeof(buf) - nbytes, - src, nbytes); - - neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, - nbytes, walk.iv); + scoped_ksimd() { + if (blocks >= 8) { + aesbs_ctr_encrypt(dst, src, ctx->key.rk, + ctx->key.rounds, blocks, + walk.iv); + dst += blocks * AES_BLOCK_SIZE; + src += blocks * AES_BLOCK_SIZE; + } + if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - + nbytes, src, nbytes); + + neon_aes_ctr_encrypt(dst, src, ctx->enc, + ctx->key.rounds, nbytes, + walk.iv); - if (unlikely(nbytes < AES_BLOCK_SIZE)) - memcpy(d, dst, nbytes); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); - nbytes = 0; + nbytes = 0; + } } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } return err; @@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (blocks >= 8) { - if (first == 1) - neon_aes_ecb_encrypt(walk.iv, walk.iv, - ctx->twkey, - ctx->key.rounds, 1); - first = 2; - - fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - - out += blocks * AES_BLOCK_SIZE; - in += blocks * AES_BLOCK_SIZE; - nbytes -= blocks * AES_BLOCK_SIZE; + scoped_ksimd() { + if (blocks >= 8) { + if (first == 1) + neon_aes_ecb_encrypt(walk.iv, walk.iv, + ctx->twkey, + ctx->key.rounds, 1); + first = 2; + + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + nbytes -= blocks * AES_BLOCK_SIZE; + } + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } } - if (walk.nbytes == walk.total && nbytes > 0) { - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - nbytes = first = 0; - } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + } return skcipher_walk_done(&walk, 0); } diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 4995b6e22335..7951557a285a 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> @@ -22,6 +21,8 @@ #include <linux/string.h> #include <linux/unaligned.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src, u64 const h[][2], const char *head)) { - kernel_neon_begin(); - simd_update(blocks, dg, src, key->h, head); - kernel_neon_end(); + scoped_ksimd() + simd_update(blocks, dg, src, key->h, head); } /* avoid hogging the CPU for too long */ @@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); - kernel_neon_end(); + scoped_ksimd() + pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, nrounds, + tag); if (unlikely(!nbytes)) break; @@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, - nrounds, tag, otag, authsize); - kernel_neon_end(); + scoped_ksimd() + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); if (unlikely(!nbytes)) break; diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index e4a0b463f080..013de6ac569a 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, do { unsigned int n = min_t(unsigned int, srclen, SZ_4K); - kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); - kernel_neon_end(); + scoped_ksimd() + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); src += n; srclen -= n; } while (srclen); diff --git a/arch/arm64/crypto/polyval-ce-core.S b/arch/arm64/crypto/polyval-ce-core.S deleted file mode 100644 index b5326540d2e3..000000000000 --- a/arch/arm64/crypto/polyval-ce-core.S +++ /dev/null @@ -1,361 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Implementation of POLYVAL using ARMv8 Crypto Extensions. - * - * Copyright 2021 Google LLC - */ -/* - * This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions - * It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8, - * ..., h^1 in the POLYVAL finite field. This precomputation allows us to split - * finite field multiplication into two steps. - * - * In the first step, we consider h^i, m_i as normal polynomials of degree less - * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication - * is simply polynomial multiplication. - * - * In the second step, we compute the reduction of p(x) modulo the finite field - * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1. - * - * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where - * multiplication is finite field multiplication. The advantage is that the - * two-step process only requires 1 finite field reduction for every 8 - * polynomial multiplications. Further parallelism is gained by interleaving the - * multiplications and polynomial reductions. - */ - -#include <linux/linkage.h> -#define STRIDE_BLOCKS 8 - -KEY_POWERS .req x0 -MSG .req x1 -BLOCKS_LEFT .req x2 -ACCUMULATOR .req x3 -KEY_START .req x10 -EXTRA_BYTES .req x11 -TMP .req x13 - -M0 .req v0 -M1 .req v1 -M2 .req v2 -M3 .req v3 -M4 .req v4 -M5 .req v5 -M6 .req v6 -M7 .req v7 -KEY8 .req v8 -KEY7 .req v9 -KEY6 .req v10 -KEY5 .req v11 -KEY4 .req v12 -KEY3 .req v13 -KEY2 .req v14 -KEY1 .req v15 -PL .req v16 -PH .req v17 -TMP_V .req v18 -LO .req v20 -MI .req v21 -HI .req v22 -SUM .req v23 -GSTAR .req v24 - - .text - - .arch armv8-a+crypto - .align 4 - -.Lgstar: - .quad 0xc200000000000000, 0xc200000000000000 - -/* - * Computes the product of two 128-bit polynomials in X and Y and XORs the - * components of the 256-bit product into LO, MI, HI. - * - * Given: - * X = [X_1 : X_0] - * Y = [Y_1 : Y_0] - * - * We compute: - * LO += X_0 * Y_0 - * MI += (X_0 + X_1) * (Y_0 + Y_1) - * HI += X_1 * Y_1 - * - * Later, the 256-bit result can be extracted as: - * [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0] - * This step is done when computing the polynomial reduction for efficiency - * reasons. - * - * Karatsuba multiplication is used instead of Schoolbook multiplication because - * it was found to be slightly faster on ARM64 CPUs. - * - */ -.macro karatsuba1 X Y - X .req \X - Y .req \Y - ext v25.16b, X.16b, X.16b, #8 - ext v26.16b, Y.16b, Y.16b, #8 - eor v25.16b, v25.16b, X.16b - eor v26.16b, v26.16b, Y.16b - pmull2 v28.1q, X.2d, Y.2d - pmull v29.1q, X.1d, Y.1d - pmull v27.1q, v25.1d, v26.1d - eor HI.16b, HI.16b, v28.16b - eor LO.16b, LO.16b, v29.16b - eor MI.16b, MI.16b, v27.16b - .unreq X - .unreq Y -.endm - -/* - * Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into - * them. - */ -.macro karatsuba1_store X Y - X .req \X - Y .req \Y - ext v25.16b, X.16b, X.16b, #8 - ext v26.16b, Y.16b, Y.16b, #8 - eor v25.16b, v25.16b, X.16b - eor v26.16b, v26.16b, Y.16b - pmull2 HI.1q, X.2d, Y.2d - pmull LO.1q, X.1d, Y.1d - pmull MI.1q, v25.1d, v26.1d - .unreq X - .unreq Y -.endm - -/* - * Computes the 256-bit polynomial represented by LO, HI, MI. Stores - * the result in PL, PH. - * [PH : PL] = - * [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0] - */ -.macro karatsuba2 - // v4 = [HI_1 + MI_1 : HI_0 + MI_0] - eor v4.16b, HI.16b, MI.16b - // v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0] - eor v4.16b, v4.16b, LO.16b - // v5 = [HI_0 : LO_1] - ext v5.16b, LO.16b, HI.16b, #8 - // v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0] - eor v4.16b, v4.16b, v5.16b - // HI = [HI_0 : HI_1] - ext HI.16b, HI.16b, HI.16b, #8 - // LO = [LO_0 : LO_1] - ext LO.16b, LO.16b, LO.16b, #8 - // PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1] - ext PH.16b, v4.16b, HI.16b, #8 - // PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0] - ext PL.16b, LO.16b, v4.16b, #8 -.endm - -/* - * Computes the 128-bit reduction of PH : PL. Stores the result in dest. - * - * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) = - * x^128 + x^127 + x^126 + x^121 + 1. - * - * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the - * product of two 128-bit polynomials in Montgomery form. We need to reduce it - * mod g(x). Also, since polynomials in Montgomery form have an "extra" factor - * of x^128, this product has two extra factors of x^128. To get it back into - * Montgomery form, we need to remove one of these factors by dividing by x^128. - * - * To accomplish both of these goals, we add multiples of g(x) that cancel out - * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low - * bits are zero, the polynomial division by x^128 can be done by right - * shifting. - * - * Since the only nonzero term in the low 64 bits of g(x) is the constant term, - * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can - * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 + - * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to - * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T - * = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191. - * - * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits - * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1 - * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) * - * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 : - * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0). - * - * So our final computation is: - * T = T_1 : T_0 = g*(x) * P_0 - * V = V_1 : V_0 = g*(x) * (P_1 + T_0) - * p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0 - * - * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0 - * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 : - * T_1 into dest. This allows us to reuse P_1 + T_0 when computing V. - */ -.macro montgomery_reduction dest - DEST .req \dest - // TMP_V = T_1 : T_0 = P_0 * g*(x) - pmull TMP_V.1q, PL.1d, GSTAR.1d - // TMP_V = T_0 : T_1 - ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8 - // TMP_V = P_1 + T_0 : P_0 + T_1 - eor TMP_V.16b, PL.16b, TMP_V.16b - // PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1 - eor PH.16b, PH.16b, TMP_V.16b - // TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x) - pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d - eor DEST.16b, PH.16b, TMP_V.16b - .unreq DEST -.endm - -/* - * Compute Polyval on 8 blocks. - * - * If reduce is set, also computes the montgomery reduction of the - * previous full_stride call and XORs with the first message block. - * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1. - * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0. - * - * Sets PL, PH. - */ -.macro full_stride reduce - eor LO.16b, LO.16b, LO.16b - eor MI.16b, MI.16b, MI.16b - eor HI.16b, HI.16b, HI.16b - - ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64 - ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64 - - karatsuba1 M7 KEY1 - .if \reduce - pmull TMP_V.1q, PL.1d, GSTAR.1d - .endif - - karatsuba1 M6 KEY2 - .if \reduce - ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8 - .endif - - karatsuba1 M5 KEY3 - .if \reduce - eor TMP_V.16b, PL.16b, TMP_V.16b - .endif - - karatsuba1 M4 KEY4 - .if \reduce - eor PH.16b, PH.16b, TMP_V.16b - .endif - - karatsuba1 M3 KEY5 - .if \reduce - pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d - .endif - - karatsuba1 M2 KEY6 - .if \reduce - eor SUM.16b, PH.16b, TMP_V.16b - .endif - - karatsuba1 M1 KEY7 - eor M0.16b, M0.16b, SUM.16b - - karatsuba1 M0 KEY8 - karatsuba2 -.endm - -/* - * Handle any extra blocks after full_stride loop. - */ -.macro partial_stride - add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4) - sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4 - ld1 {KEY1.16b}, [KEY_POWERS], #16 - - ld1 {TMP_V.16b}, [MSG], #16 - eor SUM.16b, SUM.16b, TMP_V.16b - karatsuba1_store KEY1 SUM - sub BLOCKS_LEFT, BLOCKS_LEFT, #1 - - tst BLOCKS_LEFT, #4 - beq .Lpartial4BlocksDone - ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64 - ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64 - karatsuba1 M0 KEY8 - karatsuba1 M1 KEY7 - karatsuba1 M2 KEY6 - karatsuba1 M3 KEY5 -.Lpartial4BlocksDone: - tst BLOCKS_LEFT, #2 - beq .Lpartial2BlocksDone - ld1 {M0.16b, M1.16b}, [MSG], #32 - ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32 - karatsuba1 M0 KEY8 - karatsuba1 M1 KEY7 -.Lpartial2BlocksDone: - tst BLOCKS_LEFT, #1 - beq .LpartialDone - ld1 {M0.16b}, [MSG], #16 - ld1 {KEY8.16b}, [KEY_POWERS], #16 - karatsuba1 M0 KEY8 -.LpartialDone: - karatsuba2 - montgomery_reduction SUM -.endm - -/* - * Perform montgomery multiplication in GF(2^128) and store result in op1. - * - * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1 - * If op1, op2 are in montgomery form, this computes the montgomery - * form of op1*op2. - * - * void pmull_polyval_mul(u8 *op1, const u8 *op2); - */ -SYM_FUNC_START(pmull_polyval_mul) - adr TMP, .Lgstar - ld1 {GSTAR.2d}, [TMP] - ld1 {v0.16b}, [x0] - ld1 {v1.16b}, [x1] - karatsuba1_store v0 v1 - karatsuba2 - montgomery_reduction SUM - st1 {SUM.16b}, [x0] - ret -SYM_FUNC_END(pmull_polyval_mul) - -/* - * Perform polynomial evaluation as specified by POLYVAL. This computes: - * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1} - * where n=nblocks, h is the hash key, and m_i are the message blocks. - * - * x0 - pointer to precomputed key powers h^8 ... h^1 - * x1 - pointer to message blocks - * x2 - number of blocks to hash - * x3 - pointer to accumulator - * - * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in, - * size_t nblocks, u8 *accumulator); - */ -SYM_FUNC_START(pmull_polyval_update) - adr TMP, .Lgstar - mov KEY_START, KEY_POWERS - ld1 {GSTAR.2d}, [TMP] - ld1 {SUM.16b}, [ACCUMULATOR] - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - blt .LstrideLoopExit - ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64 - ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64 - full_stride 0 - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - blt .LstrideLoopExitReduce -.LstrideLoop: - full_stride 1 - subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - bge .LstrideLoop -.LstrideLoopExitReduce: - montgomery_reduction SUM -.LstrideLoopExit: - adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS - beq .LskipPartial - partial_stride -.LskipPartial: - st1 {SUM.16b}, [ACCUMULATOR] - ret -SYM_FUNC_END(pmull_polyval_update) diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c deleted file mode 100644 index c4e653688ea0..000000000000 --- a/arch/arm64/crypto/polyval-ce-glue.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Glue code for POLYVAL using ARMv8 Crypto Extensions - * - * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi> - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * Copyright 2021 Google LLC - */ - -/* - * Glue code based on ghash-clmulni-intel_glue.c. - * - * This implementation of POLYVAL uses montgomery multiplication accelerated by - * ARMv8 Crypto Extensions instructions to implement the finite field operations. - */ - -#include <asm/neon.h> -#include <crypto/internal/hash.h> -#include <crypto/polyval.h> -#include <crypto/utils.h> -#include <linux/cpufeature.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -#define NUM_KEY_POWERS 8 - -struct polyval_tfm_ctx { - /* - * These powers must be in the order h^8, ..., h^1. - */ - u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE]; -}; - -struct polyval_desc_ctx { - u8 buffer[POLYVAL_BLOCK_SIZE]; -}; - -asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator); -asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2); - -static void internal_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator) -{ - kernel_neon_begin(); - pmull_polyval_update(keys, in, nblocks, accumulator); - kernel_neon_end(); -} - -static void internal_polyval_mul(u8 *op1, const u8 *op2) -{ - kernel_neon_begin(); - pmull_polyval_mul(op1, op2); - kernel_neon_end(); -} - -static int polyval_arm64_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm); - int i; - - if (keylen != POLYVAL_BLOCK_SIZE) - return -EINVAL; - - memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE); - - for (i = NUM_KEY_POWERS-2; i >= 0; i--) { - memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE); - internal_polyval_mul(tctx->key_powers[i], - tctx->key_powers[i+1]); - } - - return 0; -} - -static int polyval_arm64_init(struct shash_desc *desc) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - - memset(dctx, 0, sizeof(*dctx)); - - return 0; -} - -static int polyval_arm64_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - unsigned int nblocks; - - do { - /* allow rescheduling every 4K bytes */ - nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; - internal_polyval_update(tctx, src, nblocks, dctx->buffer); - srclen -= nblocks * POLYVAL_BLOCK_SIZE; - src += nblocks * POLYVAL_BLOCK_SIZE; - } while (srclen >= POLYVAL_BLOCK_SIZE); - - return srclen; -} - -static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *dst) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - - if (len) { - crypto_xor(dctx->buffer, src, len); - internal_polyval_mul(dctx->buffer, - tctx->key_powers[NUM_KEY_POWERS-1]); - } - - memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE); - - return 0; -} - -static struct shash_alg polyval_alg = { - .digestsize = POLYVAL_DIGEST_SIZE, - .init = polyval_arm64_init, - .update = polyval_arm64_update, - .finup = polyval_arm64_finup, - .setkey = polyval_arm64_setkey, - .descsize = sizeof(struct polyval_desc_ctx), - .base = { - .cra_name = "polyval", - .cra_driver_name = "polyval-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = POLYVAL_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct polyval_tfm_ctx), - .cra_module = THIS_MODULE, - }, -}; - -static int __init polyval_ce_mod_init(void) -{ - return crypto_register_shash(&polyval_alg); -} - -static void __exit polyval_ce_mod_exit(void) -{ - crypto_unregister_shash(&polyval_alg); -} - -module_cpu_feature_match(PMULL, polyval_ce_mod_init) -module_exit(polyval_ce_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions"); -MODULE_ALIAS_CRYPTO("polyval"); -MODULE_ALIAS_CRYPTO("polyval-ce"); diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S deleted file mode 100644 index 9c77313f5a60..000000000000 --- a/arch/arm64/crypto/sha3-ce-core.S +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - .set .Lv\b\().2d, \b - .set .Lv\b\().16b, \b - .endr - - /* - * ARMv8.2 Crypto Extensions instructions - */ - .macro eor3, rd, rn, rm, ra - .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro rax1, rd, rn, rm - .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) - .endm - - .macro bcax, rd, rn, rm, ra - .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) - .endm - - .macro xar, rd, rn, rm, imm6 - .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) - .endm - - /* - * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) - */ - .text -SYM_FUNC_START(sha3_ce_transform) - /* load state */ - add x8, x0, #32 - ld1 { v0.1d- v3.1d}, [x0] - ld1 { v4.1d- v7.1d}, [x8], #32 - ld1 { v8.1d-v11.1d}, [x8], #32 - ld1 {v12.1d-v15.1d}, [x8], #32 - ld1 {v16.1d-v19.1d}, [x8], #32 - ld1 {v20.1d-v23.1d}, [x8], #32 - ld1 {v24.1d}, [x8] - -0: sub w2, w2, #1 - mov w8, #24 - adr_l x9, .Lsha3_rcon - - /* load input */ - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v31.8b}, [x1], #24 - eor v0.8b, v0.8b, v25.8b - eor v1.8b, v1.8b, v26.8b - eor v2.8b, v2.8b, v27.8b - eor v3.8b, v3.8b, v28.8b - eor v4.8b, v4.8b, v29.8b - eor v5.8b, v5.8b, v30.8b - eor v6.8b, v6.8b, v31.8b - - tbnz x3, #6, 2f // SHA3-512 - - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v30.8b}, [x1], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - eor v9.8b, v9.8b, v27.8b - eor v10.8b, v10.8b, v28.8b - eor v11.8b, v11.8b, v29.8b - eor v12.8b, v12.8b, v30.8b - - tbnz x3, #4, 1f // SHA3-384 or SHA3-224 - - // SHA3-256 - ld1 {v25.8b-v28.8b}, [x1], #32 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - b 3f - -1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 - - // SHA3-224 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b}, [x1], #8 - eor v13.8b, v13.8b, v25.8b - eor v14.8b, v14.8b, v26.8b - eor v15.8b, v15.8b, v27.8b - eor v16.8b, v16.8b, v28.8b - eor v17.8b, v17.8b, v29.8b - b 3f - - // SHA3-512 -2: ld1 {v25.8b-v26.8b}, [x1], #16 - eor v7.8b, v7.8b, v25.8b - eor v8.8b, v8.8b, v26.8b - -3: sub w8, w8, #1 - - eor3 v29.16b, v4.16b, v9.16b, v14.16b - eor3 v26.16b, v1.16b, v6.16b, v11.16b - eor3 v28.16b, v3.16b, v8.16b, v13.16b - eor3 v25.16b, v0.16b, v5.16b, v10.16b - eor3 v27.16b, v2.16b, v7.16b, v12.16b - eor3 v29.16b, v29.16b, v19.16b, v24.16b - eor3 v26.16b, v26.16b, v16.16b, v21.16b - eor3 v28.16b, v28.16b, v18.16b, v23.16b - eor3 v25.16b, v25.16b, v15.16b, v20.16b - eor3 v27.16b, v27.16b, v17.16b, v22.16b - - rax1 v30.2d, v29.2d, v26.2d // bc[0] - rax1 v26.2d, v26.2d, v28.2d // bc[2] - rax1 v28.2d, v28.2d, v25.2d // bc[4] - rax1 v25.2d, v25.2d, v27.2d // bc[1] - rax1 v27.2d, v27.2d, v29.2d // bc[3] - - eor v0.16b, v0.16b, v30.16b - xar v29.2d, v1.2d, v25.2d, (64 - 1) - xar v1.2d, v6.2d, v25.2d, (64 - 44) - xar v6.2d, v9.2d, v28.2d, (64 - 20) - xar v9.2d, v22.2d, v26.2d, (64 - 61) - xar v22.2d, v14.2d, v28.2d, (64 - 39) - xar v14.2d, v20.2d, v30.2d, (64 - 18) - xar v31.2d, v2.2d, v26.2d, (64 - 62) - xar v2.2d, v12.2d, v26.2d, (64 - 43) - xar v12.2d, v13.2d, v27.2d, (64 - 25) - xar v13.2d, v19.2d, v28.2d, (64 - 8) - xar v19.2d, v23.2d, v27.2d, (64 - 56) - xar v23.2d, v15.2d, v30.2d, (64 - 41) - xar v15.2d, v4.2d, v28.2d, (64 - 27) - xar v28.2d, v24.2d, v28.2d, (64 - 14) - xar v24.2d, v21.2d, v25.2d, (64 - 2) - xar v8.2d, v8.2d, v27.2d, (64 - 55) - xar v4.2d, v16.2d, v25.2d, (64 - 45) - xar v16.2d, v5.2d, v30.2d, (64 - 36) - xar v5.2d, v3.2d, v27.2d, (64 - 28) - xar v27.2d, v18.2d, v27.2d, (64 - 21) - xar v3.2d, v17.2d, v26.2d, (64 - 15) - xar v25.2d, v11.2d, v25.2d, (64 - 10) - xar v26.2d, v7.2d, v26.2d, (64 - 6) - xar v30.2d, v10.2d, v30.2d, (64 - 3) - - bcax v20.16b, v31.16b, v22.16b, v8.16b - bcax v21.16b, v8.16b, v23.16b, v22.16b - bcax v22.16b, v22.16b, v24.16b, v23.16b - bcax v23.16b, v23.16b, v31.16b, v24.16b - bcax v24.16b, v24.16b, v8.16b, v31.16b - - ld1r {v31.2d}, [x9], #8 - - bcax v17.16b, v25.16b, v19.16b, v3.16b - bcax v18.16b, v3.16b, v15.16b, v19.16b - bcax v19.16b, v19.16b, v16.16b, v15.16b - bcax v15.16b, v15.16b, v25.16b, v16.16b - bcax v16.16b, v16.16b, v3.16b, v25.16b - - bcax v10.16b, v29.16b, v12.16b, v26.16b - bcax v11.16b, v26.16b, v13.16b, v12.16b - bcax v12.16b, v12.16b, v14.16b, v13.16b - bcax v13.16b, v13.16b, v29.16b, v14.16b - bcax v14.16b, v14.16b, v26.16b, v29.16b - - bcax v7.16b, v30.16b, v9.16b, v4.16b - bcax v8.16b, v4.16b, v5.16b, v9.16b - bcax v9.16b, v9.16b, v6.16b, v5.16b - bcax v5.16b, v5.16b, v30.16b, v6.16b - bcax v6.16b, v6.16b, v4.16b, v30.16b - - bcax v3.16b, v27.16b, v0.16b, v28.16b - bcax v4.16b, v28.16b, v1.16b, v0.16b - bcax v0.16b, v0.16b, v2.16b, v1.16b - bcax v1.16b, v1.16b, v27.16b, v2.16b - bcax v2.16b, v2.16b, v28.16b, v27.16b - - eor v0.16b, v0.16b, v31.16b - - cbnz w8, 3b - cond_yield 4f, x8, x9 - cbnz w2, 0b - - /* save state */ -4: st1 { v0.1d- v3.1d}, [x0], #32 - st1 { v4.1d- v7.1d}, [x0], #32 - st1 { v8.1d-v11.1d}, [x0], #32 - st1 {v12.1d-v15.1d}, [x0], #32 - st1 {v16.1d-v19.1d}, [x0], #32 - st1 {v20.1d-v23.1d}, [x0], #32 - st1 {v24.1d}, [x0] - mov w0, w2 - ret -SYM_FUNC_END(sha3_ce_transform) - - .section ".rodata", "a" - .align 8 -.Lsha3_rcon: - .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a - .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 - .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a - .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a - .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 - .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 - .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 - .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c deleted file mode 100644 index b4f1001046c9..000000000000 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions - * - * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/sha3.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/unaligned.h> - -MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha3-224"); -MODULE_ALIAS_CRYPTO("sha3-256"); -MODULE_ALIAS_CRYPTO("sha3-384"); -MODULE_ALIAS_CRYPTO("sha3-512"); - -asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks, - int md_len); - -static int sha3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - unsigned int bs, ds; - int blocks; - - ds = crypto_shash_digestsize(tfm); - bs = crypto_shash_blocksize(tfm); - blocks = len / bs; - len -= blocks * bs; - do { - int rem; - - kernel_neon_begin(); - rem = sha3_ce_transform(sctx->st, data, blocks, ds); - kernel_neon_end(); - data += (blocks - rem) * bs; - blocks = rem; - } while (blocks); - return len; -} - -static int sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - u8 *out) -{ - struct sha3_state *sctx = shash_desc_ctx(desc); - struct crypto_shash *tfm = desc->tfm; - __le64 *digest = (__le64 *)out; - u8 block[SHA3_224_BLOCK_SIZE]; - unsigned int bs, ds; - int i; - - ds = crypto_shash_digestsize(tfm); - bs = crypto_shash_blocksize(tfm); - memcpy(block, src, len); - - block[len++] = 0x06; - memset(block + len, 0, bs - len); - block[bs - 1] |= 0x80; - - kernel_neon_begin(); - sha3_ce_transform(sctx->st, block, 1, ds); - kernel_neon_end(); - memzero_explicit(block , sizeof(block)); - - for (i = 0; i < ds / 8; i++) - put_unaligned_le64(sctx->st[i], digest++); - - if (ds & 4) - put_unaligned_le32(sctx->st[i], (__le32 *)digest); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA3_224_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-224", - .base.cra_driver_name = "sha3-224-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_224_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_256_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-256", - .base.cra_driver_name = "sha3-256-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_256_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_384_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-384", - .base.cra_driver_name = "sha3-384-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_384_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -}, { - .digestsize = SHA3_512_DIGEST_SIZE, - .init = crypto_sha3_init, - .update = sha3_update, - .finup = sha3_finup, - .descsize = SHA3_STATE_SIZE, - .base.cra_name = "sha3-512", - .base.cra_driver_name = "sha3-512-ce", - .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .base.cra_blocksize = SHA3_512_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, -} }; - -static int __init sha3_neon_mod_init(void) -{ - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha3_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_cpu_feature_match(SHA3, sha3_neon_mod_init); -module_exit(sha3_neon_mod_fini); diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index eac6f5fa0abe..24c1fcfae072 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -13,6 +12,8 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, { int remain; - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); + } return remain; } static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + sm3_base_do_finup(desc, data, len, sm3_ce_transform); + } return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 6c4611a503a3..15f30cc24f32 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -5,7 +5,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, static int sm3_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - int remain; - - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform); - kernel_neon_end(); - return remain; + scoped_ksimd() + return sm3_base_do_update_blocks(desc, data, len, + sm3_neon_transform); } static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_neon_transform); - kernel_neon_end(); + scoped_ksimd() + sm3_base_do_finup(desc, data, len, sm3_neon_transform); return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c index e9cc1c1364ec..332f02167a96 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-glue.c +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> @@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); crypto_inc(walk->iv, SM4_BLOCK_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); - - while (walk->nbytes && walk->nbytes != walk->total) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes - tail, mac); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); - } - - if (walk->nbytes) { - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes, mac); + while (walk->nbytes) { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - sm4_ce_ccm_final(rkey_enc, ctr0, mac); + if (walk->nbytes == walk->total) + tail = 0; - kernel_neon_end(); + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, + walk->src.virt.addr, walk->iv, + walk->nbytes - tail, mac); - err = skcipher_walk_done(walk, 0); - } else { + err = skcipher_walk_done(walk, tail); + } sm4_ce_ccm_final(rkey_enc, ctr0, mac); - - kernel_neon_end(); } return err; diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c index c31d76fb5a17..bceec833ef4e 100644 --- a/arch/arm64/crypto/sm4-ce-cipher-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_enc, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_enc, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_enc, out, in); } } @@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_dec, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_dec, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_dec, out, in); } } diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c index c2ea3d5f690b..ef06f4f768a1 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-glue.c +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> @@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); - - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); + } return 0; } @@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + gcm_calculate_auth_mac(req, ghash); - if (req->assoclen) - gcm_calculate_auth_mac(req, ghash); + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + const u8 *l = NULL; - while (walk->nbytes) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - const u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; + if (walk->nbytes == walk->total) { + l = (const u8 *)&lengths; + tail = 0; + } - if (walk->nbytes == walk->total) { sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes, ghash, - ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - - return skcipher_walk_done(walk, 0); - } + walk->nbytes - tail, ghash, + ctx->ghash_table, l); - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes - tail, ghash, - ctx->ghash_table, NULL); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); + err = skcipher_walk_done(walk, tail); + } while (walk->nbytes); } - - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, - walk->nbytes, ghash, ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - return err; } diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 7a60e7b559dc..5569cece5a0b 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -8,7 +8,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> @@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, if (ret) return ret; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key1.rkey_enc, - ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, - ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key1.rkey_enc, + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_crypt(rkey, dst, src, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_crypt(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req, nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - else - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + else + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); + } } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - else - sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + else + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + } return skcipher_walk_done(&walk, 0); } @@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - /* tail */ - if (walk.nbytes == walk.total && nbytes > 0) { - u8 keystream[SM4_BLOCK_SIZE]; - - sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); - crypto_inc(walk.iv, SM4_BLOCK_SIZE); - crypto_xor_cpy(dst, src, keystream, nbytes); - nbytes = 0; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (nbytes < walk.total) nbytes &= ~(SM4_BLOCK_SIZE - 1); - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + } rkey2_enc = NULL; @@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + } return skcipher_walk_done(&walk, 0); } @@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); - + scoped_ksimd() + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key, memset(consts, 0, SM4_BLOCK_SIZE); - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - /* encrypt the zero block */ - sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); - - kernel_neon_end(); + /* encrypt the zero block */ + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); + } /* gf(2^128) multiply zero-ciphertext with u and u^2 */ a = be64_to_cpu(consts[0].a); @@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); - sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); - sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - - kernel_neon_end(); + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p, unsigned int nblocks = len / SM4_BLOCK_SIZE; len %= SM4_BLOCK_SIZE; - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, - nblocks, false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, false, true); return len; } @@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src, ctx->digest[len] ^= 0x80; consts += SM4_BLOCK_SIZE; } - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, - false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, + false, true); memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; } @@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src, if (len) { crypto_xor(ctx->digest, src, len); - kernel_neon_begin(); - sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, - ctx->digest); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, + ctx->digest); } memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c index e3500aca2d18..e944c2a2efb0 100644 --- a/arch/arm64/crypto/sm4-neon-glue.c +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_crypt(rkey, dst, src, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_crypt(rkey, dst, src, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, + walk.iv, nblocks); dst += nblocks * SM4_BLOCK_SIZE; src += nblocks * SM4_BLOCK_SIZE; |
