diff options
Diffstat (limited to 'lib/crypto/powerpc')
-rw-r--r-- | lib/crypto/powerpc/Kconfig | 16 | ||||
-rw-r--r-- | lib/crypto/powerpc/Makefile | 7 | ||||
-rw-r--r-- | lib/crypto/powerpc/chacha.h (renamed from lib/crypto/powerpc/chacha-p10-glue.c) | 36 | ||||
-rw-r--r-- | lib/crypto/powerpc/curve25519-ppc64le_asm.S | 671 | ||||
-rw-r--r-- | lib/crypto/powerpc/curve25519.h | 186 | ||||
-rw-r--r-- | lib/crypto/powerpc/md5-asm.S | 235 | ||||
-rw-r--r-- | lib/crypto/powerpc/md5.h | 12 | ||||
-rw-r--r-- | lib/crypto/powerpc/poly1305.h (renamed from lib/crypto/powerpc/poly1305-p10-glue.c) | 40 |
8 files changed, 1119 insertions, 84 deletions
diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig deleted file mode 100644 index 2eaeb7665a6a..000000000000 --- a/lib/crypto/powerpc/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA20_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - depends on BROKEN # Needs to be fixed to work in softirq context - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile deleted file mode 100644 index 5709ae14258a..000000000000 --- a/lib/crypto/powerpc/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o -chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o - -obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o -poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o diff --git a/lib/crypto/powerpc/chacha-p10-glue.c b/lib/crypto/powerpc/chacha.h index fcd23c6f1590..1df6e1ce31c4 100644 --- a/lib/crypto/powerpc/chacha-p10-glue.c +++ b/lib/crypto/powerpc/chacha.h @@ -1,14 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ChaCha stream cipher (P10 accelerated) * * Copyright 2023- IBM Corp. All rights reserved. */ -#include <crypto/chacha.h> #include <crypto/internal/simd.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/cpufeature.h> #include <linux/sizes.h> #include <asm/simd.h> @@ -48,15 +46,10 @@ static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src, chacha_crypt_generic(state, dst, src, bytes, nrounds); } -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) { if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) @@ -74,27 +67,10 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, dst += todo; } while (bytes); } -EXPORT_SYMBOL(chacha_crypt_arch); - -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); -static int __init chacha_p10_init(void) +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) static_branch_enable(&have_p10); - return 0; } -subsys_initcall(chacha_p10_init); - -static void __exit chacha_p10_exit(void) -{ -} -module_exit(chacha_p10_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (P10 accelerated)"); -MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/curve25519-ppc64le_asm.S b/lib/crypto/powerpc/curve25519-ppc64le_asm.S new file mode 100644 index 000000000000..06c1febe24b9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519-ppc64le_asm.S @@ -0,0 +1,671 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +# +# This code is taken from CRYPTOGAMs[1] and is included here using the option +# in the license to distribute the code under the GPL. Therefore this program +# is free software; you can redistribute it and/or modify it under the terms of +# the GNU General Public License version 2 as published by the Free Software +# Foundation. +# +# [1] https://github.com/dot-asm/cryptogams/ + +# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the CRYPTOGAMS nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see https://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# +# ==================================================================== +# Written and Modified by Danny Tsen <dtsen@us.ibm.com> +# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes +# and x25519_cswap +# +# Copyright 2024- IBM Corp. +# +# X25519 lower-level primitives for PPC64. +# + +#include <linux/linkage.h> + +.text + +.align 5 +SYM_FUNC_START(x25519_fe51_mul) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 6,0(5) + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + + mulld 24,8,6 + mulhdu 25,8,6 + + mulld 30,11,6 + mulhdu 31,11,6 + ld 4,8(5) + mulli 11,11,19 + + mulld 26,9,6 + mulhdu 27,9,6 + + mulld 28,10,6 + mulhdu 29,10,6 + mulld 12,11,4 + mulhdu 21,11,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,4 + mulhdu 21,10,4 + ld 6,16(5) + mulli 10,10,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,8,4 + mulhdu 21,8,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,9,4 + mulhdu 21,9,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,10,6 + mulhdu 21,10,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,9,6 + mulhdu 21,9,6 + ld 4,24(5) + mulli 9,9,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,8,6 + mulhdu 21,8,6 + addc 28,28,12 + adde 29,29,21 + mulld 12,9,4 + mulhdu 21,9,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,10,4 + mulhdu 21,10,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,8,4 + mulhdu 21,8,4 + ld 6,32(5) + mulli 8,8,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,11,4 + mulhdu 21,11,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,8,6 + mulhdu 21,8,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,9,6 + mulhdu 21,9,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,6 + mulhdu 21,10,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 28,28,12 + adde 29,29,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 30,30,12 + adde 31,31,21 + +.Lfe51_reduce: + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_mul) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_sqr) + +.align 5 +SYM_FUNC_START(x25519_fe51_mul121666) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + lis 6,1 + ori 6,6,56130 + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_mul121666) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr_times) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mtctr 5 + +.Lsqr_times_loop: + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + # fe51_reduce + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + bdnz .Lsqr_times_loop + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_sqr_times) + +.align 5 +SYM_FUNC_START(x25519_fe51_frombytes) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + + srdi 10, 5, 51 + and 5, 5, 12 # h0 + + sldi 11, 6, 13 + or 11, 10, 11 # h1t + srdi 10, 6, 38 + and 6, 11, 12 # h1 + + sldi 11, 7, 26 + or 10, 10, 11 # h2t + + srdi 11, 7, 25 + and 7, 10, 12 # h2 + sldi 10, 8, 39 + or 11, 11, 10 # h3t + + srdi 9, 8, 12 + and 8, 11, 12 # h3 + and 9, 9, 12 # h4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + std 9, 32(3) + + blr +SYM_FUNC_END(x25519_fe51_frombytes) + +.align 5 +SYM_FUNC_START(x25519_fe51_tobytes) + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + ld 9, 32(4) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + # Full reducuction + addi 10, 5, 19 + srdi 10, 10, 51 + add 10, 10, 6 + srdi 10, 10, 51 + add 10, 10, 7 + srdi 10, 10, 51 + add 10, 10, 8 + srdi 10, 10, 51 + add 10, 10, 9 + srdi 10, 10, 51 + + mulli 10, 10, 19 + add 5, 5, 10 + srdi 11, 5, 51 + add 6, 6, 11 + srdi 11, 6, 51 + add 7, 7, 11 + srdi 11, 7, 51 + add 8, 8, 11 + srdi 11, 8, 51 + add 9, 9, 11 + + and 5, 5, 12 + and 6, 6, 12 + and 7, 7, 12 + and 8, 8, 12 + and 9, 9, 12 + + sldi 10, 6, 51 + or 5, 5, 10 # s0 + + srdi 11, 6, 13 + sldi 10, 7, 38 + or 6, 11, 10 # s1 + + srdi 11, 7, 26 + sldi 10, 8, 25 + or 7, 11, 10 # s2 + + srdi 11, 8, 39 + sldi 10, 9, 12 + or 8, 11, 10 # s4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + + blr +SYM_FUNC_END(x25519_fe51_tobytes) + +.align 5 +SYM_FUNC_START(x25519_cswap) + + li 7, 5 + neg 6, 5 + mtctr 7 + +.Lswap_loop: + ld 8, 0(3) + ld 9, 0(4) + xor 10, 8, 9 + and 10, 10, 6 + xor 11, 8, 10 + xor 12, 9, 10 + std 11, 0(3) + addi 3, 3, 8 + std 12, 0(4) + addi 4, 4, 8 + bdnz .Lswap_loop + + blr +SYM_FUNC_END(x25519_cswap) diff --git a/lib/crypto/powerpc/curve25519.h b/lib/crypto/powerpc/curve25519.h new file mode 100644 index 000000000000..dee6234c48e9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519.h @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2024- IBM Corp. + * + * X25519 scalar multiplication with 51 bits limbs for PPC64le. + * Based on RFC7748 and AArch64 optimized implementation for X25519 + * - Algorithm 1 Scalar multiplication of a variable point + */ + +#include <linux/types.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> + +#include <linux/cpufeature.h> +#include <linux/processor.h> + +typedef uint64_t fe51[5]; + +asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g); +asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f); +asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f); +asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n); +asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s); +asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h); +asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit); + +#define fmul x25519_fe51_mul +#define fsqr x25519_fe51_sqr +#define fmul121666 x25519_fe51_mul121666 +#define fe51_tobytes x25519_fe51_tobytes + +static void fadd(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = f[0] + g[0]; + h[1] = f[1] + g[1]; + h[2] = f[2] + g[2]; + h[3] = f[3] + g[3]; + h[4] = f[4] + g[4]; +} + +/* + * Prime = 2 ** 255 - 19, 255 bits + * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed) + * + * Prime in 5 51-bit limbs + */ +static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff}; + +static void fsub(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = (f[0] + ((prime51[0] * 2))) - g[0]; + h[1] = (f[1] + ((prime51[1] * 2))) - g[1]; + h[2] = (f[2] + ((prime51[2] * 2))) - g[2]; + h[3] = (f[3] + ((prime51[3] * 2))) - g[3]; + h[4] = (f[4] + ((prime51[4] * 2))) - g[4]; +} + +static void fe51_frombytes(fe51 h, const uint8_t *s) +{ + /* + * Make sure 64-bit aligned. + */ + unsigned char sbuf[32+8]; + unsigned char *sb = PTR_ALIGN((void *)sbuf, 8); + + memcpy(sb, s, 32); + x25519_fe51_frombytes(h, sb); +} + +static void finv(fe51 o, const fe51 i) +{ + fe51 a0, b, c, t00; + + fsqr(a0, i); + x25519_fe51_sqr_times(t00, a0, 2); + + fmul(b, t00, i); + fmul(a0, b, a0); + + fsqr(t00, a0); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 5); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 10); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 20); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 10); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 50); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 100); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 50); + + fmul(t00, t00, b); + x25519_fe51_sqr_times(t00, t00, 5); + + fmul(o, t00, a0); +} + +static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]) +{ + fe51 x1, x2, z2, x3, z3; + uint8_t s[32]; + unsigned int swap = 0; + int i; + + memcpy(s, scalar, 32); + s[0] &= 0xf8; + s[31] &= 0x7f; + s[31] |= 0x40; + fe51_frombytes(x1, point); + + z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0; + x3[0] = x1[0]; + x3[1] = x1[1]; + x3[2] = x1[2]; + x3[3] = x1[3]; + x3[4] = x1[4]; + + x2[0] = z3[0] = 1; + x2[1] = z3[1] = 0; + x2[2] = z3[2] = 0; + x2[3] = z3[3] = 0; + x2[4] = z3[4] = 0; + + for (i = 254; i >= 0; --i) { + unsigned int k_t = 1 & (s[i / 8] >> (i & 7)); + fe51 a, b, c, d, e; + fe51 da, cb, aa, bb; + fe51 dacb_p, dacb_m; + + swap ^= k_t; + x25519_cswap(x2, x3, swap); + x25519_cswap(z2, z3, swap); + swap = k_t; + + fsub(b, x2, z2); // B = x_2 - z_2 + fadd(a, x2, z2); // A = x_2 + z_2 + fsub(d, x3, z3); // D = x_3 - z_3 + fadd(c, x3, z3); // C = x_3 + z_3 + + fsqr(bb, b); // BB = B^2 + fsqr(aa, a); // AA = A^2 + fmul(da, d, a); // DA = D * A + fmul(cb, c, b); // CB = C * B + + fsub(e, aa, bb); // E = AA - BB + fmul(x2, aa, bb); // x2 = AA * BB + fadd(dacb_p, da, cb); // DA + CB + fsub(dacb_m, da, cb); // DA - CB + + fmul121666(z3, e); // 121666 * E + fsqr(z2, dacb_m); // (DA - CB)^2 + fsqr(x3, dacb_p); // x3 = (DA + CB)^2 + fadd(b, bb, z3); // BB + 121666 * E + fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2 + fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2) + } + + finv(z2, z2); + fmul(x2, x2, z2); + fe51_tobytes(out, x2); +} + +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(pub, secret, curve25519_base_point); +} diff --git a/lib/crypto/powerpc/md5-asm.S b/lib/crypto/powerpc/md5-asm.S new file mode 100644 index 000000000000..fa6bc440cf4a --- /dev/null +++ b/lib/crypto/powerpc/md5-asm.S @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Fast MD5 implementation for PPC + * + * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> + */ +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/asm-compat.h> + +#define rHP r3 +#define rWP r4 + +#define rH0 r0 +#define rH1 r6 +#define rH2 r7 +#define rH3 r5 + +#define rW00 r8 +#define rW01 r9 +#define rW02 r10 +#define rW03 r11 +#define rW04 r12 +#define rW05 r14 +#define rW06 r15 +#define rW07 r16 +#define rW08 r17 +#define rW09 r18 +#define rW10 r19 +#define rW11 r20 +#define rW12 r21 +#define rW13 r22 +#define rW14 r23 +#define rW15 r24 + +#define rT0 r25 +#define rT1 r26 + +#define INITIALIZE \ + PPC_STLU r1,-INT_FRAME_SIZE(r1); \ + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ + +#define FINALIZE \ + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE + +#ifdef __BIG_ENDIAN__ +#define LOAD_DATA(reg, off) \ + lwbrx reg,0,rWP; /* load data */ +#define INC_PTR \ + addi rWP,rWP,4; /* increment per word */ +#define NEXT_BLOCK /* nothing to do */ +#else +#define LOAD_DATA(reg, off) \ + lwz reg,off(rWP); /* load data */ +#define INC_PTR /* nothing to do */ +#define NEXT_BLOCK \ + addi rWP,rWP,64; /* increment per block */ +#endif + +#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ + LOAD_DATA(w0, off) /* W */ \ + and rT0,b,c; /* 1: f = b and c */ \ + INC_PTR /* ptr++ */ \ + andc rT1,d,b; /* 1: f' = ~b and d */ \ + LOAD_DATA(w1, off+4) /* W */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + addis w1,w1,k1h; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + and rT0,a,b; /* 2: f = b and c */ \ + andc rT1,c,a; /* 2: f' = ~b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + INC_PTR /* ptr++ */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + andc rT0,c,d; /* 1: f = c and ~d */ \ + and rT1,b,d; /* 1: f' = b and d */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + andc rT0,b,c; /* 2: f = c and ~d */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + add d,d,w1; /* 2: a = a + wk */ \ + and rT1,a,c; /* 2: f' = b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a +b */ + +#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + xor rT0,b,c; /* 1: f' = b xor c */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + xor rT1,rT0,d; /* 1: f = f xor f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT1; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + xor rT1,rT0,a; /* 2: f = b xor f' */ \ + add d,d,rT1; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + addi w0,w0,k0l; /* 1: w = w + k */ \ + orc rT0,b,d; /* 1: f = b or ~d */ \ + addis w0,w0,k0h; /* 1: w = w + k' */ \ + xor rT0,rT0,c; /* 1: f = f xor c */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: w = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w1,w1,k1h; /* 2: w = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + orc rT0,a,c; /* 2: f = b or ~d */ \ + add d,d,w1; /* 2: a = a + wk */ \ + xor rT0,rT0,b; /* 2: f = f xor c */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +_GLOBAL(ppc_md5_transform) + INITIALIZE + + mtctr r5 + lwz rH0,0(rHP) + lwz rH1,4(rHP) + lwz rH2,8(rHP) + lwz rH3,12(rHP) + +ppc_md5_main: + R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, + 0xd76b, -23432, 0xe8c8, -18602) + R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, + 0x2420, 0x70db, 0xc1be, -12562) + R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, + 0xf57c, 0x0faf, 0x4788, -14806) + R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, + 0xa830, 0x4613, 0xfd47, -27391) + R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, + 0x6981, -26408, 0x8b45, -2129) + R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, + 0xffff, 0x5bb1, 0x895d, -10306) + R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, + 0x6b90, 0x1122, 0xfd98, 0x7193) + R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, + 0xa679, 0x438e, 0x49b4, 0x0821) + + R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, + 0x0d56, 0x6e0c, 0x1810, 0x6d2d) + R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, + 0x9d02, -32109, 0x124c, 0x2332) + R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, + 0x8ea7, 0x4a33, 0x0245, -18270) + R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, + 0x8eee, -8608, 0xf258, -5095) + R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, + 0x969d, -10697, 0x1cbe, -15288) + R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, + 0x3317, 0x3e99, 0xdbd9, 0x7c15) + R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, + 0xac4b, 0x7772, 0xd8cf, 0x331d) + R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, + 0x6a28, 0x6dd8, 0x219a, 0x3b68) + + R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, + 0x29cb, 0x28e5, 0x4218, -7788) + R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, + 0x473f, 0x06d1, 0x3aae, 0x3036) + R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, + 0xaea1, -15134, 0x640b, -11295) + R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, + 0x8f4c, 0x4887, 0xbc7c, -22499) + R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, + 0x7eb8, -27199, 0x00ea, 0x6050) + R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, + 0xe01a, 0x22fe, 0x4447, 0x69c5) + R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, + 0xb7f3, 0x0253, 0x59b1, 0x4d5b) + R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, + 0x4701, -27017, 0xc7bd, -19859) + + R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, + 0x0988, -1462, 0x4c70, -19401) + R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, + 0xadaf, -5221, 0xfc99, 0x66f7) + R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, + 0x7e80, -16418, 0xba1e, -25587) + R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, + 0x4130, 0x380d, 0xe0c5, 0x738d) + lwz rW00,0(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, + 0xe837, -30770, 0xde8a, 0x69e8) + lwz rW14,4(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, + 0x9e79, 0x260f, 0x256d, -27941) + lwz rW12,8(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, + 0xab75, -20775, 0x4f9e, -28397) + lwz rW10,12(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, + 0x662b, 0x7c56, 0x11b2, 0x0358) + + add rH0,rH0,rW00 + stw rH0,0(rHP) + add rH1,rH1,rW14 + stw rH1,4(rHP) + add rH2,rH2,rW12 + stw rH2,8(rHP) + add rH3,rH3,rW10 + stw rH3,12(rHP) + NEXT_BLOCK + + bdnz ppc_md5_main + + FINALIZE + blr diff --git a/lib/crypto/powerpc/md5.h b/lib/crypto/powerpc/md5.h new file mode 100644 index 000000000000..540b08e34d1d --- /dev/null +++ b/lib/crypto/powerpc/md5.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * MD5 optimized for PowerPC + */ + +void ppc_md5_transform(u32 *state, const u8 *data, size_t nblocks); + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + ppc_md5_transform(state->h, data, nblocks); +} diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/poly1305.h index 3f1664a724b6..b8ed098a0e95 100644 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ b/lib/crypto/powerpc/poly1305.h @@ -1,15 +1,13 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Poly1305 authenticator algorithm, RFC7539. * * Copyright 2023- IBM Corp. All rights reserved. */ #include <asm/switch_to.h> -#include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/unaligned.h> asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); @@ -30,8 +28,8 @@ static void vsx_end(void) preempt_enable(); } -void poly1305_block_init_arch(struct poly1305_block_state *dctx, - const u8 raw_key[POLY1305_BLOCK_SIZE]) +static void poly1305_block_init(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { if (!static_key_enabled(&have_p10)) return poly1305_block_init_generic(dctx, raw_key); @@ -40,10 +38,9 @@ void poly1305_block_init_arch(struct poly1305_block_state *dctx, dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); } -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) { if (!static_key_enabled(&have_p10)) return poly1305_blocks_generic(state, src, len, padbit); @@ -60,37 +57,18 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } vsx_end(); } -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) +static void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { if (!static_key_enabled(&have_p10)) return poly1305_emit_generic(state, digest, nonce); poly1305_emit_64(state, nonce, digest); } -EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - -static int __init poly1305_p10_init(void) +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) static_branch_enable(&have_p10); - return 0; } -subsys_initcall(poly1305_p10_init); - -static void __exit poly1305_p10_exit(void) -{ -} -module_exit(poly1305_p10_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>"); -MODULE_DESCRIPTION("Optimized Poly1305 for P10"); |