From fab0c9c07d8af97bc2b3b854f9b6c28f77f077cf Mon Sep 17 00:00:00 2001 From: torben-hansen <50673096+torben-hansen@users.noreply.github.com> Date: Wed, 6 Dec 2023 08:22:22 -0800 Subject: [PATCH] Factor out the machine-optimised backend for 25519 algorithms (#1340) Finalise the 25519 refactoring by moving the machine-optimised implementations (from s2n-bignum) to its own compilation unit. This also aligns the s2n-bignum wrappers with the nohw wrappers, with common signatures and common function descriptions. --- crypto/CMakeLists.txt | 1 + crypto/curve25519/curve25519.c | 263 +----------------- crypto/curve25519/curve25519_nohw.c | 36 +-- crypto/curve25519/curve25519_s2n_bignum_asm.c | 232 +++++++++++++++ crypto/curve25519/internal.h | 81 ++++-- include/openssl/curve25519.h | 16 +- 6 files changed, 340 insertions(+), 289 deletions(-) create mode 100644 crypto/curve25519/curve25519_s2n_bignum_asm.c diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 3cea99a988..020f1b7d5d 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -347,6 +347,7 @@ add_library( crypto.c curve25519/curve25519.c curve25519/curve25519_nohw.c + curve25519/curve25519_s2n_bignum_asm.c curve25519/spake25519.c des/des.c dh_extra/params.c diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index feca0fe780..1343615515 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -29,7 +29,6 @@ #include "internal.h" #include "../internal.h" -#include "../fipsmodule/cpucap/internal.h" // X25519 [1] and Ed25519 [2] is an ECDHE protocol and signature scheme, // respectively. This file contains an implementation of both using two @@ -53,46 +52,6 @@ // For Ed25519, dom2(F,C) is the empty string and PH the identify function, // cf. rfc8032 5.1. -// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not -// set, s2n-bignum path is capable. -#if ((defined(OPENSSL_X86_64) && \ - !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ - defined(OPENSSL_AARCH64)) && \ - (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ - !defined(OPENSSL_NO_ASM) -#include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" -#define CURVE25519_S2N_BIGNUM_CAPABLE -#endif - -// Stub functions if implementations are not compiled. -// These functions have to abort, otherwise we risk applications assuming they -// did work without actually doing anything. -#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS) - -#define S2N_BIGNUM_STUB_FUNC(return_type, symbol, ...) \ - return_type symbol(__VA_ARGS__); \ - return_type symbol(__VA_ARGS__) { abort(); } \ - -S2N_BIGNUM_STUB_FUNC(void, bignum_mod_n25519, uint64_t z[4], uint64_t k, uint64_t *x) -S2N_BIGNUM_STUB_FUNC(void, bignum_neg_p25519, uint64_t z[4], uint64_t x[4]) -S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) -S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519_alt, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_encode, uint8_t z[32], uint64_t p[8]) -S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode, uint64_t z[8], const uint8_t c[32]) -S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode_alt, uint64_t z[8], const uint8_t c[32]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase, uint64_t res[8],uint64_t scalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase_alt, uint64_t res[8],uint64_t scalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble_alt, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) - -#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte_alt, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte, uint8_t res[32], const uint8_t scalar[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte_alt, uint8_t res[32], const uint8_t scalar[32]) -#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) -#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS) - OPENSSL_INLINE int curve25519_s2n_bignum_capable(void) { #if defined(CURVE25519_S2N_BIGNUM_CAPABLE) return 1; @@ -110,203 +69,6 @@ OPENSSL_INLINE int ed25519_s2n_bignum_capable(void) { #endif } -// curve25519_s2n_bignum_use_no_alt_implementation returns 1 if the no_alt -// s2n-bignum implementation should be used and 0 otherwise. -// -// Below is the decision logic for which assembly backend implementation -// of x25519 s2n-bignum we should use if x25519 s2n-bignum capable. Currently, -// we support the following implementations. -// -// x86_64: -// - s2n-bignum-no-alt: hardware implementation using bmi2+adx instruction sets -// - s2n-bignum-alt: hardware implementation using standard instructions -// -// aarch64: -// - s2n-bignum-no-alt: hardware implementation for "low" multiplier throughput -// - s2n-bignum-alt: hardware implementation for "high" multiplier throughput -// -// Through experiments we have found that: -// -// For x86_64: bmi+adc will almost always give a performance boost. So, here we -// prefer s2n-bignum-no-alt over s2n-bignum-alt if the former is supported. -// For aarch64: if a wide multiplier is supported, we prefer s2n-bignum-alt over -// s2n-bignum-no-alt if the former is supported. -// |curve25519_s2n_bignum_alt_capable| specifically looks to match CPUs that -// have wide multipliers. this ensures that s2n-bignum-alt will only be used -// on such CPUs. -OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void); -OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void) { -#if defined(OPENSSL_X86_64) - // For x86_64 the no_alt implementation is bmi2+adx. Prefer if available. - if (CRYPTO_is_BMI2_capable() == 1 && CRYPTO_is_ADX_capable() == 1) { - return 1; - } else { - return 0; - } -#elif defined(OPENSSL_AARCH64) - // For aarch64 the alt implementation is for wide multipliers. Prefer if - // available. - if (CRYPTO_is_ARMv8_wide_multiplier_capable() == 1) { - return 0; - } else { - return 1; - } -#endif - // Have to return some default value. - return 0; -} - - -// s2n-bignum wrappers - -static void x25519_s2n_bignum(uint8_t out_shared_key[32], - const uint8_t private_key[32], const uint8_t peer_public_value[32]) { - - uint8_t private_key_internal_demask[32]; - OPENSSL_memcpy(private_key_internal_demask, private_key, 32); - private_key_internal_demask[0] &= 248; - private_key_internal_demask[31] &= 127; - private_key_internal_demask[31] |= 64; - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - curve25519_x25519_byte(out_shared_key, private_key_internal_demask, - peer_public_value); - } else { - curve25519_x25519_byte_alt(out_shared_key, private_key_internal_demask, - peer_public_value); - } -} - -static void x25519_s2n_bignum_public_from_private( - uint8_t out_public_value[32], const uint8_t private_key[32]) { - - uint8_t private_key_internal_demask[32]; - OPENSSL_memcpy(private_key_internal_demask, private_key, 32); - private_key_internal_demask[0] &= 248; - private_key_internal_demask[31] &= 127; - private_key_internal_demask[31] |= 64; - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - curve25519_x25519base_byte(out_public_value, private_key_internal_demask); - } else { - curve25519_x25519base_byte_alt(out_public_value, private_key_internal_demask); - } -} - -static void ed25519_public_key_from_hashed_seed_s2n_bignum( - uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], - uint8_t az[SHA512_DIGEST_LENGTH]) { - - uint64_t uint64_point[8] = {0}; - uint64_t uint64_hashed_seed[4] = {0}; - OPENSSL_memcpy(uint64_hashed_seed, az, 32); - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - edwards25519_scalarmulbase(uint64_point, uint64_hashed_seed); - } else { - edwards25519_scalarmulbase_alt(uint64_point, uint64_hashed_seed); - } - - edwards25519_encode(out_public_key, uint64_point); -} - -// |s| is of length |ED25519_PRIVATE_KEY_SEED_LEN| -// |A| is of length |ED25519_PUBLIC_KEY_LEN|. -static void ed25519_sign_s2n_bignum( - uint8_t out_sig[ED25519_SIGNATURE_LEN], - uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, - const void *message, size_t message_len) { - - void (*scalarmulbase)(uint64_t res[8],uint64_t scalar[4]); - void (*madd)(uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]); - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - scalarmulbase = edwards25519_scalarmulbase; - madd = bignum_madd_n25519; - } else { - scalarmulbase = edwards25519_scalarmulbase_alt; - madd = bignum_madd_n25519_alt; - } - - uint8_t k[SHA512_DIGEST_LENGTH] = {0}; - uint64_t R[8] = {0}; - uint64_t z[4] = {0}; - uint64_t uint64_r[8] = {0}; - uint64_t uint64_k[8] = {0}; - uint64_t uint64_s[4] = {0}; - OPENSSL_memcpy(uint64_r, r, 64); - OPENSSL_memcpy(uint64_s, s, 32); - - // Reduce r modulo the order of the base-point B. - bignum_mod_n25519(uint64_r, 8, uint64_r); - - // Compute [r]B. - scalarmulbase(R, uint64_r); - edwards25519_encode(out_sig, R); - - // Compute k = SHA512(R || A || message) - // R is of length 32 octets - ed25519_sha512(k, out_sig, 32, A, ED25519_PUBLIC_KEY_LEN, message, - message_len); - OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH); - bignum_mod_n25519(uint64_k, 8, uint64_k); - - // Compute S = r + k * s modulo the order of the base-point B. - // out_sig = R || S - madd(z, uint64_k, uint64_s, uint64_r); - OPENSSL_memcpy(out_sig + 32, z, 32); -} - -static int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], - const uint8_t public_key[32], uint8_t R_expected[32], - uint8_t S[32], const uint8_t *message, size_t message_len) { - - void (*scalarmuldouble)(uint64_t res[8], uint64_t scalar[4], - uint64_t point[8], uint64_t bscalar[4]); - uint64_t (*decode)(uint64_t z[8], const uint8_t c[32]); - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - scalarmuldouble = edwards25519_scalarmuldouble; - decode = edwards25519_decode; - } else { - scalarmuldouble = edwards25519_scalarmuldouble_alt; - decode = edwards25519_decode_alt; - } - - uint8_t k[SHA512_DIGEST_LENGTH] = {0}; - uint64_t uint64_k[8] = {0}; - uint64_t uint64_R[8] = {0}; - uint64_t uint64_S[4] = {0}; - uint64_t A[8] = {0}; - - // Decode public key as A'. - if (decode(A, public_key) != 0) { - return 0; - } - - // Step: rfc8032 5.1.7.2 - // Compute k = SHA512(R_expected || public_key || message). - ed25519_sha512(k, R_expected, 32, public_key, ED25519_PUBLIC_KEY_LEN, message, - message_len); - OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH); - bignum_mod_n25519(uint64_k, 8, uint64_k); - - // Step: rfc8032 5.1.7.3 - // Recall, we must compute [S]B - [k]A'. - // First negate A'. Point negation for the twisted edwards curve when points - // are represented in the extended coordinate system is simply: - // -(X,Y,Z,T) = (-X,Y,Z,-T). - // See "Twisted Edwards curves revisited" https://ia.cr/2008/522. - bignum_neg_p25519(A, A); - - // Compute R_have <- [S]B - [k]A'. - OPENSSL_memcpy(uint64_S, S, 32); - scalarmuldouble(uint64_R, uint64_k, A, uint64_S); - edwards25519_encode(R_computed_encoded, uint64_R); - - return 1; -} - void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH], const void *input1, size_t len1, const void *input2, size_t len2, const void *input3, size_t len3) { @@ -321,7 +83,6 @@ void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH], SHA512_Final(out, &hash_ctx); } - // Public interface functions void ED25519_keypair_from_seed(uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], @@ -468,18 +229,21 @@ int ED25519_verify(const uint8_t *message, size_t message_len, } -void X25519_public_from_private(uint8_t out_public_value[32], - const uint8_t private_key[32]) { +void X25519_public_from_private( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) { if (curve25519_s2n_bignum_capable() == 1) { - x25519_s2n_bignum_public_from_private(out_public_value, private_key); + x25519_public_from_private_s2n_bignum(out_public_value, private_key); } else { x25519_public_from_private_nohw(out_public_value, private_key); } } -void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) { - RAND_bytes(out_private_key, 32); +void X25519_keypair(uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + uint8_t out_private_key[X25519_PRIVATE_KEY_LEN]) { + + RAND_bytes(out_private_key, X25519_PRIVATE_KEY_LEN); // All X25519 implementations should decode scalars correctly (see // https://tools.ietf.org/html/rfc7748#section-5). However, if an @@ -501,18 +265,19 @@ void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) { X25519_public_from_private(out_public_value, out_private_key); } -int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32], - const uint8_t peer_public_value[32]) { +int X25519(uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]) { - static const uint8_t kZeros[32] = {0}; + static const uint8_t kZeros[X25519_SHARED_KEY_LEN] = {0}; if (curve25519_s2n_bignum_capable() == 1) { - x25519_s2n_bignum(out_shared_key, private_key, peer_public_value); + x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value); } else { x25519_scalar_mult_generic_nohw(out_shared_key, private_key, peer_public_value); } // The all-zero output results when the input is a point of small order. return constant_time_declassify_int( - CRYPTO_memcmp(kZeros, out_shared_key, 32)) != 0; + CRYPTO_memcmp(kZeros, out_shared_key, X25519_SHARED_KEY_LEN)) != 0; } diff --git a/crypto/curve25519/curve25519_nohw.c b/crypto/curve25519/curve25519_nohw.c index 3c8afbe974..1839a9988d 100644 --- a/crypto/curve25519/curve25519_nohw.c +++ b/crypto/curve25519/curve25519_nohw.c @@ -1863,14 +1863,16 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b, s[31] = s11 >> 17; } -void x25519_scalar_mult_generic_nohw(uint8_t out[32], - const uint8_t scalar[32], - const uint8_t point[32]) { +void x25519_scalar_mult_generic_nohw( + uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]) { + fe x1, x2, z2, x3, z3, tmp0, tmp1; fe_loose x2l, z2l, x3l, tmp0l, tmp1l; - uint8_t e[32]; - OPENSSL_memcpy(e, scalar, 32); + uint8_t e[X25519_PRIVATE_KEY_LEN]; + OPENSSL_memcpy(e, private_key, X25519_PRIVATE_KEY_LEN); e[0] &= 248; e[31] &= 127; e[31] |= 64; @@ -1892,7 +1894,7 @@ void x25519_scalar_mult_generic_nohw(uint8_t out[32], // // // preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0 - fe_frombytes(&x1, point); + fe_frombytes(&x1, peer_public_value); fe_1(&x2); fe_0(&z2); fe_copy(&x3, &x1); @@ -1942,14 +1944,15 @@ void x25519_scalar_mult_generic_nohw(uint8_t out[32], fe_invert(&z2, &z2); fe_mul_ttt(&x2, &x2, &z2); - fe_tobytes(out, &x2); + fe_tobytes(out_shared_key, &x2); } -void x25519_public_from_private_nohw(uint8_t out_public_value[32], - const uint8_t private_key[32]) { +void x25519_public_from_private_nohw( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) { - uint8_t e[32]; - OPENSSL_memcpy(e, private_key, 32); + uint8_t e[X25519_PRIVATE_KEY_LEN]; + OPENSSL_memcpy(e, private_key, X25519_PRIVATE_KEY_LEN); e[0] &= 248; e[31] &= 127; e[31] |= 64; @@ -1966,18 +1969,19 @@ void x25519_public_from_private_nohw(uint8_t out_public_value[32], fe_loose_invert(&zminusy_inv, &zminusy); fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv); fe_tobytes(out_public_value, &zminusy_inv); - CONSTTIME_DECLASSIFY(out_public_value, 32); + CONSTTIME_DECLASSIFY(out_public_value, X25519_PUBLIC_VALUE_LEN); } -void ed25519_public_key_from_hashed_seed_nohw(uint8_t out_public_key[32], - uint8_t az[SHA512_DIGEST_LENGTH]) { +void ed25519_public_key_from_hashed_seed_nohw( + uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], + uint8_t az[SHA512_DIGEST_LENGTH]) { + ge_p3 A; x25519_ge_scalarmult_base(&A, az); ge_p3_tobytes(out_public_key, &A); } -void ed25519_sign_nohw( - uint8_t out_sig[ED25519_SIGNATURE_LEN], +void ed25519_sign_nohw(uint8_t out_sig[ED25519_SIGNATURE_LEN], uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, const void *message, size_t message_len) { diff --git a/crypto/curve25519/curve25519_s2n_bignum_asm.c b/crypto/curve25519/curve25519_s2n_bignum_asm.c new file mode 100644 index 0000000000..9256fff59d --- /dev/null +++ b/crypto/curve25519/curve25519_s2n_bignum_asm.c @@ -0,0 +1,232 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +#include "internal.h" +#include "../fipsmodule/cpucap/internal.h" + +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) +#include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" +#endif + +// Stub functions if s2n-bignum implementations are not compiled. +// These functions have to abort, otherwise we risk applications assuming they +// did work without actually doing anything. +#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS) + +#define S2N_BIGNUM_STUB_FUNC(return_type, symbol, ...) \ + return_type symbol(__VA_ARGS__); \ + return_type symbol(__VA_ARGS__) { abort(); } \ + +S2N_BIGNUM_STUB_FUNC(void, bignum_mod_n25519, uint64_t z[4], uint64_t k, uint64_t *x) +S2N_BIGNUM_STUB_FUNC(void, bignum_neg_p25519, uint64_t z[4], uint64_t x[4]) +S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) +S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519_alt, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) +S2N_BIGNUM_STUB_FUNC(void, edwards25519_encode, uint8_t z[32], uint64_t p[8]) +S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode, uint64_t z[8], const uint8_t c[32]) +S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode_alt, uint64_t z[8], const uint8_t c[32]) +S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase, uint64_t res[8],uint64_t scalar[4]) +S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase_alt, uint64_t res[8],uint64_t scalar[4]) +S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) +S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble_alt, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) + +#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) +S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) +S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte_alt, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) +S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte, uint8_t res[32], const uint8_t scalar[32]) +S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte_alt, uint8_t res[32], const uint8_t scalar[32]) +#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) +#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS) + +// curve25519_s2n_bignum_use_no_alt_implementation returns 1 if the no_alt +// s2n-bignum implementation should be used and 0 otherwise. +// +// Below is the decision logic for which assembly backend implementation +// of x25519 s2n-bignum we should use if x25519 s2n-bignum capable. Currently, +// we support the following implementations. +// +// x86_64: +// - s2n-bignum-no-alt: hardware implementation using bmi2+adx instruction sets +// - s2n-bignum-alt: hardware implementation using standard instructions +// +// aarch64: +// - s2n-bignum-no-alt: hardware implementation for "low" multiplier throughput +// - s2n-bignum-alt: hardware implementation for "high" multiplier throughput +// +// Through experiments we have found that: +// +// For x86_64: bmi+adc will almost always give a performance boost. So, here we +// prefer s2n-bignum-no-alt over s2n-bignum-alt if the former is supported. +// For aarch64: if a wide multiplier is supported, we prefer s2n-bignum-alt over +// s2n-bignum-no-alt if the former is supported. +// |curve25519_s2n_bignum_alt_capable| specifically looks to match CPUs that +// have wide multipliers. this ensures that s2n-bignum-alt will only be used +// on such CPUs. +OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void); +OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void) { +#if defined(OPENSSL_X86_64) + // For x86_64 the no_alt implementation is bmi2+adx. Prefer if available. + if (CRYPTO_is_BMI2_capable() == 1 && CRYPTO_is_ADX_capable() == 1) { + return 1; + } else { + return 0; + } +#elif defined(OPENSSL_AARCH64) + // For aarch64 the alt implementation is for wide multipliers. Prefer if + // available. + if (CRYPTO_is_ARMv8_wide_multiplier_capable() == 1) { + return 0; + } else { + return 1; + } +#endif + // Have to return some default value. + return 0; +} + +void x25519_scalar_mult_generic_s2n_bignum( + uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]) { + + uint8_t private_key_internal_demask[X25519_PRIVATE_KEY_LEN]; + OPENSSL_memcpy(private_key_internal_demask, private_key, X25519_PRIVATE_KEY_LEN); + private_key_internal_demask[0] &= 248; + private_key_internal_demask[31] &= 127; + private_key_internal_demask[31] |= 64; + + if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { + curve25519_x25519_byte(out_shared_key, private_key_internal_demask, + peer_public_value); + } else { + curve25519_x25519_byte_alt(out_shared_key, private_key_internal_demask, + peer_public_value); + } +} + +void x25519_public_from_private_s2n_bignum( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) { + + uint8_t private_key_internal_demask[X25519_PRIVATE_KEY_LEN]; + OPENSSL_memcpy(private_key_internal_demask, private_key, X25519_PRIVATE_KEY_LEN); + private_key_internal_demask[0] &= 248; + private_key_internal_demask[31] &= 127; + private_key_internal_demask[31] |= 64; + + if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { + curve25519_x25519base_byte(out_public_value, private_key_internal_demask); + } else { + curve25519_x25519base_byte_alt(out_public_value, private_key_internal_demask); + } +} + +void ed25519_public_key_from_hashed_seed_s2n_bignum( + uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], + uint8_t az[SHA512_DIGEST_LENGTH]) { + + uint64_t uint64_point[8] = {0}; + uint64_t uint64_hashed_seed[4] = {0}; + OPENSSL_memcpy(uint64_hashed_seed, az, 32); + + if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { + edwards25519_scalarmulbase(uint64_point, uint64_hashed_seed); + } else { + edwards25519_scalarmulbase_alt(uint64_point, uint64_hashed_seed); + } + + edwards25519_encode(out_public_key, uint64_point); +} + +void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN], + uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, + const void *message, size_t message_len) { + + void (*scalarmulbase)(uint64_t res[8],uint64_t scalar[4]); + void (*madd)(uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]); + + if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { + scalarmulbase = edwards25519_scalarmulbase; + madd = bignum_madd_n25519; + } else { + scalarmulbase = edwards25519_scalarmulbase_alt; + madd = bignum_madd_n25519_alt; + } + + uint8_t k[SHA512_DIGEST_LENGTH] = {0}; + uint64_t R[8] = {0}; + uint64_t S[4] = {0}; + uint64_t uint64_r[8] = {0}; + uint64_t uint64_k[8] = {0}; + uint64_t uint64_s[4] = {0}; + OPENSSL_memcpy(uint64_r, r, 64); + OPENSSL_memcpy(uint64_s, s, 32); + + // Reduce r modulo the order of the base-point B. + bignum_mod_n25519(uint64_r, 8, uint64_r); + + // Compute [r]B. + scalarmulbase(R, uint64_r); + edwards25519_encode(out_sig, R); + + // Compute k = SHA512(R || A || message) + // R is of length 32 octets + ed25519_sha512(k, out_sig, 32, A, ED25519_PUBLIC_KEY_LEN, message, + message_len); + OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH); + bignum_mod_n25519(uint64_k, 8, uint64_k); + + // Compute S = r + k * s modulo the order of the base-point B. + // out_sig = R || S + madd(S, uint64_k, uint64_s, uint64_r); + OPENSSL_memcpy(out_sig + 32, S, 32); +} + +int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], + const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32], + uint8_t S[32], const uint8_t *message, size_t message_len) { + + void (*scalarmuldouble)(uint64_t res[8], uint64_t scalar[4], + uint64_t point[8], uint64_t bscalar[4]); + uint64_t (*decode)(uint64_t z[8], const uint8_t c[32]); + + if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { + scalarmuldouble = edwards25519_scalarmuldouble; + decode = edwards25519_decode; + } else { + scalarmuldouble = edwards25519_scalarmuldouble_alt; + decode = edwards25519_decode_alt; + } + + uint8_t k[SHA512_DIGEST_LENGTH] = {0}; + uint64_t uint64_k[8] = {0}; + uint64_t uint64_R[8] = {0}; + uint64_t uint64_S[4] = {0}; + uint64_t A[8] = {0}; + + // Decode public key as A'. + if (decode(A, public_key) != 0) { + return 0; + } + + // Step: rfc8032 5.1.7.2 + // Compute k = SHA512(R_expected || public_key || message). + ed25519_sha512(k, R_expected, 32, public_key, ED25519_PUBLIC_KEY_LEN, message, + message_len); + OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH); + bignum_mod_n25519(uint64_k, 8, uint64_k); + + // Step: rfc8032 5.1.7.3 + // Recall, we must compute [S]B - [k]A'. + // First negate A'. Point negation for the twisted edwards curve when points + // are represented in the extended coordinate system is simply: + // -(X,Y,Z,T) = (-X,Y,Z,-T). + // See "Twisted Edwards curves revisited" https://ia.cr/2008/522. + bignum_neg_p25519(A, A); + + // Compute R_have <- [S]B - [k]A'. + OPENSSL_memcpy(uint64_S, S, 32); + scalarmuldouble(uint64_R, uint64_k, A, uint64_S); + edwards25519_encode(R_computed_encoded, uint64_R); + + return 1; +} diff --git a/crypto/curve25519/internal.h b/crypto/curve25519/internal.h index 8a9c9dcb54..5bb1c05cbe 100644 --- a/crypto/curve25519/internal.h +++ b/crypto/curve25519/internal.h @@ -24,6 +24,16 @@ extern "C" { #include "../internal.h" +// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// set, s2n-bignum path is capable. +#if ((defined(OPENSSL_X86_64) && \ + !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ + defined(OPENSSL_AARCH64)) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + !defined(OPENSSL_NO_ASM) +#define CURVE25519_S2N_BIGNUM_CAPABLE +#endif + #if defined(BORINGSSL_HAS_UINT128) #define BORINGSSL_CURVE25519_64BIT #endif @@ -110,16 +120,64 @@ void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]); void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A); void x25519_sc_reduce(uint8_t s[64]); -void x25519_scalar_mult_generic_nohw(uint8_t out[32], - const uint8_t scalar[32], - const uint8_t point[32]); -void x25519_public_from_private_nohw(uint8_t out_public_value[32], - const uint8_t private_key[32]); +// x25519_scalar_mult_generic_[s2n_bignum,nohw] computes the x25519 function +// from rfc7748 6.1 using the peer coordinate (either K_A or K_B) encoded in +// |peer_public_value| and the scalar is |private_key|. The resulting shared key +// is returned in |out_shared_key|. +void x25519_scalar_mult_generic_s2n_bignum( + uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]); +void x25519_scalar_mult_generic_nohw( + uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]); + +// x25519_public_from_private_[s2n_bignum,nohw] computes the x25519 function +// from rfc7748 6.1 using the base-coordinate 9 and scalar |private_key|. The +// resulting (encoded) public key coordinate (either K_A or K_B) is returned in +// |out_public_value|. +void x25519_public_from_private_s2n_bignum( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]); +void x25519_public_from_private_nohw( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]); + +// ed25519_public_key_from_hashed_seed_[s2n_bignum,nohw] handles steps +// rfc8032 5.1.5.[3,4]. Computes [az]B and encodes the public key to a 32-byte +// octet string returning it in |out_public_key|. +void ed25519_public_key_from_hashed_seed_s2n_bignum( + uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], + uint8_t az[SHA512_DIGEST_LENGTH]); void ed25519_public_key_from_hashed_seed_nohw( uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], uint8_t az[SHA512_DIGEST_LENGTH]); -// Computes the SHA512 of three input pairs: (|input1|, |len1|), +// ed25519_sign_[s2n_bignum,nohw] handles steps rfc8032 5.1.6.[3,5,6,7]. +// Computes the signature S = r + k * s modulo the order of the base-point B. +// Returns R || S in |out_sig|. |s| must have length +// |ED25519_PRIVATE_KEY_SEED_LEN| and |A| must have length +// |ED25519_PUBLIC_KEY_LEN|. +void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN], + uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, + const void *message, size_t message_len); +void ed25519_sign_nohw(uint8_t out_sig[ED25519_SIGNATURE_LEN], + uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, + const void *message, size_t message_len); + +// ed25519_verify_[s2n_bignum,nohw] handles steps rfc8032 5.1.7.[1,2,3]. +// Computes [S]B - [k]A' and returns the result in |R_computed_encoded|. Returns +// 1 on success and 0 otherwise. The failure case occurs if decoding of the +// public key |public_key| fails. +int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], + const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32], + uint8_t S[32], const uint8_t *message, size_t message_len); +int ed25519_verify_nohw(uint8_t R_computed_encoded[32], + const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32], + uint8_t S[32], const uint8_t *message, size_t message_len); + +// Computes the SHA512 function of three input pairs: (|input1|, |len1|), // (|input2|, |len2|), (|input3|, |len3|). Specifically, the hash is computed // over the concatenation: |input1| || |input2| || |input3|. // The final pair might have |len3| == 0, meaning this input will be ignored. @@ -128,17 +186,6 @@ void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH], const void *input1, size_t len1, const void *input2, size_t len2, const void *input3, size_t len3); -// |s| is of length |ED25519_PRIVATE_KEY_SEED_LEN| -// |A| is of length |ED25519_PUBLIC_KEY_LEN|. -void ed25519_sign_nohw( - uint8_t out_sig[ED25519_SIGNATURE_LEN], - uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, - const void *message, size_t message_len); - -int ed25519_verify_nohw(uint8_t R_computed_encoded[32], - const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32], - uint8_t S[32], const uint8_t *message, size_t message_len); - enum spake2_state_t { spake2_state_init = 0, spake2_state_msg_generated, diff --git a/include/openssl/curve25519.h b/include/openssl/curve25519.h index 3a3a17c365..e7c88fa9c9 100644 --- a/include/openssl/curve25519.h +++ b/include/openssl/curve25519.h @@ -39,8 +39,9 @@ extern "C" { // X25519_keypair sets |out_public_value| and |out_private_key| to a freshly // generated, public–private key pair. -OPENSSL_EXPORT void X25519_keypair(uint8_t out_public_value[32], - uint8_t out_private_key[32]); +OPENSSL_EXPORT void X25519_keypair( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + uint8_t out_private_key[X25519_PRIVATE_KEY_LEN]); // X25519 writes a shared key to |out_shared_key| that is calculated from the // given private key and the peer's public value. It returns one on success and @@ -48,14 +49,15 @@ OPENSSL_EXPORT void X25519_keypair(uint8_t out_public_value[32], // // Don't use the shared key directly, rather use a KDF and also include the two // public values as inputs. -OPENSSL_EXPORT int X25519(uint8_t out_shared_key[32], - const uint8_t private_key[32], - const uint8_t peer_public_value[32]); +OPENSSL_EXPORT int X25519(uint8_t out_shared_key[X25519_SHARED_KEY_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN], + const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]); // X25519_public_from_private calculates a Diffie-Hellman public value from the // given private key and writes it to |out_public_value|. -OPENSSL_EXPORT void X25519_public_from_private(uint8_t out_public_value[32], - const uint8_t private_key[32]); +OPENSSL_EXPORT void X25519_public_from_private( + uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], + const uint8_t private_key[X25519_PRIVATE_KEY_LEN]); // Ed25519.