Skip to content

Commit

Permalink
Factor out the machine-optimised backend for 25519 algorithms (aws#1340)
Browse files Browse the repository at this point in the history
Finalise the 25519 refactoring by moving the machine-optimised implementations (from s2n-bignum) to its own compilation unit. This also aligns the s2n-bignum wrappers with the nohw wrappers, with common signatures and common function descriptions.
  • Loading branch information
torben-hansen authored Dec 6, 2023
1 parent 16caf44 commit fab0c9c
Show file tree
Hide file tree
Showing 6 changed files with 340 additions and 289 deletions.
1 change: 1 addition & 0 deletions crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ add_library(
crypto.c
curve25519/curve25519.c
curve25519/curve25519_nohw.c
curve25519/curve25519_s2n_bignum_asm.c
curve25519/spake25519.c
des/des.c
dh_extra/params.c
Expand Down
263 changes: 14 additions & 249 deletions crypto/curve25519/curve25519.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

#include "internal.h"
#include "../internal.h"
#include "../fipsmodule/cpucap/internal.h"

// X25519 [1] and Ed25519 [2] is an ECDHE protocol and signature scheme,
// respectively. This file contains an implementation of both using two
Expand All @@ -53,46 +52,6 @@
// For Ed25519, dom2(F,C) is the empty string and PH the identify function,
// cf. rfc8032 5.1.

// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not
// set, s2n-bignum path is capable.
#if ((defined(OPENSSL_X86_64) && \
!defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \
defined(OPENSSL_AARCH64)) && \
(defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \
!defined(OPENSSL_NO_ASM)
#include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h"
#define CURVE25519_S2N_BIGNUM_CAPABLE
#endif

// Stub functions if implementations are not compiled.
// These functions have to abort, otherwise we risk applications assuming they
// did work without actually doing anything.
#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS)

#define S2N_BIGNUM_STUB_FUNC(return_type, symbol, ...) \
return_type symbol(__VA_ARGS__); \
return_type symbol(__VA_ARGS__) { abort(); } \

S2N_BIGNUM_STUB_FUNC(void, bignum_mod_n25519, uint64_t z[4], uint64_t k, uint64_t *x)
S2N_BIGNUM_STUB_FUNC(void, bignum_neg_p25519, uint64_t z[4], uint64_t x[4])
S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4])
S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519_alt, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_encode, uint8_t z[32], uint64_t p[8])
S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode, uint64_t z[8], const uint8_t c[32])
S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode_alt, uint64_t z[8], const uint8_t c[32])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase, uint64_t res[8],uint64_t scalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase_alt, uint64_t res[8],uint64_t scalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble_alt, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4])

#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE)
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte_alt, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte, uint8_t res[32], const uint8_t scalar[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte_alt, uint8_t res[32], const uint8_t scalar[32])
#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE)
#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) || defined(BORINGSSL_FIPS)

OPENSSL_INLINE int curve25519_s2n_bignum_capable(void) {
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
return 1;
Expand All @@ -110,203 +69,6 @@ OPENSSL_INLINE int ed25519_s2n_bignum_capable(void) {
#endif
}

// curve25519_s2n_bignum_use_no_alt_implementation returns 1 if the no_alt
// s2n-bignum implementation should be used and 0 otherwise.
//
// Below is the decision logic for which assembly backend implementation
// of x25519 s2n-bignum we should use if x25519 s2n-bignum capable. Currently,
// we support the following implementations.
//
// x86_64:
// - s2n-bignum-no-alt: hardware implementation using bmi2+adx instruction sets
// - s2n-bignum-alt: hardware implementation using standard instructions
//
// aarch64:
// - s2n-bignum-no-alt: hardware implementation for "low" multiplier throughput
// - s2n-bignum-alt: hardware implementation for "high" multiplier throughput
//
// Through experiments we have found that:
//
// For x86_64: bmi+adc will almost always give a performance boost. So, here we
// prefer s2n-bignum-no-alt over s2n-bignum-alt if the former is supported.
// For aarch64: if a wide multiplier is supported, we prefer s2n-bignum-alt over
// s2n-bignum-no-alt if the former is supported.
// |curve25519_s2n_bignum_alt_capable| specifically looks to match CPUs that
// have wide multipliers. this ensures that s2n-bignum-alt will only be used
// on such CPUs.
OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void);
OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void) {
#if defined(OPENSSL_X86_64)
// For x86_64 the no_alt implementation is bmi2+adx. Prefer if available.
if (CRYPTO_is_BMI2_capable() == 1 && CRYPTO_is_ADX_capable() == 1) {
return 1;
} else {
return 0;
}
#elif defined(OPENSSL_AARCH64)
// For aarch64 the alt implementation is for wide multipliers. Prefer if
// available.
if (CRYPTO_is_ARMv8_wide_multiplier_capable() == 1) {
return 0;
} else {
return 1;
}
#endif
// Have to return some default value.
return 0;
}


// s2n-bignum wrappers

static void x25519_s2n_bignum(uint8_t out_shared_key[32],
const uint8_t private_key[32], const uint8_t peer_public_value[32]) {

uint8_t private_key_internal_demask[32];
OPENSSL_memcpy(private_key_internal_demask, private_key, 32);
private_key_internal_demask[0] &= 248;
private_key_internal_demask[31] &= 127;
private_key_internal_demask[31] |= 64;

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
curve25519_x25519_byte(out_shared_key, private_key_internal_demask,
peer_public_value);
} else {
curve25519_x25519_byte_alt(out_shared_key, private_key_internal_demask,
peer_public_value);
}
}

static void x25519_s2n_bignum_public_from_private(
uint8_t out_public_value[32], const uint8_t private_key[32]) {

uint8_t private_key_internal_demask[32];
OPENSSL_memcpy(private_key_internal_demask, private_key, 32);
private_key_internal_demask[0] &= 248;
private_key_internal_demask[31] &= 127;
private_key_internal_demask[31] |= 64;

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
curve25519_x25519base_byte(out_public_value, private_key_internal_demask);
} else {
curve25519_x25519base_byte_alt(out_public_value, private_key_internal_demask);
}
}

static void ed25519_public_key_from_hashed_seed_s2n_bignum(
uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN],
uint8_t az[SHA512_DIGEST_LENGTH]) {

uint64_t uint64_point[8] = {0};
uint64_t uint64_hashed_seed[4] = {0};
OPENSSL_memcpy(uint64_hashed_seed, az, 32);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
edwards25519_scalarmulbase(uint64_point, uint64_hashed_seed);
} else {
edwards25519_scalarmulbase_alt(uint64_point, uint64_hashed_seed);
}

edwards25519_encode(out_public_key, uint64_point);
}

// |s| is of length |ED25519_PRIVATE_KEY_SEED_LEN|
// |A| is of length |ED25519_PUBLIC_KEY_LEN|.
static void ed25519_sign_s2n_bignum(
uint8_t out_sig[ED25519_SIGNATURE_LEN],
uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A,
const void *message, size_t message_len) {

void (*scalarmulbase)(uint64_t res[8],uint64_t scalar[4]);
void (*madd)(uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
scalarmulbase = edwards25519_scalarmulbase;
madd = bignum_madd_n25519;
} else {
scalarmulbase = edwards25519_scalarmulbase_alt;
madd = bignum_madd_n25519_alt;
}

uint8_t k[SHA512_DIGEST_LENGTH] = {0};
uint64_t R[8] = {0};
uint64_t z[4] = {0};
uint64_t uint64_r[8] = {0};
uint64_t uint64_k[8] = {0};
uint64_t uint64_s[4] = {0};
OPENSSL_memcpy(uint64_r, r, 64);
OPENSSL_memcpy(uint64_s, s, 32);

// Reduce r modulo the order of the base-point B.
bignum_mod_n25519(uint64_r, 8, uint64_r);

// Compute [r]B.
scalarmulbase(R, uint64_r);
edwards25519_encode(out_sig, R);

// Compute k = SHA512(R || A || message)
// R is of length 32 octets
ed25519_sha512(k, out_sig, 32, A, ED25519_PUBLIC_KEY_LEN, message,
message_len);
OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH);
bignum_mod_n25519(uint64_k, 8, uint64_k);

// Compute S = r + k * s modulo the order of the base-point B.
// out_sig = R || S
madd(z, uint64_k, uint64_s, uint64_r);
OPENSSL_memcpy(out_sig + 32, z, 32);
}

static int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32],
const uint8_t public_key[32], uint8_t R_expected[32],
uint8_t S[32], const uint8_t *message, size_t message_len) {

void (*scalarmuldouble)(uint64_t res[8], uint64_t scalar[4],
uint64_t point[8], uint64_t bscalar[4]);
uint64_t (*decode)(uint64_t z[8], const uint8_t c[32]);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
scalarmuldouble = edwards25519_scalarmuldouble;
decode = edwards25519_decode;
} else {
scalarmuldouble = edwards25519_scalarmuldouble_alt;
decode = edwards25519_decode_alt;
}

uint8_t k[SHA512_DIGEST_LENGTH] = {0};
uint64_t uint64_k[8] = {0};
uint64_t uint64_R[8] = {0};
uint64_t uint64_S[4] = {0};
uint64_t A[8] = {0};

// Decode public key as A'.
if (decode(A, public_key) != 0) {
return 0;
}

// Step: rfc8032 5.1.7.2
// Compute k = SHA512(R_expected || public_key || message).
ed25519_sha512(k, R_expected, 32, public_key, ED25519_PUBLIC_KEY_LEN, message,
message_len);
OPENSSL_memcpy(uint64_k, k, SHA512_DIGEST_LENGTH);
bignum_mod_n25519(uint64_k, 8, uint64_k);

// Step: rfc8032 5.1.7.3
// Recall, we must compute [S]B - [k]A'.
// First negate A'. Point negation for the twisted edwards curve when points
// are represented in the extended coordinate system is simply:
// -(X,Y,Z,T) = (-X,Y,Z,-T).
// See "Twisted Edwards curves revisited" https://ia.cr/2008/522.
bignum_neg_p25519(A, A);

// Compute R_have <- [S]B - [k]A'.
OPENSSL_memcpy(uint64_S, S, 32);
scalarmuldouble(uint64_R, uint64_k, A, uint64_S);
edwards25519_encode(R_computed_encoded, uint64_R);

return 1;
}

void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH],
const void *input1, size_t len1, const void *input2, size_t len2,
const void *input3, size_t len3) {
Expand All @@ -321,7 +83,6 @@ void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH],
SHA512_Final(out, &hash_ctx);
}


// Public interface functions

void ED25519_keypair_from_seed(uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN],
Expand Down Expand Up @@ -468,18 +229,21 @@ int ED25519_verify(const uint8_t *message, size_t message_len,
}


void X25519_public_from_private(uint8_t out_public_value[32],
const uint8_t private_key[32]) {
void X25519_public_from_private(
uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN],
const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) {

if (curve25519_s2n_bignum_capable() == 1) {
x25519_s2n_bignum_public_from_private(out_public_value, private_key);
x25519_public_from_private_s2n_bignum(out_public_value, private_key);
} else {
x25519_public_from_private_nohw(out_public_value, private_key);
}
}

void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) {
RAND_bytes(out_private_key, 32);
void X25519_keypair(uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN],
uint8_t out_private_key[X25519_PRIVATE_KEY_LEN]) {

RAND_bytes(out_private_key, X25519_PRIVATE_KEY_LEN);

// All X25519 implementations should decode scalars correctly (see
// https://tools.ietf.org/html/rfc7748#section-5). However, if an
Expand All @@ -501,18 +265,19 @@ void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) {
X25519_public_from_private(out_public_value, out_private_key);
}

int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
const uint8_t peer_public_value[32]) {
int X25519(uint8_t out_shared_key[X25519_SHARED_KEY_LEN],
const uint8_t private_key[X25519_PRIVATE_KEY_LEN],
const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN]) {

static const uint8_t kZeros[32] = {0};
static const uint8_t kZeros[X25519_SHARED_KEY_LEN] = {0};

if (curve25519_s2n_bignum_capable() == 1) {
x25519_s2n_bignum(out_shared_key, private_key, peer_public_value);
x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value);
} else {
x25519_scalar_mult_generic_nohw(out_shared_key, private_key, peer_public_value);
}

// The all-zero output results when the input is a point of small order.
return constant_time_declassify_int(
CRYPTO_memcmp(kZeros, out_shared_key, 32)) != 0;
CRYPTO_memcmp(kZeros, out_shared_key, X25519_SHARED_KEY_LEN)) != 0;
}
Loading

0 comments on commit fab0c9c

Please sign in to comment.