Skip to content

Commit

Permalink
Add P256_ prefix to remaining global asm functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Emill committed Feb 3, 2021
1 parent 2e764a1 commit 6637dc4
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 39 deletions.
32 changes: 16 additions & 16 deletions p256-cortex-m4-asm-gcc.S
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
// Selects one of many values
// *r0 = output, *r1 = table, r2 = num coordinates, r3 = index to choose [0..7]
// 547 cycles for affine coordinates
.type ecc_select_point, %function
ecc_select_point:
.global ecc_select_point
.type P256_select_point, %function
P256_select_point:
.global P256_select_point
push {r0,r2,r3,r4-r11,lr}
//frame push {r4-r11,lr}
//frame address sp,48
Expand Down Expand Up @@ -104,7 +104,7 @@ ecc_select_point:
add sp,#12
//frame address sp,36
pop {r4-r11,pc}
.size ecc_select_point, .-ecc_select_point
.size P256_select_point, .-P256_select_point
#endif

#if include_p256_verify || include_p256_sign
Expand Down Expand Up @@ -1838,9 +1838,9 @@ P256_mul_mod_n:
// r1: f
// r2: g
// r3: dest
.type divsteps2_31, %function
divsteps2_31:
.global divsteps2_31
.type P256_divsteps2_31, %function
P256_divsteps2_31:
.global P256_divsteps2_31
push {r3,r4-r8,lr}
//frame push {r4-r8,lr}
//frame address sp,28
Expand Down Expand Up @@ -1901,15 +1901,15 @@ divsteps2_31:
stm r3!,{r4-r7}

pop {r4-r8,pc}
.size divsteps2_31, .-divsteps2_31
.size P256_divsteps2_31, .-P256_divsteps2_31

// r0: a, r1: b
// *r2: f,g
// *r3: out
// cycles: 132
.type matrix_mul_fg_9, %function
matrix_mul_fg_9:
.global matrix_mul_fg_9
.type P256_matrix_mul_fg_9, %function
P256_matrix_mul_fg_9:
.global P256_matrix_mul_fg_9
push {r4-r11,lr}
//frame push {r4-r11,lr}

Expand Down Expand Up @@ -2011,16 +2011,16 @@ matrix_mul_fg_9:
stm r1!,{r3,r5,r6,r7,r12,lr}

pop {r4-r11,pc}
.size matrix_mul_fg_9, .-matrix_mul_fg_9
.size P256_matrix_mul_fg_9, .-P256_matrix_mul_fg_9

// r0: a, r1: b
// *r2: x,y
// *r3: out
// cycles: 184
.align 2
.type matrix_mul_p256_order, %function
matrix_mul_p256_order:
.global matrix_mul_p256_order
.type P256_matrix_mul_mod_n, %function
P256_matrix_mul_mod_n:
.global P256_matrix_mul_mod_n
push {r4-r11,lr}
//frame push {r4-r11,lr}

Expand Down Expand Up @@ -2168,7 +2168,7 @@ matrix_mul_p256_order:
pop {r4-r11,pc}

.ltorg
.size matrix_mul_p256_order, .-matrix_mul_p256_order
.size P256_matrix_mul_mod_n, .-P256_matrix_mul_mod_n
#else
// *r0=u
// *r1=x1
Expand Down
16 changes: 8 additions & 8 deletions p256-cortex-m4-asm-keil.s
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
; Selects one of many values
; *r0 = output, *r1 = table, r2 = num coordinates, r3 = index to choose [0..7]
; 547 cycles for affine coordinates
ecc_select_point proc
export ecc_select_point
P256_select_point proc
export P256_select_point
push {r0,r2,r3,r4-r11,lr}
frame push {r4-r11,lr}
frame address sp,48
Expand Down Expand Up @@ -1814,8 +1814,8 @@ P256_mul_mod_n proc
; r1: f
; r2: g
; r3: dest
divsteps2_31 proc
export divsteps2_31
P256_divsteps2_31 proc
export P256_divsteps2_31
push {r3,r4-r8,lr}
frame push {r4-r8,lr}
frame address sp,28
Expand Down Expand Up @@ -1882,8 +1882,8 @@ divsteps2_31 proc
; *r2: f,g
; *r3: out
; cycles: 132
matrix_mul_fg_9 proc
export matrix_mul_fg_9
P256_matrix_mul_fg_9 proc
export P256_matrix_mul_fg_9
push {r4-r11,lr}
frame push {r4-r11,lr}

Expand Down Expand Up @@ -1992,8 +1992,8 @@ matrix_mul_fg_9 proc
; *r3: out
; cycles: 184
align 4
matrix_mul_p256_order proc
export matrix_mul_p256_order
P256_matrix_mul_mod_n proc
export P256_matrix_mul_mod_n
push {r4-r11,lr}
frame push {r4-r11,lr}

Expand Down
30 changes: 15 additions & 15 deletions p256-cortex-m4.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ struct XYInteger {
uint32_t value[8]; // unsigned value, 0 <= value < P256_order
};

int divsteps2_31(int delta, uint32_t f, uint32_t g, uint32_t res_matrix[4]);
void matrix_mul_fg_9(uint32_t a, uint32_t b, const struct FGInteger fg[2], struct FGInteger *res);
void matrix_mul_p256_order(uint32_t a, uint32_t b, const struct XYInteger xy[2], struct XYInteger *res);
int P256_divsteps2_31(int delta, uint32_t f, uint32_t g, uint32_t res_matrix[4]);
void P256_matrix_mul_fg_9(uint32_t a, uint32_t b, const struct FGInteger fg[2], struct FGInteger *res);
void P256_matrix_mul_mod_n(uint32_t a, uint32_t b, const struct XYInteger xy[2], struct XYInteger *res);

void P256_to_montgomery(uint32_t aR[8], const uint32_t a[8]);
void P256_from_montgomery(uint32_t a[8], const uint32_t aR[8]);
Expand All @@ -60,7 +60,7 @@ void P256_add_mod_n(uint32_t res[8], const uint32_t a[8], const uint32_t b[8]);
void P256_mod_n_inv_vartime(uint32_t res[8], const uint32_t a[8]);
void P256_reduce_mod_n_32bytes(uint32_t res[8], const uint32_t a[8]);

void ecc_select_point(uint32_t (*output)[8], uint32_t* table, uint32_t num_coordinates, uint32_t index);
void P256_select_point(uint32_t (*output)[8], uint32_t* table, uint32_t num_coordinates, uint32_t index);

void P256_jacobian_to_affine(uint32_t affine_mont_x[8], uint32_t affine_mont_y[8], const uint32_t jacobian_mont[3][8]);
bool P256_point_is_on_curve(const uint32_t x_mont[8], const uint32_t y_mont[8]);
Expand Down Expand Up @@ -238,19 +238,19 @@ void P256_mod_n_inv(uint32_t out[8], const uint32_t in[8]) {
// Scaled translation matrix Ti
uint32_t matrix[4]; // element range: [-2^30, 2^31] (negative numbers are stored in two's complement form)

// Decode f and g into two's complement representation and use the lowest 32 bits in the divsteps2_31 calculation
// Decode f and g into two's complement representation and use the lowest 32 bits in the P256_divsteps2_31 calculation
uint32_t negate_f = state[i % 2].fg[0].flip_sign;
uint32_t negate_g = state[i % 2].fg[1].flip_sign;
delta = divsteps2_31(delta, (state[i % 2].fg[0].signed_value[0] ^ negate_f) - negate_f, (state[i % 2].fg[1].signed_value[0] ^ negate_g) - negate_g, matrix);
delta = P256_divsteps2_31(delta, (state[i % 2].fg[0].signed_value[0] ^ negate_f) - negate_f, (state[i % 2].fg[1].signed_value[0] ^ negate_g) - negate_g, matrix);

// "Jump step", calculates the new f and g values that applies after 31 divstep2 iterations
matrix_mul_fg_9(matrix[0], matrix[1], state[i % 2].fg, &state[(i + 1) % 2].fg[0]);
matrix_mul_fg_9(matrix[2], matrix[3], state[i % 2].fg, &state[(i + 1) % 2].fg[1]);
P256_matrix_mul_fg_9(matrix[0], matrix[1], state[i % 2].fg, &state[(i + 1) % 2].fg[0]);
P256_matrix_mul_fg_9(matrix[2], matrix[3], state[i % 2].fg, &state[(i + 1) % 2].fg[1]);

// Iterate the result vector
// Due to montgomery multiplication inside this function, each step also adds a 2^-32 factor
matrix_mul_p256_order(matrix[0], matrix[1], state[i % 2].xy, &state[(i + 1) % 2].xy[0]);
matrix_mul_p256_order(matrix[2], matrix[3], state[i % 2].xy, &state[(i + 1) % 2].xy[1]);
P256_matrix_mul_mod_n(matrix[0], matrix[1], state[i % 2].xy, &state[(i + 1) % 2].xy[0]);
P256_matrix_mul_mod_n(matrix[2], matrix[3], state[i % 2].xy, &state[(i + 1) % 2].xy[1]);
}
// Calculates val^-1 = sgn(f) * v * 2^-744, where v is the "top-right corner" of the resulting T24*T23*...*T1 matrix.
// In this implementation, at this point x contains v * 2^-744.
Expand Down Expand Up @@ -308,7 +308,7 @@ static void scalarmult_variable_base(uint32_t output_mont_x[8], uint32_t output_

// e[63] is never negative
#if has_d_cache
ecc_select_point(current_point, (uint32_t*)table, 3, e[63] >> 1);
P256_select_point(current_point, (uint32_t*)table, 3, e[63] >> 1);
#else
memcpy(current_point, table[e[63] >> 1], 96);
#endif
Expand All @@ -319,7 +319,7 @@ static void scalarmult_variable_base(uint32_t output_mont_x[8], uint32_t output_
}
uint32_t selected_point[3][8];
#if has_d_cache
ecc_select_point(selected_point, (uint32_t*)table, 3, abs_int(e[i]) >> 1);
P256_select_point(selected_point, (uint32_t*)table, 3, abs_int(e[i]) >> 1);
#else
memcpy(selected_point, table[abs_int(e[i]) >> 1], 96);
#endif
Expand Down Expand Up @@ -375,7 +375,7 @@ static void scalarmult_fixed_base(uint32_t output_mont_x[8], uint32_t output_mon
uint32_t mask = get_bit(scalar2, i + 32 + 1) | (get_bit(scalar2, i + 64 + 32 + 1) << 1) | (get_bit(scalar2, i + 2 * 64 + 32 + 1) << 2);
if (i == 31) {
#if has_d_cache
ecc_select_point(current_point, (uint32_t*)p256_basepoint_precomp2[1], 2, mask);
P256_select_point(current_point, (uint32_t*)p256_basepoint_precomp2[1], 2, mask);
#else
memcpy(current_point, precomp[1][mask], 64);
#endif
Expand All @@ -386,7 +386,7 @@ static void scalarmult_fixed_base(uint32_t output_mont_x[8], uint32_t output_mon
uint32_t sign = get_bit(scalar2, i + 3 * 64 + 32 + 1) - 1; // positive: 0, negative: -1
mask = (mask ^ sign) & 7;
#if has_d_cache
ecc_select_point(selected_point, (uint32_t*)p256_basepoint_precomp2[1], 2, mask);
P256_select_point(selected_point, (uint32_t*)p256_basepoint_precomp2[1], 2, mask);
#else
memcpy(selected_point, precomp[1][mask], 64);
#endif
Expand All @@ -399,7 +399,7 @@ static void scalarmult_fixed_base(uint32_t output_mont_x[8], uint32_t output_mon
uint32_t sign = get_bit(scalar2, i + 3 * 64 + 1) - 1; // positive: 0, negative: -1
mask = (mask ^ sign) & 7;
#if has_d_cache
ecc_select_point(selected_point, (uint32_t*)p256_basepoint_precomp2[0], 2, mask);
P256_select_point(selected_point, (uint32_t*)p256_basepoint_precomp2[0], 2, mask);
#else
memcpy(selected_point, precomp[0][mask], 64);
#endif
Expand Down

0 comments on commit 6637dc4

Please sign in to comment.