Skip to content

Commit

Permalink
[wip] precompute twisted twiddles
Browse files Browse the repository at this point in the history
Signed-off-by: Hanno Becker <[email protected]>
  • Loading branch information
hanno-becker committed Jan 26, 2025
1 parent aa84319 commit 42adb1c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 19 deletions.
4 changes: 1 addition & 3 deletions mlkem/native/x86_64/src/basemul.S
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,10 @@ ret
vmovdqa (64*\i+16)*2(%rsi), %ymm2
vmovdqa (64*\i+48)*2(%rsi), %ymm3
vmovdqa (16*\i)*2(%rdx), %ymm4
vmovdqa (64 + 16*\i)*2(%rdx), %ymm1

vpmullw %ymm2, %ymm1, %ymm5
vpmullw %ymm3, %ymm1, %ymm6
vpmullw %ymm5, %ymm4, %ymm5
vpmullw %ymm6, %ymm4, %ymm6

vpmulhw %ymm2, %ymm4, %ymm7
vpmulhw %ymm3, %ymm4, %ymm8
Expand All @@ -146,7 +145,6 @@ vmovdqa %ymm8, (32*\i+16)*2(%rdi)
MLKEM_ASM_NAMESPACE(mulcache_compute_avx2):

vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rcx),%ymm0
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQINV*2(%rcx),%ymm1

mulcache_compute_iter 0
mulcache_compute_iter 1
Expand Down
21 changes: 15 additions & 6 deletions mlkem/native/x86_64/src/x86_64_zetas_mulcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,21 @@
*/

ALIGN const int16_t zetas_mulcache_avx2[128] = {
-1103, 555, -1251, 1550, 422, 177, -291, 1574, -246, 1159, -777,
-602, -1590, -872, 418, -156, 430, 843, 871, 105, 587, -235,
-460, 1653, 778, -147, 1483, 1119, 644, 349, 329, -75, 817,
603, 1322, -1465, -1215, 1218, -874, -1187, -1185, -1278, -1510, -870,
-108, 996, 958, 1522, 1097, 610, -1285, 384, -136, -1335, 220,
-1659, -1530, 794, -854, 478, -308, 991, -1460, 1628,
-1103, 555, -1251, 1550, 422, 177, -291, 1574, -246,
1159, -777, -602, -1590, -872, 418, -156, 430, 843,
871, 105, 587, -235, -460, 1653, 778, -147, 1483,
1119, 644, 349, 329, -75, 817, 603, 1322, -1465,
-1215, 1218, -874, -1187, -1185, -1278, -1510, -870, -108,
996, 958, 1522, 1097, 610, -1285, 384, -136, -1335,
220, -1659, -1530, 794, -854, 478, -308, 991, -1460,
1628, -335, -11477, -32227, 20494, -27738, 945, -14883, 6182,
32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, 11182,
13387, -14233, -21655, 13131, -4587, 23092, 5493, -32502, 30317,
-18741, 12639, 20100, 18525, 19529, -12619, -31183, 25435, -7382,
24391, -20927, 10946, 24214, 16989, 10335, -7934, -22502, 10906,
31636, 28644, 23998, -17422, 20297, 2146, 15355, -32384, -6280,
-14903, -11044, 14469, -21498, -20198, 23210, -17442, -23860, -20257,
7756, 23132,
};

#else /* MLKEM_NATIVE_ARITH_BACKEND_X86_64_DEFAULT */
Expand Down
26 changes: 16 additions & 10 deletions scripts/autogen
Original file line number Diff line number Diff line change
Expand Up @@ -303,19 +303,25 @@ def gen_aarch64_inv_ntt_zetas_layer56():
def gen_avx2_mulcache_twiddles():
for i in range(2):
for idx in range(16):
root = signed_reduce(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx, 7), modulus)
* montgomery_factor
)
yield prepare_root_for_barrett(root)[0]
root, root_twisted = prepare_root_for_montmul(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx, 7), modulus))
yield root

for idx in range(16):
root = signed_reduce(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx + 1, 7), modulus)
* montgomery_factor
)
yield prepare_root_for_barrett(root)[0]
root, root_twisted = prepare_root_for_montmul(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx + 1, 7), modulus))
yield root

for i in range(2):
for idx in range(16):
root, root_twisted = prepare_root_for_montmul(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx, 7), modulus))
yield root_twisted

for idx in range(16):
root, root_twisted = prepare_root_for_montmul(
pow(root_of_unity, bitreverse(64 + 32 * i + 2 * idx + 1, 7), modulus))
yield root_twisted

def gen_aarch64_mulcache_twiddles():
for idx in range(64):
Expand Down

0 comments on commit 42adb1c

Please sign in to comment.