From e61427e6e5c980d79a65c13100efab230d11d424 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sun, 26 Jan 2025 05:48:29 +0000 Subject: [PATCH] AVX2: Fix documentation of sign in basemul Signed-off-by: Hanno Becker --- mlkem/native/x86_64/src/basemul.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mlkem/native/x86_64/src/basemul.S b/mlkem/native/x86_64/src/basemul.S index f755e04e2..751635b64 100644 --- a/mlkem/native/x86_64/src/basemul.S +++ b/mlkem/native/x86_64/src/basemul.S @@ -60,9 +60,9 @@ vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo vmovdqa (32*\off+ 0)*2(%r8),%ymm8 # d0z vpmulhw %ymm8,%ymm2,%ymm2 # b0d0z.hi vpmullw %ymm8,%ymm10,%ymm10 # b0d0z.lo -vmovdqa (32*\off+16)*2(%r8),%ymm8 # d1z -vpmulhw %ymm8,%ymm4,%ymm4 # b1d1z.hi -vpmullw %ymm8,%ymm12,%ymm12 # b1d1z.lo +vmovdqa (32*\off+16)*2(%r8),%ymm8 # -d1z +vpmulhw %ymm8,%ymm4,%ymm4 # -b1d1z.hi +vpmullw %ymm8,%ymm12,%ymm12 # -b1d1z.lo /* Compute 2nd high multiplication in Montgomery multiplication */ vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rcx),%ymm8 @@ -84,10 +84,11 @@ vpsubw %ymm10,%ymm2,%ymm10 # b0d0 vpsubw %ymm6,%ymm15,%ymm6 # a1c1 vpsubw %ymm11,%ymm3,%ymm11 # a1d1 vpsubw %ymm7,%ymm0,%ymm7 # b1c1 -vpsubw %ymm12,%ymm4,%ymm12 # b1d1 +vpsubw %ymm12,%ymm4,%ymm12 # -b1d1z vpaddw %ymm5,%ymm9,%ymm9 vpaddw %ymm7,%ymm11,%ymm11 +/* Note the different sign here, cancelling the sign in -b1d1z */ vpsubw %ymm13,%ymm10,%ymm13 vpsubw %ymm12,%ymm6,%ymm6