Skip to content

Commit

Permalink
Merge pull request #649 from pq-code-package/underscore_define
Browse files Browse the repository at this point in the history
Remove (almost) all macros with leading underscore
  • Loading branch information
hanno-becker authored Jan 13, 2025
2 parents d9bd7db + da5c19c commit e626211
Show file tree
Hide file tree
Showing 14 changed files with 942 additions and 938 deletions.
1,684 changes: 837 additions & 847 deletions examples/monolithic_build/mlkem_native_monobuild.c

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions mlkem/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
#define MLKEM_ASM_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
#define FIPS202_ASM_NAMESPACE(sym) FIPS202_NAMESPACE(sym)
#else
#define _PREFIX_UNDERSCORE(sym) _##sym
#define PREFIX_UNDERSCORE(sym) _PREFIX_UNDERSCORE(sym)
#define PREFIX_UNDERSCORE_(sym) _##sym
#define PREFIX_UNDERSCORE(sym) PREFIX_UNDERSCORE_(sym)
#define MLKEM_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(MLKEM_NAMESPACE(sym))
#define FIPS202_ASM_NAMESPACE(sym) PREFIX_UNDERSCORE(FIPS202_NAMESPACE(sym))
#endif
Expand Down
6 changes: 3 additions & 3 deletions mlkem/fips202/native/x86_64/src/KeccakP-1600-times4-SnP.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#ifndef _KeccakP_1600_times4_SnP_h_
#define _KeccakP_1600_times4_SnP_h_
#ifndef KECCAKP_1600_TIMES4_SNP_H
#define KECCAKP_1600_TIMES4_SNP_H

/** For the documentation, see PlSnP-documentation.h.
*/
Expand All @@ -34,4 +34,4 @@ and related or neighboring rights to the source code in this file.
FIPS202_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds)
void KeccakP1600times4_PermuteAll_24rounds(void *states);

#endif
#endif /* KECCAKP_1600_TIMES4_SNP_H */
6 changes: 3 additions & 3 deletions mlkem/fips202/native/x86_64/src/KeccakP-align.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#ifndef _keccakp_align_h_
#define _keccakp_align_h_
#ifndef KECCAKP_ALIGN_H
#define KECCAKP_ALIGN_H

/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror
* chokes on the redef. */
Expand All @@ -37,4 +37,4 @@ and related or neighboring rights to the source code in this file.
#define ALIGN(x)
#endif

#endif
#endif /* KECCAKP_ALIGN_H */
6 changes: 3 additions & 3 deletions mlkem/fips202/native/x86_64/src/KeccakP-brg_endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
Changes for ARM 9/9/2010
*/

#ifndef _KECCAKP_BRG_ENDIAN_H
#define _KECCAKP_BRG_ENDIAN_H
#ifndef KECCAKP_BRG_ENDIAN_H
#define KECCAKP_BRG_ENDIAN_H

#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
Expand Down Expand Up @@ -147,4 +147,4 @@

#endif

#endif
#endif /* KECCAKP_BRG_ENDIAN_H */
6 changes: 3 additions & 3 deletions mlkem/native/x86_64/src/basemul.S
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

/* Polynomials to be multiplied are denoted a+bX (rsi arg) and c+dX (rdx arg) */
.macro schoolbook off
vmovdqa _16XQINV*2(%rcx),%ymm0
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQINV*2(%rcx),%ymm0
vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0
vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0
vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1
Expand Down Expand Up @@ -61,7 +61,7 @@ vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo
vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo

/* Compute 2nd high multiplication in Montgomery multiplication */
vmovdqa _16XQ*2(%rcx),%ymm8
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rcx),%ymm8
vpmulhw %ymm8,%ymm13,%ymm13
vpmulhw %ymm8,%ymm9,%ymm9
vpmulhw %ymm8,%ymm5,%ymm5
Expand Down Expand Up @@ -118,7 +118,7 @@ mov %rsp,%r8
and $-32,%rsp
sub $32,%rsp

lea (_ZETAS_EXP+176)*2(%rcx),%r9
lea (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+176)*2(%rcx),%r9
schoolbook 0

add $32*2,%r9
Expand Down
24 changes: 12 additions & 12 deletions mlkem/native/x86_64/src/consts.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,60 +27,60 @@
#define SHIFT 32

const qdata_t qdata = {{
#define _16XQ 0
#define AVX2_BACKEND_DATA_OFFSET_16XQ 0
Q, Q, Q, Q, Q, Q,
Q, Q, Q, Q, Q, Q,
Q, Q, Q, Q,

#define _16XQINV 16
#define AVX2_BACKEND_DATA_OFFSET_16XQINV 16
QINV, QINV, QINV, QINV, QINV, QINV,
QINV, QINV, QINV, QINV, QINV, QINV,
QINV, QINV, QINV, QINV,

#define _16XV 32
#define AVX2_BACKEND_DATA_OFFSET_16XV 32
V, V, V, V, V, V,
V, V, V, V, V, V,
V, V, V, V,

#define _16XFLO 48
#define AVX2_BACKEND_DATA_OFFSET_16XFLO 48
FLO, FLO, FLO, FLO, FLO, FLO,
FLO, FLO, FLO, FLO, FLO, FLO,
FLO, FLO, FLO, FLO,

#define _16XFHI 64
#define AVX2_BACKEND_DATA_OFFSET_16XFHI 64
FHI, FHI, FHI, FHI, FHI, FHI,
FHI, FHI, FHI, FHI, FHI, FHI,
FHI, FHI, FHI, FHI,

#define _16XMONTSQLO 80
#define AVX2_BACKEND_DATA_OFFSET_16XMONTSQLO 80
MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,
MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO,

#define _16XMONTSQHI 96
#define AVX2_BACKEND_DATA_OFFSET_16XMONTSQHI 96
MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,
MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI,

#define _16XMASK 112
#define AVX2_BACKEND_DATA_OFFSET_16XMASK 112
MASK, MASK, MASK, MASK, MASK, MASK,
MASK, MASK, MASK, MASK, MASK, MASK,
MASK, MASK, MASK, MASK,

#define _REVIDXB 128
#define AVX2_BACKEND_DATA_OFFSET_REVIDXB 128
3854, 3340, 2826, 2312, 1798, 1284,
770, 256, 3854, 3340, 2826, 2312,
1798, 1284, 770, 256,

#define _REVIDXD 144
#define AVX2_BACKEND_DATA_OFFSET_REVIDXD 144
7, 0, 6, 0, 5, 0,
4, 0, 3, 0, 2, 0,
1, 0, 0, 0,

#define _ZETAS_EXP 160
#define AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP 160
#include "x86_64_zetas.i"

#define _16XSHIFT 624
#define AVX2_BACKEND_DATA_OFFSET_16XSHIFT 624
SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT,
SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT,
SHIFT, SHIFT, SHIFT, SHIFT}};
Expand Down
24 changes: 12 additions & 12 deletions mlkem/native/x86_64/src/consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@

#include "../../../common.h"

#define _16XQ 0
#define _16XQINV 16
#define _16XV 32
#define _16XFLO 48
#define _16XFHI 64
#define _16XMONTSQLO 80
#define _16XMONTSQHI 96
#define _16XMASK 112
#define _REVIDXB 128
#define _REVIDXD 144
#define _ZETAS_EXP 160
#define _16XSHIFT 624
#define AVX2_BACKEND_DATA_OFFSET_16XQ 0
#define AVX2_BACKEND_DATA_OFFSET_16XQINV 16
#define AVX2_BACKEND_DATA_OFFSET_16XV 32
#define AVX2_BACKEND_DATA_OFFSET_16XFLO 48
#define AVX2_BACKEND_DATA_OFFSET_16XFHI 64
#define AVX2_BACKEND_DATA_OFFSET_16XMONTSQLO 80
#define AVX2_BACKEND_DATA_OFFSET_16XMONTSQHI 96
#define AVX2_BACKEND_DATA_OFFSET_16XMASK 112
#define AVX2_BACKEND_DATA_OFFSET_REVIDXB 128
#define AVX2_BACKEND_DATA_OFFSET_REVIDXD 144
#define AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP 160
#define AVX2_BACKEND_DATA_OFFSET_16XSHIFT 624

/* The C ABI on MacOS exports all symbols with a leading
* underscore. This means that any symbols we refer to from
Expand Down
10 changes: 5 additions & 5 deletions mlkem/native/x86_64/src/fq.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ ret
.global MLKEM_ASM_NAMESPACE(reduce_avx2)
MLKEM_ASM_NAMESPACE(reduce_avx2):
#consts
vmovdqa _16XQ*2(%rsi),%ymm0
vmovdqa _16XV*2(%rsi),%ymm1
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rsi),%ymm0
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XV*2(%rsi),%ymm1
call reduce128_avx2
add $256,%rdi
call reduce128_avx2
Expand Down Expand Up @@ -106,9 +106,9 @@ ret
.global MLKEM_ASM_NAMESPACE(tomont_avx2)
MLKEM_ASM_NAMESPACE(tomont_avx2):
#consts
vmovdqa _16XQ*2(%rsi),%ymm0
vmovdqa _16XMONTSQLO*2(%rsi),%ymm1
vmovdqa _16XMONTSQHI*2(%rsi),%ymm2
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rsi),%ymm0
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XMONTSQLO*2(%rsi),%ymm1
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XMONTSQHI*2(%rsi),%ymm2
call tomont128_avx2
add $256,%rdi
call tomont128_avx2
Expand Down
46 changes: 23 additions & 23 deletions mlkem/native/x86_64/src/intt.S
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ vpsubw %ymm\rh3,%ymm15,%ymm\rh3 /* rh3 = montmul(rh3-rl3, root0) */
.macro intt_levels0t5 off
/* level 0 */
/* no bounds assumptions */
vmovdqa _16XFLO*2(%rsi),%ymm2
vmovdqa _16XFHI*2(%rsi),%ymm3
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XFLO*2(%rsi),%ymm2
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XFHI*2(%rsi),%ymm3

vmovdqa (128*\off+ 0)*2(%rdi),%ymm4
vmovdqa (128*\off+ 32)*2(%rdi),%ymm6
Expand All @@ -83,11 +83,11 @@ fqmulprecomp 2,3,11

/* bounds: coefficients < q */

vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3
vmovdqa _REVIDXB*2(%rsi),%ymm12
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3
vmovdqa AVX2_BACKEND_DATA_OFFSET_REVIDXB*2(%rsi),%ymm12
vpshufb %ymm12,%ymm15,%ymm15
vpshufb %ymm12,%ymm1,%ymm1
vpshufb %ymm12,%ymm2,%ymm2
Expand All @@ -103,9 +103,9 @@ butterfly 4,5,8,9,6,7,10,11,15,1,2,3
* 4,5,8,9 abs bound < 2q; 6,7,10,11 abs bound < q */

/* level 1 */
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3
vmovdqa _REVIDXB*2(%rsi),%ymm1
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3
vmovdqa AVX2_BACKEND_DATA_OFFSET_REVIDXB*2(%rsi),%ymm1
vpshufb %ymm1,%ymm2,%ymm2
vpshufb %ymm1,%ymm3,%ymm3

Expand All @@ -122,14 +122,14 @@ shuffle1 8,9,6,9 // 6,9 abs bound < q
shuffle1 10,11,8,11 // 8,11 abs bound < q

/* level 2 */
vmovdqa _REVIDXD*2(%rsi),%ymm12
vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2
vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10
vmovdqa AVX2_BACKEND_DATA_OFFSET_REVIDXD*2(%rsi),%ymm12
vpermd (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2
vpermd (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10

butterfly 3,4,6,8,5,7,9,11,2,2,10,10
/* 3 abs bound < 8q, 4 abs bound < 4q, 6,8 abs bound < 2q, 5,7,9,11 abs bound < q */

vmovdqa _16XV*2(%rsi),%ymm1
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XV*2(%rsi),%ymm1
red16 3
/* 4 abs bound < 4q, 6,8 abs bound < 2q, 3,5,7,9,11 abs bound < q */

Expand All @@ -139,8 +139,8 @@ shuffle2 5,7,6,7 // 6,7 abs bound < q
shuffle2 9,11,5,11 // 5,11 abs bound < q

/* level 3 */
vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2
vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9
vpermq $0x1B,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2
vpermq $0x1B,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9

butterfly 10,3,6,5,4,8,7,11,2,2,9,9
/* 10 abs bound < 8q
Expand All @@ -165,8 +165,8 @@ shuffle4 4,8,6,8 /* 6,8 abs bound < q */
shuffle4 7,11,4,11 /* 4,11 abs bound < q */

/* level 4 */
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2
vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2
vpermq $0x4E,(AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7

butterfly 9,10,6,4,3,5,8,11,2,2,7,7
/* 9 abs bound < 8q
Expand All @@ -184,8 +184,8 @@ shuffle8 3,5,6,5 /* 5,6 abs bound < q */
shuffle8 8,11,3,11 /* 3,11 abs bound < q */

/* level 5 */
vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2
vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8
vmovdqa (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2
vmovdqa (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8

butterfly 7,9,6,3,10,4,5,11,2,2,8,8
/* 7 abs bound <8q
Expand Down Expand Up @@ -216,13 +216,13 @@ vmovdqa (64*\off+ 0)*2(%rdi),%ymm4
vmovdqa (64*\off+128)*2(%rdi),%ymm8
vmovdqa (64*\off+ 16)*2(%rdi),%ymm5
vmovdqa (64*\off+144)*2(%rdi),%ymm9
vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2
vpbroadcastq (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+0)*2(%rsi),%ymm2

vmovdqa (64*\off+ 32)*2(%rdi),%ymm6
vmovdqa (64*\off+160)*2(%rdi),%ymm10
vmovdqa (64*\off+ 48)*2(%rdi),%ymm7
vmovdqa (64*\off+176)*2(%rdi),%ymm11
vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3
vpbroadcastq (AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP+4)*2(%rsi),%ymm3

butterfly 4,5,6,7,8,9,10,11
/* global abs bound < 8q */
Expand All @@ -243,7 +243,7 @@ vmovdqa %ymm11,(64*\off+176)*2(%rdi)
.text
.global MLKEM_ASM_NAMESPACE(invntt_avx2)
MLKEM_ASM_NAMESPACE(invntt_avx2):
vmovdqa _16XQ*2(%rsi),%ymm0
vmovdqa AVX2_BACKEND_DATA_OFFSET_16XQ*2(%rsi),%ymm0

intt_levels0t5 0
intt_levels0t5 1
Expand Down
Loading

18 comments on commit e626211

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A76 (Raspberry Pi 5) benchmarks

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 28989 cycles 28989 cycles 1
ML-KEM-512 encaps 35400 cycles 35401 cycles 1.00
ML-KEM-512 decaps 45899 cycles 45897 cycles 1.00
ML-KEM-768 keypair 49365 cycles 49368 cycles 1.00
ML-KEM-768 encaps 55559 cycles 55564 cycles 1.00
ML-KEM-768 decaps 70311 cycles 70316 cycles 1.00
ML-KEM-1024 keypair 71983 cycles 71986 cycles 1.00
ML-KEM-1024 encaps 80734 cycles 80738 cycles 1.00
ML-KEM-1024 decaps 100605 cycles 100607 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 4th gen (c7i)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 13536 cycles 13516 cycles 1.00
ML-KEM-512 encaps 17350 cycles 17265 cycles 1.00
ML-KEM-512 decaps 22914 cycles 22991 cycles 1.00
ML-KEM-768 keypair 22587 cycles 22496 cycles 1.00
ML-KEM-768 encaps 24593 cycles 24483 cycles 1.00
ML-KEM-768 decaps 32704 cycles 32379 cycles 1.01
ML-KEM-1024 keypair 31319 cycles 31412 cycles 1.00
ML-KEM-1024 encaps 34804 cycles 34977 cycles 1.00
ML-KEM-1024 decaps 45603 cycles 45925 cycles 0.99

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 3rd gen (c6i)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 20359 cycles 20364 cycles 1.00
ML-KEM-512 encaps 26943 cycles 26954 cycles 1.00
ML-KEM-512 decaps 35740 cycles 36027 cycles 0.99
ML-KEM-768 keypair 34866 cycles 34937 cycles 1.00
ML-KEM-768 encaps 38182 cycles 38228 cycles 1.00
ML-KEM-768 decaps 50957 cycles 51005 cycles 1.00
ML-KEM-1024 keypair 47931 cycles 47942 cycles 1.00
ML-KEM-1024 encaps 54104 cycles 54107 cycles 1.00
ML-KEM-1024 decaps 71634 cycles 71587 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 4th gen (c7i) (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 35009 cycles 35019 cycles 1.00
ML-KEM-512 encaps 46100 cycles 46103 cycles 1.00
ML-KEM-512 decaps 58759 cycles 58765 cycles 1.00
ML-KEM-768 keypair 58967 cycles 58894 cycles 1.00
ML-KEM-768 encaps 70893 cycles 70998 cycles 1.00
ML-KEM-768 decaps 88489 cycles 88632 cycles 1.00
ML-KEM-1024 keypair 87102 cycles 86960 cycles 1.00
ML-KEM-1024 encaps 104283 cycles 104217 cycles 1.00
ML-KEM-1024 decaps 127044 cycles 126963 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton4

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 18114 cycles 18115 cycles 1.00
ML-KEM-512 encaps 22178 cycles 22176 cycles 1.00
ML-KEM-512 decaps 28840 cycles 28838 cycles 1.00
ML-KEM-768 keypair 30568 cycles 30559 cycles 1.00
ML-KEM-768 encaps 33632 cycles 33636 cycles 1.00
ML-KEM-768 decaps 43156 cycles 43156 cycles 1
ML-KEM-1024 keypair 44163 cycles 44166 cycles 1.00
ML-KEM-1024 encaps 49647 cycles 49650 cycles 1.00
ML-KEM-1024 decaps 62645 cycles 62646 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 4th gen (c7a)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 14915 cycles 14905 cycles 1.00
ML-KEM-512 encaps 19661 cycles 19648 cycles 1.00
ML-KEM-512 decaps 26307 cycles 26290 cycles 1.00
ML-KEM-768 keypair 25621 cycles 25592 cycles 1.00
ML-KEM-768 encaps 28168 cycles 28066 cycles 1.00
ML-KEM-768 decaps 37849 cycles 37834 cycles 1.00
ML-KEM-1024 keypair 35337 cycles 35661 cycles 0.99
ML-KEM-1024 encaps 39987 cycles 41015 cycles 0.97
ML-KEM-1024 decaps 53509 cycles 54439 cycles 0.98

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intel Xeon 3rd gen (c6i) (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 56537 cycles 56618 cycles 1.00
ML-KEM-512 encaps 69488 cycles 69552 cycles 1.00
ML-KEM-512 decaps 91292 cycles 91395 cycles 1.00
ML-KEM-768 keypair 91859 cycles 91968 cycles 1.00
ML-KEM-768 encaps 107740 cycles 107912 cycles 1.00
ML-KEM-768 decaps 136291 cycles 136510 cycles 1.00
ML-KEM-1024 keypair 134829 cycles 134672 cycles 1.00
ML-KEM-1024 encaps 155130 cycles 155266 cycles 1.00
ML-KEM-1024 decaps 191483 cycles 191625 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton3

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 18961 cycles 18960 cycles 1.00
ML-KEM-512 encaps 23575 cycles 23575 cycles 1
ML-KEM-512 decaps 30662 cycles 30660 cycles 1.00
ML-KEM-768 keypair 32311 cycles 32311 cycles 1
ML-KEM-768 encaps 35892 cycles 35895 cycles 1.00
ML-KEM-768 decaps 46032 cycles 46029 cycles 1.00
ML-KEM-1024 keypair 46635 cycles 46638 cycles 1.00
ML-KEM-1024 encaps 52462 cycles 52462 cycles 1
ML-KEM-1024 decaps 66271 cycles 66270 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton2

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 28986 cycles 28988 cycles 1.00
ML-KEM-512 encaps 35424 cycles 35426 cycles 1.00
ML-KEM-512 decaps 45886 cycles 45886 cycles 1
ML-KEM-768 keypair 49379 cycles 49376 cycles 1.00
ML-KEM-768 encaps 55562 cycles 55564 cycles 1.00
ML-KEM-768 decaps 70312 cycles 70311 cycles 1.00
ML-KEM-1024 keypair 71971 cycles 71971 cycles 1
ML-KEM-1024 encaps 80771 cycles 80772 cycles 1.00
ML-KEM-1024 decaps 100637 cycles 100634 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton4 (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 42065 cycles 42047 cycles 1.00
ML-KEM-512 encaps 50179 cycles 50180 cycles 1.00
ML-KEM-512 decaps 66087 cycles 66087 cycles 1
ML-KEM-768 keypair 69061 cycles 69056 cycles 1.00
ML-KEM-768 encaps 79781 cycles 79779 cycles 1.00
ML-KEM-768 decaps 101036 cycles 101038 cycles 1.00
ML-KEM-1024 keypair 102185 cycles 102180 cycles 1.00
ML-KEM-1024 encaps 117165 cycles 117168 cycles 1.00
ML-KEM-1024 decaps 143740 cycles 143729 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A55 (Snapdragon 888) benchmarks

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 58342 cycles 58331 cycles 1.00
ML-KEM-512 encaps 65807 cycles 65787 cycles 1.00
ML-KEM-512 decaps 84564 cycles 84594 cycles 1.00
ML-KEM-768 keypair 99001 cycles 98943 cycles 1.00
ML-KEM-768 encaps 110422 cycles 110472 cycles 1.00
ML-KEM-768 decaps 137103 cycles 136982 cycles 1.00
ML-KEM-1024 keypair 150105 cycles 150114 cycles 1.00
ML-KEM-1024 encaps 166785 cycles 166793 cycles 1.00
ML-KEM-1024 decaps 202940 cycles 203018 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 4th gen (c7a) (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 45754 cycles 45740 cycles 1.00
ML-KEM-512 encaps 56879 cycles 56866 cycles 1.00
ML-KEM-512 decaps 76277 cycles 76256 cycles 1.00
ML-KEM-768 keypair 74526 cycles 74471 cycles 1.00
ML-KEM-768 encaps 88629 cycles 88553 cycles 1.00
ML-KEM-768 decaps 114447 cycles 114371 cycles 1.00
ML-KEM-1024 keypair 109612 cycles 109371 cycles 1.00
ML-KEM-1024 encaps 127275 cycles 127260 cycles 1.00
ML-KEM-1024 decaps 159939 cycles 160053 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton3 (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 45404 cycles 45399 cycles 1.00
ML-KEM-512 encaps 54334 cycles 54333 cycles 1.00
ML-KEM-512 decaps 71386 cycles 71378 cycles 1.00
ML-KEM-768 keypair 74870 cycles 74874 cycles 1.00
ML-KEM-768 encaps 86172 cycles 86175 cycles 1.00
ML-KEM-768 decaps 108650 cycles 108660 cycles 1.00
ML-KEM-1024 keypair 111029 cycles 111038 cycles 1.00
ML-KEM-1024 encaps 125884 cycles 125889 cycles 1.00
ML-KEM-1024 decaps 154631 cycles 154645 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 3rd gen (c6a)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 18115 cycles 18091 cycles 1.00
ML-KEM-512 encaps 23033 cycles 23009 cycles 1.00
ML-KEM-512 decaps 30227 cycles 30204 cycles 1.00
ML-KEM-768 keypair 31129 cycles 31086 cycles 1.00
ML-KEM-768 encaps 33967 cycles 33867 cycles 1.00
ML-KEM-768 decaps 44535 cycles 44598 cycles 1.00
ML-KEM-1024 keypair 44700 cycles 44686 cycles 1.00
ML-KEM-1024 encaps 49959 cycles 50000 cycles 1.00
ML-KEM-1024 decaps 64485 cycles 64386 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graviton2 (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 71292 cycles 71249 cycles 1.00
ML-KEM-512 encaps 85129 cycles 85132 cycles 1.00
ML-KEM-512 decaps 112737 cycles 112739 cycles 1.00
ML-KEM-768 keypair 117333 cycles 117635 cycles 1.00
ML-KEM-768 encaps 135159 cycles 135324 cycles 1.00
ML-KEM-768 decaps 171918 cycles 172043 cycles 1.00
ML-KEM-1024 keypair 175319 cycles 175209 cycles 1.00
ML-KEM-1024 encaps 197354 cycles 197292 cycles 1.00
ML-KEM-1024 decaps 243560 cycles 243480 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMD EPYC 3rd gen (c6a) (no-opt)

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 52393 cycles 52306 cycles 1.00
ML-KEM-512 encaps 65380 cycles 65271 cycles 1.00
ML-KEM-512 decaps 88204 cycles 88087 cycles 1.00
ML-KEM-768 keypair 85108 cycles 85013 cycles 1.00
ML-KEM-768 encaps 101659 cycles 101563 cycles 1.00
ML-KEM-768 decaps 132278 cycles 132149 cycles 1.00
ML-KEM-1024 keypair 124171 cycles 123926 cycles 1.00
ML-KEM-1024 encaps 146035 cycles 145719 cycles 1.00
ML-KEM-1024 decaps 183883 cycles 183590 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bananapi bpi-f3 benchmarks

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 334563 cycles 334613 cycles 1.00
ML-KEM-512 encaps 445827 cycles 446029 cycles 1.00
ML-KEM-512 decaps 594143 cycles 594113 cycles 1.00
ML-KEM-768 keypair 554947 cycles 555012 cycles 1.00
ML-KEM-768 encaps 697023 cycles 697082 cycles 1.00
ML-KEM-768 decaps 889449 cycles 888816 cycles 1.00
ML-KEM-1024 keypair 819543 cycles 819644 cycles 1.00
ML-KEM-1024 encaps 997493 cycles 996775 cycles 1.00
ML-KEM-1024 decaps 1228668 cycles 1229877 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@oqs-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arm Cortex-A72 (Raspberry Pi 4) benchmarks

Benchmark suite Current: e626211 Previous: d9bd7db Ratio
ML-KEM-512 keypair 51538 cycles 51998 cycles 0.99
ML-KEM-512 encaps 57868 cycles 58446 cycles 0.99
ML-KEM-512 decaps 74079 cycles 75080 cycles 0.99
ML-KEM-768 keypair 88992 cycles 88019 cycles 1.01
ML-KEM-768 encaps 97040 cycles 96283 cycles 1.01
ML-KEM-768 decaps 120517 cycles 119311 cycles 1.01
ML-KEM-1024 keypair 131278 cycles 131192 cycles 1.00
ML-KEM-1024 encaps 144147 cycles 144404 cycles 1.00
ML-KEM-1024 decaps 175473 cycles 175644 cycles 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.