Skip to content

Commit

Permalink
enable poly_compress_avx for Linux kernel compilation
Browse files Browse the repository at this point in the history
Also enable the AVX compiler flags.

Signed-off-by: Stephan Mueller <[email protected]>
  • Loading branch information
smuellerDD committed Feb 8, 2024
1 parent c11e5a6 commit c022fda
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Changes 0.9.0-prerelease

* enhancement: use accelerated XOR for KMAC/cSHAKE AEAD

* fix: enable poly_compress_avx for Linux kernel compilation

Changes 0.8.0:

* enhancement: add applications
Expand Down
2 changes: 1 addition & 1 deletion TODO
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
compiled for target host ARMv7 - at the moment compiling on ARMv6 fails
because the Neon code is compiled (which should not be compiled)

- poly_compress_avx does not work when compiled for Linux kernel
- poly_compress_avx does not work when compiled for Linux kernel - it works with current compilers (see change 0b0bf07b5f7c0d7d623087212883c427476eefef, but older compilers still fail this code

- check RISCV-64 Keccak implementation
15 changes: 4 additions & 11 deletions kem/src/avx2/kyber_poly_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@
#error "AVX2 support for Kyber mode 4 only"
#endif

//TODO remove from kernel code
void poly_compress(uint8_t r[LC_KYBER_POLYCOMPRESSEDBYTES],
const poly *restrict a);
/**
* @brief poly_compress
*
Expand All @@ -51,17 +48,13 @@ void poly_compress(uint8_t r[LC_KYBER_POLYCOMPRESSEDBYTES],
void poly_compress_avx(uint8_t r[LC_KYBER_POLYCOMPRESSEDBYTES],
const poly *restrict a)
{
#ifdef LINUX_KERNEL
poly_compress(r, a);
#else /* LINUX_KERNEL */
unsigned int i;
__m256i f0, f1;
__m128i t0, t1;

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
/* Due to const, the variables cannot be defined before */
LC_FPU_ENABLE;
const __m256i v = _mm256_load_si256(&kyber_qdata.vec[_16XV / 16]);
const __m256i shift1 = _mm256_set1_epi16(1 << 10);
const __m256i mask = _mm256_set1_epi16(31);
Expand All @@ -74,6 +67,7 @@ void poly_compress_avx(uint8_t r[LC_KYBER_POLYCOMPRESSEDBYTES],
-1, 4, 3, 2, 1, 0);
#pragma GCC diagnostic pop

LC_FPU_ENABLE;
for (i = 0; i < LC_KYBER_N / 32; i++) {
f0 = _mm256_load_si256(&a->vec[2 * i + 0]);
f1 = _mm256_load_si256(&a->vec[2 * i + 1]);
Expand All @@ -98,7 +92,6 @@ void poly_compress_avx(uint8_t r[LC_KYBER_POLYCOMPRESSEDBYTES],
memcpy(&r[20 * i + 16], &t1, 4);
}
LC_FPU_DISABLE;
#endif /* LINUX_KERNEL */
}

/**
Expand All @@ -122,7 +115,6 @@ void poly_decompress_avx(poly *restrict r,
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
/* Due to const, the variables cannot be defined before */
LC_FPU_ENABLE;
const __m256i q = _mm256_load_si256(&kyber_qdata.vec[_16XQ / 16]);
const __m256i shufbidx =
_mm256_set_epi8(9, 9, 9, 8, 8, 8, 8, 7, 7, 6, 6, 6, 6, 5, 5, 5,
Expand All @@ -135,6 +127,7 @@ void poly_decompress_avx(poly *restrict r,
512, 64, 8, 256, 32, 1024);
#pragma GCC diagnostic pop

LC_FPU_ENABLE;
for (i = 0; i < LC_KYBER_N / 16; i++) {
t = _mm_loadl_epi64((__m128i_u *)&a[10 * i + 0]);
memcpy(&ti, &a[10 * i + 8], 2);
Expand Down Expand Up @@ -168,7 +161,6 @@ void poly_frommsg_avx(poly *restrict r,
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
/* Due to const, the variables cannot be defined before */
LC_FPU_ENABLE;
const __m256i shift =
_mm256_broadcastsi128_si256(_mm_set_epi32(0, 1, 2, 3));
const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(
Expand Down Expand Up @@ -205,6 +197,7 @@ void poly_frommsg_avx(poly *restrict r,
_mm256_store_si256(&r->vec[8 + 2 * i + 1], g3)

f = _mm256_loadu_si256((__m256i_u *)msg);
LC_FPU_ENABLE;
FROMMSG64(0);
FROMMSG64(1);
FROMMSG64(2);
Expand Down Expand Up @@ -232,11 +225,11 @@ void poly_tomsg_avx(uint8_t msg[LC_KYBER_INDCPA_MSGBYTES],
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
/* Due to const, the variables cannot be defined before */
LC_FPU_ENABLE;
const __m256i hq = _mm256_set1_epi16((LC_KYBER_Q - 1) / 2);
const __m256i hhq = _mm256_set1_epi16((LC_KYBER_Q - 1) / 4);
#pragma GCC diagnostic pop

LC_FPU_ENABLE;
for (i = 0; i < LC_KYBER_N / 32; i++) {
f0 = _mm256_load_si256(&a->vec[2 * i + 0]);
f1 = _mm256_load_si256(&a->vec[2 * i + 1]);
Expand Down
9 changes: 6 additions & 3 deletions linux_kernel/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,12 @@ ccflags-y := -I$(PWD) -DLINUX_KERNEL -DLC_MEM_ON_HEAP \
-DMAJVERSION=0 -DMINVERSION=9 -DPATCHLEVEL=0

ifdef CONFIG_X86_64
ccflags-y += -DLC_HOST_X86_64
ccflags-y += -DLC_HOST_X86_64 \
-mavx2 -mbmi2 -mpopcnt \
-Wno-unused-command-line-argument
else ifdef CONFIG_ARM64
ccflags-y += -DLC_HOST_AARCH64
ccflags-y += -DLC_HOST_AARCH64 \
-march=armv8-a+simd -Wno-unused-result
else ifneq ($(and $(CONFIG_RISCV),$(CONFIG_64BIT)),)
ccflags-y += -DLC_HOST_RISCV64
endif
Expand Down Expand Up @@ -896,7 +899,7 @@ ifdef CONFIG_ARM64
asflags-$(CONFIG_LEANCRYPTO_AES) \
+= -march=armv8-a+crypto
leancrypto-$(CONFIG_LEANCRYPTO_AES) \
+= ../sym/src/asm/ARMv8/aes_armv8_ce.o\
+= ../sym/src/asm/ARMv8/aes_armv8_ce.o \
../sym/src/aes_block_armce_v8.o
else
leancrypto-$(CONFIG_LEANCRYPTO_AES) \
Expand Down

0 comments on commit c022fda

Please sign in to comment.