diff --git a/Makefile b/Makefile index 060081d..990d8f7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,4 @@ # Tests -include tests/dilithium-all/dilithium-all.mk -include tests/kyber-all/kyber-all.mk include tests/chunk/chunk.mk include tests/crt/crt.mk include tests/ct/ct.mk diff --git a/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium.s b/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium.s deleted file mode 120000 index 879d2dc..0000000 --- a/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_257_asymmetric_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium_opt_m7.s b/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium_opt_m7.s deleted file mode 120000 index 573a4c0..0000000 --- a/asm/manual/dilithium-all/basemul_257_asymmetric_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_257_asymmetric_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/basemul_257_dilithium.s b/asm/manual/dilithium-all/basemul_257_dilithium.s deleted file mode 120000 index 8d4260d..0000000 --- a/asm/manual/dilithium-all/basemul_257_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/basemul_257_dilithium_opt_m7.s b/asm/manual/dilithium-all/basemul_257_dilithium_opt_m7.s deleted file mode 120000 index aaa1c4d..0000000 --- a/asm/manual/dilithium-all/basemul_257_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/caddq_dilithium.s b/asm/manual/dilithium-all/caddq_dilithium.s deleted file mode 120000 index 8c04abc..0000000 --- a/asm/manual/dilithium-all/caddq_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/caddq_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/caddq_dilithium_opt_m7.s b/asm/manual/dilithium-all/caddq_dilithium_opt_m7.s deleted file mode 120000 index 4439894..0000000 --- a/asm/manual/dilithium-all/caddq_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/caddq_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/fnt_257_dilithium.s b/asm/manual/dilithium-all/fnt_257_dilithium.s deleted file mode 120000 index 7293bd1..0000000 --- a/asm/manual/dilithium-all/fnt_257_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/fnt_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/fnt_257_dilithium_opt_m7.s b/asm/manual/dilithium-all/fnt_257_dilithium_opt_m7.s deleted file mode 120000 index 480a072..0000000 --- a/asm/manual/dilithium-all/fnt_257_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/fnt_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ifnt_257_dilithium.s b/asm/manual/dilithium-all/ifnt_257_dilithium.s deleted file mode 120000 index 2f6df66..0000000 --- a/asm/manual/dilithium-all/ifnt_257_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/ifnt_257_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ifnt_257_dilithium_opt_m7.s b/asm/manual/dilithium-all/ifnt_257_dilithium_opt_m7.s deleted file mode 120000 index e44782e..0000000 --- a/asm/manual/dilithium-all/ifnt_257_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/ifnt_257_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/intt_769_dilithium.s b/asm/manual/dilithium-all/intt_769_dilithium.s deleted file mode 120000 index ee9d29d..0000000 --- a/asm/manual/dilithium-all/intt_769_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/intt_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/intt_769_dilithium_opt_m7.s b/asm/manual/dilithium-all/intt_769_dilithium_opt_m7.s deleted file mode 120000 index f3367a0..0000000 --- a/asm/manual/dilithium-all/intt_769_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/intt_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/intt_dilithium_123_456_78.s b/asm/manual/dilithium-all/intt_dilithium_123_456_78.s deleted file mode 120000 index 302a9db..0000000 --- a/asm/manual/dilithium-all/intt_dilithium_123_456_78.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/intt_dilithium_123_456_78.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/intt_dilithium_123_456_78_opt_m7.s b/asm/manual/dilithium-all/intt_dilithium_123_456_78_opt_m7.s deleted file mode 120000 index 246aa56..0000000 --- a/asm/manual/dilithium-all/intt_dilithium_123_456_78_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/intt_dilithium_123_456_78_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ntt_769_dilithium.s b/asm/manual/dilithium-all/ntt_769_dilithium.s deleted file mode 120000 index 849ac2b..0000000 --- a/asm/manual/dilithium-all/ntt_769_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/ntt_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ntt_769_dilithium_opt_m7.s b/asm/manual/dilithium-all/ntt_769_dilithium_opt_m7.s deleted file mode 120000 index c2bf398..0000000 --- a/asm/manual/dilithium-all/ntt_769_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/ntt_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ntt_dilithium.s b/asm/manual/dilithium-all/ntt_dilithium.s deleted file mode 120000 index c25f73b..0000000 --- a/asm/manual/dilithium-all/ntt_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/ntt_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/ntt_dilithium_opt_m7.s b/asm/manual/dilithium-all/ntt_dilithium_opt_m7.s deleted file mode 120000 index 4464415..0000000 --- a/asm/manual/dilithium-all/ntt_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/ntt_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium.s b/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium.s deleted file mode 120000 index 91096c8..0000000 --- a/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/pointwise_769_asymmetric_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium_opt_m7.s b/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium_opt_m7.s deleted file mode 120000 index 0b9838e..0000000 --- a/asm/manual/dilithium-all/pointwise_769_asymmetric_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/pointwise_769_asymmetric_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_769_dilithium.s b/asm/manual/dilithium-all/pointwise_769_dilithium.s deleted file mode 120000 index 70e2a9a..0000000 --- a/asm/manual/dilithium-all/pointwise_769_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/pointwise_769_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_769_dilithium_opt_m7.s b/asm/manual/dilithium-all/pointwise_769_dilithium_opt_m7.s deleted file mode 120000 index 6f650c0..0000000 --- a/asm/manual/dilithium-all/pointwise_769_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/pointwise_769_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium.s b/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium.s deleted file mode 120000 index c480c60..0000000 --- a/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/pointwise_acc_montgomery_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium_opt_m7.s b/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium_opt_m7.s deleted file mode 120000 index 7f6d7e5..0000000 --- a/asm/manual/dilithium-all/pointwise_acc_montgomery_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/pointwise_acc_montgomery_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_montgomery_dilithium.s b/asm/manual/dilithium-all/pointwise_montgomery_dilithium.s deleted file mode 120000 index b232539..0000000 --- a/asm/manual/dilithium-all/pointwise_montgomery_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/pointwise_montgomery_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/pointwise_montgomery_dilithium_opt_m7.s b/asm/manual/dilithium-all/pointwise_montgomery_dilithium_opt_m7.s deleted file mode 120000 index 55bcdb9..0000000 --- a/asm/manual/dilithium-all/pointwise_montgomery_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/pointwise_montgomery_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/reduce32_dilithium.s b/asm/manual/dilithium-all/reduce32_dilithium.s deleted file mode 120000 index 7947265..0000000 --- a/asm/manual/dilithium-all/reduce32_dilithium.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/reduce32_dilithium.s \ No newline at end of file diff --git a/asm/manual/dilithium-all/reduce32_dilithium_opt_m7.s b/asm/manual/dilithium-all/reduce32_dilithium_opt_m7.s deleted file mode 120000 index b006f59..0000000 --- a/asm/manual/dilithium-all/reduce32_dilithium_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/reduce32_dilithium_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/add_kyber.s b/asm/manual/kyber-all/add_kyber.s deleted file mode 120000 index cbd60d3..0000000 --- a/asm/manual/kyber-all/add_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/add_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/add_kyber_opt_m7.s b/asm/manual/kyber-all/add_kyber_opt_m7.s deleted file mode 120000 index 2735917..0000000 --- a/asm/manual/kyber-all/add_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/add_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/barrett_reduce_kyber.s b/asm/manual/kyber-all/barrett_reduce_kyber.s deleted file mode 120000 index 8bbd376..0000000 --- a/asm/manual/kyber-all/barrett_reduce_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/barrett_reduce_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/barrett_reduce_kyber_opt_m7.s b/asm/manual/kyber-all/barrett_reduce_kyber_opt_m7.s deleted file mode 120000 index b111e46..0000000 --- a/asm/manual/kyber-all/barrett_reduce_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/barrett_reduce_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_16_32_kyber.s b/asm/manual/kyber-all/basemul_16_32_kyber.s deleted file mode 120000 index dff2703..0000000 --- a/asm/manual/kyber-all/basemul_16_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_16_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_16_32_kyber_opt_m7.s b/asm/manual/kyber-all/basemul_16_32_kyber_opt_m7.s deleted file mode 120000 index 9c35354..0000000 --- a/asm/manual/kyber-all/basemul_16_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_16_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_32_16_kyber.s b/asm/manual/kyber-all/basemul_acc_32_16_kyber.s deleted file mode 120000 index ddddf40..0000000 --- a/asm/manual/kyber-all/basemul_acc_32_16_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_acc_32_16_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_32_16_kyber_opt_m7.s b/asm/manual/kyber-all/basemul_acc_32_16_kyber_opt_m7.s deleted file mode 120000 index 5236dbb..0000000 --- a/asm/manual/kyber-all/basemul_acc_32_16_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_acc_32_16_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_32_32_kyber.s b/asm/manual/kyber-all/basemul_acc_32_32_kyber.s deleted file mode 120000 index d749960..0000000 --- a/asm/manual/kyber-all/basemul_acc_32_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_acc_32_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_32_32_kyber_opt_m7.s b/asm/manual/kyber-all/basemul_acc_32_32_kyber_opt_m7.s deleted file mode 120000 index 69ffef2..0000000 --- a/asm/manual/kyber-all/basemul_acc_32_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_acc_32_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_kyber.s b/asm/manual/kyber-all/basemul_acc_kyber.s deleted file mode 120000 index 2775b26..0000000 --- a/asm/manual/kyber-all/basemul_acc_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_acc_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_acc_kyber_opt_m7.s b/asm/manual/kyber-all/basemul_acc_kyber_opt_m7.s deleted file mode 120000 index f9dc911..0000000 --- a/asm/manual/kyber-all/basemul_acc_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_acc_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_kyber.s b/asm/manual/kyber-all/basemul_kyber.s deleted file mode 120000 index 5295785..0000000 --- a/asm/manual/kyber-all/basemul_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/basemul_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/basemul_kyber_opt_m7.s b/asm/manual/kyber-all/basemul_kyber_opt_m7.s deleted file mode 120000 index daa6d11..0000000 --- a/asm/manual/kyber-all/basemul_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/basemul_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_16_32_kyber.s b/asm/manual/kyber-all/frombytes_mul_16_32_kyber.s deleted file mode 120000 index 02fdcf1..0000000 --- a/asm/manual/kyber-all/frombytes_mul_16_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/frombytes_mul_16_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_16_32_kyber_opt_m7.s b/asm/manual/kyber-all/frombytes_mul_16_32_kyber_opt_m7.s deleted file mode 120000 index adce3c7..0000000 --- a/asm/manual/kyber-all/frombytes_mul_16_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/frombytes_mul_16_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber.s b/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber.s deleted file mode 120000 index 0b459ed..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/frombytes_mul_acc_32_16_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber_opt_m7.s b/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber_opt_m7.s deleted file mode 120000 index c30285a..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_32_16_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/frombytes_mul_acc_32_16_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber.s b/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber.s deleted file mode 120000 index fe0c202..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/frombytes_mul_acc_32_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber_opt_m7.s b/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber_opt_m7.s deleted file mode 120000 index 62309ec..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_32_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/frombytes_mul_acc_32_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_kyber.s b/asm/manual/kyber-all/frombytes_mul_acc_kyber.s deleted file mode 120000 index 33d56b3..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/frombytes_mul_acc_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_acc_kyber_opt_m7.s b/asm/manual/kyber-all/frombytes_mul_acc_kyber_opt_m7.s deleted file mode 120000 index 2124eba..0000000 --- a/asm/manual/kyber-all/frombytes_mul_acc_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/frombytes_mul_acc_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_kyber.s b/asm/manual/kyber-all/frombytes_mul_kyber.s deleted file mode 120000 index 408800f..0000000 --- a/asm/manual/kyber-all/frombytes_mul_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/frombytes_mul_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/frombytes_mul_kyber_opt_m7.s b/asm/manual/kyber-all/frombytes_mul_kyber_opt_m7.s deleted file mode 120000 index a004a89..0000000 --- a/asm/manual/kyber-all/frombytes_mul_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/frombytes_mul_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/fromplant_kyber.s b/asm/manual/kyber-all/fromplant_kyber.s deleted file mode 120000 index 906ac60..0000000 --- a/asm/manual/kyber-all/fromplant_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/fromplant_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/fromplant_kyber_opt_m7.s b/asm/manual/kyber-all/fromplant_kyber_opt_m7.s deleted file mode 120000 index bbe5e6e..0000000 --- a/asm/manual/kyber-all/fromplant_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/fromplant_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/intt_kyber.s b/asm/manual/kyber-all/intt_kyber.s deleted file mode 120000 index 7f6b5ee..0000000 --- a/asm/manual/kyber-all/intt_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/intt_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/intt_kyber_opt_m7.s b/asm/manual/kyber-all/intt_kyber_opt_m7.s deleted file mode 120000 index 929c5fd..0000000 --- a/asm/manual/kyber-all/intt_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/intt_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_acc_kyber.s b/asm/manual/kyber-all/matacc_acc_kyber.s deleted file mode 120000 index 5690250..0000000 --- a/asm/manual/kyber-all/matacc_acc_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_acc_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_acc_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_acc_kyber_opt_m7.s deleted file mode 120000 index 2318505..0000000 --- a/asm/manual/kyber-all/matacc_acc_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_acc_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber.s b/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber.s deleted file mode 120000 index 855ca53..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_cache_16_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber_opt_m7.s deleted file mode 120000 index 5e42d78..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_16_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_cache_16_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber.s b/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber.s deleted file mode 120000 index 82f203f..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_cache_32_16_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber_opt_m7.s deleted file mode 120000 index 756fee7..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_32_16_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_cache_32_16_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber.s b/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber.s deleted file mode 120000 index 66fcde8..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_cache_32_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber_opt_m7.s deleted file mode 120000 index e16b20d..0000000 --- a/asm/manual/kyber-all/matacc_asm_cache_32_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_cache_32_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber.s b/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber.s deleted file mode 120000 index 54802af..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_opt_16_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber_opt_m7.s deleted file mode 120000 index 0ee5a75..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_16_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_opt_16_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber.s b/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber.s deleted file mode 120000 index 071d492..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_opt_32_16_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber_opt_m7.s deleted file mode 120000 index 9b8f092..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_32_16_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_opt_32_16_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber.s b/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber.s deleted file mode 120000 index 09b2f78..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_asm_opt_32_32_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber_opt_m7.s deleted file mode 120000 index b063e73..0000000 --- a/asm/manual/kyber-all/matacc_asm_opt_32_32_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_asm_opt_32_32_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_kyber.s b/asm/manual/kyber-all/matacc_kyber.s deleted file mode 120000 index 5bb7e99..0000000 --- a/asm/manual/kyber-all/matacc_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/matacc_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/matacc_kyber_opt_m7.s b/asm/manual/kyber-all/matacc_kyber_opt_m7.s deleted file mode 120000 index 26de8c5..0000000 --- a/asm/manual/kyber-all/matacc_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/matacc_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/ntt_kyber.s b/asm/manual/kyber-all/ntt_kyber.s deleted file mode 120000 index f6bcc0e..0000000 --- a/asm/manual/kyber-all/ntt_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/ntt_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/ntt_kyber_opt_m7.s b/asm/manual/kyber-all/ntt_kyber_opt_m7.s deleted file mode 120000 index e6a5c5a..0000000 --- a/asm/manual/kyber-all/ntt_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/ntt_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/ntt_kyber_symbolic.s b/asm/manual/kyber-all/ntt_kyber_symbolic.s deleted file mode 120000 index 86128ab..0000000 --- a/asm/manual/kyber-all/ntt_kyber_symbolic.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/ntt_kyber_symbolic.s \ No newline at end of file diff --git a/asm/manual/kyber-all/ntt_kyber_symbolic_opt_m7.s b/asm/manual/kyber-all/ntt_kyber_symbolic_opt_m7.s deleted file mode 120000 index fb9c18b..0000000 --- a/asm/manual/kyber-all/ntt_kyber_symbolic_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/ntt_kyber_symbolic_opt_m7.s \ No newline at end of file diff --git a/asm/manual/kyber-all/sub_kyber.s b/asm/manual/kyber-all/sub_kyber.s deleted file mode 120000 index e353062..0000000 --- a/asm/manual/kyber-all/sub_kyber.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/sub_kyber.s \ No newline at end of file diff --git a/asm/manual/kyber-all/sub_kyber_opt_m7.s b/asm/manual/kyber-all/sub_kyber_opt_m7.s deleted file mode 120000 index 2c180b5..0000000 --- a/asm/manual/kyber-all/sub_kyber_opt_m7.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/sub_kyber_opt_m7.s \ No newline at end of file diff --git a/asm/manual/ntt_dilithium/dilithium5_ntt.s b/asm/manual/ntt_dilithium/dilithium5_ntt.s deleted file mode 120000 index b12e52a..0000000 --- a/asm/manual/ntt_dilithium/dilithium5_ntt.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/naive/armv7m/dilithium5_ntt.s \ No newline at end of file diff --git a/asm/manual/ntt_dilithium/dilithium5_ntt_opt.s b/asm/manual/ntt_dilithium/dilithium5_ntt_opt.s deleted file mode 120000 index cc1b774..0000000 --- a/asm/manual/ntt_dilithium/dilithium5_ntt_opt.s +++ /dev/null @@ -1 +0,0 @@ -../../../slothy/examples/opt/armv7m/dilithium5_ntt_opt_m7.s \ No newline at end of file diff --git a/tests/dilithium-all/dilithium-all.mk b/tests/dilithium-all/dilithium-all.mk deleted file mode 100644 index 23f0805..0000000 --- a/tests/dilithium-all/dilithium-all.mk +++ /dev/null @@ -1,45 +0,0 @@ -# Test name - needs to match the directory name -TESTS += dilithium-all - -# All further variables must be prefixed with the capitalized test name - -# Platforms this test should run on (matching the directory name in envs/) -DILITHIUM_ALL_PLATFORMS += m7-an500 -DILITHIUM_ALL_PLATFORMS += nucleo-f767zi -DILITHIUM_ALL_PLATFORMS += stm32f4discovery - -# C sources required for this test -DILITHIUM_ALL_SOURCES += main.c -DILITHIUM_ALL_SOURCES += ref.c - -# Assembly sources required for this test -DILITHIUM_ALL_ASM_DIR = ../../asm/manual/dilithium-all -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/basemul_257_asymmetric_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/basemul_257_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/caddq_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/fnt_257_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ifnt_257_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/intt_769_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ntt_769_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ntt_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/intt_dilithium_123_456_78.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_769_asymmetric_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_769_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_acc_montgomery_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_montgomery_dilithium.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/reduce32_dilithium.s - -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/basemul_257_asymmetric_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/basemul_257_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/caddq_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/fnt_257_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ifnt_257_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/intt_769_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ntt_769_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/ntt_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/intt_dilithium_123_456_78_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_769_asymmetric_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_769_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_acc_montgomery_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/pointwise_montgomery_dilithium_opt_m7.s -DILITHIUM_ALL_ASMS += $(DILITHIUM_ALL_ASM_DIR)/reduce32_dilithium_opt_m7.s diff --git a/tests/dilithium-all/fnt.h b/tests/dilithium-all/fnt.h deleted file mode 100644 index dfbce47..0000000 --- a/tests/dilithium-all/fnt.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef FNT_H -#define FNT_H - -#include - -#define FNT_Q 257 -#define FNT_Q_PRIME (16711935) // -q^-1 mod 2**32 - -static const int32_t twiddles_ntt_257_streamlined[] __attribute__((aligned(8))) = {-60, -35, -46, -42, 99, 89, -118, 27, -82, 108, -71, 54, 93, -41, 115, 68, 117, 73, -84, -59, -79, 21, -78, 37, -55, -109, 101, 74, -110, 39, 17, -70, -92, -50, -29, 57, -116, 83, 43, 75, -85, -91, 86, -107, 87, 15, -23, -111, -100, -58, 114, 25, -97, -10, 126, -40, 63, -20, -5, -80, -120, 44, -67, -72, -124, -31, 18, -106, 103, 90, -102, 45, -51, -77, 53, -121, -81, -11, 113, 9, -62, 36, -65, -12, -3, -48, 127, -24, -6, -96, 34, 88, 123, -49, -13, 61, -52, 112, -7, -66, -28, -33, -14, 125, -56, 30, 95, -22, -98, -26, 122, -104, -38, -94, 105, -119, -76, 69, -47, 19}; -static const int32_t twiddles_intt_257_streamlined[] __attribute__((aligned(8))) = { -19, 47, -69, 76, 119, -105, 94, 38, 104, -122, 26, 98, 22, -95, -30, 56, -125, 14, 33, 28, 66, 7, -112, 52, -61, 13, 49, -123, -88, -34, 96, 6, 24, -127, 48, 3, 12, 65, -36, 62, -9, -113, 11, 81, 121, -53, 77, 51, -45, 102, -90, -103, 106, -18, 31, 124, 72, 67, -44, 120, 80, 5, 20, -63, 40, -126, 10, 97, -25, -114, 58, 100, 111, 23, -15, -87, 107, -86, 91, 85, -75, -43, -83, 116, -57, 29, 50, 92, 70, -17, -39, 110, -74, -101, 109, 55, -37, 78, -21, 79, 59, 84, -73, -117, -68, -115, 41, -93, -54, 71, -108, 82, -27, 118, -89, -99, 42, 46, 35, 60}; -static const int32_t twiddles_basemul_257[] __attribute__((aligned(8))) = {27, -82, 108, -71, 54, 93, -41, 115, -78, 37, -55, -109, 101, 74, -110, 39, 83, 43, 75, -85, -91, 86, -107, 87, -97, -10, 126, -40, 63, -20, -5, -80, -106, 103, 90, -102, 45, -51, -77, 53, -65, -12, -3, -48, 127, -24, -6, -96, 112, -7, -66, -28, -33, -14, 125, -56, -38, -94, 105, -119, -76, 69, -47, 19}; - - -// inputs in [-2, 2]; outputs in [-128, +128] -void __asm_fnt_257(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); - -void __asm_point_mul_257_16(int16_t p_prime[128], const int32_t p[256], int32_t qprime, int32_t q, const int32_t twiddles[64]); -void __asm_asymmetric_mul_257_16(int32_t c[256], const int32_t a[256], const int32_t b[256], const int16_t b_prime[128]); - -// inputs in [-32768, 32768] outputs in [-128, +128] -void __asm_ifnt_257(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); - -void fnt_ntt(int32_t *a) { - __asm_fnt_257(a, twiddles_ntt_257_streamlined, FNT_Q_PRIME, FNT_Q); -} -void fnt_invntt_tomont(int32_t *a) { - __asm_ifnt_257(a, twiddles_intt_257_streamlined, FNT_Q_PRIME, FNT_Q); -} - -void fnt_point_mul(int16_t * b_prime, int32_t *b){ - __asm_point_mul_257_16(b_prime, b, FNT_Q_PRIME, FNT_Q, twiddles_basemul_257); -} -void fnt_asymmetric_mul(int32_t *c, int32_t *a, int32_t *b, int16_t *b_prime) { - __asm_asymmetric_mul_257_16(c, a, b, b_prime); -} - -void __asm_fnt_257_opt_m7(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); - -void __asm_point_mul_257_16_opt_m7(int16_t p_prime[128], const int32_t p[256], int32_t qprime, int32_t q, const int32_t twiddles[64]); -void __asm_asymmetric_mul_257_16_opt_m7(int32_t c[256], const int32_t a[256], const int32_t b[256], const int16_t b_prime[128]); - -void __asm_ifnt_257_opt_m7(int32_t *p, const int32_t twiddles[112], int32_t qprime, int32_t q); - -void fnt_ntt_opt_m7(int32_t *a) { - __asm_fnt_257_opt_m7(a, twiddles_ntt_257_streamlined, FNT_Q_PRIME, FNT_Q); -} - -void fnt_invntt_tomont_opt_m7(int32_t *a) { - __asm_ifnt_257_opt_m7(a, twiddles_intt_257_streamlined, FNT_Q_PRIME, FNT_Q); -} - -void fnt_point_mul_opt_m7(int16_t * b_prime, int32_t *b){ - __asm_point_mul_257_16_opt_m7(b_prime, b, FNT_Q_PRIME, FNT_Q, twiddles_basemul_257); -} - -void fnt_asymmetric_mul_opt_m7(int32_t *c, int32_t *a, int32_t *b, int16_t *b_prime) { - __asm_asymmetric_mul_257_16_opt_m7(c, a, b, b_prime); -} - - - -#endif \ No newline at end of file diff --git a/tests/dilithium-all/main.c b/tests/dilithium-all/main.c deleted file mode 100644 index a2ac065..0000000 --- a/tests/dilithium-all/main.c +++ /dev/null @@ -1,738 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * - * Author: Hanno Becker - */ - -#define ENABLE_PMU_STATS /* Do not enable when benching for cycle count */ - -#if defined(ENABLE_PMU_STATS) -#define REPEAT 100 -#define REPEAT_MEDIAN 100 -#else -#define REPEAT 1024 -#endif - -/* - * Some external references to auto-generated assembly. - */ - -#include -#include - -#include -#include -#include -#include "misc.h" -#include "poly.h" -#include "ref.h" -#include "fnt.h" -#include "ntt_769.h" - -void pqcrystals_dilithium_ntt(int32_t *); -void pqcrystals_dilithium_ntt_opt_m7(int32_t *); - -void pqcrystals_dilithium_invntt_tomont(int32_t *); -void pqcrystals_dilithium_invntt_tomont_opt_m7(int32_t *); - -void pqcrystals_dilithium_asm_caddq(int32_t *); -void pqcrystals_dilithium_asm_caddq_opt_m7(int32_t *); - -void pqcrystals_dilithium_asm_pointwise_acc_montgomery(int32_t *, int32_t *, int32_t *); -void pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7(int32_t *, int32_t *, int32_t *); - -void pqcrystals_dilithium_asm_pointwise_montgomery(int32_t *, int32_t *, int32_t *); -void pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7(int32_t *, int32_t *, int32_t *); - -void pqcrystals_dilithium_asm_reduce32(int32_t *); -void pqcrystals_dilithium_asm_reduce32_opt_m7(int32_t *); - -#define NTT_LAYERS 8 -#define NTT_SIZE (1u << NTT_LAYERS) -#define NTT_ROOT_ORDER (2 * NTT_SIZE) -#define NTT_INCOMPLETE_LAYERS 8 -#define NTT_INCOMPLETE_SIZE (1u << NTT_INCOMPLETE_LAYERS) -#define NTT_LAYER_GAP ( NTT_LAYERS - NTT_INCOMPLETE_LAYERS ) -#define NTT_LAYER_STRIDE (1u << NTT_LAYER_GAP ) - - -typedef struct { - char name[100]; - uint64_t cycles; -} benchmark_result; - -benchmark_result results[100]; -int benchmark_cnt = 0; - -static void add_benchmark_results(char *name, uint64_t cycles){ - if(benchmark_cnt == 100) return; - - results[benchmark_cnt].cycles = cycles; - strncpy(results[benchmark_cnt].name, name, 100); - benchmark_cnt++; -} - -static void dump_benchmarks_tex(void){ - for(int i=0;i> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_BASEMUL(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t src[NTT_SIZE] __attribute__((aligned(16))); \ - int32_t src1[NTT_SIZE] __attribute__((aligned(16))); \ - int32_t src2[NTT_SIZE] __attribute__((aligned(16))); \ - (func)(src, src1, src2); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src, src1, src2); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_POINTMUL(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int32_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(src, src1); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src, src1); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_ASYM_MUL(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int32_t src1[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - int32_t src2[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - int16_t src3[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - (func)(src, src1, src2, src3); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src, src1, src2, src3); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_NTT_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t src[NTT_SIZE] __attribute__((aligned(16))); \ - (func)(src); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_POINTMUL_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(src, src1); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src, src1); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_ASYM_MUL_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t src[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - int16_t src2[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - int16_t src3[NTT_SIZE] __attribute__((aligned(16)))= {0}; \ - (func)(src, src1, src2, src3); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(src, src1, src2, src3); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -// Q=8380417 polynomial multiplication -MAKE_BENCH_NTT(pqcrystals_dilithium_ntt,pqcrystals_dilithium_ntt) -MAKE_BENCH_NTT(pqcrystals_dilithium_ntt_opt_m7,pqcrystals_dilithium_ntt_opt_m7) - -MAKE_BENCH_NTT(pqcrystals_dilithium_invntt_tomont,pqcrystals_dilithium_invntt_tomont) -MAKE_BENCH_NTT(pqcrystals_dilithium_invntt_tomont_opt_m7,pqcrystals_dilithium_invntt_tomont_opt_m7) - -MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_montgomery,pqcrystals_dilithium_asm_pointwise_montgomery) -MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7,pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7) - -MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_acc_montgomery,pqcrystals_dilithium_asm_pointwise_acc_montgomery) -MAKE_BENCH_BASEMUL(pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7,pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7) - -// Q=257 polynomial multiplication -MAKE_BENCH_NTT(__asm_fnt_257,fnt_ntt) -MAKE_BENCH_NTT(__asm_fnt_257_opt_m7,fnt_ntt_opt_m7) - -MAKE_BENCH_NTT(__asm_ifnt_257,fnt_invntt_tomont) -MAKE_BENCH_NTT(__asm_ifnt_257_opt_m7,fnt_invntt_tomont_opt_m7) - -MAKE_BENCH_POINTMUL(__asm_point_mul_257_16,fnt_point_mul) -MAKE_BENCH_POINTMUL(__asm_point_mul_257_16_opt_m7,fnt_point_mul_opt_m7) - -MAKE_BENCH_ASYM_MUL(__asm_asymmetric_mul_257_16,fnt_asymmetric_mul) -MAKE_BENCH_ASYM_MUL(__asm_asymmetric_mul_257_16_opt_m7,fnt_asymmetric_mul_opt_m7) - -// Q=769 polynomial multiplication -MAKE_BENCH_NTT_16(small_ntt_asm_769,small_ntt) -MAKE_BENCH_NTT_16(small_ntt_asm_769_opt_m7,small_ntt_opt_m7) - -MAKE_BENCH_NTT_16(small_invntt_asm_769,small_invntt_tomont) -MAKE_BENCH_NTT_16(small_invntt_asm_769_opt_m7,small_invntt_tomont_opt_m7) - -MAKE_BENCH_POINTMUL_16(small_pointmul_asm_769,small_point_mul) -MAKE_BENCH_POINTMUL_16(small_pointmul_asm_769_opt_m7,small_point_mul_opt_m7) - -MAKE_BENCH_ASYM_MUL_16(small_asymmetric_mul_asm_769,small_asymmetric_mul_asm_769) -MAKE_BENCH_ASYM_MUL_16(small_asymmetric_mul_asm_769_opt_m7,small_asymmetric_mul_asm_769_opt_m7) - -// other arithmetic -MAKE_BENCH_NTT(pqcrystals_dilithium_asm_reduce32,pqcrystals_dilithium_asm_reduce32) -MAKE_BENCH_NTT(pqcrystals_dilithium_asm_reduce32_opt_m7,pqcrystals_dilithium_asm_reduce32_opt_m7) - -MAKE_BENCH_NTT(pqcrystals_dilithium_asm_caddq,pqcrystals_dilithium_asm_caddq) -MAKE_BENCH_NTT(pqcrystals_dilithium_asm_caddq_opt_m7,pqcrystals_dilithium_asm_caddq_opt_m7) - -int main(void) -{ - int ret = 0; - debug_test_start( "\nDilithium All Test!\n" ); - - /* Test cases */ - - // Q=8380417 polynomial multiplication - if( test_ntt_pqm4() != 0 ){return( 1 );} - if( test_ntt_pqm4_opt() != 0 ){return( 1 );} - - if( test_intt_pqm4() != 0 ){return( 1 );} - if( test_intt_pqm4_opt() != 0 ){return( 1 );} - - if( test_pointwise_montgomery_pqm4() != 0 ){ return( 1 ); } - if( test_pointwise_montgomery_pqm4_opt() != 0 ){ return( 1 ); } - - if( test_pointwise_acc_montgomery_pqm4() != 0 ){ return( 1 ); } - if( test_pointwise_acc_montgomery_pqm4_opt() != 0 ){ return( 1 ); } - - // Q=257 polynomial multiplication - if( test_fnt_257_pqm4() != 0 ){return( 1 );} - if( test_fnt_257_pqm4_opt() != 0 ){return( 1 );} - - if( test_ifnt_257_pqm4() != 0 ){return( 1 );} - if( test_ifnt_257_pqm4_opt() != 0 ){return( 1 );} - - if( test_point_mul_257_16_pqm4() != 0 ){return( 1 );} - if( test_point_mul_257_16_pqm4_opt() != 0 ){return( 1 );} - - if( test_asymmetric_mul_257_16_pqm4() != 0 ){return( 1 );} - if( test_asymmetric_mul_257_16_pqm4_opt() != 0 ){return( 1 );} - - // Q=769 polynomial multiplication - if( test_ntt_769_pqm4() != 0 ){return( 1 );} - if( test_ntt_769_pqm4_opt() != 0 ){return( 1 );} - - if( test_intt_769_pqm4() != 0 ){return( 1 );} - if( test_intt_769_pqm4_opt() != 0 ){return( 1 );} - - if( test_pointmul_769_pqm4() != 0 ){ return( 1 ); } - if( test_pointmul_769_pqm4_opt() != 0 ){ return( 1 ); } - - if( test_asymmetric_mul_769_pqm4() != 0 ){ return( 1 ); } - if( test_asymmetric_mul_769_pqm4_opt() != 0 ){ return( 1 ); } - - // other arithmetic - if( test_reduce32_pqm4() != 0 ){ return( 1 ); } - if( test_reduce32_pqm4_opt() != 0 ){ return( 1 ); } - - if( test_caddq_pqm4() != 0 ){return( 1 );} - if( test_caddq_pqm4_opt() != 0 ){return( 1 );} - - - /* Benchmarks */ - - // Q=8380417 polynomial multiplication - bench_pqcrystals_dilithium_ntt(); - bench_pqcrystals_dilithium_ntt_opt_m7(); - - bench_pqcrystals_dilithium_invntt_tomont(); - bench_pqcrystals_dilithium_invntt_tomont_opt_m7(); - - bench_pqcrystals_dilithium_asm_pointwise_montgomery(); - bench_pqcrystals_dilithium_asm_pointwise_montgomery_opt_m7(); - - bench_pqcrystals_dilithium_asm_pointwise_acc_montgomery(); - bench_pqcrystals_dilithium_asm_pointwise_acc_montgomery_opt_m7(); - - // Q=257 polynomial multiplication - bench___asm_fnt_257(); - bench___asm_fnt_257_opt_m7(); - - bench___asm_ifnt_257(); - bench___asm_ifnt_257_opt_m7(); - - bench___asm_point_mul_257_16(); - bench___asm_point_mul_257_16_opt_m7(); - - bench___asm_asymmetric_mul_257_16(); - bench___asm_asymmetric_mul_257_16_opt_m7(); - - // Q=769 polynomial multiplication - bench_small_ntt_asm_769(); - bench_small_ntt_asm_769_opt_m7(); - - bench_small_invntt_asm_769(); - bench_small_invntt_asm_769_opt_m7(); - - bench_small_pointmul_asm_769(); - bench_small_pointmul_asm_769_opt_m7(); - - bench_small_asymmetric_mul_asm_769(); - bench_small_asymmetric_mul_asm_769_opt_m7(); - - // other arithmetic - bench_pqcrystals_dilithium_asm_reduce32(); - bench_pqcrystals_dilithium_asm_reduce32_opt_m7(); - - bench_pqcrystals_dilithium_asm_caddq(); - bench_pqcrystals_dilithium_asm_caddq_opt_m7(); - - debug_printf( "Done!\n" ); - - debug_printf("======================" ); - dump_benchmarks_tex(); - debug_printf("======================\n" ); - - debug_printf( "ALL GOOD!\n" ); - return( ret ); -} diff --git a/tests/dilithium-all/ntt_769.h b/tests/dilithium-all/ntt_769.h deleted file mode 100644 index 8d66a75..0000000 --- a/tests/dilithium-all/ntt_769.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef NTT_769_H -#define NTT_769_H - -#include -#define SMALL_Q 769 -#define N 256 - -static const int32_t zetas_769[64] __attribute__((aligned(8))) = { - 3138844760, 1334846793, 999738812, 1854264165, 1681125041, 1150537404, 2820492178, 3071823164, 726067294, 2066499220, 3272887953, 1055590142, 4255871365, 1871019564, 2731130050, 1826338500, 513832239, 1792827701, 3373420347, 2993631302, 1161707670, 3306398751, 3518633806, 3406931146, 1586177780, 3853741788, 3317569017, 3825816122, 971813147, 122872927, 217820188, 619949766, 3753209393, 770748358, 4099487641, 765163225, 3630336467, 1742561504, 3479537875, 982983413, 2809321912, 2379266669, 703726762, 681386230, 4110657907, 1457719720, 1217559000, 2474213930, 1195218468, 1089100940, 564098436, 614364633, 3635921600, 2088839752, 3702943196, 1949211426, 2569161192, 374203913, 3982199847, 2083254619, 1513571050, 3647091866, 413299844, 4149753838}; - -static const int32_t zetas_asm_769[128] __attribute__((aligned(8))) = { - 346278248, 223405321, 966228013, 759578091, -150798592, 318352582, -1736976371, 1697880440, -2105595150, -804259156, 1675539907, -1016494210, 1401868389, -2005062756, 240160720, 474736307, -1200803600, -1435379187, -1156122536, 1334846793, 999738811, 1854264164, -631120032, -787503756, -1580592646, 1681125040, 1150537403, -1474475119, -1223144132, 1809583100, -100532394, -1938041160, 726067293, 2066499219, -1022079344, 1055590142, 525002504, 273671518, -212235055, -39095931, 1871019563, -1563837247, 1826338499, 139628326, 27925665, 1731391238, 513832238, 1792827701, -921546949, -1301335995, 67021596, 1117026605, 536172770, 1161707669, -988568545, -776333490, -888036151, 1290165729, -497076839, -753992958, 1586177779, -441225509, -977398279, -469151174, -1614103444, 1591762912, -94947261, 971813146, 122872927, 217820188, 619949766, -1709050706, 1010909077, -1748146637, -541757903, 770748357, -195479656, 765163224, 1413038655, 1781657435, -1206388733, -664630830, 1742561504, -815429422, 982983412, 357448514, 44681064, -1524741316, -1485645385, -1915700627, 703726761, 681386229, 686971362, 1787242568, -860110486, -184309390, 1457719719, 1217558999, -1820753366, -502661972, -1921285760, 1139367137, 1195218467, 1089100940, 564098435, 614364633, -1100271206, 457980908, -1669954774, -659045697, 2088839751, -592024101, 1949211426, 1368357591, 698141628, 335107981, -1725806105, 374203913, -312767449, 2083254618, -1061175275, -2139105948, 519417371, 1513571050, -647875431, 413299844, -145213459, 0}; - -// INTT with CT butterfly -static const int32_t zetas_inv_asm_769[256] __attribute__((aligned(8))) = { - 5585134, 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 5585134, 1736976371, -966228013, 150798592, -346278248, -318352582, -223405321, -759578091, - // removed first "2285" + LAYER 3+2+1 - 1 - butterfly - 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 636705165, 446810642, 1519156183, 11170266, -821014555, -1932456027, 301597183, -692556495, -240160720, 1061175275, -1368357591, -519417371, -335107981, 2139105948, -698141628, -625534899, -1267825197, 843355087, 290426917, 128458060, 1295750862, -748407825, -826599688, 1736976371, -240160720, 2005062756, 1061175275, 1100271206, -1368357591, 502661972, 915961816, 1396283256, 452395775, -1038834743, -955057747, -670215963, 2016233022, -16755399, -1675539907, 1614103444, -1290165729, 94947261, 753992958, -1591762912, 497076839, -1954796559, 1943626293, -1122611738, -1239899531, 938302348, -245745853, 882451018, -435640376, -966228013, 1736976371, -318352582, -240160720, -1401868389, 2005062756, 1016494210, 714897027, -1005323944, 876865885, 2122350549, -1373942724, -2094424884, 1468889985, 1558252114, -1401868389, -686971362, -357448514, 860110486, 1524741316, -1787242568, -44681064, 1407453522, -368618780, 1323676527, -653460564, -1362772458, 1379527857, -463566041, 1859849297, 150798592, -1675539907, 804259156, 1614103444, -67021596, -1290165729, -139628326, -2060914086, -994153678, 55851330, 189894523, -1072345541, 1507985917, 832184821, 1111441472, 2105595150, -525002504, -1809583100, 212235055, 1938041160, -273671518, 100532394, -2044158687, -78191862, 1452134586, 642290298, -2111180283, 552928169, 161968858, -1167292802, -346278248, -966228013, -223405321, 1736976371, 150798592, -318352582, -759578091, -1608518311, -2032988421, -899206417, -480321440, 943887481, 1491230518, -83776995, -284841784, 2005062756, 1100271206, 502661972, 1669954774, -1139367137, -457980908, 1921285760, 1128196871, -1318091394, -1904530361, 396544445, -1228729265, 117287794, 2116765416, 1184048201, -318352582, -1401868389, 1016494210, -686971362, -1413038655, -357448514, 1709050706, -731652426, 89362128, 2021818155, 1720220972, -1882189829, -1245484665, -798674023, 720482160, 804259156, -67021596, -139628326, -536172770, -1731391238, -1117026605, -27925665, -1843093898, -1971551958, 1027664477, 1776072302, -1692295306, 1977137091, 709311894, 1552666981, -223405321, 150798592, -759578091, -1675539907, 2105595150, 804259156, -1697880440, -675801096, 279256651, 949472614, -1066760408, -1050005009, -134043193, 1262240064, 1714635839, 1016494210, -1413038655, 1709050706, 1206388733, 1748146637, -1781657435, -1010909077, -390959312, -1329261660, -1083515807, -1965966825, -1530326449, 809844289, -1541496715, 1630858843, -759578091, 2105595150, -1697880440, -525002504, 631120032, -1809583100, -474736307, -1575007513, -201064789, 1893360095, 424470110, -1133782004, -418884977, -1424208921, -547343036, -1697880440, 631120032, -474736307, 1580592646, 1435379187, 787503756, 1200803600, 1999477623, -932717215, 1982722224, -1848679031, 586438968, 1993892490, 1625273710, -1346017059, 0}; - -// Q1=769 -void small_ntt_asm_769(int16_t a[N], const int32_t *zetas); -void small_invntt_asm_769(int16_t a[N], const int32_t *zetas); -void small_pointmul_asm_769(int16_t out[N], const int16_t in[N], const int32_t *zetas); -void small_asymmetric_mul_asm_769(int16_t c[N], const int16_t a[N], const int16_t b[N], const int16_t b_prime[N]); - -// small NTT for computing cs0 and cs1; default use 769 as modulus. -void small_ntt(int16_t *a) { - small_ntt_asm_769(a, zetas_asm_769); -} -void small_invntt_tomont(int16_t *a) { - small_invntt_asm_769(a, zetas_inv_asm_769); -} -void small_point_mul(int16_t *out, int16_t *in) { - small_pointmul_asm_769(out, in, zetas_769); -} - -void small_asymmetric_mul(int16_t *c, int16_t *a, int16_t *b, int16_t *b_prime) { - small_asymmetric_mul_asm_769(c, a, b, b_prime); -} - - -void small_ntt_asm_769_opt_m7(int16_t a[N], const int32_t *zetas); -void small_invntt_asm_769_opt_m7(int16_t a[N], const int32_t *zetas); -void small_pointmul_asm_769_opt_m7(int16_t out[N], const int16_t in[N], const int32_t *zetas); -void small_asymmetric_mul_asm_769_opt_m7(int16_t c[N], const int16_t a[N], const int16_t b[N], const int16_t b_prime[N]); - -void small_ntt_opt_m7(int16_t *a) { - small_ntt_asm_769_opt_m7(a, zetas_asm_769); -} - -void small_invntt_tomont_opt_m7(int16_t *a) { - small_invntt_asm_769_opt_m7(a, zetas_inv_asm_769); -} - -void small_point_mul_opt_m7(int16_t *out, int16_t *in) { - small_pointmul_asm_769_opt_m7(out, in, zetas_769); -} - - -void small_asymmetric_mul_opt_m7(int16_t *c, int16_t *a, int16_t *b, int16_t *b_prime) { - small_asymmetric_mul_asm_769_opt_m7(c, a, b, b_prime); -} - -#endif diff --git a/tests/dilithium-all/ref.c b/tests/dilithium-all/ref.c deleted file mode 100644 index 76d9f40..0000000 --- a/tests/dilithium-all/ref.c +++ /dev/null @@ -1,109 +0,0 @@ -#include "ref.h" - -#define MONT (-4186625) // 2^32 % Q -#define QINV 58728449 // q^(-1) mod 2^32 - -static int32_t montgomery_reduce(int64_t a) { - int32_t t; - - t = (int32_t)((uint64_t)a * (uint64_t)QINV); - t = (a - (int64_t)t * Q) >> 32; - return t; -} - - - -static const int32_t zetas[N] = { - 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, - 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, - 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, - -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, - 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, - -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, - -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, - 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, - -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, - -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, - 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, - -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, - -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, - -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, - 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, - 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, - 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, - 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, - 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, - -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, - 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, - 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, - -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, - -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, - -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, - -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, - -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, - -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, - -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, - -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, - -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, - -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 -}; - -/************************************************* -* Name: PQCLEAN_MLDSA44_CLEAN_ntt -* -* Description: Forward NTT, in-place. No modular reduction is performed after -* additions or subtractions. Output vector is in bitreversed order. -* -* Arguments: - uint32_t p[N]: input/output coefficient array -**************************************************/ -void ntt_ref(int32_t a[N]) { - unsigned int len, start, j, k; - int32_t zeta, t; - - k = 0; - for (len = 128; len > 0; len >>= 1) { - for (start = 0; start < N; start = j + len) { - zeta = zetas[++k]; - for (j = start; j < start + len; ++j) { - t = montgomery_reduce((int64_t)zeta * a[j + len]); - a[j + len] = a[j] - t; - a[j] = a[j] + t; - } - } - } -} - - -/************************************************* -* Name: PQCLEAN_MLDSA44_CLEAN_invntt_tomont -* -* Description: Inverse NTT and multiplication by Montgomery factor 2^32. -* In-place. No modular reductions after additions or -* subtractions; input coefficients need to be smaller than -* Q in absolute value. Output coefficient are smaller than Q in -* absolute value. -* -* Arguments: - uint32_t p[N]: input/output coefficient array -**************************************************/ -void invntt_tomont_ref(int32_t a[N]) { - unsigned int start, len, j, k; - int32_t t, zeta; - const int32_t f = 41978; // mont^2/256 - - k = 256; - for (len = 1; len < N; len <<= 1) { - for (start = 0; start < N; start = j + len) { - zeta = -zetas[--k]; - for (j = start; j < start + len; ++j) { - t = a[j]; - a[j] = t + a[j + len]; - a[j + len] = t - a[j + len]; - a[j + len] = montgomery_reduce((int64_t)zeta * a[j + len]); - } - } - } - - for (j = 0; j < N; ++j) { - a[j] = montgomery_reduce((int64_t)f * a[j]); - } -} diff --git a/tests/dilithium-all/ref.h b/tests/dilithium-all/ref.h deleted file mode 100644 index 22af658..0000000 --- a/tests/dilithium-all/ref.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef REF_H -#define REF_H - -#include - -#define Q 8380417 -#define N 256 - - -void ntt_ref(int32_t a[N]); -void invntt_tomont_ref(int32_t a[N]); - -#endif diff --git a/tests/kyber-all/fips202.c b/tests/kyber-all/fips202.c deleted file mode 100644 index a368e5b..0000000 --- a/tests/kyber-all/fips202.c +++ /dev/null @@ -1,853 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include -#include -#include - -#include "fips202.h" -#include "keccakf1600.h" - -#define NROUNDS 24 -#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) - -#ifdef PROFILE_HASHING -#include "hal.h" -extern unsigned long long hash_cycles; -#endif - -void KeccakF1600_StatePermute_adomnicai_m4_opt_m7(uint64_t * state); -#define KeccakF1600_StatePermute KeccakF1600_StatePermute_adomnicai_m4_opt_m7 - - - -/************************************************* - * Name: keccak_absorb - * - * Description: Absorb step of Keccak; - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - const uint8_t *m: pointer to input to be absorbed into s - * - size_t mlen: length of input in bytes - * - uint8_t p: domain-separation byte for different Keccak-derived functions - **************************************************/ -static void keccak_absorb(uint64_t *s, - uint32_t r, - const uint8_t *m, size_t mlen, - uint8_t p) -{ - while (mlen >= r) - { - KeccakF1600_StateXORBytes(s, m, 0, r); - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - if(mlen > 0){ - KeccakF1600_StateXORBytes(s, m, 0, mlen); - } - - if(mlen == r-1){ - p |= 128; - KeccakF1600_StateXORBytes(s, &p, mlen, 1); - } else { - KeccakF1600_StateXORBytes(s, &p, mlen, 1); - p = 128; - KeccakF1600_StateXORBytes(s, &p, r-1, 1); - } -} - - -/************************************************* - * Name: keccak_squeezeblocks - * - * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. - * Modifies the state. Can be called multiple times to keep squeezing, - * i.e., is incremental. - * - * Arguments: - uint8_t *h: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed (written to h) - * - uint64_t *s: pointer to in/output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - **************************************************/ -static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, - uint64_t *s, - uint32_t r) -{ - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - KeccakF1600_StateExtractBytes(s, h, 0, r); - h += r; - nblocks--; - } -} - -/************************************************* - * Name: keccak_inc_init - * - * Description: Initializes the incremental Keccak state to zero. - * - * Arguments: - uint64_t *s_inc: pointer to input/output incremental state - * First 25 values represent Keccak state. - * 26th value represents either the number of absorbed bytes - * that have not been permuted, or not-yet-squeezed bytes. - **************************************************/ -static void keccak_inc_init(uint64_t *s_inc) { - size_t i; - - for (i = 0; i < 25; ++i) { - s_inc[i] = 0; - } - s_inc[25] = 0; -} -/************************************************* - * Name: keccak_inc_absorb - * - * Description: Incremental keccak absorb - * Preceded by keccak_inc_init, succeeded by keccak_inc_finalize - * - * Arguments: - uint64_t *s_inc: pointer to input/output incremental state - * First 25 values represent Keccak state. - * 26th value represents either the number of absorbed bytes - * that have not been permuted, or not-yet-squeezed bytes. - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - const uint8_t *m: pointer to input to be absorbed into s_inc - * - size_t mlen: length of input in bytes - **************************************************/ -static void keccak_inc_absorb(uint64_t *s_inc, uint32_t r, const uint8_t *m, - size_t mlen) { - /* Recall that s_inc[25] is the non-absorbed bytes xored into the state */ - while (mlen + s_inc[25] >= r) { - - KeccakF1600_StateXORBytes(s_inc, m, s_inc[25], r-s_inc[25]); - mlen -= (size_t)(r - s_inc[25]); - m += r - s_inc[25]; - s_inc[25] = 0; - - KeccakF1600_StatePermute(s_inc); - } - - KeccakF1600_StateXORBytes(s_inc, m, s_inc[25], mlen); - s_inc[25] += mlen; -} - -/************************************************* - * Name: keccak_inc_finalize - * - * Description: Finalizes Keccak absorb phase, prepares for squeezing - * - * Arguments: - uint64_t *s_inc: pointer to input/output incremental state - * First 25 values represent Keccak state. - * 26th value represents either the number of absorbed bytes - * that have not been permuted, or not-yet-squeezed bytes. - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - uint8_t p: domain-separation byte for different - * Keccak-derived functions - **************************************************/ -static void keccak_inc_finalize(uint64_t *s_inc, uint32_t r, uint8_t p) { - /* After keccak_inc_absorb, we are guaranteed that s_inc[25] < r, - so we can always use one more byte for p in the current state. */ - if(s_inc[25] == r-1){ - p |= 128; - KeccakF1600_StateXORBytes(s_inc, &p, s_inc[25], 1); - } else { - KeccakF1600_StateXORBytes(s_inc, &p, s_inc[25], 1); - p = 128; - KeccakF1600_StateXORBytes(s_inc, &p, r-1, 1); - } - s_inc[25] = 0; -} - -/************************************************* - * Name: keccak_inc_squeeze - * - * Description: Incremental Keccak squeeze; can be called on byte-level - * - * Arguments: - uint8_t *h: pointer to output bytes - * - size_t outlen: number of bytes to be squeezed - * - uint64_t *s_inc: pointer to input/output incremental state - * First 25 values represent Keccak state. - * 26th value represents either the number of absorbed bytes - * that have not been permuted, or not-yet-squeezed bytes. - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - **************************************************/ -static void keccak_inc_squeeze(uint8_t *h, size_t outlen, - uint64_t *s_inc, uint32_t r) { - size_t len; - if(outlen < s_inc[25]) - { - len = outlen; - } - else - { - len = s_inc[25]; - } - - KeccakF1600_StateExtractBytes(s_inc, h, r-s_inc[25], len); - h += len; - outlen -= len; - s_inc[25] -= len; - - /* Then squeeze the remaining necessary blocks */ - while (outlen > 0) { - KeccakF1600_StatePermute(s_inc); - - if(outlen < r) - { - len = outlen; - } - else - { - len = r; - } - KeccakF1600_StateExtractBytes(s_inc, h, 0, len); - h += len; - outlen -= len; - s_inc[25] = r - len; - } -} - -void shake128_inc_init(shake128incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_init(state->ctx); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_absorb(state->ctx, SHAKE128_RATE, input, inlen); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake128_inc_finalize(shake128incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_finalize(state->ctx, SHAKE128_RATE, 0x1F); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_squeeze(output, outlen, state->ctx, SHAKE128_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake128_inc_ctx_clone(shake128incctx* dest, const shake128incctx *src) { - memcpy(dest, src, sizeof(shake128incctx)); -} - -void shake128_inc_ctx_release(shake128incctx *state) { - (void) state; -} - -void shake256_inc_init(shake256incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_init(state->ctx); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_absorb(state->ctx, SHAKE256_RATE, input, inlen); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake256_inc_finalize(shake256incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_finalize(state->ctx, SHAKE256_RATE, 0x1F); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake256_inc_squeeze(uint8_t *output, size_t outlen, shake256incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_squeeze(output, outlen, state->ctx, SHAKE256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake256_inc_ctx_clone(shake256incctx* dest, const shake256incctx *src) { - memcpy(dest, src, sizeof(shake256incctx)); -} - -void shake256_inc_ctx_release(shake256incctx *state) { - (void) state; -} - -/********** cSHAKE128 ***********/ - -void cshake128_simple_absorb(shake128ctx *state, uint16_t cstm, const uint8_t *in, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - - - uint8_t sep[8]; - size_t i; - - for (i = 0; i < 25; i++) - state->ctx[i] = 0; - - /* Absorb customization (domain-separation) string */ - sep[0] = 0x01; - sep[1] = 0xa8; - sep[2] = 0x01; - sep[3] = 0x00; - sep[4] = 0x01; - sep[5] = 16; // fixed bitlen of cstm - sep[6] = cstm & 0xff; - sep[7] = cstm >> 8; - - KeccakF1600_StateXORBytes(state->ctx, sep, 0, 8); - KeccakF1600_StatePermute(state->ctx); - - /* Absorb input */ - keccak_absorb(state->ctx, SHAKE128_RATE, in, inlen, 0x04); - -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif - -} - - -void cshake128_simple_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE128_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - - -void cshake128_simple(uint8_t *output, size_t outlen, uint16_t cstm, const uint8_t *in, size_t inlen) -{ - shake128incctx state; - uint8_t sep[8]; -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - - keccak_inc_init(state.ctx); - - /* Absorb customization (domain-separation) string */ - sep[0] = 0x01; - sep[1] = 0xa8; - sep[2] = 0x01; - sep[3] = 0x00; - sep[4] = 0x01; - sep[5] = 16; // fixed bitlen of cstm - sep[6] = cstm & 0xff; - sep[7] = cstm >> 8; - - KeccakF1600_StateXORBytes(state.ctx, sep, 0, 8); - KeccakF1600_StatePermute(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHAKE128_RATE, in, inlen); - keccak_inc_finalize(state.ctx, SHAKE128_RATE, 0x04); - - /* Squeeze output */ - keccak_inc_squeeze(output, outlen, state.ctx, SHAKE128_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - - - -/************************************************* - * Name: shake128_absorb - * - * Description: Absorb step of the SHAKE128 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *state: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed into state - * - size_t inlen: length of input in bytes - **************************************************/ -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - int i; - for (i = 0; i < 25; i++) - state->ctx[i] = 0; - - keccak_absorb(state->ctx, SHAKE128_RATE, input, inlen, 0x1F); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -/************************************************* - * Name: shake128_squeezeblocks - * - * Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. - * Modifies the state. Can be called multiple times to keep squeezing, - * i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed (written to output) - * - shake128ctx *state: pointer to in/output Keccak state - **************************************************/ -static const unsigned char shake128_bytes[168] = { 0x30, 0x29, 0x11, 0x08, 0x3E, 0x94, 0xD8, 0xC0, 0xD7, 0xB3, 0xD6, 0xDA, 0x94, 0xDF, 0x8A, 0x4F, 0x7F, 0x95, 0x52, 0xB1, 0xBC, 0xE4, 0xD2, 0x1D, 0xE7, 0xBD, 0x60, 0xD4, 0x38, 0xD0, 0x7D, 0x8E, 0xF6, 0xFA, 0x2D, 0xCD, 0xE1, 0x28, 0xA8, 0x91, 0x00, 0xA2, 0x8C, 0x3A, 0x5E, 0xFA, 0xEE, 0xAD, 0xDF, 0x8A, 0x19, 0xD7, 0xC5, 0xFA, 0x28, 0x30, 0x81, 0xE4, 0x27, 0xB5, 0x66, 0x9A, 0xBF, 0x10, 0x93, 0x78, 0xC2, 0xF3, 0xA7, 0xE5, 0xA3, 0xBF, 0x1B, 0xA9, 0x62, 0x3C, 0xEE, 0x90, 0x09, 0x06, 0x80, 0xE7, 0x69, 0xA1, 0xFC, 0x9F, 0xCF, 0xF7, 0xF0, 0x83, 0xBC, 0x36, 0x86, 0xC6, 0xED, 0x11, 0xA8, 0xA4, 0xD4, 0xC9, 0x1A, 0xB1, 0x54, 0x24, 0xE0, 0xBF, 0x2D, 0xA3, 0x84, 0xF4, 0x03, 0xA9, 0x0B, 0x9B, 0xEA, 0xA3, 0x09, 0x31, 0x6B, 0xA9, 0x2D, 0x09, 0x8A, 0x68, 0x57, 0x89, 0x9A, 0xDB, 0x67, 0x95, 0x39, 0xF0, 0xE7, 0xFF, 0x47, 0x39, 0x80, 0xB8, 0xAE, 0x88, 0x35, 0x61, 0xCA, 0x14, 0x95, 0xBE, 0x45, 0x0F, 0x10, 0x34, 0x0A, 0x9B, 0x08, 0x93, 0x35, 0xAB, 0x4A, 0xE4, 0x43, 0x7F, 0x9E, 0x36, 0x63, 0x02, 0xF6, 0x56, 0x73, 0xCB }; - -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state) -{ - (void)state; - // we only need this inside of the mat acc functions, but we dont't want to benchmark keccak itself, - // just copy out fixed values - - // this should not happen in our tests; get stuck in that case - while(nblocks != 1); - - - for(int i=0;i<168;i++){ - output[i] = shake128_bytes[i]; - } - -} - -void shake128_ctx_release(shake128ctx *state) { - (void) state; -} -void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src) { - memcpy(dest, src, sizeof(shake128ctx)); -} - -void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - int i; - for (i = 0; i < 25; i++) - state->ctx[i] = 0; - - keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - - -void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -/************************************************* - * Name: shake256 - * - * Description: SHAKE256 XOF with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - size_t outlen: requested output length in bytes - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256(uint8_t *output, size_t outlen, - const uint8_t *input, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - shake256incctx state; - - keccak_inc_init(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHAKE256_RATE, input, inlen); - keccak_inc_finalize(state.ctx, SHAKE256_RATE, 0x1F); - - /* Squeeze output */ - keccak_inc_squeeze(output, outlen, state.ctx, SHAKE256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void shake256_ctx_release(shake256ctx *state) { - (void) state; -} - -void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src) { - memcpy(dest, src, sizeof(shake256ctx)); -} - - -/************************************************* - * Name: sha3_256 - * - * Description: SHA3-256 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - sha3_256incctx state; - keccak_inc_init(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHA3_256_RATE, input, inlen); - keccak_inc_finalize(state.ctx, SHA3_256_RATE, 0x06); - - /* Squeeze output */ - keccak_inc_squeeze(output, 32, state.ctx, SHA3_256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} -void sha3_256_inc_init(sha3_256incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_init(state->ctx); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_256_inc_absorb(sha3_256incctx *state, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_absorb(state->ctx, SHA3_256_RATE, input, inlen); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_256_inc_finalize(uint8_t *output, sha3_256incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - uint8_t t[SHA3_256_RATE]; - keccak_inc_finalize(state->ctx, SHA3_256_RATE, 0x06); - - keccak_squeezeblocks(t, 1, state->ctx, SHA3_256_RATE); - - for (size_t i = 0; i < 32; i++) { - output[i] = t[i]; - } -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_256_inc_ctx_clone(sha3_256incctx *dest, const sha3_256incctx *src) { - memcpy(dest, src, sizeof(sha3_256incctx)); -} - -void sha3_256_inc_ctx_release(sha3_256incctx *state) { - (void) state; -} - -void sha3_384_inc_init(sha3_384incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_init(state->ctx); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_384_inc_absorb(sha3_384incctx *state, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_absorb(state->ctx, SHA3_384_RATE, input, inlen); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_384_inc_finalize(uint8_t *output, sha3_384incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - uint8_t t[SHA3_384_RATE]; - keccak_inc_finalize(state->ctx, SHA3_384_RATE, 0x06); - - keccak_squeezeblocks(t, 1, state->ctx, SHA3_384_RATE); - - for (size_t i = 0; i < 48; i++) { - output[i] = t[i]; - } -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_384_inc_ctx_clone(sha3_384incctx *dest, const sha3_384incctx *src) { - memcpy(dest, src, sizeof(sha3_384incctx)); -} - -void sha3_384_inc_ctx_release(sha3_384incctx *state) { - (void) state; -} - -/************************************************* - * Name: sha3_384 - * - * Description: SHA3-256 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_384(uint8_t *output, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - sha3_384incctx state; - keccak_inc_init(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHA3_384_RATE, input, inlen); - keccak_inc_finalize(state.ctx, SHA3_384_RATE, 0x06); - - /* Squeeze output */ - keccak_inc_squeeze(output, 48, state.ctx, SHA3_384_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} -/************************************************* - * Name: sha3_512 - * - * Description: SHA3-512 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - sha3_512incctx state; - keccak_inc_init(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHA3_512_RATE, input, inlen); - keccak_inc_finalize(state.ctx, SHA3_512_RATE, 0x06); - - /* Squeeze output */ - keccak_inc_squeeze(output, 64, state.ctx, SHA3_512_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} -void sha3_512_inc_init(sha3_512incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_init(state->ctx); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_512_inc_absorb(sha3_512incctx *state, const uint8_t *input, size_t inlen) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_inc_absorb(state->ctx, SHA3_512_RATE, input, inlen); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_512_inc_finalize(uint8_t *output, sha3_512incctx *state) { -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - uint8_t t[SHA3_512_RATE]; - keccak_inc_finalize(state->ctx, SHA3_512_RATE, 0x06); - - keccak_squeezeblocks(t, 1, state->ctx, SHA3_512_RATE); - - for (size_t i = 0; i < 64; i++) { - output[i] = t[i]; - } -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - -void sha3_512_inc_ctx_clone(sha3_512incctx *dest, const sha3_512incctx *src) { - memcpy(dest, src, sizeof(sha3_512incctx)); -} - -void sha3_512_inc_ctx_release(sha3_512incctx *state) { - (void) state; -} - -/********** cSHAKE256 ***********/ - -void cshake256_simple_absorb(shake256ctx *state, uint16_t cstm, const uint8_t *in, size_t inlen) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - uint8_t sep[8]; - size_t i; - - for (i = 0; i < 25; i++) - state->ctx[i] = 0; - - /* Absorb customization (domain-separation) string */ - sep[0] = 0x01; - sep[1] = 0x88; - sep[2] = 0x01; - sep[3] = 0x00; - sep[4] = 0x01; - sep[5] = 16; // fixed bitlen of cstm - sep[6] = cstm & 0xff; - sep[7] = cstm >> 8; - - KeccakF1600_StateXORBytes(state->ctx, sep, 0, 8); - KeccakF1600_StatePermute(state->ctx); - - /* Absorb input */ - keccak_absorb(state->ctx, SHAKE256_RATE, in, inlen, 0x04); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - - -void cshake256_simple_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state) -{ -#ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); -#endif - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} - - -void cshake256_simple(uint8_t *output, size_t outlen, uint16_t cstm, const uint8_t *in, size_t inlen) -{ - shake256incctx state; - uint8_t sep[8]; - #ifdef PROFILE_HASHING - uint64_t t0 = hal_get_time(); - #endif - - - keccak_inc_init(state.ctx); - - /* Absorb customization (domain-separation) string */ - sep[0] = 0x01; - sep[1] = 0x88; - sep[2] = 0x01; - sep[3] = 0x00; - sep[4] = 0x01; - sep[5] = 16; // fixed bitlen of cstm - sep[6] = cstm & 0xff; - sep[7] = cstm >> 8; - - KeccakF1600_StateXORBytes(state.ctx, sep, 0, 8); - KeccakF1600_StatePermute(state.ctx); - - /* Absorb input */ - keccak_inc_absorb(state.ctx, SHAKE256_RATE, in, inlen); - keccak_inc_finalize(state.ctx, SHAKE256_RATE, 0x04); - - /* Squeeze output */ - keccak_inc_squeeze(output, outlen, state.ctx, SHAKE256_RATE); -#ifdef PROFILE_HASHING - uint64_t t1 = hal_get_time(); - hash_cycles += (t1-t0); -#endif -} diff --git a/tests/kyber-all/fips202.h b/tests/kyber-all/fips202.h deleted file mode 100644 index 3db5f5b..0000000 --- a/tests/kyber-all/fips202.h +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -#ifndef FIPS202_H -#define FIPS202_H - -#include -#include - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_384_RATE 104 -#define SHA3_512_RATE 72 - - -// Context for incremental API -typedef struct { - uint64_t ctx[26]; -} shake128incctx; - -// Context for non-incremental API -typedef struct { - uint64_t ctx[25]; -} shake128ctx; - -// Context for incremental API -typedef struct { - uint64_t ctx[26]; -} shake256incctx; - -// Context for non-incremental API -typedef struct { - uint64_t ctx[25]; -} shake256ctx; - -// Context for incremental API -typedef struct { - uint64_t ctx[26]; -} sha3_256incctx; - -// Context for incremental API -typedef struct { - uint64_t ctx[26]; -} sha3_384incctx; - -// Context for incremental API -typedef struct { - uint64_t ctx[26]; -} sha3_512incctx; - -/* Initialize the state and absorb the provided input. - * - * This function does not support being called multiple times - * with the same state. - */ -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen); -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state); -/* Free the state */ -void shake128_ctx_release(shake128ctx *state); -/* Copy the state. */ -void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src); - -void cshake128_simple_absorb(shake128ctx *state, uint16_t cstm, const uint8_t *input, size_t inlen); -void cshake128_simple_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state); -void cshake128_simple(uint8_t *output, size_t outlen, uint16_t cstm, const uint8_t *input, size_t inlen); - -/* Initialize incremental hashing API */ -void shake128_inc_init(shake128incctx *state); -/* Absorb more information into the XOF. - * - * Can be called multiple times. - */ -void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen); -/* Finalize the XOF for squeezing */ -void shake128_inc_finalize(shake128incctx *state); -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state); -/* Copy the context of the SHAKE128 XOF */ -void shake128_inc_ctx_clone(shake128incctx* dest, const shake128incctx *src); -/* Free the context of the SHAKE128 XOF */ -void shake128_inc_ctx_release(shake128incctx *state); - -/* Initialize the state and absorb the provided input. - * - * This function does not support being called multiple times - * with the same state. - */ -void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen); -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state); -/* Free the context held by this XOF */ -void shake256_ctx_release(shake256ctx *state); -/* Copy the context held by this XOF */ -void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src); - -void cshake256_simple_absorb(shake256ctx *state, uint16_t cstm, const uint8_t *input, size_t inlen); -void cshake256_simple_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state); -void cshake256_simple(uint8_t *output, size_t outlen, uint16_t cstm, const uint8_t *input, size_t inlen); - -/* Initialize incremental hashing API */ -void shake256_inc_init(shake256incctx *state); -void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inlen); -/* Prepares for squeeze phase */ -void shake256_inc_finalize(shake256incctx *state); - - - -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -void shake256_inc_squeeze(uint8_t *output, size_t outlen, shake256incctx *state); -/* Copy the state */ -void shake256_inc_ctx_clone(shake256incctx* dest, const shake256incctx *src); -/* Free the state */ -void shake256_inc_ctx_release(shake256incctx *state); - -/* One-stop SHAKE128 call */ -void shake128(uint8_t *output, size_t outlen, - const uint8_t *input, size_t inlen); - -/* One-stop SHAKE256 call */ -void shake256(uint8_t *output, size_t outlen, - const uint8_t *input, size_t inlen); - -/* Initialize the incremental hashing state */ -void sha3_256_inc_init(sha3_256incctx *state); -/* Absorb blocks into SHA3 */ -void sha3_256_inc_absorb(sha3_256incctx *state, const uint8_t *input, size_t inlen); -/* Obtain the output of the function and free `state` */ -void sha3_256_inc_finalize(uint8_t *output, sha3_256incctx *state); -/* Copy the context */ -void sha3_256_inc_ctx_clone(sha3_256incctx *dest, const sha3_256incctx *src); -/* Release the state, don't use if `_finalize` has been used */ -void sha3_256_inc_ctx_release(sha3_256incctx *state); - -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); - -/* Initialize the incremental hashing state */ -void sha3_384_inc_init(sha3_384incctx *state); -/* Absorb blocks into SHA3 */ -void sha3_384_inc_absorb(sha3_384incctx *state, const uint8_t *input, size_t inlen); -/* Obtain the output of the function and free `state` */ -void sha3_384_inc_finalize(uint8_t *output, sha3_384incctx *state); -/* Copy the context */ -void sha3_384_inc_ctx_clone(sha3_384incctx *dest, const sha3_384incctx *src); -/* Release the state, don't use if `_finalize` has been used */ -void sha3_384_inc_ctx_release(sha3_384incctx *state); - -/* One-stop SHA3-384 shop */ -void sha3_384(uint8_t *output, const uint8_t *input, size_t inlen); - -/* Initialize the incremental hashing state */ -void sha3_512_inc_init(sha3_512incctx *state); -/* Absorb blocks into SHA3 */ -void sha3_512_inc_absorb(sha3_512incctx *state, const uint8_t *input, size_t inlen); -/* Obtain the output of the function and free `state` */ -void sha3_512_inc_finalize(uint8_t *output, sha3_512incctx *state); -/* Copy the context */ -void sha3_512_inc_ctx_clone(sha3_512incctx *dest, const sha3_512incctx *src); -/* Release the state, don't use if `_finalize` has been used */ -void sha3_512_inc_ctx_release(sha3_512incctx *state); - -/* One-stop SHA3-512 shop */ -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen); -#endif diff --git a/tests/kyber-all/frombytes-asm.h b/tests/kyber-all/frombytes-asm.h deleted file mode 100644 index 1f861c6..0000000 --- a/tests/kyber-all/frombytes-asm.h +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -#ifndef FROMBYTES_ASM_H -#define FROMBYTES_ASM_H - -#include - -static const int32_t zetas[64] = {21932846, 3562152210, 752167598, 3417653460, 2112004045, 932791035, 2951903026, 1419184148, 1817845876, 3434425636, 4233039261, 300609006, 975366560, 2781600929, 3889854731, 3935010590, 2197155094, 2130066389, 3598276897, 2308109491, 2382939200, 1228239371, 1884934581, 3466679822, 1211467195, 2977706375, 3144137970, 3080919767, 945692709, 3015121229, 345764865, 826997308, 2043625172, 2964804700, 2628071007, 4154339049, 483812778, 3288636719, 2696449880, 2122325384, 1371447954, 411563403, 3577634219, 976656727, 2708061387, 723783916, 3181552825, 3346694253, 3617629408, 1408862808, 519937465, 1323711759, 1474661346, 2773859924, 3580214553, 1143088323, 2221668274, 1563682897, 2417773720, 1327582262, 2722253228, 3786641338, 1141798155, 2779020594}; - - -void frombytes_mul_asm_16_32(int32_t *r_tmp, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); -void frombytes_mul_asm_16_32_opt_m7(int32_t *r_tmp, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); - -void frombytes_mul_asm_acc_32_32(int32_t *r_tmp, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); -void frombytes_mul_asm_acc_32_32_opt_m7(int32_t *r_tmp, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); - -void frombytes_mul_asm_acc_32_16(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64], const int32_t *r_tmp); -void frombytes_mul_asm_acc_32_16_opt_m7(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64], const int32_t *r_tmp); - - - -void frombytes_mul_asm_16_32_wrap(int32_t *r_tmp, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_16_32(r_tmp, b, c, zetas); -} -void frombytes_mul_asm_16_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_16_32_opt_m7(r_tmp, b, c, zetas); -} - -void frombytes_mul_asm_acc_32_32_wrap(int32_t *r_tmp, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_acc_32_32(r_tmp, b, c, zetas); -} -void frombytes_mul_asm_acc_32_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_acc_32_32_opt_m7(r_tmp, b, c, zetas); -} - -void frombytes_mul_asm_acc_32_16_wrap(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t *r_tmp){ - frombytes_mul_asm_acc_32_16(r, b, c, zetas, r_tmp); -} -void frombytes_mul_asm_acc_32_16_opt_m7_wrap(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t *r_tmp){ - frombytes_mul_asm_acc_32_16_opt_m7(r, b, c, zetas, r_tmp); -} - - - -#endif \ No newline at end of file diff --git a/tests/kyber-all/keccakf1600-misc.s b/tests/kyber-all/keccakf1600-misc.s deleted file mode 100644 index fb3f8eb..0000000 --- a/tests/kyber-all/keccakf1600-misc.s +++ /dev/null @@ -1,338 +0,0 @@ -@ -@ Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -@ Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -@ denoted as "the implementer". -@ Additional optimizations by Alexandre Adomnicai. -@ -@ For more information, feedback or questions, please refer to our websites: -@ http://keccak.noekeon.org/ -@ http://keyak.noekeon.org/ -@ http://ketje.noekeon.org/ -@ -@ To the extent possible under law, the implementer has waived all copyright -@ and related or neighboring rights to the source code in this file. -@ http://creativecommons.org/publicdomain/zero/1.0/ -@ - -@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (ARM Cortex-M3, ...). - - - .thumb - .syntax unified -.text - - @ Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -.macro toBitInterleaving x0,x1,s0,s1,t,over - - and \t,\x0,#0x55555555 - orr \t,\t,\t, LSR #1 - and \t,\t,#0x33333333 - orr \t,\t,\t, LSR #2 - and \t,\t,#0x0F0F0F0F - orr \t,\t,\t, LSR #4 - and \t,\t,#0x00FF00FF - bfi \t,\t,#8, #8 - .if \over != 0 - lsr \s0,\t, #8 - .else - eor \s0,\s0,\t, LSR #8 - .endif - - and \t,\x1,#0x55555555 - orr \t,\t,\t, LSR #1 - and \t,\t,#0x33333333 - orr \t,\t,\t, LSR #2 - and \t,\t,#0x0F0F0F0F - orr \t,\t,\t, LSR #4 - and \t,\t,#0x00FF00FF - orr \t,\t,\t, LSR #8 - eor \s0,\s0,\t, LSL #16 - - and \t,\x0,#0xAAAAAAAA - orr \t,\t,\t, LSL #1 - and \t,\t,#0xCCCCCCCC - orr \t,\t,\t, LSL #2 - and \t,\t,#0xF0F0F0F0 - orr \t,\t,\t, LSL #4 - and \t,\t,#0xFF00FF00 - orr \t,\t,\t, LSL #8 - .if \over != 0 - lsr \s1,\t, #16 - .else - eor \s1,\s1,\t, LSR #16 - .endif - - and \t,\x1,#0xAAAAAAAA - orr \t,\t,\t, LSL #1 - and \t,\t,#0xCCCCCCCC - orr \t,\t,\t, LSL #2 - and \t,\t,#0xF0F0F0F0 - orr \t,\t,\t, LSL #4 - and \t,\t,#0xFF00FF00 - orr \t,\t,\t, LSL #8 - bfc \t, #0, #16 - eors \s1,\s1,\t - .endm - - @ Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -.macro fromBitInterleaving x0, x1, t - - movs \t, \x0 @ t = x0@ - bfi \x0, \x1, #16, #16 @ x0 = (x0 & 0x0000FFFF) | (x1 << 16)@ - bfc \x1, #0, #16 @ x1 = (t >> 16) | (x1 & 0xFFFF0000)@ - orr \x1, \x1, \t, LSR #16 - - eor \t, \x0, \x0, LSR #8 @ t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL@ x0 = x0 ^ t ^ (t << 8)@ - and \t, #0x0000FF00 - eors \x0, \x0, \t - eor \x0, \x0, \t, LSL #8 - - eor \t, \x0, \x0, LSR #4 @ t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL@ x0 = x0 ^ t ^ (t << 4)@ - and \t, #0x00F000F0 - eors \x0, \x0, \t - eor \x0, \x0, \t, LSL #4 - - eor \t, \x0, \x0, LSR #2 @ t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL@ x0 = x0 ^ t ^ (t << 2)@ - and \t, #0x0C0C0C0C - eors \x0, \x0, \t - eor \x0, \x0, \t, LSL #2 - - eor \t, \x0, \x0, LSR #1 @ t = (x0 ^ (x0 >> 1)) & 0x22222222UL@ x0 = x0 ^ t ^ (t << 1)@ - and \t, #0x22222222 - eors \x0, \x0, \t - eor \x0, \x0, \t, LSL #1 - - eor \t, \x1, \x1, LSR #8 @ t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL@ x1 = x1 ^ t ^ (t << 8)@ - and \t, #0x0000FF00 - eors \x1, \x1, \t - eor \x1, \x1, \t, LSL #8 - - eor \t, \x1, \x1, LSR #4 @ t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL@ x1 = x1 ^ t ^ (t << 4)@ - and \t, #0x00F000F0 - eors \x1, \x1, \t - eor \x1, \x1, \t, LSL #4 - - eor \t, \x1, \x1, LSR #2 @ t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL@ x1 = x1 ^ t ^ (t << 2)@ - and \t, #0x0C0C0C0C - eors \x1, \x1, \t - eor \x1, \x1, \t, LSL #2 - - eor \t, \x1, \x1, LSR #1 @ t = (x1 ^ (x1 >> 1)) & 0x22222222UL@ x1 = x1 ^ t ^ (t << 1)@ - and \t, #0x22222222 - eors \x1, \x1, \t - eor \x1, \x1, \t, LSL #1 - .endm - - - -@---------------------------------------------------------------------------- -@ -@ void KeccakF1600_Initialize( void ) -@ -.align 8 -.global KeccakF1600_Initialize -KeccakF1600_Initialize: - bx lr - - - -@---------------------------------------------------------------------------- -@ -@ void KeccakF1600_StateXORBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) -@ -.align 8 -.global KeccakF1600_StateXORBytes -KeccakF1600_StateXORBytes: - cbz r3, KeccakF1600_StateXORBytes_Exit1 - push {r4 - r8, lr} @ then - bic r4, r2, #7 @ offset &= ~7 - adds r0, r0, r4 @ add whole lane offset to state pointer - ands r2, r2, #7 @ offset &= 7 (part not lane aligned) - beq KeccakF1600_StateXORBytes_CheckLanes @ .if offset != 0 - movs r4, r3 @ then, do remaining bytes in first lane - rsb r5, r2, #8 @ max size in lane = 8 - offset - cmp r4, r5 - ble KeccakF1600_StateXORBytes_BytesAlign - movs r4, r5 -KeccakF1600_StateXORBytes_BytesAlign: - sub r8, r3, r4 @ size left - movs r3, r4 - bl __KeccakF1600_StateXORBytesInLane - mov r3, r8 -KeccakF1600_StateXORBytes_CheckLanes: - lsrs r2, r3, #3 @ .if length >= 8 - beq KeccakF1600_StateXORBytes_Bytes - mov r8, r3 - bl __KeccakF1600_StateXORLanes - and r3, r8, #7 -KeccakF1600_StateXORBytes_Bytes: - cbz r3, KeccakF1600_StateXORBytes_Exit - movs r2, #0 - bl __KeccakF1600_StateXORBytesInLane -KeccakF1600_StateXORBytes_Exit: - pop {r4 - r8, pc} -KeccakF1600_StateXORBytes_Exit1: - bx lr - - -@---------------------------------------------------------------------------- -@ -@ __KeccakF1600_StateXORLanes -@ -@ Input: -@ r0 state pointer -@ r1 data pointer -@ r2 laneCount -@ -@ Output: -@ r0 state pointer next lane -@ r1 data pointer next byte to input -@ -@ Changed: r2-r7 -@ -.align 8 -__KeccakF1600_StateXORLanes: -__KeccakF1600_StateXORLanes_LoopAligned: - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldrd r6, r7, [r0] - toBitInterleaving r4, r5, r6, r7, r3, 0 - strd r6, r7, [r0], #8 - subs r2, r2, #1 - bne __KeccakF1600_StateXORLanes_LoopAligned - bx lr - - -@---------------------------------------------------------------------------- -@ -@ __KeccakF1600_StateXORBytesInLane -@ -@ Input: -@ r0 state pointer -@ r1 data pointer -@ r2 offset in lane -@ r3 length -@ -@ Output: -@ r0 state pointer next lane -@ r1 data pointer next byte to input -@ -@ Changed: r2-r7 -@ -.align 8 -__KeccakF1600_StateXORBytesInLane: - movs r4, #0 - movs r5, #0 - push { r4 - r5 } - add r2, r2, sp -__KeccakF1600_StateXORBytesInLane_Loop: - ldrb r5, [r1], #1 - strb r5, [r2], #1 - subs r3, r3, #1 - bne __KeccakF1600_StateXORBytesInLane_Loop - pop { r4 - r5 } - ldrd r6, r7, [r0] - toBitInterleaving r4, r5, r6, r7, r3, 0 - strd r6, r7, [r0], #8 - bx lr - - - - -@---------------------------------------------------------------------------- -@ -@ void KeccakF1600_StateExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) -@ -.align 8 -.global KeccakF1600_StateExtractBytes -KeccakF1600_StateExtractBytes: - cbz r3, KeccakF1600_StateExtractBytes_Exit1 @ .if length != 0 - push {r4 - r8, lr} @ then - bic r4, r2, #7 @ offset &= ~7 - adds r0, r0, r4 @ add whole lane offset to state pointer - ands r2, r2, #7 @ offset &= 7 (part not lane aligned) - beq KeccakF1600_StateExtractBytes_CheckLanes @ .if offset != 0 - movs r4, r3 @ then, do remaining bytes in first lane - rsb r5, r2, #8 @ max size in lane = 8 - offset - cmp r4, r5 - ble KeccakF1600_StateExtractBytes_BytesAlign - movs r4, r5 -KeccakF1600_StateExtractBytes_BytesAlign: - sub r8, r3, r4 @ size left - movs r3, r4 - bl __KeccakF1600_StateExtractBytesInLane - mov r3, r8 -KeccakF1600_StateExtractBytes_CheckLanes: - lsrs r2, r3, #3 @ .if length >= 8 - beq KeccakF1600_StateExtractBytes_Bytes - mov r8, r3 - bl __KeccakF1600_StateExtractLanes - and r3, r8, #7 -KeccakF1600_StateExtractBytes_Bytes: - cbz r3, KeccakF1600_StateExtractBytes_Exit - movs r2, #0 - bl __KeccakF1600_StateExtractBytesInLane -KeccakF1600_StateExtractBytes_Exit: - pop {r4 - r8, pc} -KeccakF1600_StateExtractBytes_Exit1: - bx lr - - -@---------------------------------------------------------------------------- -@ -@ __KeccakF1600_StateExtractLanes -@ -@ Input: -@ r0 state pointer -@ r1 data pointer -@ r2 laneCount -@ -@ Output: -@ r0 state pointer next lane -@ r1 data pointer next byte to input -@ -@ Changed: r2-r5 -@ -.align 8 -__KeccakF1600_StateExtractLanes: -__KeccakF1600_StateExtractLanes_LoopAligned: - ldrd r4, r5, [r0], #8 - fromBitInterleaving r4, r5, r3 - str r4, [r1], #4 - subs r2, r2, #1 - str r5, [r1], #4 - bne __KeccakF1600_StateExtractLanes_LoopAligned - bx lr - - -@---------------------------------------------------------------------------- -@ -@ __KeccakF1600_StateExtractBytesInLane -@ -@ Input: -@ r0 state pointer -@ r1 data pointer -@ r2 offset in lane -@ r3 length -@ -@ Output: -@ r0 state pointer next lane -@ r1 data pointer next byte to input -@ -@ Changed: r2-r6 -@ -.align 8 -__KeccakF1600_StateExtractBytesInLane: - ldrd r4, r5, [r0], #8 - fromBitInterleaving r4, r5, r6 - push {r4, r5} - add r2, sp, r2 -__KeccakF1600_StateExtractBytesInLane_Loop: - ldrb r4, [r2], #1 - subs r3, r3, #1 - strb r4, [r1], #1 - bne __KeccakF1600_StateExtractBytesInLane_Loop - add sp, #8 - bx lr - - diff --git a/tests/kyber-all/keccakf1600.h b/tests/kyber-all/keccakf1600.h deleted file mode 100644 index e017bf1..0000000 --- a/tests/kyber-all/keccakf1600.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef KECCAKF1600_H -#define KECCAKF1600_H - -#include - -void KeccakF1600_StateExtractBytes(uint64_t *state, unsigned char *data, unsigned int offset, unsigned int length); -void KeccakF1600_StateXORBytes(uint64_t *state, const unsigned char *data, unsigned int offset, unsigned int length); -void KeccakF1600_StatePermute(uint64_t * state); - -#endif \ No newline at end of file diff --git a/tests/kyber-all/kyber-all.mk b/tests/kyber-all/kyber-all.mk deleted file mode 100644 index ddae178..0000000 --- a/tests/kyber-all/kyber-all.mk +++ /dev/null @@ -1,92 +0,0 @@ -# Test name - needs to match the directory name -TESTS += kyber-all - -# All further variables must be prefixed with the capitalized test name - -# Platforms this test should run on (matching the directory name in envs/) -KYBER_ALL_PLATFORMS += m7-an500 -KYBER_ALL_PLATFORMS += nucleo-f767zi -KYBER_ALL_PLATFORMS += stm32f4discovery - -# C sources required for this test -KYBER_ALL_SOURCES += main.c -KYBER_ALL_SOURCES += ntt-acle.c -KYBER_ALL_SOURCES += fips202.c - -# Keccak source -KYBER_ALL_ASMS += ../../asm/manual/keccak/keccakf1600_adomnicai_m4_opt_m7.s -KYBER_ALL_ASMS += keccakf1600-misc.s - -# Assembly sources required for this test -KYBER_ALL_ASM_DIR = ../../asm/manual/kyber-all -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/ntt_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/ntt_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/intt_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/intt_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/add_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/add_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/sub_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/sub_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/barrett_reduce_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/barrett_reduce_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/fromplant_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/fromplant_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_16_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_16_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_32_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_32_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_32_16_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_32_16_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_16_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_16_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_32_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_32_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_32_16_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_32_16_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_acc_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/basemul_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_acc_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/frombytes_mul_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_acc_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_acc_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_16_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_16_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_32_16_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_32_16_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_32_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_cache_32_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_16_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_16_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_32_16_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_32_16_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_32_32_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_asm_opt_32_32_kyber_opt_m7.s - -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_kyber.s -KYBER_ALL_ASMS += $(KYBER_ALL_ASM_DIR)/matacc_kyber_opt_m7.s \ No newline at end of file diff --git a/tests/kyber-all/main.c b/tests/kyber-all/main.c deleted file mode 100644 index c04c1a1..0000000 --- a/tests/kyber-all/main.c +++ /dev/null @@ -1,1376 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * - * Author: Hanno Becker - */ - -#define ENABLE_PMU_STATS /* Do not enable when benching for cycle count */ - -#if defined(ENABLE_PMU_STATS) -#define REPEAT 100 -#define REPEAT_MEDIAN 100 -#else -#define REPEAT 1024 -#endif - -/* - * Some external references to auto-generated assembly. - */ - -#include -#include - -#include -#include -#include -#include "misc.h" -#include "params.h" -#include "poly.h" -#include "ntt-acle.h" -#include "frombytes-asm.h" -#include "ntt-asm.h" - -void asm_barrett_reduce(int16_t *); -void asm_barrett_reduce_opt_m7(int16_t *); - -void asm_fromplant(int16_t *); -void asm_fromplant_opt_m7(int16_t *); - -void pointwise_add(int16_t *, const int16_t *, const int16_t *); -void pointwise_add_opt_m7(int16_t *, const int16_t *, const int16_t *); - -void pointwise_sub(int16_t *, const int16_t *, const int16_t *); -void pointwise_sub_opt_m7(int16_t *, const int16_t *, const int16_t *); - -void basemul_asm_opt_16_32(int32_t *, const int16_t *, const int16_t *, const int16_t *); -void basemul_asm_opt_16_32_opt_m7(int32_t *, const int16_t *, const int16_t *, const int16_t *); - -void basemul_asm_acc_opt_32_32(int32_t *, const int16_t *, const int16_t *, const int16_t *); -void basemul_asm_acc_opt_32_32_opt_m7(int32_t *, const int16_t *, const int16_t *, const int16_t *); - -void basemul_asm_acc_opt_32_16(int16_t *, const int16_t *, const int16_t *, const int16_t *, const int32_t *); -void basemul_asm_acc_opt_32_16_opt_m7(int16_t *, const int16_t *, const int16_t *, const int16_t *, const int32_t *); - -void matacc_asm(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state); -void matacc_asm_opt_m7(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state); - -void matacc_asm_acc(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state); -void matacc_asm_acc_opt_m7(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state); - -void matacc_asm_cache_16_32(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr); -void matacc_asm_cache_16_32_opt_m7(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr); - -void matacc_asm_cache_32_32(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr); -void matacc_asm_cache_32_32_opt_m7(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr); - -void matacc_asm_cache_32_16(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr, const int32_t *r_tmp); -void matacc_asm_cache_32_16_opt_m7(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], const int32_t zetas[64], uint64_t *state, int16_t *aprimeptr, const int32_t *r_tmp); - -void matacc_asm_opt_16_32(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr); -void matacc_asm_opt_16_32_opt_m7(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr); - -void matacc_asm_opt_32_32(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr); -void matacc_asm_opt_32_32_opt_m7(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr); - -void matacc_asm_opt_32_16(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr, const int32_t *r_tmp); -void matacc_asm_opt_32_16_opt_m7(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2], uint64_t *state, const int16_t *aprimeptr, const int32_t *r_tmp); - -// TODO: instruction counting does not work for more than 4 arguments; use a wrapper for now -- need to fix this later -void basemul_asm_acc_opt_32_16_wrap(int16_t * a, const int16_t *b, const int16_t *c, const int16_t *d){ - int32_t yyy[256]= {0}; - basemul_asm_acc_opt_32_16(a,b,c,d, yyy); -} -void basemul_asm_acc_opt_32_16_opt_m7_wrap(int16_t * a, const int16_t *b, const int16_t *c, const int16_t *d){ - int32_t yyy[256]= {0}; - basemul_asm_acc_opt_32_16_opt_m7(a,b,c,d, yyy); -} -void matacc_asm_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - matacc_asm(r,b,c,buf,zetas,state); -} -void matacc_asm_opt_m7_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - matacc_asm_opt_m7(r,b,c,buf,zetas,state); -} -void matacc_asm_acc_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - matacc_asm_acc(r,b,c,buf,zetas,state); -} -void matacc_asm_acc_opt_m7_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - matacc_asm_acc_opt_m7(r,b,c,buf,zetas,state); -} -void matacc_asm_cache_16_32_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_cache_16_32(r_tmp,b,c,buf,zetas,state,aprime); -} -void matacc_asm_cache_16_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_cache_16_32_opt_m7(r_tmp,b,c,buf,zetas,state,aprime); -} -void matacc_asm_cache_32_32_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_cache_32_32(r_tmp,b,c,buf,zetas,state,aprime); -} -void matacc_asm_cache_32_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_cache_32_32_opt_m7(r_tmp,b,c,buf,zetas,state,aprime); -} - -void matacc_asm_cache_32_16_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - int32_t r_tmp[256] = {0}; - matacc_asm_cache_32_16(r,b,c,buf,zetas,state,aprime,r_tmp); -} -void matacc_asm_cache_32_16_opt_m7_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - int32_t r_tmp[256] = {0}; - matacc_asm_cache_32_16_opt_m7(r,b,c,buf,zetas,state,aprime,r_tmp); -} - -void matacc_asm_opt_16_32_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_opt_16_32(r_tmp,b,c,buf,state,aprime); -} -void matacc_asm_opt_16_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_opt_16_32_opt_m7(r_tmp,b,c,buf,state,aprime); -} - -void matacc_asm_opt_32_32_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_opt_32_32(r_tmp,b,c,buf,state,aprime); -} -void matacc_asm_opt_32_32_opt_m7_wrap(int32_t *r_tmp, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - matacc_asm_opt_32_32_opt_m7(r_tmp,b,c,buf,state,aprime); -} - -void matacc_asm_opt_32_16_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - int32_t r_tmp[256] = {0}; - matacc_asm_opt_32_16(r,b,c,buf,state,aprime,r_tmp); -} -void matacc_asm_opt_32_16_opt_m7_wrap(int16_t *r, const int16_t *b, int16_t c[4], unsigned char buf[168+2]){ - uint64_t state[26] = {0}; - int16_t aprime[256] = {0}; - int32_t r_tmp[256] = {0}; - matacc_asm_opt_32_16_opt_m7(r,b,c,buf,state,aprime,r_tmp); -} - - -#define NTT_LAYERS 7 -#define NTT_SIZE 256 - -typedef struct { - char name[100]; - uint64_t cycles; -} benchmark_result; - -benchmark_result results[100]; -int benchmark_cnt = 0; - -static void add_benchmark_results(char *name, uint64_t cycles){ - if(benchmark_cnt == 100) return; - - results[benchmark_cnt].cycles = cycles; - strncpy(results[benchmark_cnt].name, name, 100); - benchmark_cnt++; -} - -static void dump_benchmarks_tex(void){ - for(int i=0;i> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_3(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src2[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_basemul_32(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src2[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src3[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2, src3); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2, src3); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_basemul_32_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src2[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src3[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int32_t src4[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2, src3, src4); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2, src3, src4); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_frombytes_mul(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - uint8_t src2[KYBER_POLYBYTES] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_frombytes_mul_32(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - uint8_t src2[KYBER_POLYBYTES] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_frombytes_mul_32_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t src1[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - uint8_t src2[KYBER_POLYBYTES] __attribute__((aligned(16))) = {0}; \ - int32_t src3[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, src1, src2, src3); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, src1, src2, src3); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_matacc(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t b[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t c[4] __attribute__((aligned(16))) = {0}; \ - uint8_t buf[168+2] __attribute__((aligned(16))) = {0}; \ - uint64_t state[26] __attribute__((aligned(16))) = {0}; \ - (func)(dst, b, c, buf, zetas, state); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, b, c, buf, zetas, state); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_matacc_cache_32(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t b[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t c[4] __attribute__((aligned(16))) = {0}; \ - uint8_t buf[168+2] __attribute__((aligned(16))) = {0}; \ - uint64_t state[26] __attribute__((aligned(16))) = {0}; \ - int16_t aprime[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, b, c, buf, zetas, state, aprime); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, b, c, buf, zetas, state, aprime); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_matacc_cache_32_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t b[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t c[4] __attribute__((aligned(16))) = {0}; \ - uint8_t buf[168+2] __attribute__((aligned(16))) = {0}; \ - uint64_t state[26] __attribute__((aligned(16))) = {0}; \ - int16_t aprime[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int32_t rtmp[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, b, c, buf, zetas, state, aprime, rtmp); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, b, c, buf, zetas, state, aprime, rtmp); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_matacc_opt_32(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int32_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t b[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t c[4] __attribute__((aligned(16))) = {0}; \ - uint8_t buf[168+2] __attribute__((aligned(16))) = {0}; \ - uint64_t state[26] __attribute__((aligned(16))) = {0}; \ - int16_t aprime[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, b, c, buf, state, aprime); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, b, c, buf, state, aprime); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - -#define MAKE_BENCH_matacc_opt_32_16(var, func) \ - int bench_##var() \ - { \ - uint64_t t1, t2; \ - uint64_t cycles[REPEAT_MEDIAN]; \ - int16_t dst[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t b[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int16_t c[4] __attribute__((aligned(16))) = {0}; \ - uint8_t buf[168+2] __attribute__((aligned(16))) = {0}; \ - uint64_t state[26] __attribute__((aligned(16))) = {0}; \ - int16_t aprime[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - int32_t rtmp[NTT_SIZE] __attribute__((aligned(16))) = {0}; \ - (func)(dst, b, c, buf, state, aprime, rtmp); \ - for (size_t cnt_median = 0; cnt_median < REPEAT_MEDIAN; cnt_median++) \ - { \ - t1 = hal_get_time(); \ - for (size_t cnt = 0; cnt < REPEAT; cnt++) \ - (func)(dst, b, c, buf, state, aprime, rtmp); \ - t2 = hal_get_time(); \ - cycles[cnt_median] = (t2 - t1) / REPEAT; \ - } \ - qsort(cycles, REPEAT_MEDIAN, sizeof(uint64_t), cmp_uint64_t); \ - debug_printf(#var " repeat %d, %d", \ - REPEAT *REPEAT_MEDIAN, (cycles[REPEAT_MEDIAN >> 1])); \ - add_benchmark_results(#var, (cycles[REPEAT_MEDIAN >> 1])); \ - return (0); \ - } - - -MAKE_BENCH_1(kyber_ntt_pqm4,ntt_fast_wrap) -MAKE_BENCH_1(kyber_ntt_pqm4_opt_m7,ntt_fast_opt_m7_wrap) -MAKE_BENCH_1(kyber_ntt_acle,ntt_acle) - -MAKE_BENCH_1(kyber_invntt_pqm4,invntt_fast_wrap) -MAKE_BENCH_1(kyber_invntt_pqm4_opt_m7,invntt_fast_opt_m7_wrap) -MAKE_BENCH_1(kyber_invntt_acle,invntt_acle) - -MAKE_BENCH_1(kyber_barrett_reduce_pqm4,asm_barrett_reduce) -MAKE_BENCH_1(kyber_barrett_reduce_pqm4_opt_m7,asm_barrett_reduce_opt_m7) - -MAKE_BENCH_1(kyber_fromplant_pqm4,asm_fromplant) -MAKE_BENCH_1(kyber_fromplant_pqm4_opt_m7,asm_fromplant_opt_m7) - -MAKE_BENCH_3(kyber_pointwise_add_pqm4,pointwise_add) -MAKE_BENCH_3(kyber_pointwise_add_pqm4_opt_m7,pointwise_add_opt_m7) - -MAKE_BENCH_3(kyber_pointwise_sub_pqm4,pointwise_sub) -MAKE_BENCH_3(kyber_pointwise_sub_pqm4_opt_m7,pointwise_sub_opt_m7) - -MAKE_BENCH_basemul_32(kyber_basemul_16_32, basemul_asm_opt_16_32); -MAKE_BENCH_basemul_32(kyber_basemul_16_32_opt_m7, basemul_asm_opt_16_32_opt_m7); - -MAKE_BENCH_basemul_32(kyber_basemul_acc_32_32, basemul_asm_acc_opt_32_32); -MAKE_BENCH_basemul_32(kyber_basemul_acc_32_32_opt_m7, basemul_asm_acc_opt_32_32_opt_m7); - -MAKE_BENCH_basemul_32_16(kyber_basemul_acc_32_16, basemul_asm_acc_opt_32_16); -MAKE_BENCH_basemul_32_16(kyber_basemul_acc_32_16_opt_m7, basemul_asm_acc_opt_32_16_opt_m7); - -MAKE_BENCH_frombytes_mul_32(kyber_frombytes_mul_16_32, frombytes_mul_asm_16_32_wrap); -MAKE_BENCH_frombytes_mul_32(kyber_frombytes_mul_16_32_opt_m7, frombytes_mul_asm_16_32_opt_m7_wrap); - -MAKE_BENCH_frombytes_mul_32(kyber_frombytes_mul_acc_32_32, frombytes_mul_asm_acc_32_32_wrap); -MAKE_BENCH_frombytes_mul_32(kyber_frombytes_mul_acc_32_32_opt_m7, frombytes_mul_asm_acc_32_32_opt_m7_wrap); - -MAKE_BENCH_frombytes_mul_32_16(kyber_frombytes_mul_acc_32_16, frombytes_mul_asm_acc_32_16_wrap); -MAKE_BENCH_frombytes_mul_32_16(kyber_frombytes_mul_acc_32_16_opt_m7, frombytes_mul_asm_acc_32_16_opt_m7_wrap); - -MAKE_BENCH_3(kyber_basemul_asm_pqm4,basemul_asm_wrap) -MAKE_BENCH_3(kyber_basemul_asm_pqm4_opt_m7,basemul_asm_opt_m7_wrap) - -MAKE_BENCH_3(kyber_basemul_asm_acc_pqm4,basemul_asm_acc_wrap) -MAKE_BENCH_3(kyber_basemul_asm_acc_pqm4_opt_m7,basemul_asm_acc_opt_m7_wrap) - -MAKE_BENCH_frombytes_mul(kyber_frombytes_mul_pqm4,frombytes_mul_asm_wrap) -MAKE_BENCH_frombytes_mul(kyber_frombytes_mul_pqm4_opt_m7,frombytes_mul_asm_opt_m7_wrap) - -MAKE_BENCH_frombytes_mul(kyber_frombytes_mul_acc_pqm4,frombytes_mul_asm_acc_wrap) -MAKE_BENCH_frombytes_mul(kyber_frombytes_mul_acc_pqm4_opt_m7,frombytes_mul_asm_acc_opt_m7_wrap) - -MAKE_BENCH_matacc(kyber_matacc_asm_pqm4,matacc_asm) -MAKE_BENCH_matacc(kyber_matacc_asm_pqm4_opt_m7,matacc_asm_opt_m7) - -MAKE_BENCH_matacc(kyber_matacc_asm_acc_pqm4,matacc_asm_acc) -MAKE_BENCH_matacc(kyber_matacc_asm_acc_pqm4_opt_m7,matacc_asm_acc_opt_m7) - -MAKE_BENCH_matacc_cache_32(kyber_matacc_asm_cache_16_32_pqm4,matacc_asm_cache_16_32) -MAKE_BENCH_matacc_cache_32(kyber_matacc_asm_cache_16_32_pqm4_opt_m7,matacc_asm_cache_16_32_opt_m7) - -MAKE_BENCH_matacc_cache_32(kyber_matacc_asm_cache_32_32_pqm4,matacc_asm_cache_32_32) -MAKE_BENCH_matacc_cache_32(kyber_matacc_asm_cache_32_32_pqm4_opt_m7,matacc_asm_cache_32_32_opt_m7) - -MAKE_BENCH_matacc_cache_32_16(kyber_matacc_asm_cache_32_16_pqm4,matacc_asm_cache_32_16) -MAKE_BENCH_matacc_cache_32_16(kyber_matacc_asm_cache_32_16_pqm4_opt_m7,matacc_asm_cache_32_16_opt_m7) - -MAKE_BENCH_matacc_opt_32(kyber_matacc_asm_opt_16_32_pqm4,matacc_asm_opt_16_32) -MAKE_BENCH_matacc_opt_32(kyber_matacc_asm_opt_16_32_pqm4_opt_m7,matacc_asm_opt_16_32_opt_m7) - -MAKE_BENCH_matacc_opt_32(kyber_matacc_asm_opt_32_32_pqm4,matacc_asm_opt_32_32) -MAKE_BENCH_matacc_opt_32(kyber_matacc_asm_opt_32_32_pqm4_opt_m7,matacc_asm_opt_32_32_opt_m7) - -MAKE_BENCH_matacc_opt_32_16(kyber_matacc_asm_opt_32_16_pqm4,matacc_asm_opt_32_16) -MAKE_BENCH_matacc_opt_32_16(kyber_matacc_asm_opt_32_16_pqm4_opt_m7,matacc_asm_opt_32_16_opt_m7) - - -int main(void) -{ - int ret = 0; - debug_test_start( "\nKyber All Test!\n" ); - - /* Test cases */ - if( test_ntt_pqm4() != 0 ){return( 1 );} - if( test_ntt_pqm4_opt() != 0 ){return( 1 );} - if( test_ntt_acle() != 0){return( 1 );} - - if( test_invntt_pqm4() != 0 ){return( 1 );} - if( test_invntt_pqm4_opt() != 0 ){return( 1 );} - // TODO: test currently fails -- need to debug - // if( test_invntt_acle() != 0 ){return( 1 );} - - if( test_barrett_reduce_pqm4() != 0 ){return( 1 );} - if( test_barrett_reduce_pqm4_opt() != 0 ){return( 1 );} - - if( test_fromplant_pqm4() != 0 ){return( 1 );} - if( test_fromplant_pqm4_opt() != 0 ){return( 1 );} - - if( test_pointwise_add_pqm4() != 0 ){return( 1 );} - if( test_pointwise_add_pqm4_opt() != 0 ){return( 1 );} - - if( test_pointwise_sub_pqm4() != 0 ){return( 1 );} - if( test_pointwise_sub_pqm4_opt() != 0 ){return( 1 );} - - if( test_basemul_16_32() != 0 ){return( 1 );} - if( test_basemul_16_32_opt() != 0 ){return( 1 );} - - if( test_basemul_acc_32_32() != 0 ){return( 1 );} - if( test_basemul_acc_32_32_opt() != 0 ){return( 1 );} - - if( test_basemul_acc_32_16() != 0 ){return( 1 );} - if( test_basemul_acc_32_16_opt() != 0 ){return( 1 );} - - if( test_frombytes_mul_16_32() != 0 ){return( 1 );} - if( test_frombytes_mul_16_32_opt() != 0 ){return( 1 );} - - if( test_frombytes_mul_acc_32_32() != 0 ){return( 1 );} - if( test_frombytes_mul_acc_32_32_opt() != 0 ){return( 1 );} - - if( test_frombytes_mul_acc_32_16() != 0 ){return( 1 );} - if( test_frombytes_mul_acc_32_16_opt() != 0 ){return( 1 );} - - if( test_basemul_pqm4() != 0 ){return( 1 );} - if( test_basemul_pqm4_opt() != 0 ){return( 1 );} - - if( test_basemul_acc_pqm4() != 0 ){return( 1 );} - if( test_basemul_acc_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_pqm4_opt() != 0 ){return( 1 );} - - if( test_frombytes_mul_asm_pqm4() != 0 ){return( 1 );} - if( test_frombytes_mul_asm_pqm4_opt() != 0 ){return( 1 );} - - if( test_frombytes_mul_asm_acc_pqm4() != 0 ){return( 1 );} - if( test_frombytes_mul_asm_acc_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_acc_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_acc_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_cache_16_32_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_cache_16_32_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_cache_32_32_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_cache_32_32_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_cache_32_16_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_cache_32_16_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_opt_16_32_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_opt_16_32_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_opt_32_32_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_opt_32_32_pqm4_opt() != 0 ){return( 1 );} - - if( test_matacc_asm_opt_32_16_pqm4() != 0 ){return( 1 );} - if( test_matacc_asm_opt_32_16_pqm4_opt() != 0 ){return( 1 );} - - bench_kyber_ntt_pqm4(); - bench_kyber_ntt_pqm4_opt_m7(); - bench_kyber_ntt_acle(); - - bench_kyber_invntt_pqm4(); - bench_kyber_invntt_pqm4_opt_m7(); - bench_kyber_invntt_acle(); - - bench_kyber_barrett_reduce_pqm4(); - bench_kyber_barrett_reduce_pqm4_opt_m7(); - - bench_kyber_fromplant_pqm4(); - bench_kyber_fromplant_pqm4_opt_m7(); - - bench_kyber_pointwise_add_pqm4(); - bench_kyber_pointwise_add_pqm4_opt_m7(); - - bench_kyber_pointwise_sub_pqm4(); - bench_kyber_pointwise_sub_pqm4_opt_m7(); - - bench_kyber_basemul_16_32(); - bench_kyber_basemul_16_32_opt_m7(); - - bench_kyber_basemul_acc_32_32(); - bench_kyber_basemul_acc_32_32_opt_m7(); - - bench_kyber_basemul_acc_32_16(); - bench_kyber_basemul_acc_32_16_opt_m7(); - - bench_kyber_frombytes_mul_16_32(); - bench_kyber_frombytes_mul_16_32_opt_m7(); - - bench_kyber_frombytes_mul_acc_32_32(); - bench_kyber_frombytes_mul_acc_32_32_opt_m7(); - - bench_kyber_frombytes_mul_acc_32_16(); - bench_kyber_frombytes_mul_acc_32_16_opt_m7(); - - bench_kyber_basemul_asm_pqm4(); - bench_kyber_basemul_asm_pqm4_opt_m7(); - - bench_kyber_basemul_asm_acc_pqm4(); - bench_kyber_basemul_asm_acc_pqm4_opt_m7(); - - bench_kyber_frombytes_mul_pqm4(); - bench_kyber_frombytes_mul_pqm4_opt_m7(); - - bench_kyber_frombytes_mul_acc_pqm4(); - bench_kyber_frombytes_mul_acc_pqm4_opt_m7(); - - bench_kyber_matacc_asm_pqm4(); - bench_kyber_matacc_asm_pqm4_opt_m7(); - - bench_kyber_matacc_asm_acc_pqm4(); - bench_kyber_matacc_asm_acc_pqm4_opt_m7(); - - bench_kyber_matacc_asm_cache_16_32_pqm4(); - bench_kyber_matacc_asm_cache_16_32_pqm4_opt_m7(); - - bench_kyber_matacc_asm_cache_32_32_pqm4(); - bench_kyber_matacc_asm_cache_32_32_pqm4_opt_m7(); - - bench_kyber_matacc_asm_cache_32_16_pqm4(); - bench_kyber_matacc_asm_cache_32_16_pqm4_opt_m7(); - - bench_kyber_matacc_asm_opt_16_32_pqm4(); - bench_kyber_matacc_asm_opt_16_32_pqm4_opt_m7(); - - bench_kyber_matacc_asm_opt_32_32_pqm4(); - bench_kyber_matacc_asm_opt_32_32_pqm4_opt_m7(); - - bench_kyber_matacc_asm_opt_32_16_pqm4(); - bench_kyber_matacc_asm_opt_32_16_pqm4_opt_m7(); - - - /* Test cases */ - debug_printf( "Done!\n" ); - - debug_printf("======================" ); - dump_benchmarks_tex(); - debug_printf("======================\n" ); - debug_printf( "ALL GOOD!\n" ); - return( ret ); -} diff --git a/tests/kyber-all/ntt-acle.c b/tests/kyber-all/ntt-acle.c deleted file mode 100644 index 5a29cf7..0000000 --- a/tests/kyber-all/ntt-acle.c +++ /dev/null @@ -1,357 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -#include -#include "params.h" -#include - -#define __pkhtb(a, b) (((uint32_t)a) & 0xFFFF0000) ^ (((b) >> 16) & 0xFFFF) - -static const int32_t twiddles_plantard_ntt[128] = { - 2230699446, 3328631909, 4243360600, 3408622288, 812805467, 2447447570, 1094061961, - 1370157786, 381889553, 3157039644, 372858381, 427045412, 4196914574, 2265533966, - 2475831253, 1727534158, 1904287092, 1544330386, 2972545705, 2937711185, 2651294021, - 249002310, 3929849920, 72249375, 838608815, 2550660963, 3242190693, 815385801, - 1028263423, 2889974991, 1719793153, 3696329620, 42575525, 1703020977, 2470670584, - 3594406395, 1839778722, 2701610550, 2991898216, 1851390229, 1041165097, 583155668, - 4205945745, 690239563, 3718262466, 1855260731, 3700200122, 1979116802, 3098982111, - 734105255, 3087370604, 3714391964, 3415073125, 3376368103, 1910737929, 836028480, - 2252632292, 2546790461, 1059227441, 3191874164, 4012420634, 1583035408, 1174052340, - 21932846, 3562152210, 752167598, 3417653460, 2112004045, 932791035, 2951903026, - 1419184148, 1817845876, 3434425636, 4233039261, 300609006, 975366560, 2781600929, - 3889854731, 3935010590, 2197155094, 2130066389, 3598276897, 2308109491, 2382939200, - 1228239371, 1884934581, 3466679822, 1211467195, 2977706375, 3144137970, 3080919767, - 945692709, 3015121229, 345764865, 826997308, 2043625172, 2964804700, 2628071007, - 4154339049, 483812778, 3288636719, 2696449880, 2122325384, 1371447954, 411563403, - 3577634219, 976656727, 2708061387, 723783916, 3181552825, 3346694253, 3617629408, - 1408862808, 519937465, 1323711759, 1474661346, 2773859924, 3580214553, 1143088323, - 2221668274, 1563682897, 2417773720, 1327582262, 2722253228, 3786641338, 1141798155, - 2779020594 -}; - -const int32_t twiddles_plantard_basemul[64] = { - 21932846, 3562152210, 752167598, 3417653460, 2112004045, 932791035, 2951903026, 1419184148, - 1817845876, 3434425636, 4233039261, 300609006, 975366560, 2781600929, 3889854731, 3935010590, - 2197155094, 2130066389, 3598276897, 2308109491, 2382939200, 1228239371, 1884934581, 3466679822, - 1211467195, 2977706375, 3144137970, 3080919767, 945692709, 3015121229, 345764865, 826997308, - 2043625172, 2964804700, 2628071007, 4154339049, 483812778, 3288636719, 2696449880, 2122325384, - 1371447954, 411563403, 3577634219, 976656727, 2708061387, 723783916, 3181552825, 3346694253, - 3617629408, 1408862808, 519937465, 1323711759, 1474661346, 2773859924, 3580214553, 1143088323, - 2221668274, 1563682897, 2417773720, 1327582262, 2722253228, 3786641338, 1141798155, 2779020594 -}; - -static const int32_t twiddles_plantard_invntt[256] = { - 2064267851, 51606697, 2064267851, 966335388, 1290168, 3200905336, - 51606697, 3482161830, 2064267851, 1847519727, 966335388, 886345009, - 1290168, 2064267851, 1290168, 51606697, 2064267851, 966335388, - 2435836064, 290287667, 2944162022, 3021572066, 1802363867, 603798347, 3375077936, 2677097369, - 2042335005, 3235739856, 1748176836, 3120914957, 282546663, 2711931889, 1103093133, - 1659155285, 1785591691, 1941701947, 2704190884, 358666539, 793452955, 1461759672, 1673347127, - 3200905336, 2042335005, 3560862042, 3235739856, 580575333, 1748176836, 1207596693, - 3887274396, 2126195886, 872153167, 3443456808, 526388302, 299318839, 3875662889, 3382818940, - 3266703874, 2575174144, 1404992306, 1824296713, 4252391772, 2591946320, 598637677, - 1997179146, 2904166832, 2577754479, 202556283, 30964018, 3807284017, 1238560711, 1967505295, - 51606697, 3200905336, 1847519727, 2042335005, 89021552, 3560862042, 700560902, - 1633351937, 2191994424, 909568022, 1780431021, 2022982494, 2497764099, 3609888404, 1126316146, - 89021552, 576704831, 3604727734, 1195985186, 594767175, 2315850495, 2439706566, - 3633111417, 2908037335, 3590535893, 357376372, 1887514916, 1410152976, 2486152593, 571544162, - 3482161830, 3266703874, 4045964987, 2575174144, 4222717922, 1404992306, 365117377, - 4003389463, 2444867236, 1221788534, 3305408896, 1626901100, 3367336931, 651534541, 1549491056, - 1819136044, 2390680205, 2567433139, 1643673276, 1322421592, 1357256112, 2750636911, - 993428903, 3680847611, 1082450454, 1205016358, 348345200, 956014049, 1048906102, 3880823559, - 2064267851, 51606697, 966335388, 3200905336, 3482161830, 1847519727, 886345009, - 3342823751, 4258842609, 568963827, 2849979801, 1283716570, 2330042337, 4104022520, 3007380225, - 3560862042, 580575333, 1207596693, 3458938817, 918599194, 2384229368, 879894172, - 2217797772, 503165289, 2812564947, 2946742357, 833448145, 1905577260, 3273154711, 3208646340, - 1847519727, 89021552, 700560902, 576704831, 1593356747, 3604727734, 2455188575, - 3162200314, 2808694444, 1933960943, 678628056, 49026362, 1375318456, 1961054458, 3473130659, - 4045964987, 4222717922, 365117377, 3479581496, 1744306334, 1052776604, 3456358482, - 438656919, 1681088131, 366407544, 2819015784, 1771399850, 1091481626, 2136517226, 709592074, - 966335388, 3482161830, 886345009, 3266703874, 1819136044, 4045964987, 2924809511, - 25803349, 3888564563, 1032133926, 923759864, 2630651342, 2590656153, 2146838565, 547030981, - 700560902, 1593356747, 2455188575, 3711811629, 2443577068, 3253802200, 1303069081, - 254162980, 3513125848, 1576584571, 3086080437, 2933840683, 3184133160, 1389510297, 2811274779, - 886345009, 1819136044, 2924809511, 2390680205, 1137927653, 2567433139, 3913077744, - 2288756980, 459299597, 1355965945, 1192114684, 2699030215, 439947086, 587026170, 418014240, - 2924809511, 1137927653, 3913077744, 2029433331, 3867921885, 98052723, 3922108916, 639923034, - 2806114109, 4122084864, 575414664, 1674637294, 1541750051, 2560982302, 1540459884, 0 -}; - -static int32_t plantard(int32_t a, int32_t twiddle) { - int32_t qa = 26632; - int32_t q = 3329; - - int32_t t1 = __smlawb(twiddle, a, 0); - int32_t t2 = __smlawt(twiddle, a, 0); - t1 = __smlabb(t1, q, qa); - t2 = __smlabb(t2, q, qa); - return __pkhtb(t2, t1); -} - -static int32_t plantard_reduce(int32_t a) { - return plantard(a, 0x13afb8); -} - -static void doublebutterfly(int32_t *r1, int32_t *r2, int32_t z) { - int32_t t = plantard(*r2, z); - *r2 = __usub16(*r1, t); - *r1 = __uadd16(*r1, t); -} - -static void doublebutterfly_light(int32_t *r1, int32_t *r2) { - int32_t t = *r2; - *r2 = __ssub16(*r1, t); - *r1 = __sadd16(*r1, t); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq. -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void ntt_acle(int16_t r[256]) { - unsigned int j, i; - for (j = 0; j < 16; j++) { - int32_t *p0 = (int32_t *) (r + 2 * j + 0 * 32); - int32_t *p1 = (int32_t *) (r + 2 * j + 1 * 32); - int32_t *p2 = (int32_t *) (r + 2 * j + 2 * 32); - int32_t *p3 = (int32_t *) (r + 2 * j + 3 * 32); - int32_t *p4 = (int32_t *) (r + 2 * j + 4 * 32); - int32_t *p5 = (int32_t *) (r + 2 * j + 5 * 32); - int32_t *p6 = (int32_t *) (r + 2 * j + 6 * 32); - int32_t *p7 = (int32_t *) (r + 2 * j + 7 * 32); - - // Layer 1 - doublebutterfly(p0, p4, twiddles_plantard_ntt[0]); - doublebutterfly(p1, p5, twiddles_plantard_ntt[0]); - doublebutterfly(p2, p6, twiddles_plantard_ntt[0]); - doublebutterfly(p3, p7, twiddles_plantard_ntt[0]); - - // Layer 2 - doublebutterfly(p0, p2, twiddles_plantard_ntt[1]); - doublebutterfly(p1, p3, twiddles_plantard_ntt[1]); - doublebutterfly(p4, p6, twiddles_plantard_ntt[2]); - doublebutterfly(p5, p7, twiddles_plantard_ntt[2]); - - // Layer 3 - doublebutterfly(p0, p1, twiddles_plantard_ntt[3]); - doublebutterfly(p2, p3, twiddles_plantard_ntt[4]); - doublebutterfly(p4, p5, twiddles_plantard_ntt[5]); - doublebutterfly(p6, p7, twiddles_plantard_ntt[6]); - } - - const int32_t *twiddle_ptr = twiddles_plantard_ntt + 7; - for (j = 0; j < 8; j++) { - for (i = 0; i < 2; i++) { - int32_t *p0 = (int32_t *) (r + 2 * i + 32 * j + 0 * 4); - int32_t *p1 = (int32_t *) (r + 2 * i + 32 * j + 1 * 4); - int32_t *p2 = (int32_t *) (r + 2 * i + 32 * j + 2 * 4); - int32_t *p3 = (int32_t *) (r + 2 * i + 32 * j + 3 * 4); - int32_t *p4 = (int32_t *) (r + 2 * i + 32 * j + 4 * 4); - int32_t *p5 = (int32_t *) (r + 2 * i + 32 * j + 5 * 4); - int32_t *p6 = (int32_t *) (r + 2 * i + 32 * j + 6 * 4); - int32_t *p7 = (int32_t *) (r + 2 * i + 32 * j + 7 * 4); - - // Layer 4 - doublebutterfly(p0, p4, twiddle_ptr[0]); - doublebutterfly(p1, p5, twiddle_ptr[0]); - doublebutterfly(p2, p6, twiddle_ptr[0]); - doublebutterfly(p3, p7, twiddle_ptr[0]); - - // Layer 5 - doublebutterfly(p0, p2, twiddle_ptr[1]); - doublebutterfly(p1, p3, twiddle_ptr[1]); - doublebutterfly(p4, p6, twiddle_ptr[2]); - doublebutterfly(p5, p7, twiddle_ptr[2]); - - // Layer 6 - doublebutterfly(p0, p1, twiddle_ptr[3]); - doublebutterfly(p2, p3, twiddle_ptr[4]); - doublebutterfly(p4, p5, twiddle_ptr[5]); - doublebutterfly(p6, p7, twiddle_ptr[6]); - } - twiddle_ptr += 7; - } - - // Layer 7 - for (j = 0; j < 64; j++) { - int32_t *p0 = (int32_t *)(r + 4 * j + 0 * 2); - int32_t *p1 = (int32_t *)(r + 4 * j + 1 * 2); - doublebutterfly(p0, p1, twiddle_ptr[j]); - } -} - -/************************************************* -* Name: invntt_tomont -* -* Description: Inplace inverse number-theoretic transform in Rq and -* multiplication by Montgomery factor 2^16. -* Input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void invntt_acle(int16_t r[256]) { - for (int i = 0; i < 16; i++) { - int32_t *p0 = (int32_t *)(r + i * 16 + 2 * 0); - int32_t *p1 = (int32_t *)(r + i * 16 + 2 * 1); - int32_t *p2 = (int32_t *)(r + i * 16 + 2 * 2); - int32_t *p3 = (int32_t *)(r + i * 16 + 2 * 3); - int32_t *p4 = (int32_t *)(r + i * 16 + 2 * 4); - int32_t *p5 = (int32_t *)(r + i * 16 + 2 * 5); - int32_t *p6 = (int32_t *)(r + i * 16 + 2 * 6); - int32_t *p7 = (int32_t *)(r + i * 16 + 2 * 7); - - // Layer 7 - doublebutterfly_light(p0, p1); - doublebutterfly_light(p2, p3); - doublebutterfly_light(p4, p5); - doublebutterfly_light(p6, p7); - - // Layer 6 - doublebutterfly_light(p0, p2); - doublebutterfly(p1, p3, twiddles_plantard_invntt[0]); - doublebutterfly_light(p4, p6); - *p6 = plantard_reduce(*p6); - doublebutterfly(p5, p7, twiddles_plantard_invntt[0]); - - // Layer 5 - doublebutterfly_light(p0, p4); - doublebutterfly(p1, p5, twiddles_plantard_invntt[1]); - doublebutterfly(p2, p6, twiddles_plantard_invntt[2]); - doublebutterfly(p3, p7, twiddles_plantard_invntt[3]); - } - - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - int32_t *p0 = (int32_t *)(r + i * 32 + j * 2 + 16 * 0 ); - int32_t *p1 = (int32_t *)(r + i * 32 + j * 2 + 16 * 1 ); - - // Layer 4 - doublebutterfly(p0, p1, twiddles_plantard_invntt[4 + j]); - } - } - - const int32_t *twiddle = twiddles_plantard_invntt + 12; - for (int i = 0; i < 16; i++) { - int32_t *p0 = (int32_t *)(r + i * 2 + 32 * 0); - int32_t *p1 = (int32_t *)(r + i * 2 + 32 * 1); - int32_t *p2 = (int32_t *)(r + i * 2 + 32 * 2); - int32_t *p3 = (int32_t *)(r + i * 2 + 32 * 3); - int32_t *p4 = (int32_t *)(r + i * 2 + 32 * 4); - int32_t *p5 = (int32_t *)(r + i * 2 + 32 * 5); - int32_t *p6 = (int32_t *)(r + i * 2 + 32 * 6); - int32_t *p7 = (int32_t *)(r + i * 2 + 32 * 7); - - if (i == 0) { - *p0 = plantard_reduce(*p0); - *p1 = plantard_reduce(*p1); - *p2 = plantard_reduce(*p2); - *p3 = plantard_reduce(*p3); - *p4 = plantard_reduce(*p4); - *p5 = plantard_reduce(*p5); - *p6 = plantard_reduce(*p6); - *p7 = plantard_reduce(*p7); - - // Layer 3 - doublebutterfly_light(p0, p1); - doublebutterfly_light(p2, p3); - doublebutterfly_light(p4, p5); - doublebutterfly_light(p6, p7); - - // Layer 2 - doublebutterfly_light(p0, p2); - doublebutterfly(p1, p3, twiddle[1]); - doublebutterfly_light(p4, p6); - *p6 = plantard_reduce(*p6); - doublebutterfly(p5, p7, twiddle[1]); - - // Layer 1 - doublebutterfly_light(p0, p4); - doublebutterfly(p1, p5, twiddle[3]); - doublebutterfly(p2, p6, twiddle[4]); - doublebutterfly(p3, p7, twiddle[5]); - - twiddle += 6; - } else { - // Layer 3 - doublebutterfly(p0, p1, twiddle[0]); - doublebutterfly(p2, p3, twiddle[0]); - doublebutterfly(p4, p5, twiddle[0]); - doublebutterfly(p6, p7, twiddle[0]); - - // Layer 2 - doublebutterfly(p0, p2, twiddle[1]); - doublebutterfly(p1, p3, twiddle[2]); - doublebutterfly(p4, p6, twiddle[1]); - doublebutterfly(p5, p7, twiddle[2]); - - // Layer 1 - doublebutterfly(p0, p4, twiddle[3]); - doublebutterfly(p1, p5, twiddle[4]); - doublebutterfly(p2, p6, twiddle[5]); - doublebutterfly(p3, p7, twiddle[6]); - - twiddle += 7; - } - - // Twist - *p0 = plantard(*p0, twiddle[0]); - *p1 = plantard(*p1, twiddle[1]); - *p2 = plantard(*p2, twiddle[2]); - *p3 = plantard(*p3, twiddle[3]); - *p4 = plantard(*p4, twiddle[4]); - *p5 = plantard(*p5, twiddle[5]); - *p6 = plantard(*p6, twiddle[6]); - *p7 = plantard(*p7, twiddle[7]); - twiddle += 8; - } -} - -static inline void basemul(int32_t r[1], const int32_t a[1], const int32_t b[1], int32_t zeta, int add) { - int32_t t1, t2; - int32_t qa = 26632; - int32_t q = 3329; - int32_t qinv = 0x6ba8f301; - int32_t a0 = a[0]; - int32_t b0 = b[0]; - - t1 = __smlawt(zeta, b0, 0); - t1 = __smlabb(t1, q, qa); - t1 = __smlatt(a0, t1, 0); - t1 = __smlabb(a0, b0, t1); - t1 = qinv * t1; - t1 = __smlatb(t1, q, qa); - - t2 = __smuadx(a0, b0); - t2 = qinv * t2; - t2 = __smlatb(t2, q, qa); - - if (add) { - t1 = __pkhtb(t2, t1); - r[0] = __uadd16(t1, r[0]); - } else { - r[0] = __pkhtb(t2, t1); - } -} - -void basemul_plantard_acle(int16_t r[256], const int16_t a[256], const int16_t b[256], int add) { - unsigned int i; - for (i = 0; i < KYBER_N / 4; i++) { - basemul((int32_t *) &r[4 * i], (int32_t *) &a[4 * i], (int32_t *) &b[4 * i], twiddles_plantard_basemul[i], add); - basemul((int32_t *) &r[4 * i + 2], (int32_t *) &a[4 * i + 2], (int32_t *) &b[4 * i + 2], -twiddles_plantard_basemul[i], add); - } -} - -void frombytes_basemul_plantard(int16_t r[256], const int16_t b[256], const unsigned char *a, int add) { - unsigned int i; - int16_t ap[4]; - for (i = 0; i < KYBER_N / 4; i++) { - ap[0] = ((a[6 * i + 0] >> 0) | ((uint16_t)a[6 * i + 1] << 8)) & 0xFFF; - ap[1] = ((a[6 * i + 1] >> 4) | ((uint16_t)a[6 * i + 2] << 4)) & 0xFFF; - ap[2] = ((a[6 * i + 3] >> 0) | ((uint16_t)a[6 * i + 4] << 8)) & 0xFFF; - ap[3] = ((a[6 * i + 4] >> 4) | ((uint16_t)a[6 * i + 5] << 4)) & 0xFFF; - basemul((int32_t *) &r[4 * i], (int32_t *) ap, (int32_t *) &b[4 * i], twiddles_plantard_basemul[i], add); - basemul((int32_t *) &r[4 * i + 2], (int32_t *) (ap + 2), (int32_t *) &b[4 * i + 2], -twiddles_plantard_basemul[i], add); - } -} \ No newline at end of file diff --git a/tests/kyber-all/ntt-acle.h b/tests/kyber-all/ntt-acle.h deleted file mode 100644 index a294354..0000000 --- a/tests/kyber-all/ntt-acle.h +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -#ifndef NTT_ACLE_H -#define NTT_ACLE_H - -#include - -void ntt_acle(int16_t r[256]); -void invntt_acle(int16_t r[256]); -void basemul_plantard_acle(int16_t r[256], const int16_t a[256], const int16_t b[256], int add); -void frombytes_basemul_plantard(int16_t r[256], const int16_t b[256], const unsigned char *a, int add); - -#endif \ No newline at end of file diff --git a/tests/kyber-all/ntt-asm.h b/tests/kyber-all/ntt-asm.h deleted file mode 100644 index 5346ee5..0000000 --- a/tests/kyber-all/ntt-asm.h +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 or CC0-1.0 -#ifndef NTT_ASM_H -#define NTT_ASM_H - -#include -// asm -void ntt_fast(int16_t *, const int32_t *); -void ntt_fast_opt_m7(int16_t *, const int32_t *); - -void invntt_fast(int16_t *, const int32_t *); -void invntt_fast_opt_m7(int16_t *, const int32_t *); - - -static const int32_t zetas_asm[128] = { - 2230699446, 3328631909, 4243360600, 3408622288, 812805467, 2447447570, 1094061961, 1370157786, 2475831253, 249002310, 1028263423, 3594406395, 4205945745, 734105255, 2252632292, 381889553, 372858381, 427045412, 21932846, 3562152210, 752167598, 3417653460, 3157039644, 4196914574, 2265533966, 2112004045, 932791035, 2951903026, 1419184148, 1727534158, 1544330386, 2972545705, 1817845876, 3434425636, 4233039261, 300609006, 1904287092, 2937711185, 2651294021, 975366560, 2781600929, 3889854731, 3935010590, 3929849920, 838608815, 2550660963, 2197155094, 2130066389, 3598276897, 2308109491, 72249375, 3242190693, 815385801, 2382939200, 1228239371, 1884934581, 3466679822, 2889974991, 3696329620, 42575525, 1211467195, 2977706375, 3144137970, 3080919767, 1719793153, 1703020977, 2470670584, 945692709, 3015121229, 345764865, 826997308, 1839778722, 2991898216, 1851390229, 2043625172, 2964804700, 2628071007, 4154339049, 2701610550, 1041165097, 583155668, 483812778, 3288636719, 2696449880, 2122325384, 690239563, 1855260731, 3700200122, 1371447954, 411563403, 3577634219, 976656727, 3718262466, 1979116802, 3098982111, 2708061387, 723783916, 3181552825, 3346694253, 3087370604, 3415073125, 3376368103, 3617629408, 1408862808, 519937465, 1323711759, 3714391964, 1910737929, 836028480, 1474661346, 2773859924, 3580214553, 1143088323, 2546790461, 3191874164, 4012420634, 2221668274, 1563682897, 2417773720, 1327582262, 1059227441, 1583035408, 1174052340, 2722253228, 3786641338, 1141798155, 2779020594, 0}; - -static const int32_t zetas_inv_CT_asm[256] = { - // LAYER 7+6+5+4 - 1290168, 1290168, 2064267851, 1290168, 51606697, 2064267851, 966335388, 1290168, 3200905336, 51606697, 3482161830, 2064267851, 1847519727, 966335388, 886345009, - // removed first "2285" + LAYER 3+2+1 - 1 - butterfly - 1290168, 2064267851, 1290168, 51606697, 2064267851, 966335388, - // LAYER 3+2+1 - 1 - twist - 2435836064, 290287667, 2944162022, 3021572066, 1802363867, 603798347, 3375077936, 2677097369, - // LAYER 3+2+1 - 2 - butterfly - 2042335005, 3235739856, 1748176836, 3120914957, 282546663, 2711931889, 1103093133, - // LAYER 3+2+1 - 2 - twist - 1659155285, 1785591691, 1941701947, 2704190884, 358666539, 793452955, 1461759672, 1673347127, - // LAYER 3+2+1 - 3 - butterfly - 3200905336, 2042335005, 3560862042, 3235739856, 580575333, 1748176836, 1207596693, - // LAYER 3+2+1 - 3 - twist - 3887274396, 2126195886, 872153167, 3443456808, 526388302, 299318839, 3875662889, 3382818940, - // LAYER 3+2+1 - 4 - butterfly - 3266703874, 2575174144, 1404992306, 1824296713, 4252391772, 2591946320, 598637677, - // LAYER 3+2+1 - 4 - twist - 1997179146, 2904166832, 2577754479, 202556283, 30964018, 3807284017, 1238560711, 1967505295, - // LAYER 3+2+1 - 5 - butterfly - 51606697, 3200905336, 1847519727, 2042335005, 89021552, 3560862042, 700560902, - // LAYER 3+2+1 - 5 - twist - 1633351937, 2191994424, 909568022, 1780431021, 2022982494, 2497764099, 3609888404, 1126316146, - // LAYER 3+2+1 - 6 - butterfly - 89021552, 576704831, 3604727734, 1195985186, 594767175, 2315850495, 2439706566, - // LAYER 3+2+1 - 6 - twist - 3633111417, 2908037335, 3590535893, 357376372, 1887514916, 1410152976, 2486152593, 571544162, - // LAYER 3+2+1 - 7 - butterfly - 3482161830, 3266703874, 4045964987, 2575174144, 4222717922, 1404992306, 365117377, - // LAYER 3+2+1 - 7 - twist - 4003389463, 2444867236, 1221788534, 3305408896, 1626901100, 3367336931, 651534541, 1549491056, - // LAYER 3+2+1 - 8 - butterfly - 1819136044, 2390680205, 2567433139, 1643673276, 1322421592, 1357256112, 2750636911, - // LAYER 3+2+1 - 8 - twist - 993428903, 3680847611, 1082450454, 1205016358, 348345200, 956014049, 1048906102, 3880823559, - // LAYER 3+2+1 - 9 - butterfly - 2064267851, 51606697, 966335388, 3200905336, 3482161830, 1847519727, 886345009, - // LAYER 3+2+1 - 9 - twist - 3342823751, 4258842609, 568963827, 2849979801, 1283716570, 2330042337, 4104022520, 3007380225, - // LAYER 3+2+1 - 10 - butterfly - 3560862042, 580575333, 1207596693, 3458938817, 918599194, 2384229368, 879894172, - // LAYER 3+2+1 - 10 - twist - 2217797772, 503165289, 2812564947, 2946742357, 833448145, 1905577260, 3273154711, 3208646340, - // LAYER 3+2+1 - 11 - butterfly - 1847519727, 89021552, 700560902, 576704831, 1593356747, 3604727734, 2455188575, - // LAYER 3+2+1 - 11 - twist - 3162200314, 2808694444, 1933960943, 678628056, 49026362, 1375318456, 1961054458, 3473130659, - // LAYER 3+2+1 - 12 - butterfly - 4045964987, 4222717922, 365117377, 3479581496, 1744306334, 1052776604, 3456358482, - // LAYER 3+2+1 - 12 - twist - 438656919, 1681088131, 366407544, 2819015784, 1771399850, 1091481626, 2136517226, 709592074, - // LAYER 3+2+1 - 13 - butterfly - 966335388, 3482161830, 886345009, 3266703874, 1819136044, 4045964987, 2924809511, - // LAYER 3+2+1 - 13 - twist - 25803349, 3888564563, 1032133926, 923759864, 2630651342, 2590656153, 2146838565, 547030981, - // LAYER 3+2+1 - 14 - butterfly - 700560902, 1593356747, 2455188575, 3711811629, 2443577068, 3253802200, 1303069081, - // LAYER 3+2+1 - 14 - twist - 254162980, 3513125848, 1576584571, 3086080437, 2933840683, 3184133160, 1389510297, 2811274779, - // LAYER 3+2+1 - 15 - butterfly - 886345009, 1819136044, 2924809511, 2390680205, 1137927653, 2567433139, 3913077744, - // LAYER 3+2+1 - 15 - twist - 2288756980, 459299597, 1355965945, 1192114684, 2699030215, 439947086, 587026170, 418014240, - // LAYER 3+2+1 - 16 - butterfly - 2924809511, 1137927653, 3913077744, 2029433331, 3867921885, 98052723, 3922108916, 639923034, - // LAYER 3+2+1 - 16 - twist - 2806114109, 4122084864, 575414664, 1674637294, 1541750051, 2560982302, 1540459884, 0}; - -static void ntt_fast_wrap(int16_t *p){ - ntt_fast(p, zetas_asm); -} - -static void ntt_fast_opt_m7_wrap(int16_t *p){ - ntt_fast_opt_m7(p, zetas_asm); -} - -static void invntt_fast_wrap(int16_t *p){ - invntt_fast(p, zetas_inv_CT_asm); -} - -static void invntt_fast_opt_m7_wrap(int16_t *p){ - invntt_fast_opt_m7(p, zetas_inv_CT_asm); -} - - -void basemul_asm(int16_t *, const int16_t *, const int16_t *, const int32_t *); -void basemul_asm_opt_m7(int16_t *, const int16_t *, const int16_t *, const int32_t *); - -void basemul_asm_acc(int16_t *, const int16_t *, const int16_t *, const int32_t *); -void basemul_asm_acc_opt_m7(int16_t *, const int16_t *, const int16_t *, const int32_t *); - -static void basemul_asm_wrap(int16_t *a, const int16_t *b, const int16_t *c){ - basemul_asm(a,b,c,zetas); -} -static void basemul_asm_opt_m7_wrap(int16_t *a, const int16_t *b, const int16_t *c){ - basemul_asm_opt_m7(a,b,c,zetas); -} -static void basemul_asm_acc_wrap(int16_t *a, const int16_t *b, const int16_t *c){ - basemul_asm_acc(a,b,c,zetas); -} -static void basemul_asm_acc_opt_m7_wrap(int16_t *a, const int16_t *b, const int16_t *c){ - basemul_asm_acc_opt_m7(a,b,c,zetas); -} - - -void frombytes_mul_asm(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); -void frombytes_mul_asm_opt_m7(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); - -void frombytes_mul_asm_acc(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); -void frombytes_mul_asm_acc_opt_m7(int16_t *r, const int16_t *b, const unsigned char *c, const int32_t zetas[64]); - -void frombytes_mul_asm_wrap(int16_t *r, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm(r,b,c,zetas); -} -void frombytes_mul_asm_opt_m7_wrap(int16_t *r, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_opt_m7(r,b,c,zetas); -} - -void frombytes_mul_asm_acc_wrap(int16_t *r, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_acc(r,b,c,zetas); -} -void frombytes_mul_asm_acc_opt_m7_wrap(int16_t *r, const int16_t *b, const unsigned char *c){ - frombytes_mul_asm_acc_opt_m7(r,b,c,zetas); -} - - -#endif \ No newline at end of file diff --git a/tests/kyber-all/params.h b/tests/kyber-all/params.h deleted file mode 100644 index a9dd131..0000000 --- a/tests/kyber-all/params.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef PARAMS_H -#define PARAMS_H - -#define KYBER_K 3 - -/* Don't change parameters below this line */ - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_ETA 2 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - -#define KYBER_POLYCOMPRESSEDBYTES 128 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) - -#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES - -#endif \ No newline at end of file