diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index f3aa498..4107bda 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -35,3 +35,7 @@ jobs: popd - name: Execute Tests on ${{matrix.os}} run: make -j + - name: Execute Tests with AddressSanitizer on ${{matrix.os}} + run: make asan_test -j + - name: Execute Tests with UndefinedBehaviourSanitizer on ${{matrix.os}} + run: make ubsan_test -j diff --git a/Makefile b/Makefile index 2700e43..88ac8b1 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,10 @@ -CXX = g++ +CXX ?= clang++ CXX_FLAGS = -std=c++20 WARN_FLAGS = -Wall -Wextra -pedantic OPT_FLAGS = -O3 -march=native LINK_FLAGS = -flto +ASAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address # From https://clang.llvm.org/docs/AddressSanitizer.html +UBSAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=undefined # From https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html SHA3_INC_DIR = ./sha3/include I_FLAGS = -I ./include @@ -11,15 +13,22 @@ DEP_IFLAGS = -I $(SHA3_INC_DIR) SRC_DIR = include DILITHIUM_SOURCES := $(wildcard $(SRC_DIR)/*.hpp) BUILD_DIR = build +ASAN_BUILD_DIR = $(BUILD_DIR)/asan +UBSAN_BUILD_DIR = $(BUILD_DIR)/ubsan TEST_DIR = tests TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +UBSAN_TEST_OBJECTS := $(addprefix $(UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) TEST_LINK_FLAGS = -lgtest -lgtest_main TEST_BINARY = $(BUILD_DIR)/test.out +ASAN_TEST_BINARY = $(ASAN_BUILD_DIR)/test.out +UBSAN_TEST_BINARY = $(UBSAN_BUILD_DIR)/test.out BENCHMARK_DIR = benchmarks BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) +BENCHMARK_HEADERS := $(wildcard $(BENCHMARK_DIR)/*.hpp) BENCHMARK_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES)))) BENCHMARK_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpthread BENCHMARK_BINARY = $(BUILD_DIR)/bench.out @@ -28,6 +37,12 @@ PERF_BINARY = $(BUILD_DIR)/perf.out all: test +$(ASAN_BUILD_DIR): + mkdir -p $@ + +$(UBSAN_BUILD_DIR): + mkdir -p $@ + $(BUILD_DIR): mkdir -p $@ @@ -37,12 +52,30 @@ $(SHA3_INC_DIR): $(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ +$(ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + $(TEST_BINARY): $(TEST_OBJECTS) $(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ +$(ASAN_TEST_BINARY): $(ASAN_TEST_OBJECTS) + $(CXX) $(ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +$(UBSAN_TEST_BINARY): $(UBSAN_TEST_OBJECTS) + $(CXX) $(UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + test: $(TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 +asan_test: $(ASAN_TEST_BINARY) + ./$< --gtest_shuffle --gtest_random_seed=0 + +ubsan_test: $(UBSAN_TEST_BINARY) + ./$< --gtest_shuffle --gtest_random_seed=0 + $(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ @@ -65,5 +98,5 @@ perf: $(PERF_BINARY) clean: rm -rf $(BUILD_DIR) -format: $(DILITHIUM_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES) +format: $(DILITHIUM_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS) clang-format -i $^ diff --git a/README.md b/README.md index b2ad782..f78b427 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,9 @@ For ensuring functional correctness ( & conformance with the Dilithium specifica > Dilithium Known Answer Tests are generated following the procedure, described in https://gist.github.com/itzmeanjan/d14afc3866b82119221682f0f3c9822d. ```bash -make -j +make -j # Run tests without any sort of sanitizers +make asan_test -j # Run tests with AddressSanitizer enabled +make ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled ``` ```bash @@ -112,8 +114,8 @@ Note: Randomizing tests' orders with a seed of 75840 . Benchmarking key generation, signing and verification algorithms for various instantiations of Dilithium digital signature scheme can be done, by issuing ```bash -make benchmark # If you haven't built google-benchmark library with libPFM support. -make perf # If you have built google-benchmark library with libPFM support. +make benchmark -j # If you haven't built google-benchmark library with libPFM support. +make perf -j # If you have built google-benchmark library with libPFM support. ``` > [!NOTE] @@ -134,143 +136,142 @@ make perf # If you have built google-benchmark library with libPFM support ### On 12th Gen Intel(R) Core(TM) i7-1260P **[ Compiled with GCC-13.2.0 ]** ```bash -2023-12-03T20:02:44+04:00 +2023-12-24T13:05:17+04:00 Running ./build/perf.out -Run on (16 X 2599.08 MHz CPU s) +Run on (16 X 4129.81 MHz CPU s) CPU Caches: L1 Data 48 KiB (x8) L1 Instruction 32 KiB (x8) L2 Unified 1280 KiB (x8) L3 Unified 18432 KiB (x1) -Load Average: 0.54, 0.41, 0.42 +Load Average: 0.33, 0.29, 0.27 -------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations CYCLES items_per_second -------------------------------------------------------------------------------------------------- -dilithium5_keygen_mean 186 us 186 us 32 821.932k 5.38908k/s -dilithium5_keygen_median 185 us 185 us 32 820.155k 5.41793k/s -dilithium5_keygen_stddev 4.65 us 4.65 us 32 9.13685k 133.925/s -dilithium5_keygen_cv 2.51 % 2.51 % 32 1.11% 2.49% -dilithium5_keygen_min 175 us 175 us 32 809.883k 5.0667k/s -dilithium5_keygen_max 197 us 197 us 32 862.716k 5.70414k/s -dilithium2_keygen_mean 67.2 us 67.2 us 32 300.107k 14.8893k/s -dilithium2_keygen_median 67.4 us 67.4 us 32 299.896k 14.8291k/s -dilithium2_keygen_stddev 1.82 us 1.82 us 32 2.51795k 410.616/s -dilithium2_keygen_cv 2.71 % 2.71 % 32 0.84% 2.76% -dilithium2_keygen_min 62.6 us 62.6 us 32 293.409k 14.3046k/s -dilithium2_keygen_max 69.9 us 69.9 us 32 307.17k 15.9763k/s -dilithium5_verify/32_mean 185 us 185 us 32 821.849k 5.41345k/s -dilithium5_verify/32_median 185 us 185 us 32 821.058k 5.41518k/s -dilithium5_verify/32_stddev 5.01 us 5.01 us 32 3.99281k 145.612/s -dilithium5_verify/32_cv 2.71 % 2.71 % 32 0.49% 2.69% -dilithium5_verify/32_min 176 us 176 us 32 819.494k 5.05159k/s -dilithium5_verify/32_max 198 us 198 us 32 843.439k 5.69743k/s -dilithium2_verify/32_mean 71.1 us 71.1 us 32 315.524k 14.0757k/s -dilithium2_verify/32_median 70.9 us 70.9 us 32 315.176k 14.1072k/s -dilithium2_verify/32_stddev 1.81 us 1.81 us 32 1.09955k 361.894/s -dilithium2_verify/32_cv 2.55 % 2.54 % 32 0.35% 2.57% -dilithium2_verify/32_min 67.4 us 67.3 us 32 314.061k 13.5205k/s -dilithium2_verify/32_max 74.0 us 74.0 us 32 318.203k 14.8479k/s -dilithium3_sign/32_mean 562 us 562 us 32 2.60878M 2.67652k/s -dilithium3_sign/32_median 441 us 441 us 32 2.01511M 2.286k/s -dilithium3_sign/32_stddev 447 us 447 us 32 2.09094M 1.53899k/s -dilithium3_sign/32_cv 79.47 % 79.47 % 32 80.15% 57.50% -dilithium3_sign/32_min 185 us 185 us 32 864.577k 503.861/s -dilithium3_sign/32_max 1985 us 1985 us 32 9.20776M 5.40967k/s -dilithium3_verify/32_mean 113 us 113 us 32 502.694k 8.82864k/s -dilithium3_verify/32_median 113 us 113 us 32 502.774k 8.81962k/s -dilithium3_verify/32_stddev 2.48 us 2.48 us 32 1.151k 194.77/s -dilithium3_verify/32_cv 2.19 % 2.19 % 32 0.23% 2.21% -dilithium3_verify/32_min 107 us 107 us 32 497.022k 8.54526k/s -dilithium3_verify/32_max 117 us 117 us 32 504.311k 9.31292k/s -dilithium3_keygen_mean 108 us 108 us 32 468.889k 9.25888k/s -dilithium3_keygen_median 108 us 108 us 32 468.925k 9.2344k/s -dilithium3_keygen_stddev 1.72 us 1.72 us 32 593.411 150.434/s -dilithium3_keygen_cv 1.59 % 1.59 % 32 0.13% 1.62% -dilithium3_keygen_min 102 us 102 us 32 467.707k 9.0229k/s -dilithium3_keygen_max 111 us 111 us 32 470.131k 9.78539k/s -dilithium2_sign/32_mean 449 us 449 us 32 2.08289M 3.52966k/s -dilithium2_sign/32_median 380 us 379 us 32 1.72744M 2.6549k/s -dilithium2_sign/32_stddev 317 us 317 us 32 1.47858M 2.43421k/s -dilithium2_sign/32_cv 70.61 % 70.61 % 32 70.99% 68.96% -dilithium2_sign/32_min 126 us 126 us 32 586.667k 726.071/s -dilithium2_sign/32_max 1377 us 1377 us 32 6.44594M 7.96399k/s -dilithium5_sign/32_mean 792 us 792 us 32 3.69128M 1.86398k/s -dilithium5_sign/32_median 630 us 630 us 32 2.91369M 1.60256k/s -dilithium5_sign/32_stddev 484 us 484 us 32 2.27588M 1.11593k/s -dilithium5_sign/32_cv 61.14 % 61.15 % 32 61.66% 59.87% -dilithium5_sign/32_min 282 us 282 us 32 1.31765M 607.987/s -dilithium5_sign/32_max 1645 us 1645 us 32 7.70207M 3.54424k/s +dilithium5_sign/32_mean 573 us 573 us 32 2.62244M 2.15221k/s +dilithium5_sign/32_median 570 us 570 us 32 2.56475M 1.77223k/s +dilithium5_sign/32_stddev 289 us 289 us 32 1.33162M 983.885/s +dilithium5_sign/32_cv 50.50 % 50.50 % 32 50.78% 45.72% +dilithium5_sign/32_min 266 us 266 us 32 1.22116M 584.246/s +dilithium5_sign/32_max 1712 us 1712 us 32 7.85984M 3.75566k/s +dilithium2_sign/32_mean 413 us 413 us 32 1.92469M 4.31431k/s +dilithium2_sign/32_median 239 us 239 us 32 1.10512M 4.18457k/s +dilithium2_sign/32_stddev 393 us 393 us 32 1.83463M 2.77288k/s +dilithium2_sign/32_cv 95.04 % 95.04 % 32 95.32% 64.27% +dilithium2_sign/32_min 114 us 114 us 32 531.5k 548.719/s +dilithium2_sign/32_max 1822 us 1822 us 32 8.53117M 8.78612k/s +dilithium3_keygen_mean 100 us 100 us 32 450.02k 10.0107k/s +dilithium3_keygen_median 98.4 us 98.4 us 32 450.223k 10.158k/s +dilithium3_keygen_stddev 3.72 us 3.72 us 32 1.44564k 370.119/s +dilithium3_keygen_cv 3.72 % 3.72 % 32 0.32% 3.70% +dilithium3_keygen_min 95.9 us 95.9 us 32 443.754k 9.43455k/s +dilithium3_keygen_max 106 us 106 us 32 452.294k 10.4222k/s +dilithium3_sign/32_mean 511 us 511 us 32 2.38786M 2.9922k/s +dilithium3_sign/32_median 346 us 346 us 32 1.60917M 2.89361k/s +dilithium3_sign/32_stddev 370 us 370 us 32 1.73198M 1.82606k/s +dilithium3_sign/32_cv 72.38 % 72.38 % 32 72.53% 61.03% +dilithium3_sign/32_min 171 us 171 us 32 798.767k 611.443/s +dilithium3_sign/32_max 1636 us 1635 us 32 7.65597M 5.86292k/s +dilithium3_verify/32_mean 104 us 104 us 32 472.594k 9.60074k/s +dilithium3_verify/32_median 102 us 102 us 32 472.428k 9.82254k/s +dilithium3_verify/32_stddev 3.54 us 3.55 us 32 870.197 322.901/s +dilithium3_verify/32_cv 3.40 % 3.40 % 32 0.18% 3.36% +dilithium3_verify/32_min 101 us 101 us 32 470.808k 9.0583k/s +dilithium3_verify/32_max 110 us 110 us 32 474.024k 9.92375k/s +dilithium5_keygen_mean 168 us 168 us 32 749.512k 5.96366k/s +dilithium5_keygen_median 167 us 167 us 32 748.591k 6.00453k/s +dilithium5_keygen_stddev 4.98 us 4.97 us 32 3.57104k 175.466/s +dilithium5_keygen_cv 2.96 % 2.96 % 32 0.48% 2.94% +dilithium5_keygen_min 162 us 162 us 32 743.167k 5.63682k/s +dilithium5_keygen_max 177 us 177 us 32 757.093k 6.18287k/s +dilithium2_verify/32_mean 64.0 us 64.0 us 32 292.642k 15.631k/s +dilithium2_verify/32_median 62.9 us 62.9 us 32 292.392k 15.9072k/s +dilithium2_verify/32_stddev 2.00 us 2.00 us 32 891.422 476.406/s +dilithium2_verify/32_cv 3.12 % 3.12 % 32 0.30% 3.05% +dilithium2_verify/32_min 62.4 us 62.4 us 32 291.515k 14.6913k/s +dilithium2_verify/32_max 68.1 us 68.1 us 32 294.848k 16.0265k/s +dilithium2_keygen_mean 59.0 us 59.0 us 32 269.516k 16.9619k/s +dilithium2_keygen_median 58.4 us 58.4 us 32 269.005k 17.1373k/s +dilithium2_keygen_stddev 1.96 us 1.96 us 32 2.73875k 546.274/s +dilithium2_keygen_cv 3.31 % 3.31 % 32 1.02% 3.22% +dilithium2_keygen_min 56.7 us 56.7 us 32 265.559k 15.6044k/s +dilithium2_keygen_max 64.1 us 64.1 us 32 275.942k 17.6457k/s +dilithium5_verify/32_mean 171 us 171 us 32 780.57k 5.8392k/s +dilithium5_verify/32_median 168 us 168 us 32 780.972k 5.95881k/s +dilithium5_verify/32_stddev 5.63 us 5.63 us 32 1.43262k 187.536/s +dilithium5_verify/32_cv 3.28 % 3.28 % 32 0.18% 3.21% +dilithium5_verify/32_min 167 us 167 us 32 778.381k 5.49484k/s +dilithium5_verify/32_max 182 us 182 us 32 782.668k 6.00273k/s ``` -### On 12th Gen Intel(R) Core(TM) i7-1260P **[ Compiled with Clang-17.0.2 ]** +### On ARM Cortex-A72 i.e. Raspberry Pi 4B ( compiled with GCC-13.2.0 ) ```bash -2023-12-03T20:04:56+04:00 +2023-12-24T15:13:38+05:30 Running ./build/perf.out -Run on (16 X 769.632 MHz CPU s) +Run on (4 X 1800 MHz CPU s) CPU Caches: - L1 Data 48 KiB (x8) - L1 Instruction 32 KiB (x8) - L2 Unified 1280 KiB (x8) - L3 Unified 18432 KiB (x1) -Load Average: 0.70, 0.53, 0.46 + L1 Data 32 KiB (x4) + L1 Instruction 48 KiB (x4) + L2 Unified 1024 KiB (x1) +Load Average: 1.18, 1.24, 0.78 -------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations CYCLES items_per_second -------------------------------------------------------------------------------------------------- -dilithium3_keygen_mean 83.5 us 83.5 us 32 357.922k 11.9822k/s -dilithium3_keygen_median 82.9 us 82.9 us 32 357.476k 12.0596k/s -dilithium3_keygen_stddev 1.66 us 1.66 us 32 1.37386k 237.754/s -dilithium3_keygen_cv 1.99 % 1.99 % 32 0.38% 1.98% -dilithium3_keygen_min 79.2 us 79.1 us 32 355.796k 11.5482k/s -dilithium3_keygen_max 86.6 us 86.6 us 32 361.6k 12.635k/s -dilithium3_sign/32_mean 454 us 454 us 32 2.03248M 3.24843k/s -dilithium3_sign/32_median 352 us 352 us 32 1.59002M 2.84438k/s -dilithium3_sign/32_stddev 380 us 380 us 32 1.70717M 1.85244k/s -dilithium3_sign/32_cv 83.64 % 83.64 % 32 83.99% 57.03% -dilithium3_sign/32_min 138 us 138 us 32 637.75k 464.313/s -dilithium3_sign/32_max 2154 us 2154 us 32 9.76395M 7.2601k/s -dilithium5_verify/32_mean 141 us 141 us 32 608.394k 7.09008k/s -dilithium5_verify/32_median 141 us 141 us 32 609.789k 7.10327k/s -dilithium5_verify/32_stddev 3.10 us 3.10 us 32 6.25188k 156.944/s -dilithium5_verify/32_cv 2.20 % 2.20 % 32 1.03% 2.21% -dilithium5_verify/32_min 131 us 131 us 32 578.326k 6.77614k/s -dilithium5_verify/32_max 148 us 148 us 32 612.502k 7.61134k/s -dilithium5_keygen_mean 135 us 135 us 32 574.267k 7.40962k/s -dilithium5_keygen_median 134 us 134 us 32 574.71k 7.4711k/s -dilithium5_keygen_stddev 2.88 us 2.88 us 32 2.26057k 156.775/s -dilithium5_keygen_cv 2.13 % 2.13 % 32 0.39% 2.12% -dilithium5_keygen_min 129 us 129 us 32 569.574k 7.11534k/s -dilithium5_keygen_max 141 us 141 us 32 578.219k 7.756k/s -dilithium5_sign/32_mean 490 us 490 us 32 2.18245M 2.74663k/s -dilithium5_sign/32_median 418 us 417 us 32 1.82931M 2.39665k/s -dilithium5_sign/32_stddev 302 us 302 us 32 1.35018M 1.35967k/s -dilithium5_sign/32_cv 61.57 % 61.57 % 32 61.87% 49.50% -dilithium5_sign/32_min 215 us 215 us 32 955.368k 699.95/s -dilithium5_sign/32_max 1429 us 1429 us 32 6.17288M 4.64585k/s -dilithium2_verify/32_mean 54.1 us 54.1 us 32 233.523k 18.4952k/s -dilithium2_verify/32_median 53.8 us 53.8 us 32 234.186k 18.5735k/s -dilithium2_verify/32_stddev 1.67 us 1.67 us 32 5.12152k 557.174/s -dilithium2_verify/32_cv 3.08 % 3.08 % 32 2.19% 3.01% -dilithium2_verify/32_min 50.9 us 50.9 us 32 206.153k 16.8808k/s -dilithium2_verify/32_max 59.2 us 59.2 us 32 237.767k 19.6343k/s -dilithium2_sign/32_mean 274 us 274 us 32 1.22063M 5.6924k/s -dilithium2_sign/32_median 197 us 197 us 32 882.499k 5.08766k/s -dilithium2_sign/32_stddev 218 us 218 us 32 0.9994M 3.22799k/s -dilithium2_sign/32_cv 79.75 % 79.75 % 32 81.88% 56.71% -dilithium2_sign/32_min 94.5 us 94.5 us 32 421.809k 973.148/s -dilithium2_sign/32_max 1028 us 1028 us 32 4.80341M 10.5799k/s -dilithium3_verify/32_mean 86.6 us 86.6 us 32 377.733k 11.5572k/s -dilithium3_verify/32_median 86.4 us 86.4 us 32 377.276k 11.5776k/s -dilithium3_verify/32_stddev 1.79 us 1.79 us 32 1.81191k 238.962/s -dilithium3_verify/32_cv 2.07 % 2.07 % 32 0.48% 2.07% -dilithium3_verify/32_min 82.0 us 82.0 us 32 375.822k 11.1021k/s -dilithium3_verify/32_max 90.1 us 90.1 us 32 383.959k 12.1901k/s -dilithium2_keygen_mean 50.6 us 50.6 us 32 217.128k 19.7773k/s -dilithium2_keygen_median 50.4 us 50.4 us 32 216.995k 19.8543k/s -dilithium2_keygen_stddev 1.18 us 1.18 us 32 1.59103k 454.985/s -dilithium2_keygen_cv 2.33 % 2.34 % 32 0.73% 2.30% -dilithium2_keygen_min 47.7 us 47.7 us 32 214.661k 18.5924k/s -dilithium2_keygen_max 53.8 us 53.8 us 32 221.737k 20.9436k/s +dilithium5_verify/32_mean 982 us 982 us 32 1.76352M 1.01874k/s +dilithium5_verify/32_median 982 us 982 us 32 1.76347M 1.01877k/s +dilithium5_verify/32_stddev 0.618 us 0.417 us 32 447.3 0.432759/s +dilithium5_verify/32_cv 0.06 % 0.04 % 32 0.03% 0.04% +dilithium5_verify/32_min 981 us 981 us 32 1.76263M 1.01761k/s +dilithium5_verify/32_max 984 us 983 us 32 1.76433M 1.0195k/s +dilithium3_verify/32_mean 627 us 627 us 32 1.12594M 1.59563k/s +dilithium3_verify/32_median 627 us 627 us 32 1.12584M 1.59581k/s +dilithium3_verify/32_stddev 0.369 us 0.267 us 32 385.4 0.678979/s +dilithium3_verify/32_cv 0.06 % 0.04 % 32 0.03% 0.04% +dilithium3_verify/32_min 626 us 626 us 32 1.12543M 1.59386k/s +dilithium3_verify/32_max 628 us 627 us 32 1.12704M 1.59664k/s +dilithium2_verify/32_mean 397 us 397 us 32 712.872k 2.51939k/s +dilithium2_verify/32_median 397 us 397 us 32 712.921k 2.51999k/s +dilithium2_verify/32_stddev 1.01 us 0.901 us 32 514.135 5.66412/s +dilithium2_verify/32_cv 0.25 % 0.23 % 32 0.07% 0.22% +dilithium2_verify/32_min 396 us 396 us 32 711.708k 2.49017k/s +dilithium2_verify/32_max 402 us 402 us 32 713.909k 2.52477k/s +dilithium2_keygen_mean 332 us 332 us 32 596.912k 3.00994k/s +dilithium2_keygen_median 333 us 333 us 32 597.624k 3.00643k/s +dilithium2_keygen_stddev 1.30 us 1.31 us 32 2.40391k 11.8347/s +dilithium2_keygen_cv 0.39 % 0.39 % 32 0.40% 0.39% +dilithium2_keygen_min 329 us 329 us 32 591.684k 2.98561k/s +dilithium2_keygen_max 335 us 335 us 32 601.775k 3.03661k/s +dilithium3_sign/32_mean 4059 us 4058 us 32 7.29006M 365.449/s +dilithium3_sign/32_median 3304 us 3303 us 32 5.93442M 302.715/s +dilithium3_sign/32_stddev 2724 us 2723 us 32 4.89362M 233.459/s +dilithium3_sign/32_cv 67.10 % 67.11 % 32 67.13% 63.88% +dilithium3_sign/32_min 1171 us 1171 us 32 2.10391M 71.6582/s +dilithium3_sign/32_max 13957 us 13955 us 32 25.0762M 854.011/s +dilithium5_sign/32_mean 6044 us 6043 us 32 10.8539M 270.538/s +dilithium5_sign/32_median 4732 us 4732 us 32 8.49736M 211.344/s +dilithium5_sign/32_stddev 4933 us 4932 us 32 8.86216M 184.414/s +dilithium5_sign/32_cv 81.63 % 81.63 % 32 81.65% 68.17% +dilithium5_sign/32_min 1722 us 1722 us 32 3.09382M 38.8392/s +dilithium5_sign/32_max 25754 us 25747 us 32 46.2578M 580.602/s +dilithium5_keygen_mean 869 us 869 us 32 1.56125M 1.15065k/s +dilithium5_keygen_median 869 us 869 us 32 1.5609M 1.15112k/s +dilithium5_keygen_stddev 2.55 us 2.30 us 32 3.61955k 3.03513/s +dilithium5_keygen_cv 0.29 % 0.26 % 32 0.23% 0.26% +dilithium5_keygen_min 865 us 865 us 32 1.55393M 1.14095k/s +dilithium5_keygen_max 879 us 876 us 32 1.57077M 1.15633k/s +dilithium3_keygen_mean 548 us 547 us 32 983.306k 1.82668k/s +dilithium3_keygen_median 547 us 547 us 32 983.369k 1.82715k/s +dilithium3_keygen_stddev 0.969 us 0.740 us 32 698.653 2.45958/s +dilithium3_keygen_cv 0.18 % 0.14 % 32 0.07% 0.13% +dilithium3_keygen_min 547 us 547 us 32 981.931k 1.81604k/s +dilithium3_keygen_max 552 us 551 us 32 984.944k 1.82955k/s +dilithium2_sign/32_mean 2253 us 2253 us 32 4.04616M 666.05/s +dilithium2_sign/32_median 1763 us 1762 us 32 3.16635M 567.381/s +dilithium2_sign/32_stddev 1738 us 1738 us 32 3.12125M 385.348/s +dilithium2_sign/32_cv 77.14 % 77.13 % 32 77.14% 57.86% +dilithium2_sign/32_min 782 us 782 us 32 1.40548M 109.753/s +dilithium2_sign/32_max 9114 us 9111 us 32 16.3662M 1.27835k/s ``` ## Usage @@ -402,7 +403,7 @@ int main() { I suggest you look at example [program](./examples/dilithium2.cpp), which demonstrates how to use Dilithium2 API, similarly you can use Dilithium{3,5} API. ```bash -$ g++ -std=c++20 -Wall -Wextra -pedantic -march=native -O3 -I ./include -I ./sha3/include examples/dilithium2.cpp && ./a.out +$ g++ -std=c++20 -Wall -Wextra -pedantic -O3 -march=native -I ./include -I ./sha3/include examples/dilithium2.cpp && ./a.out Dilithium @ NIST security level 2 seed : e9ae2e74d3bc6e60b4966eb0da2f6d03c0d864ef7b001947d294d64cb90fd8c7 pubkey : 1587ba74cc89529d73cc656feed4787e7c3b81399f211eaa262fe7d2e6b4c1e36ab472cf5f3f86a34f24504168dd3d5fa5464b7baeef99cfcc0cd5327a58c7a14dd26b231c70587011e23536d6710bdc265d58dbc036f12c4d20c867b912929b2b4113319927d262b19faaa5a700464bc31fe5082c57d533075115f80adf74d6dbed2387391006c4a30d3fdb56bd536e6f28c0f0030f55cc5fa2fa8ad4ade48bc07b23e8be49bdd4b1af6965ce6621d9a4a0f6dc634161702b3436d18c52f45ef3df50bd56d8572e4c10db101ae8b035f25f4f94169306de474f5b63ef50765c55b8b1088ed524c95441a31a7e886166dc1bbe61ca5c43ec81e60087b749c1a1143439c7df852e49087cc9bf442ffa01e02e9c21ba734195561e764a0f8700313a6c7002728220e27a7cffbc123c77e41b8a34ebcc9b2c7137da55aedcc7e37f9ebb0d146c60ada1089a57ee3b8e9a2456daf8900b65f3c74fe0150331f0c6b13ec18a12e3033b74ca934dadcc8fc03d22277fec5baa6565340de50cd04656cf8482c13f696316ef5bdd049d81a8b9197ebb817f4da289efb839ddef0101e13b3c5a76411fa48a1899c6a3b3c9a01143f47dc1b215073298fae398edbda469bf9fa314886ed23984335647ff6dbde83844b26b997fbf65069b5e6b304b1934863ce9395f247501c7071d0519949adb6708b4bcfef36842073f435a22e83355a87662531e5fdf5ce985bc96b23f993046fb32819a14b3f9c8b68cf71b5d1841cdfc0197ed5c2751846504afc198652f41f611ab1e617606a8557f672e11bad50475fac511c83e2b8970b86e2e4e06d055f46adf218cfd8e7c711ba5a92acbd015cfb21ce41f751111a58115f78742c7f48ded07149e6d4e9f65e11a616443475a0d1f3a100ba3532d79b13393e3671dc97fc6e4befd4497b6d0c8c7be4a7f9d4a5eff849c6240c985c4fd6bd8b548ed9c1c382a4f60b046beec03a9f7f4e26056ccc90357e726b768bc7b6ce75f9ab447a07b60b97bc8a591736dcd023fb8473e8de0898f34393e5be897802f017d7be7fb4766f866520fe2a5032bdc630c453c7f00a48307465185b25e1061452399513f569bee79db2ad1dab0d6e513a5a9542a5b438cbc140e56ea7ce9f951d39b0a5de98df072197632683892b2741bac4d0059fd5dfad37a3e3e2a4e3726933c81a5b1dd9c7f1bb6104784457dda8888c30e95d3b6786038c3c9b970c6a24d4df85b966315855e4b00ef7a5a2e8e0ba64cfd76de2302077425a1f6deaebed670c3e7d144c74bc2068fbaa3cdb44a1ae98f912272c1d980dba976ca93206cce50a4128066411e7322c1a3e4ecdba2011a0f64284bb5551c840f9305233700cbd1cb4d387fdb6066c0cff6b8aece0af44a78f6c5054228664c29092081fb341761696a25ac8aceb360e7e6764230cf83bae14746f7f8b6569d88e01b604b3294286e922e2314dd180a82eae1d31af6c7eec71917acc18cb86c6f590db24d6653d39dc995b576088784a6f95bdb48dbd4edd6e1d8e936b27f7e9d8d5c59c0d54dc690c8c8e0cb2c06561b2a8b89fad0550a7c07a5c6165bb7dc3b72e7cc559bdf9259a9290d03dda1f57779b89bd46f801d24ebd6cb6d9924225d817b625afb42b405163494d005e5a5e49a87ba55482adb81dc0e07717d7d3278880233720aabadd9cef8638474f333a92dca085b22935add88ebc3f33a5db1f17db92f4b5c76cad1b51477dbd729bdbb86d40f762668969fdc92692451e816113bc44b6398d6535ffa81e4685457a9c8eb0058445e8dbc80e36fe62c7b4267d6a47ca466f50c1921bd3343e1f5d6e0dd86fb67e32a0bdee8 diff --git a/benchmarks/bench_helper.hpp b/benchmarks/bench_helper.hpp index 6922991..58e62ec 100644 --- a/benchmarks/bench_helper.hpp +++ b/benchmarks/bench_helper.hpp @@ -2,10 +2,5 @@ #include #include -const auto compute_min = [](const std::vector& v) -> double { - return *std::min_element(v.begin(), v.end()); -}; - -const auto compute_max = [](const std::vector& v) -> double { - return *std::max_element(v.begin(), v.end()); -}; +const auto compute_min = [](const std::vector& v) -> double { return *std::min_element(v.begin(), v.end()); }; +const auto compute_max = [](const std::vector& v) -> double { return *std::max_element(v.begin(), v.end()); }; diff --git a/include/bit_packing.hpp b/include/bit_packing.hpp index d69cf0e..9f61a0c 100644 --- a/include/bit_packing.hpp +++ b/include/bit_packing.hpp @@ -346,7 +346,7 @@ encode_hint_bits(std::span h, std::span(j) }; - arr[idx] = br[flg]; + arr[idx] = br[static_cast(flg)]; idx += 1ul * flg; } @@ -365,11 +365,7 @@ template static inline constexpr bool decode_hint_bits(std::span arr, std::span h) { - // Instead of using std::memset, prefer following, to avoid compiler warnings. - // Compiler should ideally be able to use std::memset for executing following. - for (size_t i = 0; i < h.size(); i++) { - h[i] = field::zq_t::zero(); - } + std::fill(h.begin(), h.end(), field::zq_t::zero()); size_t idx = 0; bool failed = false; @@ -384,7 +380,7 @@ decode_hint_bits(std::span arr, std::span idx; const bool flg1 = flg0 & (arr[j] <= arr[j - flg0 * 1]); diff --git a/include/field.hpp b/include/field.hpp index db28524..411df6a 100644 --- a/include/field.hpp +++ b/include/field.hpp @@ -46,11 +46,8 @@ struct zq_t // Modulo addition of two Zq elements. inline constexpr zq_t operator+(const zq_t rhs) const { - const uint32_t t0 = this->v + rhs.v; - const uint32_t mask = (-static_cast(t0 >= Q)); - const uint32_t t1 = t0 - (mask & Q); - - return zq_t(t1); + const uint32_t t = reduce_once(this->v + rhs.v); + return zq_t(t); } // Compound modulo addition of two Zq elements. @@ -132,9 +129,7 @@ struct zq_t const uint64_t t5 = res * static_cast(Q); const uint32_t t6 = static_cast(t2 - t5); - const uint32_t mask = (-static_cast(t6 >= Q)); - const uint32_t t7 = t6 - (mask & Q); - + const uint32_t t7 = reduce_once(t6); return zq_t(t7); } @@ -237,11 +232,20 @@ struct zq_t const uint32_t t3 = t1 + t2; const uint32_t t4 = t3 & mask23; - const bool flg1 = t4 >= Q; - const uint32_t t5 = (-static_cast(flg1)) & Q; - const uint32_t t6 = t4 - t5; + const uint32_t t5 = reduce_once(t4); + return t5; + } + + // Given a 32 -bit unsigned integer `v` such that `v` ∈ [0, 2*Q), this routine can be invoked for reducing `v` modulo + // prime Q. + static inline constexpr uint32_t reduce_once(const uint32_t val) + { + const uint32_t t0 = val - Q; + const uint32_t t1 = -(t0 >> 31); + const uint32_t t2 = t1 & Q; + const uint32_t t3 = t0 + t2; - return t6; + return t3; } }; diff --git a/tests/test_bit_packing.cpp b/tests/test_bit_packing.cpp index 13f1a31..55d9a23 100644 --- a/tests/test_bit_packing.cpp +++ b/tests/test_bit_packing.cpp @@ -60,12 +60,10 @@ template void generate_random_hint_bits(std::span poly) { - for (size_t i = 0; i < k * ntt::N; i++) { - poly[i] = field::zq_t::zero(); - } + std::fill(poly.begin(), poly.end(), field::zq_t::zero()); constexpr size_t frm = 0; - constexpr size_t to = k * ntt::N - 1; + constexpr size_t to = poly.size() - 1; std::random_device rd; std::mt19937_64 gen(rd()); @@ -73,7 +71,7 @@ generate_random_hint_bits(std::span poly) for (size_t i = 0; i < ω; i++) { const size_t idx = dis(gen); - poly[idx] = field::zq_t{ 1u }; + poly[idx] = field::zq_t::one(); } }