From 68c846c6d045230fc311b802f9bd8c31ef99c6e0 Mon Sep 17 00:00:00 2001 From: Paul Date: Mon, 5 Aug 2024 20:39:51 -0400 Subject: [PATCH] Loaded samples get MD5 sum and it is in stream (#1076) Not used for anything yet, but it will be soon enough! Closes #1071 --- libs/CMakeLists.txt | 3 +- libs/md5sum/.gitignore | 2 + libs/md5sum/CMakeLists.txt | 22 ++ libs/md5sum/LICENSE | 21 ++ libs/md5sum/README.md | 298 ++++++++++++++++++ libs/md5sum/README.shortcircuit | 4 + libs/md5sum/README_zh-CN.md | 296 +++++++++++++++++ libs/md5sum/benchmark.cc | 43 +++ libs/md5sum/demo.cc | 80 +++++ libs/md5sum/src/impl/constexpr.inl | 100 ++++++ libs/md5sum/src/impl/core.cc | 98 ++++++ libs/md5sum/src/impl/inline.inl | 42 +++ libs/md5sum/src/impl/sine.inl | 62 ++++ libs/md5sum/src/impl/value.inl | 67 ++++ libs/md5sum/src/impl/wrapper.cc | 39 +++ libs/md5sum/src/md5.h | 83 +++++ libs/md5sum/test/assert.cc | 200 ++++++++++++ libs/md5sum/test/hash.cc | 280 ++++++++++++++++ libs/md5sum/test/helper.h | 19 ++ libs/md5sum/test/simple.cc | 30 ++ libs/md5sum/test/stream.cc | 28 ++ libs/sst/sst-basic-blocks | 2 +- src/CMakeLists.txt | 1 + src/engine/engine.cpp | 3 + src/infrastructure/md5support.h | 25 ++ src/json/sample_traits.h | 8 +- .../multisample_import.cpp | 9 +- src/sample/sample.cpp | 9 +- src/sample/sample.h | 16 +- src/sample/sample_manager.cpp | 11 +- src/sample/sample_manager.h | 6 +- 31 files changed, 1888 insertions(+), 19 deletions(-) create mode 100644 libs/md5sum/.gitignore create mode 100644 libs/md5sum/CMakeLists.txt create mode 100644 libs/md5sum/LICENSE create mode 100644 libs/md5sum/README.md create mode 100644 libs/md5sum/README.shortcircuit create mode 100644 libs/md5sum/README_zh-CN.md create mode 100644 libs/md5sum/benchmark.cc create mode 100644 libs/md5sum/demo.cc create mode 100644 libs/md5sum/src/impl/constexpr.inl create mode 100644 libs/md5sum/src/impl/core.cc create mode 100644 libs/md5sum/src/impl/inline.inl create mode 100644 libs/md5sum/src/impl/sine.inl create mode 100644 libs/md5sum/src/impl/value.inl create mode 100644 libs/md5sum/src/impl/wrapper.cc create mode 100644 libs/md5sum/src/md5.h create mode 100644 libs/md5sum/test/assert.cc create mode 100644 libs/md5sum/test/hash.cc create mode 100644 libs/md5sum/test/helper.h create mode 100644 libs/md5sum/test/simple.cc create mode 100644 libs/md5sum/test/stream.cc create mode 100644 src/infrastructure/md5support.h diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index c39025fa..94b33c00 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -72,4 +72,5 @@ if (${SCXT_USE_MP3}) target_compile_definitions(minimp3 INTERFACE SCXT_USE_MP3=1) endif() -add_subdirectory (melatonin_inspector) \ No newline at end of file +add_subdirectory (melatonin_inspector) +add_subdirectory(md5sum) \ No newline at end of file diff --git a/libs/md5sum/.gitignore b/libs/md5sum/.gitignore new file mode 100644 index 00000000..ad9082d8 --- /dev/null +++ b/libs/md5sum/.gitignore @@ -0,0 +1,2 @@ +/.idea/ +/cmake-build*/ diff --git a/libs/md5sum/CMakeLists.txt b/libs/md5sum/CMakeLists.txt new file mode 100644 index 00000000..63d0ea89 --- /dev/null +++ b/libs/md5sum/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.10) +project(md5sum LANGUAGES CXX) + +if ("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.13") + cmake_policy(SET CMP0077 NEW) +endif() + +file(GLOB MD5_SRC src/impl/*.cc) +add_library(md5sum STATIC ${MD5_SRC}) + +if (MSVC) + set(MD5_COMPILE_OPTIONS /GR-) +else() + set(MD5_COMPILE_OPTIONS + -fno-rtti -fno-exceptions + -fno-unwind-tables -fno-asynchronous-unwind-tables) +endif() + +target_compile_options(md5sum PRIVATE ${MD5_COMPILE_OPTIONS}) +target_include_directories(md5sum PUBLIC src/) + +add_library(md5sum::md5 ALIAS md5sum) diff --git a/libs/md5sum/LICENSE b/libs/md5sum/LICENSE new file mode 100644 index 00000000..18e591ae --- /dev/null +++ b/libs/md5sum/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Dnomd343 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/md5sum/README.md b/libs/md5sum/README.md new file mode 100644 index 00000000..5b3ed72b --- /dev/null +++ b/libs/md5sum/README.md @@ -0,0 +1,298 @@ +# MD5 + ++ ✔︎ Pure C++ interface and implementation. + ++ ✔︎ Computational performance is higher than coreutils. + ++ ✔︎ Implement complete algorithms based on constant expressions. + ++ ✔︎ Complete unit testing and performance benchmark suite. + ++ ✔︎ Supports compile-time MD5 hash calculation. + +[简体中文](./README_zh-CN.md) + +## Quick Start + +First, you need to introduce this repository into your project: + +```bash +> mkdir my_project && cd ./my_project/ +> git clone https://github.com/dnomd343/md5sum.git +``` + +Create a source code for testing, such as the `main.cc` file: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + std::cout << MD5::Hash("hello world") << std::endl; +} +``` + +Next, you need a CMake configuration, create the `CMakeLists.txt` file: + +```cmake +cmake_minimum_required(VERSION 3.10) +project(my_project LANGUAGES CXX) + +add_subdirectory(md5sum) + +add_executable(my_demo main.cc) +target_link_libraries(my_demo PRIVATE md5sum::md5) +``` + +Finally, we leave the remaining work to the compiler and execute the following command: + +```bash +> cmake -Bcmake-build && cmake --build cmake-build +> ./cmake-build/my_demo +5eb63bbbe01eeed093cb22bb8f5acdc3 +``` + +## Hash Functions + +All hash calculation interfaces are concentrated in the `MD5` class, which is divided into two types: direct calculation and streaming update. The former is a unary call, passing in data and getting the hash result, while the latter allows you to pass in the data multiple times and constantly update the hash value, and finally get the result, which is especially useful when calculating hash of large file. + +The following interfaces are used for direct calculation and returns the hash result in string form: + +```c++ +// Calculate the md5 hash value of the specified data. +static std::string Hash(const std::string_view &data); +static std::string Hash(const void *data, uint64_t len); +``` + +The following interfaces allow streaming calculation of hash values. Use the `Update` interface to pass in data, call the `Final` interface to complete the calculation, and get the hash result in string form through the `Digest` interface. Finally, you may need to call `Reset` for the next round of calculation: + +> The return value `MD5&` is a reference to the class itself, which makes chain calling scenarios more convenient. + +```c++ +// Update md5 hash with specified data. +MD5& Update(const std::string_view &data); +MD5& Update(const void *data, uint64_t len); + +// Stop streaming updates and calculate result. +MD5& Final(); + +// Get the string result of md5. +std::string Digest() const; + +// Reset for next round of hashing. +MD5& Reset(); +``` + +Please note that the `Update` interface should no longer be used after calling `Final`. Before the next round of calculation, be sure to call the `Reset` interface, otherwise you will get incorrect results. Here's a simple example: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + MD5 hash; + + hash.Update("hello") + .Update(" ") + .Update("world") + .Final(); + std::cout << hash.Digest() << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 + + hash.Reset(); + hash.Update("hello world").Final(); + std::cout << hash.Digest() << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 + + std::cout << MD5::Hash("hello world") << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 +} +``` + +## Compile-time Hash + +This is a very interesting feature. C++ allows us to perform some constant expression calculations during compilation. You can directly pass in constant binary data and get its MD5 constant value. + +However, due to compiler limitations, constructing `std::string` as a constant expression is currently not supported. Instead, it returns a result of type `std::array` . The function prototypes are as follows: + +```c++ +// Calculate the md5 hash value of the specified data with constexpr. +static constexpr std::array HashCE(const std::string_view &data); +static constexpr std::array HashCE(const char *data, uint64_t len); +``` + +Using constant expressions means that the hashing process will be performed at compile-time and the MD5 result will be recorded as a constant in the compilation product. Below is an example: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + constexpr auto my_hash = MD5::HashCE("hello world"); + std::cout << std::string { my_hash.data(), 32 } << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 +} +``` + +## Unit-Test and Benchmark + +For a robust project, unit tests and performance benchmarks are necessary, and `md5sum` also provides these. Before we begin, we need to clone these third-party library codes: + +```bash +> cd ./md5sum/ +> git submodule update --init +Submodule 'third_party/benchmark' (https://github.com/google/benchmark.git) registered for path 'third_party/benchmark' +Submodule 'third_party/googletest' (https://github.com/google/googletest.git) registered for path 'third_party/googletest' +··· +``` + +Then, execute the following commands to compile: + +```bash +> cmake -DMD5_ENABLE_TESTING=ON -DMD5_ENABLE_BENCHMARK=ON -Bcmake-build +> cmake --build cmake-build +``` + +Let's execute the unit tests: + +```bash +> ./cmake-build/md5_test +Running main() from ··· +[==========] Running 5 tests from 1 test suite. +[----------] Global test environment set-up. +[----------] 5 tests from md5sum +[ RUN ] md5sum.hash +[ OK ] md5sum.hash (0 ms) +[ RUN ] md5sum.hash_ce +[ OK ] md5sum.hash_ce (0 ms) +[ RUN ] md5sum.empty +[ OK ] md5sum.empty (0 ms) +[ RUN ] md5sum.simple +[ OK ] md5sum.simple (0 ms) +[ RUN ] md5sum.stream +[ OK ] md5sum.stream (30 ms) +[----------] 5 tests from md5sum (31 ms total) + +[----------] Global test environment tear-down +[==========] 5 tests from 1 test suite ran. (31 ms total) +[ PASSED ] 5 tests. +``` + +There are also performance benchmark tests: + +```bash +> ./cmake-build/md5_benchmark +Running ./cmake-build/md5sum/md5_benchmark +Run on (4 X 4100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x2) + L1 Instruction 32 KiB (x2) + L2 Unified 1280 KiB (x2) + L3 Unified 6144 KiB (x1) +Load Average: 1.07, 0.62, 0.85 +---------------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------------- +MD5_Digest 9.98 ns 9.97 ns 69955773 +MD5_Update/64 78.7 ns 78.6 ns 8892747 +MD5_Update/256 315 ns 315 ns 2224685 +MD5_Update/1024 1259 ns 1258 ns 556361 +MD5_Update/4096 5034 ns 5034 ns 139048 +MD5_Hash/0 101 ns 101 ns 6883388 +MD5_Hash/64 190 ns 190 ns 3686157 +MD5_Hash/256 414 ns 414 ns 1690953 +MD5_Hash/1024 1358 ns 1358 ns 515488 +MD5_Hash/4096 5137 ns 5133 ns 136354 +``` + +These figures mean that on this CPU, it takes about 10 nanoseconds to export an MD5 string, 78.6 nanoseconds to complete a 64-byte update, and 5.133 microseconds to complete a 4 KiB hash calculation. + +Hash speed is directly related to the single-core performance of the CPU. In most scenarios, the performance bottleneck lies in the CPU rather than the IO part. If you need to verify a large amount of data, xxHash or BLAKE3 will be a more suitable choice. + +## Binary Demo + +This project also provides a demonstration sample, which can calculate the MD5 value of a file. You need to use the following command to compile: + +```bash +> cmake -DMD5_BUILD_DEMO=ON -Bcmake-build +> cmake --build cmake-build +``` + +Generate an 8GiB empty file for testing: + +```bash +> dd if=/dev/zero of=test.dat bs=1GiB count=8 +8+0 records in +8+0 records out +8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.7279 s, 1.3 GB/s + +> du -b test.dat +8589934592 test.dat +``` + +Count their respective times: + +```bash +> time ./cmake-build/md5_demo test.dat +b770351fadae5a96bbaf9702ed97d28d + +real 0m10.849s +user 0m10.588s +sys 0m0.260s + +> time md5sum test.dat +b770351fadae5a96bbaf9702ed97d28d test.dat + +real 0m11.854s +user 0m10.721s +sys 0m1.132s + +> time openssl md5 test.dat +MD5(test.dat)= b770351fadae5a96bbaf9702ed97d28d + +real 0m11.497s +user 0m10.243s +sys 0m1.252s +``` + +## Advanced + +The following options are reserved in the project's CMake configuration, and you can switch them as needed. + ++ `MD5_BUILD_DEMO` : Whether to build demo binary, disabled by default. + ++ `MD5_SHARED_LIB` :Whether to build as a dynamic library, disabled by default. + ++ `MD5_ENABLE_LTO` :Whether to enable LTO optimization, enabled by default. + ++ `MD5_ENABLE_TESTING` :Whether to build project unit tests, disabled by default. + ++ `MD5_ENABLE_BENCHMARK` :Whether to build performance benchmark suites, disabled by default. + +> Note: If you use the Clang compiler and ld linker, since the GNU tools do not understand LLVM bytecode, you need to turn off LTO to link normally, or you can add the `-fuse-ld=lld` option to switch to the lld linker. Generally speaking, Linux users are not recommended to use Clang to compile this project. Under the current performance benchmark, if the `-march=native` optimization is not enabled, in comparisons of Clang18 versus g++12, it typically lags behind by around 20%. + +In addition, when building as a dynamic library, symbols inside the project will be hidden, which means that after stripping, only the following symbols are exposed: + +> Since part of the hash interfaces are implemented inline in the header file, the `FinalImpl` symbol will be exposed. + +```bash +> cmake -DMD5_SHARED_LIB=ON -Bcmake-build +> cmake --build cmake-build +> nm -CD ./cmake-build/libmd5sum.so + w __cxa_finalize@GLIBC_2.2.5 + w __gmon_start__ + w _ITM_deregisterTMCloneTable + w _ITM_registerTMCloneTable + U memcpy@GLIBC_2.14 + U __stack_chk_fail@GLIBC_2.4 +0000000000001cc0 T md5::MD5::Update(void const*, unsigned long) +0000000000001840 T md5::MD5::FinalImpl(void const*, unsigned long) +0000000000001a80 T md5::MD5::Digest[abi:cxx11]() const + U operator new(unsigned long)@GLIBCXX_3.4 +``` + +## License + +MIT ©2024 [@dnomd343](https://github.com/dnomd343) diff --git a/libs/md5sum/README.shortcircuit b/libs/md5sum/README.shortcircuit new file mode 100644 index 00000000..01d64a55 --- /dev/null +++ b/libs/md5sum/README.shortcircuit @@ -0,0 +1,4 @@ +This is a direct import of the MIT licensed code https://github.com/dnomd343/md5sum at hash + 951c952, We didn't make it a submodule since it has all of googletest as a submodule itself which +would expand our recursive updates, so just copied it here with the third_party directory removed +the CMakeLists adjusted to just build the static lib, but otherwise unchanged. diff --git a/libs/md5sum/README_zh-CN.md b/libs/md5sum/README_zh-CN.md new file mode 100644 index 00000000..921c60a6 --- /dev/null +++ b/libs/md5sum/README_zh-CN.md @@ -0,0 +1,296 @@ +# MD5 + ++ ✔︎ 纯 C++ 接口及实现。 + ++ ✔︎ 计算性能比 coreutils 更高。 + ++ ✔︎ 基于常量表达式实现完整的算法。 + ++ ✔︎ 完善的单元测试及性能基准套件。 + ++ ✔︎ 支持编译期 MD5 哈希值计算。 + +## 快速开始 + +首先,您需要在项目中引入本仓库: + +```bash +> mkdir my_project && cd ./my_project/ +> git clone https://github.com/dnomd343/md5sum.git +``` + +编写一份源代码用于测试,例如 `main.cc` 文件: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + std::cout << MD5::Hash("hello world") << std::endl; +} +``` + +同时,您需要一份 CMake 配置来驱动它,创建 `CMakeLists.txt` 文件: + +```cmake +cmake_minimum_required(VERSION 3.10) +project(my_project LANGUAGES CXX) + +add_subdirectory(md5sum) + +add_executable(my_demo main.cc) +target_link_libraries(my_demo PRIVATE md5sum::md5) +``` + +最后,我们将剩下的工作交给编译器,执行以下命令: + +```bash +> cmake -Bcmake-build && cmake --build cmake-build +> ./cmake-build/my_demo +5eb63bbbe01eeed093cb22bb8f5acdc3 +``` + +## 哈希接口 + +所有的哈希计算接口都集中在 `MD5` 这个类中,它分为两种:直接计算和流式更新。前者是一元调用,传入数据并得到哈希结果,后者允许您多次将数据传入并不断更新哈希值,并最终得到结果,这在计算大文件哈希时特别有用。 + +以下接口用于直接计算,返回字符串形式的哈希结果: + +```c++ +// Calculate the md5 hash value of the specified data. +static std::string Hash(const std::string_view &data); +static std::string Hash(const void *data, uint64_t len); +``` + +以下接口允许流式计算哈希值,使用 `Update` 接口传入数据,调用 `Final` 接口完成计算,并通过 `Digest` 接口得到字符串形式的哈希结果,最后,您可能需要调用 `Reset` 为下一轮计算初始化: + +> 返回值 `MD5&` 是类自身的引用,它使得链式调用场景更为方便。 + +```c++ +// Update md5 hash with specified data. +MD5& Update(const std::string_view &data); +MD5& Update(const void *data, uint64_t len); + +// Stop streaming updates and calculate result. +MD5& Final(); + +// Get the string result of md5. +std::string Digest() const; + +// Reset for next round of hashing. +MD5& Reset(); +``` + +请注意,在调用 `Final` 后不再应该使用 `Update` 接口,在进行下一轮计算前,请务必调用 `Reset` 接口,否则将得到错误的结果。以下是一个简单的示例: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + MD5 hash; + + hash.Update("hello") + .Update(" ") + .Update("world") + .Final(); + std::cout << hash.Digest() << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 + + hash.Reset(); + hash.Update("hello world").Final(); + std::cout << hash.Digest() << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 + + std::cout << MD5::Hash("hello world") << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 +} +``` + +## 编译期哈希 + +这是一个很有趣的特性,C++ 允许我们在编译的时候进行一些常量表达式计算,您可以直接将常量二进制数据传入,并得到它的 MD5 常量值。 + +不过由于编译器限制,在当前并不支持构造 `std::string` 作为常量表达式,作为替代,它返回 `std::array` 类型的结果,函数原型如下: + +```c++ +// Calculate the md5 hash value of the specified data with constexpr. +static constexpr std::array HashCE(const std::string_view &data); +static constexpr std::array HashCE(const char *data, uint64_t len); +``` + +使用常量表达式意味着,哈希过程将在编译期进行,MD5 结果将作为常量记录到编译产物中。下面是一个例子: + +```c++ +#include "md5.h" +#include + +using md5::MD5; + +int main() { + constexpr auto my_hash = MD5::HashCE("hello world"); + std::cout << std::string { my_hash.data(), 32 } << std::endl; // 5eb63bbbe01eeed093cb22bb8f5acdc3 +} +``` + +## 测试与基准 + +对于一个鲁棒的项目,单元测试和性能基准是很有必要的,`md5sum` 同样提供了这些。在开始之前,我们需要先克隆这些第三方库代码: + +```bash +> cd ./md5sum/ +> git submodule update --init +Submodule 'third_party/benchmark' (https://github.com/google/benchmark.git) registered for path 'third_party/benchmark' +Submodule 'third_party/googletest' (https://github.com/google/googletest.git) registered for path 'third_party/googletest' +··· +``` + +然后,执行以下命令进行编译: + +```bash +> cmake -DMD5_ENABLE_TESTING=ON -DMD5_ENABLE_BENCHMARK=ON -Bcmake-build +> cmake --build cmake-build +``` + +让我们执行单元测试: + +```bash +> ./cmake-build/md5_test +Running main() from ··· +[==========] Running 5 tests from 1 test suite. +[----------] Global test environment set-up. +[----------] 5 tests from md5sum +[ RUN ] md5sum.hash +[ OK ] md5sum.hash (0 ms) +[ RUN ] md5sum.hash_ce +[ OK ] md5sum.hash_ce (0 ms) +[ RUN ] md5sum.empty +[ OK ] md5sum.empty (0 ms) +[ RUN ] md5sum.simple +[ OK ] md5sum.simple (0 ms) +[ RUN ] md5sum.stream +[ OK ] md5sum.stream (30 ms) +[----------] 5 tests from md5sum (31 ms total) + +[----------] Global test environment tear-down +[==========] 5 tests from 1 test suite ran. (31 ms total) +[ PASSED ] 5 tests. +``` + +还有性能基准的测试: + +```bash +> ./cmake-build/md5_benchmark +Running ./cmake-build/md5sum/md5_benchmark +Run on (4 X 4100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x2) + L1 Instruction 32 KiB (x2) + L2 Unified 1280 KiB (x2) + L3 Unified 6144 KiB (x1) +Load Average: 1.07, 0.62, 0.85 +---------------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------------- +MD5_Digest 9.98 ns 9.97 ns 69955773 +MD5_Update/64 78.7 ns 78.6 ns 8892747 +MD5_Update/256 315 ns 315 ns 2224685 +MD5_Update/1024 1259 ns 1258 ns 556361 +MD5_Update/4096 5034 ns 5034 ns 139048 +MD5_Hash/0 101 ns 101 ns 6883388 +MD5_Hash/64 190 ns 190 ns 3686157 +MD5_Hash/256 414 ns 414 ns 1690953 +MD5_Hash/1024 1358 ns 1358 ns 515488 +MD5_Hash/4096 5137 ns 5133 ns 136354 +``` + +这些数据意味着,在这颗 CPU 上,导出一次 MD5 字符串需要约 10 纳秒,完成 64 字节的更新需要 78.6 纳秒,完成 4 KiB 的哈希计算需要 5.133 微秒。 + +哈希速度与 CPU 的单核性能有直接关系,绝大多数场景下,性能瓶颈在于 CPU 而非 IO 部分。如果您需要校验大量数据,xxHash 或者 BLAKE3 将会是更合适的选择。 + +## 二进制示例 + +本项目同时提供了一个演示样例,它可以实现对文件的 MD5 值计算,您需要使用以下命令编译: + +```bash +> cmake -DMD5_BUILD_DEMO=ON -Bcmake-build +> cmake --build cmake-build +``` + +生成一个 8GiB 的空文件用于测试: + +```bash +> dd if=/dev/zero of=test.dat bs=1GiB count=8 +8+0 records in +8+0 records out +8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.7279 s, 1.3 GB/s + +> du -b test.dat +8589934592 test.dat +``` + +统计它们各自的用时: + +```bash +> time ./cmake-build/md5_demo test.dat +b770351fadae5a96bbaf9702ed97d28d + +real 0m10.849s +user 0m10.588s +sys 0m0.260s + +> time md5sum test.dat +b770351fadae5a96bbaf9702ed97d28d test.dat + +real 0m11.854s +user 0m10.721s +sys 0m1.132s + +> time openssl md5 test.dat +MD5(test.dat)= b770351fadae5a96bbaf9702ed97d28d + +real 0m11.497s +user 0m10.243s +sys 0m1.252s +``` + +## 高级选项 + +在项目的 CMake 配置中预留了以下选项,您可以按需进行开关。 + ++ `MD5_BUILD_DEMO` :是否构建演示二进制,默认关闭 + ++ `MD5_SHARED_LIB` :是否构建为动态库,默认关闭 + ++ `MD5_ENABLE_LTO` :是否开启 LTO 优化,默认打开 + ++ `MD5_ENABLE_TESTING` :是否构建项目单元测试,默认关闭 + ++ `MD5_ENABLE_BENCHMARK` :是否构建性能基准套件,默认关闭 + +> 注意:如果您使用 Clang 编译器和 ld 链接器,由于 GNU 工具并不认识 LLVM 字节码,需要关闭 LTO 才能正常链接,或者您可以增加 `-fuse-ld=lld` 选项切换到 lld 链接器。普遍情况下,Linux 用户并不建议使用 Clang 编译本项目,在当前的性能基准下,如果未开启 `-march=native` 优化,在 Clang18 与 g++12 的对比中,它通常会落后 20% 左右。 + +此外,在构建为动态库时,项目内部的符号将被隐藏,这意味着 strip 以后,仅有以下符号暴露: + +> 由于部分哈希接口在头文件内联实现,因此 `FinalImpl` 符号将对外暴露。 + +```bash +> cmake -DMD5_SHARED_LIB=ON -Bcmake-build +> cmake --build cmake-build +> nm -CD ./cmake-build/libmd5sum.so + w __cxa_finalize@GLIBC_2.2.5 + w __gmon_start__ + w _ITM_deregisterTMCloneTable + w _ITM_registerTMCloneTable + U memcpy@GLIBC_2.14 + U __stack_chk_fail@GLIBC_2.4 +0000000000001cc0 T md5::MD5::Update(void const*, unsigned long) +0000000000001840 T md5::MD5::FinalImpl(void const*, unsigned long) +0000000000001a80 T md5::MD5::Digest[abi:cxx11]() const + U operator new(unsigned long)@GLIBCXX_3.4 +``` + +## 许可证 + +MIT ©2024 [@dnomd343](https://github.com/dnomd343) diff --git a/libs/md5sum/benchmark.cc b/libs/md5sum/benchmark.cc new file mode 100644 index 00000000..6fb9903c --- /dev/null +++ b/libs/md5sum/benchmark.cc @@ -0,0 +1,43 @@ +#include "md5.h" +#include "benchmark/benchmark.h" + +using md5::MD5; + +std::string build_test_data() { + std::string data(65536, 0x00); + for (uint32_t i = 0; i < data.size(); ++i) { + data[i] = static_cast(i & 0xff); + } + return data; +} + +static void MD5_Digest(benchmark::State &state) { + constexpr MD5 md5; + for (auto _ : state) { + auto volatile holder = md5.Digest(); + } +} + +static void MD5_Update(benchmark::State &state) { + MD5 md5; + const auto data = build_test_data(); + for (auto _ : state) { + md5.Update(data.c_str(), state.range(0)); + } +} + +static void MD5_Hash(benchmark::State &state) { + const auto data = build_test_data(); + for (auto _ : state) { + MD5::Hash(data.c_str(), state.range(0)); + } +} + +BENCHMARK(MD5_Digest); + +BENCHMARK(MD5_Update)->RangeMultiplier(4)->Range(64, 4096); + +BENCHMARK(MD5_Hash)->Arg(0); +BENCHMARK(MD5_Hash)->RangeMultiplier(4)->Range(64, 4096); + +BENCHMARK_MAIN(); diff --git a/libs/md5sum/demo.cc b/libs/md5sum/demo.cc new file mode 100644 index 00000000..8d04059a --- /dev/null +++ b/libs/md5sum/demo.cc @@ -0,0 +1,80 @@ +#include "md5.h" +#include +#include + +using md5::MD5; + +#if not __linux__ +std::optional hash_file(const std::string_view &file_name) { + auto *fp = std::fopen(file_name.data(), "rb"); + if (!fp) { + std::perror("File open failed"); + return std::nullopt; + } + + MD5 md5; + size_t len; + char buffer[BUFSIZ]; + while ((len = std::fread(buffer, sizeof(char), BUFSIZ, fp)) > 0) { + md5.Update(buffer, len); + } + if (std::ferror(fp)) { + std::perror("File read failed"); + std::fclose(fp); + return std::nullopt; + } + std::fclose(fp); + return md5.Final().Digest(); +} + +#else + +#include +#include +#include +#include + +std::optional hash_file(const std::string_view &file_name) { + auto fd = open(file_name.data(), O_RDONLY); + if (fd < 0) { + std::perror("File open failed"); + return std::nullopt; + } + + struct stat st {}; + fstat(fd, &st); + auto file_size = st.st_size; + + auto ptr = mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (ptr == MAP_FAILED) { + std::perror("File mapping failed"); + return std::nullopt; + } + auto result = MD5::Hash(ptr, file_size); + if (munmap(ptr, file_size)) { + std::perror("File unmapping failed"); + } + return result; +} +#endif + +constexpr auto NO_ERROR = 0; +constexpr auto ARG_ERROR = 1; +constexpr auto FILE_ERROR = 2; + +int main(int argc, char *argv[]) { + if (argc == 1) { + std::cout << "Usage: " << argv[0] << " [FILE]" << std::endl; + return NO_ERROR; + } else if (argc != 2) { + std::cout << "Invalid MD5 arguments" << std::endl; + return ARG_ERROR; + } + + if (auto result = hash_file(argv[1]); result.has_value()) { + std::cout << result.value() << std::endl; + return NO_ERROR; + } + return FILE_ERROR; +} diff --git a/libs/md5sum/src/impl/constexpr.inl b/libs/md5sum/src/impl/constexpr.inl new file mode 100644 index 00000000..e6d1d8ad --- /dev/null +++ b/libs/md5sum/src/impl/constexpr.inl @@ -0,0 +1,100 @@ +#pragma once + +namespace md5::ce { + +struct md5_ctx { + uint32_t A = value::kA; + uint32_t B = value::kB; + uint32_t C = value::kC; + uint32_t D = value::kD; +}; + +struct md5_data { + const char *ptr; + uint64_t len, padded_len; + + constexpr md5_data(const char *data, const uint64_t len) + : ptr(data), len(len), padded_len((len + 64 + 8) & ~0b111111ULL) {} +}; + +using Block = std::array; // single md5 block with 64 bytes + +/// Get the data and padding byte of the specified index. +constexpr uint8_t GetByte(const md5_data &data, const uint64_t index) { + if (index < data.len) // message data + return data.ptr[index]; + if (index == data.len) // padding flag + return 0x80; + if (index < data.padded_len - 8) // padding content + return 0x00; + const auto offset = (index + 8 - data.padded_len) * 8; + return static_cast(0xff & (data.len * 8) >> offset); +} + +/// Get the MD5 block content at the specified index. +constexpr Block GetBlock(const md5_data &data, const uint64_t index) { + Block block {}; + for (int i = 0; i < 16; ++i) { + const auto offset = index + i * 4; + block[i] |= GetByte(data, offset + 3); + block[i] <<= 8; + block[i] |= GetByte(data, offset + 2); + block[i] <<= 8; + block[i] |= GetByte(data, offset + 1); + block[i] <<= 8; + block[i] |= GetByte(data, offset + 0); + } + return block; +} + +/// Apply MD5 round process with 64 times calculate. +constexpr md5_ctx Round(const Block &block, md5_ctx ctx) { + constexpr auto calc = [](const md5_ctx &c, const int i) { + if (i < 0x10) + return c.D ^ (c.B & (c.C ^ c.D)); + if (i < 0x20) + return c.C ^ (c.D & (c.B ^ c.C)); + if (i < 0x30) + return c.B ^ c.C ^ c.D; + return c.C ^ (c.B | ~c.D); + }; + + for (int i = 0; i < 64; ++i) { + const auto a = ctx.A + calc(ctx, i) + block[value::K(i)] + value::T(i); + ctx.A = ctx.D; + ctx.D = ctx.C; + ctx.C = ctx.B; + ctx.B += a << value::S(i) | a >> (32 - value::S(i)); + } + return ctx; +} + +/// Convert origin MD5 integers to hexadecimal character array. +constexpr std::array DigestCE(const std::array &ctx) { + std::array result {}; + for (uint32_t i = 0, val = 0; i < 32; val >>= 8) { + if (!(i & 0b111)) + val = ctx[i >> 3]; + result[i++] = value::HexTable[(val >> 4) & 0b1111]; + result[i++] = value::HexTable[val & 0b1111]; + } + return result; +} + +/// MD5 hash implement based on constexpr. +constexpr std::array Hash(const char *data, const uint64_t len) { + md5_ctx ctx; + const md5_data md5(data, len); + for (uint32_t index = 0; index < md5.padded_len; index += 64) { + const auto [A, B, C, D] = Round(GetBlock(md5, index), ctx); + ctx.A += A; + ctx.B += B; + ctx.C += C; + ctx.D += D; + } + return DigestCE({ctx.A, ctx.B, ctx.C, ctx.D}); +} + +static_assert(Hash("", 0)[0] == 'd'); + +} // namespace md5::ce diff --git a/libs/md5sum/src/impl/core.cc b/libs/md5sum/src/impl/core.cc new file mode 100644 index 00000000..251d2d5e --- /dev/null +++ b/libs/md5sum/src/impl/core.cc @@ -0,0 +1,98 @@ +#include "md5.h" +#include + +using md5::MD5; +using md5::value::K; +using md5::value::S; +using md5::value::T; + +#define R1 A, B, C, D +#define R2 D, A, B, C +#define R3 C, D, A, B +#define R4 B, C, D, A + +#define F(x, y, z) (z ^ (x & (y ^ z))) +#define G(x, y, z) (y ^ (z & (x ^ y))) +#define H(x, y, z) (x ^ y ^ z) +#define I(x, y, z) (y ^ (x | ~z)) + +#define MD5_ROUND(i, f, a, b, c, d) \ + do { \ + a += f(b, c, d) + block[K(i)] + T(i); \ + a = a << S(i) | a >> (32 - S(i)); \ + a += b; \ + } while (0) + +#ifdef _MSC_VER +#define EXPAND(...) __VA_ARGS__ +#define ROUND(...) EXPAND(MD5_ROUND(__VA_ARGS__)) +#else +#define ROUND MD5_ROUND +#endif + +#define FF(i, ...) ROUND(i | 0x00, F, __VA_ARGS__) +#define GG(i, ...) ROUND(i | 0x10, G, __VA_ARGS__) +#define HH(i, ...) ROUND(i | 0x20, H, __VA_ARGS__) +#define II(i, ...) ROUND(i | 0x30, I, __VA_ARGS__) + +#define MD5_UPDATE(OP) \ + OP(0x0, R1); OP(0x1, R2); OP(0x2, R3); OP(0x3, R4); \ + OP(0x4, R1); OP(0x5, R2); OP(0x6, R3); OP(0x7, R4); \ + OP(0x8, R1); OP(0x9, R2); OP(0xa, R3); OP(0xb, R4); \ + OP(0xc, R1); OP(0xd, R2); OP(0xe, R3); OP(0xf, R4); + +static constexpr unsigned char Padding[64] { 0x80, /* 0x00, ... */ }; + +const void* MD5::UpdateImpl(const void *data, uint64_t len) { + auto *block = static_cast(data); + auto *limit = block + ((len &= ~0b111111ULL) >> 2); + + auto A = ctx_.A; + auto B = ctx_.B; + auto C = ctx_.C; + auto D = ctx_.D; + + while (block < limit) { + const auto A_ = A; + const auto B_ = B; + const auto C_ = C; + const auto D_ = D; + MD5_UPDATE(FF) + MD5_UPDATE(GG) + MD5_UPDATE(HH) + MD5_UPDATE(II) + A += A_; + B += B_; + C += C_; + D += D_; + block += 16; // move to next block + } + + ctx_.A = A; + ctx_.B = B; + ctx_.C = C; + ctx_.D = D; + ctx_.size += len; + return limit; +} + +void MD5::FinalImpl(const void *data, uint64_t len) { + if (len >= 120) { // len -> [64 + 56, INF) + data = UpdateImpl(data, len); + len &= 0b111111; // len -> [0, 64) + } + + unsigned char buffer[128]; // 2 blocks + std::memcpy(buffer, data, len); + const uint64_t total = (ctx_.size + len) << 3; // total number in bit + + if (len < 56) { // len -> [0, 56) + std::memcpy(buffer + len, Padding, 56 - len); + std::memcpy(buffer + 56, &total, 8); + UpdateImpl(buffer, 64); // update 1 block + } else { // len -> [56, 64 + 56) + std::memcpy(buffer + len, Padding, 120 - len); + std::memcpy(buffer + 120, &total, 8); + UpdateImpl(buffer, 128); // update 2 blocks + } +} diff --git a/libs/md5sum/src/impl/inline.inl b/libs/md5sum/src/impl/inline.inl new file mode 100644 index 00000000..73234b0d --- /dev/null +++ b/libs/md5sum/src/impl/inline.inl @@ -0,0 +1,42 @@ +#pragma once + +namespace md5 { + +inline MD5& MD5::Reset() { + ctx_.A = value::kA; + ctx_.B = value::kB; + ctx_.C = value::kC; + ctx_.D = value::kD; + ctx_.size = 0; + buffer_size_ = 0; + return *this; +} + +inline MD5& MD5::Final() { + FinalImpl(buffer_, buffer_size_); + return *this; +} + +inline MD5& MD5::Update(const std::string_view &data) { + return Update(data.data(), data.size()); +} + +inline std::string MD5::Hash(const std::string_view &data) { + return Hash(data.data(), data.size()); +} + +inline std::string MD5::Hash(const void *data, const uint64_t len) { + MD5 md5; + md5.FinalImpl(data, len); + return md5.Digest(); +} + +constexpr std::array MD5::HashCE(const std::string_view &data) { + return HashCE(data.data(), data.size()); +} + +constexpr std::array MD5::HashCE(const char *data, const uint64_t len) { + return ce::Hash(data, len); +} + +} // namespace md5 diff --git a/libs/md5sum/src/impl/sine.inl b/libs/md5sum/src/impl/sine.inl new file mode 100644 index 00000000..d98dbb12 --- /dev/null +++ b/libs/md5sum/src/impl/sine.inl @@ -0,0 +1,62 @@ +/// This is the mathematical calculation implementation of MD5. It is not an +/// efficient implementation and is only used for compile-time expansion +/// calculations. In addition, in terms of accuracy, it is only used to ensure +/// the MD5 T-table constants. + +#pragma once + +namespace md5::math { + +constexpr double PI = 3.14159265358979323846264338327950; + +constexpr double abs(const double x) { + return x < 0 ? -x : x; +} + +constexpr double fmod(const double x, const double y) { + const auto tmp = static_cast(x / y); + return x - y * static_cast(tmp); +} + +constexpr double pow(const double x, const int n) { + double res = 1; + for (int i = 0; i < n; ++i) { + res *= x; + } + return res; +} + +constexpr double factorial(const int n) { + double res = 1; + for (int i = 2 ; i <= n ; ++i) { + res *= i; + } + return res; +} + +/// Calculate sin(x) value with Maclaurin series. +constexpr double sin_core(const double x) { + double res = x; + for (int i = 1; i < 80; ++i) { + const int n = i * 2 + 1; + const int sign = i & 1 ? -1 : 1; + res += sign * pow(x, n) / factorial(n); + } + return res; +} + +/// Calculate the sin(x) value in radians. +constexpr double sin(double x) { + x = fmod(x, 2 * PI); // -2PI < x < 2PI + + if (abs(x) > PI) { + x -= (x > 0 ? 2 : -2) * PI; // -PI < x < PI + } + + if (abs(x) > PI / 2) { + x = (x > 0 ? 1 : -1) * PI - x; // -PI / 2 < x < PI / 2 + } + return sin_core(x); // closer to 0 for better accuracy +} + +} // namespace md5::math diff --git a/libs/md5sum/src/impl/value.inl b/libs/md5sum/src/impl/value.inl new file mode 100644 index 00000000..4ebc6617 --- /dev/null +++ b/libs/md5sum/src/impl/value.inl @@ -0,0 +1,67 @@ +#pragma once + +#include "sine.inl" + +namespace md5::value { + +/// Hexadecimal character mapping table. +constexpr char HexTable[] = { + '0','1','2','3','4','5','6','7', + '8','9','a','b','c','d','e','f', +}; + +/// MD5 fixed constants in little endian. +constexpr uint32_t kA = 0x67452301; +constexpr uint32_t kB = 0xefcdab89; +constexpr uint32_t kC = 0x98badcfe; +constexpr uint32_t kD = 0x10325476; + +/// MD5 data block index, input between 0 and 63. +constexpr int K(const int i) { + constexpr int step[4] = {1, 5, 3, 7}; + constexpr int begin[4] = {0, 1, 5, 0}; + return (begin[i >> 4] + step[i >> 4] * i) & 0b1111; +} +static_assert(K(0) != K(63)); + +/// MD5 circular shift times, input between 0 and 63. +constexpr int S(const int i) { + constexpr int shift[4][4] = { + {7, 12, 17, 22}, + {5, 9, 14, 20}, + {4, 11, 16, 23}, + {6, 10, 15, 21}, + }; + return shift[i >> 4][i & 0b11]; +} +static_assert(S(0) != S(63)); + +/// In order to be compatible with C++17, the `consteval` keyword cannot be used +/// here. The MD5 T-table constants will be macro-expanded and calculated. +#define MD5_TT \ + MD5_T(00) MD5_T(01) MD5_T(02) MD5_T(03) MD5_T(04) MD5_T(05) MD5_T(06) MD5_T(07) \ + MD5_T(08) MD5_T(09) MD5_T(0a) MD5_T(0b) MD5_T(0c) MD5_T(0d) MD5_T(0e) MD5_T(0f) \ + MD5_T(10) MD5_T(11) MD5_T(12) MD5_T(13) MD5_T(14) MD5_T(15) MD5_T(16) MD5_T(17) \ + MD5_T(18) MD5_T(19) MD5_T(1a) MD5_T(1b) MD5_T(1c) MD5_T(1d) MD5_T(1e) MD5_T(1f) \ + MD5_T(20) MD5_T(21) MD5_T(22) MD5_T(23) MD5_T(24) MD5_T(25) MD5_T(26) MD5_T(27) \ + MD5_T(28) MD5_T(29) MD5_T(2a) MD5_T(2b) MD5_T(2c) MD5_T(2d) MD5_T(2e) MD5_T(2f) \ + MD5_T(30) MD5_T(31) MD5_T(32) MD5_T(33) MD5_T(34) MD5_T(35) MD5_T(36) MD5_T(37) \ + MD5_T(38) MD5_T(39) MD5_T(3a) MD5_T(3b) MD5_T(3c) MD5_T(3d) MD5_T(3e) MD5_T(3f) + +#define MD5_T(x) constexpr auto kT_##x = static_cast(math::abs(math::sin(0x##x + 1)) * 0x100000000); +MD5_TT +#undef MD5_T + +#define MD5_T(x) kT_##x, +constexpr std::array kT = {MD5_TT}; +#undef MD5_T + +#undef MD5_TT + +/// MD5 T-table constant, input between 0 and 63. +constexpr uint32_t T(const int i) { + return kT[i]; +} +static_assert(T(0) != T(63)); + +} // namespace md5::value diff --git a/libs/md5sum/src/impl/wrapper.cc b/libs/md5sum/src/impl/wrapper.cc new file mode 100644 index 00000000..dd7b6573 --- /dev/null +++ b/libs/md5sum/src/impl/wrapper.cc @@ -0,0 +1,39 @@ +#include "md5.h" +#include + +using md5::MD5; + +std::string MD5::Digest() const { + std::string result(32, 0x00); + auto *ptr = reinterpret_cast(&ctx_); + for (int i = 0; i < 32; ++ptr) { + result[i++] = value::HexTable[*ptr >> 4]; + result[i++] = value::HexTable[*ptr & 0b1111]; + } + return result; +} + +MD5& MD5::Update(const void *data, uint64_t len) { + if (buffer_size_ != 0) { + if (buffer_size_ + len < 64) { // buffer not filled + std::memcpy(buffer_ + buffer_size_, data, len); + buffer_size_ += len; + return *this; // save into buffer and return + } + + const auto size = 64 - buffer_size_; + std::memcpy(buffer_ + buffer_size_, data, size); + UpdateImpl(buffer_, 64); // fill and update with buffer + data = static_cast(data) + size; + buffer_size_ = 0; + len -= size; + } // buffer is empty for now + + data = UpdateImpl(data, len); + len &= 0b111111; // len -> [0, 64) + if (len != 0) { + std::memcpy(buffer_, data, len); // save remain data into buffer + buffer_size_ = len; + } + return *this; +} diff --git a/libs/md5sum/src/md5.h b/libs/md5sum/src/md5.h new file mode 100644 index 00000000..21939e71 --- /dev/null +++ b/libs/md5sum/src/md5.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include + +static_assert(sizeof(uintptr_t) == 8, + "Project only works on 64-bits architecture."); + +#ifdef _MSC_VER +static_assert('\x01\x02\x03\x04' == 0x04030201, +#else +static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, +#endif + "Project only works on little-endian architecture."); + +#if defined(__clang__) || defined(__GNUC__) +#define MD5_EXPORT __attribute__ ((visibility ("default"))) +#elif defined(_MSC_VER) +#define MD5_EXPORT __declspec(dllexport) +#else +#define MD5_EXPORT +#endif + +#include "impl/value.inl" +#include "impl/constexpr.inl" + +namespace md5 { + +class MD5 { +public: + MD5() = default; + + /// Reset for next round of hashing. + MD5& Reset(); + + /// Update md5 hash with specified data. + MD5& Update(const std::string_view &data); + + /// Update md5 hash with specified data. + MD5_EXPORT MD5& Update(const void *data, uint64_t len); + + /// Stop streaming updates and calculate result. + MD5& Final(); + + /// Get the string result of md5. + [[nodiscard]] MD5_EXPORT std::string Digest() const; + + /// Calculate the md5 hash value of the specified data. + static std::string Hash(const std::string_view &data); + + /// Calculate the md5 hash value of the specified data. + static std::string Hash(const void *data, uint64_t len); + + /// Calculate the md5 hash value of the specified data with constexpr. + static constexpr std::array HashCE(const std::string_view &data); + + /// Calculate the md5 hash value of the specified data with constexpr. + static constexpr std::array HashCE(const char *data, uint64_t len); + +private: + struct md5_ctx { + uint32_t A = value::kA; + uint32_t B = value::kB; + uint32_t C = value::kC; + uint32_t D = value::kD; + uint64_t size = 0; // processed size in byte + }; + + md5_ctx ctx_; + char buffer_[64] {}; + uint64_t buffer_size_ = 0; // size < 64 + + /// Update md5 ctx with specified data, and return the pointer of unprocessed data (< 64 bytes). + const void* UpdateImpl(const void *data, uint64_t len); + + /// Update and final the md5 hash with the specified data. + MD5_EXPORT void FinalImpl(const void *data, uint64_t len); +}; + +} // namespace md5 + +#include "impl/inline.inl" diff --git a/libs/md5sum/test/assert.cc b/libs/md5sum/test/assert.cc new file mode 100644 index 00000000..8c5ccb6c --- /dev/null +++ b/libs/md5sum/test/assert.cc @@ -0,0 +1,200 @@ +#include "md5.h" + +using md5::value::K; +using md5::value::S; +using md5::value::T; + +static_assert(K(0x00) == 0); +static_assert(K(0x01) == 1); +static_assert(K(0x02) == 2); +static_assert(K(0x03) == 3); +static_assert(K(0x04) == 4); +static_assert(K(0x05) == 5); +static_assert(K(0x06) == 6); +static_assert(K(0x07) == 7); +static_assert(K(0x08) == 8); +static_assert(K(0x09) == 9); +static_assert(K(0x0a) == 10); +static_assert(K(0x0b) == 11); +static_assert(K(0x0c) == 12); +static_assert(K(0x0d) == 13); +static_assert(K(0x0e) == 14); +static_assert(K(0x0f) == 15); +static_assert(K(0x10) == 1); +static_assert(K(0x11) == 6); +static_assert(K(0x12) == 11); +static_assert(K(0x13) == 0); +static_assert(K(0x14) == 5); +static_assert(K(0x15) == 10); +static_assert(K(0x16) == 15); +static_assert(K(0x17) == 4); +static_assert(K(0x18) == 9); +static_assert(K(0x19) == 14); +static_assert(K(0x1a) == 3); +static_assert(K(0x1b) == 8); +static_assert(K(0x1c) == 13); +static_assert(K(0x1d) == 2); +static_assert(K(0x1e) == 7); +static_assert(K(0x1f) == 12); +static_assert(K(0x20) == 5); +static_assert(K(0x21) == 8); +static_assert(K(0x22) == 11); +static_assert(K(0x23) == 14); +static_assert(K(0x24) == 1); +static_assert(K(0x25) == 4); +static_assert(K(0x26) == 7); +static_assert(K(0x27) == 10); +static_assert(K(0x28) == 13); +static_assert(K(0x29) == 0); +static_assert(K(0x2a) == 3); +static_assert(K(0x2b) == 6); +static_assert(K(0x2c) == 9); +static_assert(K(0x2d) == 12); +static_assert(K(0x2e) == 15); +static_assert(K(0x2f) == 2); +static_assert(K(0x30) == 0); +static_assert(K(0x31) == 7); +static_assert(K(0x32) == 14); +static_assert(K(0x33) == 5); +static_assert(K(0x34) == 12); +static_assert(K(0x35) == 3); +static_assert(K(0x36) == 10); +static_assert(K(0x37) == 1); +static_assert(K(0x38) == 8); +static_assert(K(0x39) == 15); +static_assert(K(0x3a) == 6); +static_assert(K(0x3b) == 13); +static_assert(K(0x3c) == 4); +static_assert(K(0x3d) == 11); +static_assert(K(0x3e) == 2); +static_assert(K(0x3f) == 9); + +static_assert(S(0x00) == 7); +static_assert(S(0x01) == 12); +static_assert(S(0x02) == 17); +static_assert(S(0x03) == 22); +static_assert(S(0x04) == 7); +static_assert(S(0x05) == 12); +static_assert(S(0x06) == 17); +static_assert(S(0x07) == 22); +static_assert(S(0x08) == 7); +static_assert(S(0x09) == 12); +static_assert(S(0x0a) == 17); +static_assert(S(0x0b) == 22); +static_assert(S(0x0c) == 7); +static_assert(S(0x0d) == 12); +static_assert(S(0x0e) == 17); +static_assert(S(0x0f) == 22); +static_assert(S(0x10) == 5); +static_assert(S(0x11) == 9); +static_assert(S(0x12) == 14); +static_assert(S(0x13) == 20); +static_assert(S(0x14) == 5); +static_assert(S(0x15) == 9); +static_assert(S(0x16) == 14); +static_assert(S(0x17) == 20); +static_assert(S(0x18) == 5); +static_assert(S(0x19) == 9); +static_assert(S(0x1a) == 14); +static_assert(S(0x1b) == 20); +static_assert(S(0x1c) == 5); +static_assert(S(0x1d) == 9); +static_assert(S(0x1e) == 14); +static_assert(S(0x1f) == 20); +static_assert(S(0x20) == 4); +static_assert(S(0x21) == 11); +static_assert(S(0x22) == 16); +static_assert(S(0x23) == 23); +static_assert(S(0x24) == 4); +static_assert(S(0x25) == 11); +static_assert(S(0x26) == 16); +static_assert(S(0x27) == 23); +static_assert(S(0x28) == 4); +static_assert(S(0x29) == 11); +static_assert(S(0x2a) == 16); +static_assert(S(0x2b) == 23); +static_assert(S(0x2c) == 4); +static_assert(S(0x2d) == 11); +static_assert(S(0x2e) == 16); +static_assert(S(0x2f) == 23); +static_assert(S(0x30) == 6); +static_assert(S(0x31) == 10); +static_assert(S(0x32) == 15); +static_assert(S(0x33) == 21); +static_assert(S(0x34) == 6); +static_assert(S(0x35) == 10); +static_assert(S(0x36) == 15); +static_assert(S(0x37) == 21); +static_assert(S(0x38) == 6); +static_assert(S(0x39) == 10); +static_assert(S(0x3a) == 15); +static_assert(S(0x3b) == 21); +static_assert(S(0x3c) == 6); +static_assert(S(0x3d) == 10); +static_assert(S(0x3e) == 15); +static_assert(S(0x3f) == 21); + +static_assert(T(0x00) == 0xd76aa478); +static_assert(T(0x01) == 0xe8c7b756); +static_assert(T(0x02) == 0x242070db); +static_assert(T(0x03) == 0xc1bdceee); +static_assert(T(0x04) == 0xf57c0faf); +static_assert(T(0x05) == 0x4787c62a); +static_assert(T(0x06) == 0xa8304613); +static_assert(T(0x07) == 0xfd469501); +static_assert(T(0x08) == 0x698098d8); +static_assert(T(0x09) == 0x8b44f7af); +static_assert(T(0x0a) == 0xffff5bb1); +static_assert(T(0x0b) == 0x895cd7be); +static_assert(T(0x0c) == 0x6b901122); +static_assert(T(0x0d) == 0xfd987193); +static_assert(T(0x0e) == 0xa679438e); +static_assert(T(0x0f) == 0x49b40821); +static_assert(T(0x10) == 0xf61e2562); +static_assert(T(0x11) == 0xc040b340); +static_assert(T(0x12) == 0x265e5a51); +static_assert(T(0x13) == 0xe9b6c7aa); +static_assert(T(0x14) == 0xd62f105d); +static_assert(T(0x15) == 0x02441453); +static_assert(T(0x16) == 0xd8a1e681); +static_assert(T(0x17) == 0xe7d3fbc8); +static_assert(T(0x18) == 0x21e1cde6); +static_assert(T(0x19) == 0xc33707d6); +static_assert(T(0x1a) == 0xf4d50d87); +static_assert(T(0x1b) == 0x455a14ed); +static_assert(T(0x1c) == 0xa9e3e905); +static_assert(T(0x1d) == 0xfcefa3f8); +static_assert(T(0x1e) == 0x676f02d9); +static_assert(T(0x1f) == 0x8d2a4c8a); +static_assert(T(0x20) == 0xfffa3942); +static_assert(T(0x21) == 0x8771f681); +static_assert(T(0x22) == 0x6d9d6122); +static_assert(T(0x23) == 0xfde5380c); +static_assert(T(0x24) == 0xa4beea44); +static_assert(T(0x25) == 0x4bdecfa9); +static_assert(T(0x26) == 0xf6bb4b60); +static_assert(T(0x27) == 0xbebfbc70); +static_assert(T(0x28) == 0x289b7ec6); +static_assert(T(0x29) == 0xeaa127fa); +static_assert(T(0x2a) == 0xd4ef3085); +static_assert(T(0x2b) == 0x04881d05); +static_assert(T(0x2c) == 0xd9d4d039); +static_assert(T(0x2d) == 0xe6db99e5); +static_assert(T(0x2e) == 0x1fa27cf8); +static_assert(T(0x2f) == 0xc4ac5665); +static_assert(T(0x30) == 0xf4292244); +static_assert(T(0x31) == 0x432aff97); +static_assert(T(0x32) == 0xab9423a7); +static_assert(T(0x33) == 0xfc93a039); +static_assert(T(0x34) == 0x655b59c3); +static_assert(T(0x35) == 0x8f0ccc92); +static_assert(T(0x36) == 0xffeff47d); +static_assert(T(0x37) == 0x85845dd1); +static_assert(T(0x38) == 0x6fa87e4f); +static_assert(T(0x39) == 0xfe2ce6e0); +static_assert(T(0x3a) == 0xa3014314); +static_assert(T(0x3b) == 0x4e0811a1); +static_assert(T(0x3c) == 0xf7537e82); +static_assert(T(0x3d) == 0xbd3af235); +static_assert(T(0x3e) == 0x2ad7d2bb); +static_assert(T(0x3f) == 0xeb86d391); diff --git a/libs/md5sum/test/hash.cc b/libs/md5sum/test/hash.cc new file mode 100644 index 00000000..2996e717 --- /dev/null +++ b/libs/md5sum/test/hash.cc @@ -0,0 +1,280 @@ +#include "md5.h" +#include "helper.h" +#include "gtest/gtest.h" + +using md5::MD5; + +constexpr std::array, 256> test_items {{ + {0x00, "d41d8cd98f00b204e9800998ecf8427e"}, + {0x01, "93b885adfe0da089cdf634904fd59f71"}, + {0x02, "441077cc9e57554dd476bdfb8b8b8102"}, + {0x03, "b95f67f61ebb03619622d798f45fc2d3"}, + {0x04, "37b59afd592725f9305e484a5d7f5168"}, + {0x05, "d05374dc381d9b52806446a71c8e79b1"}, + {0x06, "d15ae53931880fd7b724dd7888b4b4ed"}, + {0x07, "9aa461e1eca4086f9230aa49c90b0c61"}, + {0x08, "3677509751ccf61539174d2b9635a7bf"}, + {0x09, "a6e7d3b46fdfaf0bde2a1f832a00d2de"}, + {0x0a, "c56bd5480f6e5413cb62a0ad9666613a"}, + {0x0b, "5b86fa8ad8f4357ea417214182177be8"}, + {0x0c, "50a73d7013e9803e3b20888f8fcafb15"}, + {0x0d, "b20d4797e23eea3ea5778970d2e226f3"}, + {0x0e, "aa541e601b7b9ddd0504d19866350d4e"}, + {0x0f, "58b7ce493ac99c66058538dacb1e3c94"}, + {0x10, "1ac1ef01e96caf1be0d329331a4fc2a8"}, + {0x11, "1bdd36b0a024c90db383512607293692"}, + {0x12, "633ab81aea5942052b794524e1a28477"}, + {0x13, "2d325313eb5df436c078435fa0f5eff1"}, + {0x14, "1549d1aae20214e065ab4b76aaac89a8"}, + {0x15, "7e437c81824d3982e70c88b5da8ea94b"}, + {0x16, "2f5f7e7216832ae19c353023618a35a8"}, + {0x17, "6535e52506c27eaa1033891ff4f3a74e"}, + {0x18, "8bd9c8efbbac58748951ca5a45cfd386"}, + {0x19, "d983c63bf41853056787fe1bb764dbff"}, + {0x1a, "b4f24c1219fb00d081c4020c56263451"}, + {0x1b, "b0ae6708c5e1be10668f57d3916cf423"}, + {0x1c, "ba7bb5ad4dba5bde028703007969cb25"}, + {0x1d, "ea880e16eac1b1488aff8a25d11d6271"}, + {0x1e, "c7172f0903c4919eb232f18ab7a30c42"}, + {0x1f, "e9e77893ba926e732f483282f416ffac"}, + {0x20, "b4ffcb23737cec315a4a4d1aa2a620ce"}, + {0x21, "5506a276a0a9acc3093f9169c73cf8c5"}, + {0x22, "e5a849897d9cc0b25b286c1f0bfb50e3"}, + {0x23, "f54fa30ea7b26d3e11c54d3c8451bcf0"}, + {0x24, "07602fe0229e486957081a49e3f06f83"}, + {0x25, "7c4bba98253ca834bf9ed43fd8b2f959"}, + {0x26, "cf8df427548bbfdb1e11143fdf008b85"}, + {0x27, "1431a6895a8f435755395f9ba83e76bf"}, + {0x28, "30dd5e4cae35ba892cc66d7736723980"}, + {0x29, "8ee247a1063931bedaf4c2fa3e4e261a"}, + {0x2a, "c32ceee2d2245df8589f94fcda0c9f2c"}, + {0x2b, "f25fa0e071d1f1cdc6632c6b673bccd5"}, + {0x2c, "370491b643e97577f4f74bd88576d1ec"}, + {0x2d, "b292bf16e3aafaf41f19c921068214f8"}, + {0x2e, "52921aae5ccc9b6e8e45853419d0c80f"}, + {0x2f, "f1375be31969155ef76f04741cd861d7"}, + {0x30, "04605ca542b2d82b9886a4b4b9acfb1c"}, + {0x31, "fa887ba0fa491faaacbb82bc5fefcd5b"}, + {0x32, "06470e932ad7c7cedf548b5ccb9d4806"}, + {0x33, "ad130b245e2dd894267cb0ddc532d169"}, + {0x34, "a9eeb95053682248608e97d79e89ca82"}, + {0x35, "cc26a3dc608268b98ecd1f3946c4b718"}, + {0x36, "33dd62a2df6538daf1cf821d9cde61f9"}, + {0x37, "6912ee65fff2d9f9ce2508cddf8bcda0"}, + {0x38, "51fdd1acda72405dfdfa03fcb85896d7"}, + {0x39, "5320ef4c17ef34a0cf2db763338d25eb"}, + {0x3a, "9f4f41b5cde885f94cfc0e06e78f929d"}, + {0x3b, "e39965bc00ecacd90fd875f77eff499a"}, + {0x3c, "63ed72093ae09e2c8553ee069e63d702"}, + {0x3d, "0d08fc14ac5baa37792377355dbad0ae"}, + {0x3e, "f3cdffe2e160a061754a06dafcfd688b"}, + {0x3f, "48a6295221902e8e0938f773a7185e72"}, + {0x40, "b2d3f56bc197fd985d5965079b5e7148"}, + {0x41, "8bd7053801c768420faf816fadba971c"}, + {0x42, "e58b3261a467f02ba51b215c013df4c3"}, + {0x43, "73062234b55754c3383480d5ef70dce5"}, + {0x44, "f752ebd79a813ef27c35bed69e2ee69f"}, + {0x45, "10907846eb89ef5dc5d4935a09dad0e7"}, + {0x46, "5f1f5f64b84400fb9ad6d8ecd9c142a0"}, + {0x47, "3157d7bb98a202b50cf0c437aa216c39"}, + {0x48, "70e7ade70281b0afcb1d4ed13efc2e25"}, + {0x49, "0bb96a503b1626c9ab16c1291c663e75"}, + {0x4a, "5bed4126b3c973f685fcf92a738d4dab"}, + {0x4b, "7523c240f2a44e86dd22504ca49f098d"}, + {0x4c, "6710949ed8ae17c44fb77496bedcb2ab"}, + {0x4d, "4a4c43373b9e40035e6e40cba227ce0b"}, + {0x4e, "91977cbcc32cdeaec7a0fa24bb948d6a"}, + {0x4f, "a6a0f1373cf3dbee116df2738d6f544d"}, + {0x50, "761f6d007f6e5c64c8d161a5ced4e0aa"}, + {0x51, "d44ea4d5a7074b88883a82f2b4cfbe67"}, + {0x52, "3097eda5666e2b2723e8949fcff2f244"}, + {0x53, "ab247a3d9bc600f594d5a6c50b80583f"}, + {0x54, "b229430e3db2dfdd13aa1da1bac14d5c"}, + {0x55, "befef62987c6dcdf24febd0bb7cd3678"}, + {0x56, "bfc3e5c7c461500ff085a66548378e0e"}, + {0x57, "a5712194537c75f0dd5a5ab3e9ebaf03"}, + {0x58, "8daac097e9044b85b75999d6c3bccd24"}, + {0x59, "b8124df21129685597c53a3f606ffd28"}, + {0x5a, "8fbc4d795c22d958248582a8df7332ed"}, + {0x5b, "36d217135db136b2bdf1617d7e9c79ce"}, + {0x5c, "1b3e6271a3a4b663c509a1255027ca99"}, + {0x5d, "a25f596574031ff9c34314c1b1f6bf34"}, + {0x5e, "aca7017e5bb62bfdd5bbfded78c8987a"}, + {0x5f, "8129e53a694add0560b1534b32fe5912"}, + {0x60, "da0e48224106c7535a4cd8db2ac7b8e3"}, + {0x61, "cbd4ace3d766d8e44f63e0de8f110f04"}, + {0x62, "bdc17a0ef2777512cb402c90e9d13e31"}, + {0x63, "47695ad6af968d6f1cdd2d8c5c87a466"}, + {0x64, "7acedd1a84a4cfcb6e7a16003242945e"}, + {0x65, "225489d3d073ac705f7b3ad358eabab2"}, + {0x66, "301da87a7b2ec27514c3a2789d5dbe49"}, + {0x67, "16222c503718f1420958133c330fe3f8"}, + {0x68, "d778ce7f642aa23355948477da4cc11c"}, + {0x69, "e873c37f8977e200a594b815e1a87ef3"}, + {0x6a, "e8f8f41528d4f855d8fdf4055bbabe2f"}, + {0x6b, "cacf3d3d1e7d21c97d265f64d9864b75"}, + {0x6c, "6bf48f161eff9f7005bd6667f30a5c27"}, + {0x6d, "42e7bb8e780b3b26616ecbcace81fa1a"}, + {0x6e, "225afd8ec21f86f66211adf54afc2e86"}, + {0x6f, "4fad3ab7d8546851ec1bb63ea7e6f5a8"}, + {0x70, "d1fec2ac3715e791ca5f489f300381b3"}, + {0x71, "f62807c995735b44699bb8179100ce87"}, + {0x72, "54050b090344e3284f390806ff716371"}, + {0x73, "50482241280543b88f7af3fc13d65c65"}, + {0x74, "4c36f27d4786fe2fb8caac690b6d62f7"}, + {0x75, "5a0edf0b97977ee5afb3d185b64fb610"}, + {0x76, "4541055c6675b614d27c537c3bb15675"}, + {0x77, "1c772251899a7ff007400b888d6b2042"}, + {0x78, "b7ba1efc6022e9ed272f00b8831e26e6"}, + {0x79, "b0b2d719a838db877b6d6571a39a1cdc"}, + {0x7a, "800aa956ec16f603ecdba66c2dc6e4cf"}, + {0x7b, "8827d2778287c58a242acd4c549beb31"}, + {0x7c, "cfbc5aa0b61103c1a982d8927b26f575"}, + {0x7d, "a1f5b691f74f566a2be1765731084f8a"}, + {0x7e, "80749be03f5724fa4ca0aef8909379b7"}, + {0x7f, "8402b21e7bc7906493bae0dac017f1f9"}, + {0x80, "37eff01866ba3f538421b30b7cbefcac"}, + {0x81, "46f986692847558fc38b0cece591c20f"}, + {0x82, "7c05c285d0263c40a0437421b387a2a1"}, + {0x83, "cc188799001d39bf0854be3426d93d51"}, + {0x84, "5633ceac96819c2778e4ea5baa12b1cd"}, + {0x85, "8b6831066bd6fa5d47714f2ea8bd137e"}, + {0x86, "1783ae63d2db2973b3aedc5d66b33400"}, + {0x87, "577b18536be8880747324fe72f73b4cb"}, + {0x88, "e0d70f824895dedd2a6eff96b2496a08"}, + {0x89, "d622abf62660ef4976d2c268257e38b8"}, + {0x8a, "9a3909b356dd42783dff7b4092a8e25f"}, + {0x8b, "05599be323d9f92e7a58f9bb42118737"}, + {0x8c, "780c43f8f8caf48638dc4f2313158f76"}, + {0x8d, "d5f6a198221af8fa64cb830c0311eed7"}, + {0x8e, "94cde25ecffd3f73240f3d83ffd6b5e4"}, + {0x8f, "ac4339e956f1a594b11b4be60ae35691"}, + {0x90, "82254c4ffa7ad6a977d1cb52667cd772"}, + {0x91, "58e2ca9acf732f5c4be9fb893a040b3c"}, + {0x92, "b38cc9c297d3dce48f19e9722572372d"}, + {0x93, "5f70fedb617e951ff5844d9812bd9b5c"}, + {0x94, "8e723b5c1f9c524f3df345c6dfefcd34"}, + {0x95, "745f520f26df966ab08f8629f464d9a4"}, + {0x96, "b2ac0c745422d02bcd86d2ef3793fbb3"}, + {0x97, "d44a13e4f5bc6067cb479cbe71621897"}, + {0x98, "818e7209b35dcab2cf09bc348e1d40b4"}, + {0x99, "7ab6b401d4020a282029f19275ae2da4"}, + {0x9a, "54b51be0eade3f37428d2cdaaf41855f"}, + {0x9b, "36c30f57eaf6a59fc16295c9441275f7"}, + {0x9c, "e346e60198f7ad9e102340d59403cfbd"}, + {0x9d, "affc7614e74ad844a0cb7357f5e63dcf"}, + {0x9e, "aaac54364c782c27bcf85f4baa8a01d9"}, + {0x9f, "08ddf1a1fe169ebadf020bf7608d09a8"}, + {0xa0, "d548e64706a1ef6f712c4691224ae0c2"}, + {0xa1, "cd5763f277ea4489a16f252f3a6e31ff"}, + {0xa2, "56306ccd80b5bb5f6ad26575c3fd8b20"}, + {0xa3, "59d431c923fb3986f6c88b72186ce7b4"}, + {0xa4, "2287174326668bce993b2601e454cac1"}, + {0xa5, "62228dcadd45303137ba761e625bf545"}, + {0xa6, "2edf32cc58cf34ba4c0355480aa120a3"}, + {0xa7, "ebf5e551cc4698fad12e23cd0a896cbe"}, + {0xa8, "0ad9386009402849e45cadf2b3d81f76"}, + {0xa9, "812b6bd7293cd7959985351476905c8b"}, + {0xaa, "6f1890f1c51247e60bb8aeec48dc04d9"}, + {0xab, "2f46ceaa9219e90e4367c1203a8279d3"}, + {0xac, "2e1a8952e80678f0a971acf34d9323b2"}, + {0xad, "6269ed8346f69f582f537e5ad54fc7e2"}, + {0xae, "67000dc8749677f8b0a490e7830f90ce"}, + {0xaf, "cc91c083f81039ea163e60143d75a017"}, + {0xb0, "7b91f9e232fc5728e5753935b927a7f9"}, + {0xb1, "fd64a907efaa9981aa90c402aeb9d5f7"}, + {0xb2, "95677269f66b41c5e7d4bebfd6b76c21"}, + {0xb3, "e3aca0de59a31fad3773dc1b56945885"}, + {0xb4, "42b847ced4f9b84e8030add16b4589cd"}, + {0xb5, "23ba18de73ec4c6575ed1be0d965e1ed"}, + {0xb6, "9c470e7a268d605d73e7a4f418fa9852"}, + {0xb7, "51999d289f5bbe579ce4a2224c7478d9"}, + {0xb8, "76e4931d881a3c50d61b61b2f28f3152"}, + {0xb9, "b5eb5787c150f5171912368a69a34281"}, + {0xba, "e68a7e782091a126a3fd5129775bbdd3"}, + {0xbb, "311da29ce1da0d6906209a55e92fb254"}, + {0xbc, "636adfb00a8702ef427a2671d6c38281"}, + {0xbd, "8e26d96042556f93b3ce25de6f084f1c"}, + {0xbe, "e0b30eea7deb658061b82e7855690201"}, + {0xbf, "4810b77b79c8a77c2f237265d565384e"}, + {0xc0, "48599090c2176432f4fa671af1ccb6c2"}, + {0xc1, "b4e66155a376e2cdb5e0892d3bee915d"}, + {0xc2, "89b3411fba959e07c034acde928abc46"}, + {0xc3, "59b867e2b86937e87c791987b3e408c7"}, + {0xc4, "2f3d37c11076f00f027d96d8cd9cf943"}, + {0xc5, "12e16b98ef3fd1e523911e9a020df66b"}, + {0xc6, "7602fbac9420bd58c72a923d3dfc7687"}, + {0xc7, "7c01c1c1599e517793f447ad02e83386"}, + {0xc8, "fb7001d34b8e82c9b579be5005d5b0a5"}, + {0xc9, "25f321363432ae94887c2af5e3854279"}, + {0xca, "f08617537c59005b22009f1b24bb2389"}, + {0xcb, "60ef17634ecbf55e24088b209c0f5bc0"}, + {0xcc, "6b2ebd7a16a966ffd033e787a5b3e6b0"}, + {0xcd, "16452ef6b8db3cf54bb8e9ad172a9e69"}, + {0xce, "cebe2b709fb9019b68d36112081fecc5"}, + {0xcf, "97043a2d56f045b0ac0f0e3be773b2a5"}, + {0xd0, "adc63e4b0a7d9d544cfb60b71095835f"}, + {0xd1, "4a4473504a27431eaebdef876dad3f4f"}, + {0xd2, "d2375723a4fe55ed98972c7498ffbb6a"}, + {0xd3, "5560730debc821c216afd556e7abac94"}, + {0xd4, "a9b2636860f86567ea198831dcf18b85"}, + {0xd5, "d3dc7a94f8587a60335043ec9a5cd68f"}, + {0xd6, "e94835131e81cd5b336c80e8751c491a"}, + {0xd7, "b23462fe71bcfa35f6f2d68719d0cf79"}, + {0xd8, "e104421e96b2fa79b52e5b94ff684b83"}, + {0xd9, "5c7b8bbc4e206e5add272475289edefd"}, + {0xda, "19d38667addff9b35bc88532c5a7c89d"}, + {0xdb, "163eb5680a41a0f978a14691a9a55b57"}, + {0xdc, "609b36f7ca099027146f627d7c1b87c4"}, + {0xdd, "c7ef5b63e448762b0389b4ea452734c7"}, + {0xde, "00b0d05c1887db7935e510e15131b37d"}, + {0xdf, "9b2cd2976df72cc25884f63de8651f65"}, + {0xe0, "331e1699744701b65d8bdf6ea08bb5fb"}, + {0xe1, "2da4f83d8cf7ad7f030130db966e70b4"}, + {0xe2, "645d4483688c92376e38b9675f804710"}, + {0xe3, "8a4f9211861999bb73278d003c58c01f"}, + {0xe4, "dcbea204a5301705e3cb3bba49fd1c53"}, + {0xe5, "d969cda70d04b412696f4ef0a5adff41"}, + {0xe6, "11429183461be35415052fe04bfc8106"}, + {0xe7, "eb9d2c88aae6c7b33aacba0336b58b17"}, + {0xe8, "9264eab1fdefba47e3f89854be11069d"}, + {0xe9, "bb916a43ce5883fcf0104a4d35f30253"}, + {0xea, "b730e8b9f04e3ed3b073c99655293ec6"}, + {0xeb, "9e45f084ce74240d33fa6c7fa48440d2"}, + {0xec, "5a56a05fa7b73b6403530e89b024c3f5"}, + {0xed, "09d54dc70fde38ec58858d5676801848"}, + {0xee, "e316edfcf19ebec41d2b883b3ccc1117"}, + {0xef, "4006f7d5ec5e2e49cb20bc0d8296439c"}, + {0xf0, "ddabc96224d832fde27d53c83270c3f1"}, + {0xf1, "267a256d457a3856bbfce6554c1566df"}, + {0xf2, "8b00f7e89794bc5b2c4383cbb8f9bae5"}, + {0xf3, "c6ed8882362bcbd5e25413ab6e85a325"}, + {0xf4, "6f86c742ac261c3cea66286c1e2dfcee"}, + {0xf5, "e375ff6bad4a9ad36baaccf22a7095bb"}, + {0xf6, "f911bfe01c9aca4c144b31387c78aa92"}, + {0xf7, "f220ef03645a47db8126f321de3c6012"}, + {0xf8, "96c762e75475f86fac474622e4943839"}, + {0xf9, "9b91849bde0bc07dae5b7c572cce9206"}, + {0xfa, "d04120d0e8d4c6e61d6bb33cb6f14df5"}, + {0xfb, "0a897617ec0dcb6efe8774fbcb4a9ac3"}, + {0xfc, "15a155fa20962a0f21ffddb1e6695c43"}, + {0xfd, "5089797486c967716d69b2ed0f9ba876"}, + {0xfe, "7bdac450b9343317aa89895d4dda181e"}, + {0xff, "11b7aaa64c413d2f0fccf893881c46a2"}, +}}; + +TEST(md5sum, hash) { + for (const auto &[index, expect] : test_items) { + auto data = build_test_data(index); + EXPECT_EQ(MD5::Hash(data), expect); + EXPECT_EQ(MD5::Hash(data.c_str(), index), expect); + } +} + +TEST(md5sum, hash_ce) { + for (const auto &[index, expect] : test_items) { + auto data = build_test_data(index); + EXPECT_EQ(MD5::HashCE(data), expect); + EXPECT_EQ(MD5::HashCE(data.c_str(), index), expect); + } +} diff --git a/libs/md5sum/test/helper.h b/libs/md5sum/test/helper.h new file mode 100644 index 00000000..2776c27b --- /dev/null +++ b/libs/md5sum/test/helper.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +inline std::string build_test_data(const uint32_t size) { + std::string data(size, 0x00); + for (uint32_t i = 0; i < size; ++i) { + data[i] = static_cast(i); + } + return data; +} + +namespace testing::internal { + +inline bool operator==(const std::array &s1, const std::string_view &s2) { + return std::string {s1.data(), 32} == s2; +} + +} // namespace testing::internal diff --git a/libs/md5sum/test/simple.cc b/libs/md5sum/test/simple.cc new file mode 100644 index 00000000..37f23775 --- /dev/null +++ b/libs/md5sum/test/simple.cc @@ -0,0 +1,30 @@ +#include "md5.h" +#include "helper.h" +#include "gtest/gtest.h" + +using md5::MD5; + +TEST(md5sum, empty) { + constexpr auto expect = "d41d8cd98f00b204e9800998ecf8427e"; + + EXPECT_EQ(MD5::Hash(""), expect); + EXPECT_EQ(MD5::HashCE(""), expect); + EXPECT_EQ(MD5().Final().Digest(), expect); + + MD5 md5; + EXPECT_EQ(md5.Reset().Final().Digest(), expect); + EXPECT_EQ(md5.Reset().Final().Digest(), expect); + EXPECT_EQ(md5.Reset().Update("").Final().Digest(), expect); +} + +TEST(md5sum, simple) { + constexpr auto expect = "5227827849ea5e9d942ff40dbbfaffd6"; + + EXPECT_EQ(MD5::Hash("dnomd343"), expect); + EXPECT_EQ(MD5::HashCE("dnomd343"), expect); + + MD5 md5; + EXPECT_EQ(md5.Reset().Update("").Update("dnomd343").Final().Digest(), expect); + EXPECT_EQ(md5.Reset().Update("dnomd").Update("343").Final().Digest(), expect); + EXPECT_EQ(md5.Reset().Update("dnomd343").Final().Digest(), expect); +} diff --git a/libs/md5sum/test/stream.cc b/libs/md5sum/test/stream.cc new file mode 100644 index 00000000..587a6c29 --- /dev/null +++ b/libs/md5sum/test/stream.cc @@ -0,0 +1,28 @@ +#include "md5.h" +#include "helper.h" +#include "gtest/gtest.h" + +using md5::MD5; + +TEST(md5sum, stream) { + const auto test_data = build_test_data(256 * 256); + + MD5 md5; + for (uint64_t size = 1; size <= 256; ++size) { + auto expect = MD5::Hash(test_data.data(), size * 256); + + for (int times = 0; times < 256; ++times) { + const auto offset = test_data.data() + times * size; + md5.Update(offset, size); // update multiple times + } + EXPECT_EQ(md5.Final().Digest(), expect); + md5.Reset(); // reset for next round + + for (int times = 0; times < 256; ++times) { + const auto offset = test_data.data() + times * size; + md5.Update(std::string_view {offset, size}); // update multiple times + } + EXPECT_EQ(md5.Final().Digest(), expect); + md5.Reset(); // reset for next round + } +} diff --git a/libs/sst/sst-basic-blocks b/libs/sst/sst-basic-blocks index 0fa6e1dd..ece173ae 160000 --- a/libs/sst/sst-basic-blocks +++ b/libs/sst/sst-basic-blocks @@ -1 +1 @@ -Subproject commit 0fa6e1dd7577f2799d06ad91a9cba7d066aad3b3 +Subproject commit ece173ae064856c870c239b42e83a8be7996ed71 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8e314d86..f4c62e93 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -86,6 +86,7 @@ target_link_libraries(${PROJECT_NAME} PUBLIC FLAC++ minimp3 + md5sum::md5 sc-compiler-options ) diff --git a/src/engine/engine.cpp b/src/engine/engine.cpp index 6d948a32..02d823c2 100644 --- a/src/engine/engine.cpp +++ b/src/engine/engine.cpp @@ -39,6 +39,7 @@ #include "sample/sfz_support/sfz_import.h" #include "sample/multisample_support/multisample_import.h" #include "infrastructure/user_defaults.h" +#include "infrastructure/md5support.h" #include "browser/browser.h" #include "browser/browser_db.h" @@ -655,6 +656,7 @@ void Engine::loadSf2MultiSampleIntoSelectedPart(const fs::path &p) { auto riff = std::make_unique(p.u8string()); auto sf = std::make_unique(riff.get()); + auto md5 = infrastructure::createMD5SumFromFile(p); auto sz = getSelectionManager()->currentLeadZone(*this); auto pt = 0; @@ -694,6 +696,7 @@ void Engine::loadSf2MultiSampleIntoSelectedPart(const fs::path &p) auto sid = sampleManager->loadSampleFromSF2(p, sf.get(), pc, i, j); if (!sid.has_value()) continue; + sampleManager->getSample(*sid)->md5Sum = md5; if (firstGroup < 0) firstGroup = grpnum; diff --git a/src/infrastructure/md5support.h b/src/infrastructure/md5support.h new file mode 100644 index 00000000..284dd7bb --- /dev/null +++ b/src/infrastructure/md5support.h @@ -0,0 +1,25 @@ +// +// Created by Paul Walker on 8/5/24. +// + +#ifndef SHORTCIRCUITXT_MD5SUPPORT_H +#define SHORTCIRCUITXT_MD5SUPPORT_H + +#include +#include "filesystem_import.h" +#include "file_map_view.h" +#include "md5.h" + +namespace scxt::infrastructure +{ +inline std::string createMD5SumFromFile(const fs::path &path) +{ + auto fmp = infrastructure::FileMapView(path); + if (!fmp.isMapped()) + return {}; + + return md5::MD5::Hash(fmp.data(), fmp.dataSize()); +} + +} // namespace scxt::infrastructure +#endif // SHORTCIRCUITXT_MD5SUPPORT_H diff --git a/src/json/sample_traits.h b/src/json/sample_traits.h index 2c816644..5a4a6e54 100644 --- a/src/json/sample_traits.h +++ b/src/json/sample_traits.h @@ -98,11 +98,8 @@ template <> struct scxt_traits static void assign(tao::json::basic_value &v, const scxt::sample::Sample::SampleFileAddress &f) { - v = {{"type", f.type}, - {"path", f.path.u8string()}, - {"preset", f.preset}, - {"instrument", f.instrument}, - {"region", f.region}}; + v = {{"type", f.type}, {"path", f.path.u8string()}, {"md5sum", f.md5sum}, + {"preset", f.preset}, {"instrument", f.instrument}, {"region", f.region}}; } template