From f31f019ab95c6d09c27707a91d89e99325a7c146 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Sun, 4 Aug 2024 12:40:49 +0200 Subject: [PATCH 1/7] build refactor --- Cargo.lock | 388 ++++++++---------------- Cargo.toml | 8 +- build/gates.rs | 6 +- build/main.rs | 46 +-- build/poseidon_constants.rs | 2 +- build/template.rs | 8 +- native/.gitignore | 2 - native/CMakeLists.txt | 16 +- native/gate_kernels_template.cuh | 3 + native/gates.cu | 4 + native/gates_template.cu | 6 - native/poseidon_constants_template.cuh | 2 - src/.gitignore | 1 - src/context.rs | 20 +- src/gates.rs | 2 +- src/ops_complex.rs | 2 +- src/ops_cub/device_radix_sort.rs | 26 +- src/ops_cub/device_reduce.rs | 18 +- src/ops_cub/device_run_length_encode.rs | 6 +- src/ops_cub/device_scan.rs | 51 +++- 20 files changed, 274 insertions(+), 343 deletions(-) create mode 100644 native/gate_kernels_template.cuh create mode 100644 native/gates.cu delete mode 100644 native/gates_template.cu delete mode 100644 src/.gitignore diff --git a/Cargo.lock b/Cargo.lock index ac84fc9..e6f944e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "arrayvec" @@ -44,34 +44,11 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.66", - "which", -] - [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bitvec" @@ -142,8 +119,8 @@ dependencies = [ "boojum", "cmake", "criterion", - "criterion-cuda", "criterion-macro", + "era_criterion_cuda", "era_cudart", "era_cudart_sys", "itertools 0.13.0", @@ -173,9 +150,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cast" @@ -185,18 +162,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.99" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] +checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" [[package]] name = "cfg-if" @@ -231,31 +199,20 @@ dependencies = [ "half", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.7" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.7" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" dependencies = [ "anstyle", "clap_lex", @@ -263,9 +220,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cmake" @@ -340,15 +297,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "criterion-cuda" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "criterion", - "cudart", -] - [[package]] name = "criterion-macro" version = "0.4.0" @@ -463,25 +411,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "cudart" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "bitflags", - "cudart-sys", - "paste", -] - -[[package]] -name = "cudart-sys" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "bindgen", - "serde_json", -] - [[package]] name = "derivative" version = "2.2.0" @@ -506,9 +435,9 @@ dependencies = [ [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "equivalent" @@ -516,11 +445,19 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "era_criterion_cuda" +version = "0.1.0" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" +dependencies = [ + "criterion", + "era_cudart", +] + [[package]] name = "era_cudart" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1725b17e5e41b89f566ace3900f119fdc87f04e2daa8e253b668573ad67a454f" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" dependencies = [ "bitflags", "era_cudart_sys", @@ -530,23 +467,11 @@ dependencies = [ [[package]] name = "era_cudart_sys" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60d46683f8a9a5364874f95b00073f6dc93d33e9a019f150b0d6ce09ffc13251" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" dependencies = [ - "bindgen", "serde_json", ] -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys", -] - [[package]] name = "ethbloom" version = "0.13.0" @@ -730,12 +655,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "half" version = "2.4.1" @@ -764,15 +683,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys", -] - [[package]] name = "impl-codec" version = "0.6.0" @@ -813,9 +723,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", "hashbrown", @@ -829,7 +739,7 @@ checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -841,15 +751,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -889,34 +790,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libc" version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets", -] - -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - [[package]] name = "lock_api" version = "0.4.12" @@ -929,9 +808,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "memchr" @@ -939,27 +818,11 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num-bigint" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", @@ -1011,9 +874,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" -version = "11.1.3" +version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "pairing_ce" @@ -1125,18 +988,11 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "prettyplease" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "proc-macro2", - "syn 2.0.66", + "zerocopy", ] [[package]] @@ -1187,9 +1043,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -1298,18 +1154,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -1344,31 +1200,12 @@ dependencies = [ "rustc-hex", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hex" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6" -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "ryu" version = "1.0.18" @@ -1386,9 +1223,9 @@ dependencies = [ [[package]] name = "scc" -version = "2.1.1" +version = "2.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ad2bbb0ae5100a07b7a6f2ed7ab5fd0045551a4c507989b7a620046ea3efdc" +checksum = "a870e34715d5d59c8536040d4d4e7a41af44d527dc50237036ba4090db7996fc" dependencies = [ "sdd", ] @@ -1401,37 +1238,38 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sdd" -version = "0.2.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84345e4c9bd703274a082fb80caaa99b7612be48dfaa1dd9266577ec412309d" +checksum = "177258b64c0faaa9ffd3c65cd3262c2bc7e2588dbbd9c1641d0346145c1bbda8" [[package]] name = "serde" -version = "1.0.203" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1458,7 +1296,7 @@ checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", ] [[package]] @@ -1482,12 +1320,6 @@ dependencies = [ "keccak", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "slab" version = "0.4.9" @@ -1514,9 +1346,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -1531,9 +1363,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -1567,9 +1399,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" @@ -1630,9 +1462,9 @@ dependencies = [ [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -1671,7 +1503,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -1693,7 +1525,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1714,18 +1546,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.3.9" @@ -1744,11 +1564,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1766,11 +1586,20 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -1784,51 +1613,51 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -1847,3 +1676,24 @@ checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" dependencies = [ "tap", ] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] diff --git a/Cargo.toml b/Cargo.toml index f5556f1..db9f04e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,21 +13,21 @@ description = "Boojum-CUDA is a library implementing GPU-accelerated cryptograph [build-dependencies] boojum = "=0.2.1" -cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" } +cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart_sys" } cmake = "0.1" itertools = "0.13" [dependencies] boojum = "=0.2.1" -cudart = { version = "=0.1.0", package = "era_cudart" } -cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" } +cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart" } +cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor" , package = "era_cudart_sys" } itertools = "0.13" lazy_static = "1.4" [dev-dependencies] blake2 = "0.10" criterion = "0.5" -criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "main", package = "criterion-cuda" } +criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_criterion_cuda" } criterion-macro = "0.4" itertools = "0.13" rand = "0.8" diff --git a/build/gates.rs b/build/gates.rs index 8d00716..eee7d27 100644 --- a/build/gates.rs +++ b/build/gates.rs @@ -55,8 +55,8 @@ pub(super) fn generate() { } fn generate_cuda(descriptions: &[Description]) { - const TEMPLATE_PATH: &str = "native/gates_template.cu"; - const RESULT_PATH: &str = "native/gates.cu"; + const TEMPLATE_PATH: &str = "native/gate_kernels_template.cuh"; + const RESULT_PATH: &str = "gate_kernels.cuh"; let mut code = String::new(); let s = &mut code; new_line(s); @@ -180,7 +180,7 @@ fn generate_cuda(descriptions: &[Description]) { fn generate_rust(descriptions: &[Description]) { const TEMPLATE_PATH: &str = "src/gates_data_template.rs"; - const RESULT_PATH: &str = "src/gates_data.rs"; + const RESULT_PATH: &str = "gates_data.rs"; let mut hash_map = String::new(); let mut bindings = String::new(); let mut mappings = String::new(); diff --git a/build/main.rs b/build/main.rs index 33ef1a4..9a67d72 100644 --- a/build/main.rs +++ b/build/main.rs @@ -1,8 +1,7 @@ #![allow(incomplete_features)] +#![allow(unexpected_cfgs)] #![feature(generic_const_exprs)] -use cudart_sys::{cuda_lib_path, cuda_path}; - mod gates; mod poseidon_constants; mod template; @@ -10,19 +9,32 @@ mod template; fn main() { gates::generate(); poseidon_constants::generate(); - #[cfg(target_os = "macos")] - std::process::exit(0); - let dst = cmake::Config::new("native") - .profile("Release") - .define( - "CMAKE_CUDA_ARCHITECTURES", - std::env::var("CUDAARCHS").unwrap_or("native".to_string()), - ) - .build(); - println!("cargo:rustc-link-search=native={}", dst.display()); - println!("cargo:rustc-link-lib=static=boojum-cuda-native"); - println!("cargo:rustc-link-search=native={}", cuda_lib_path!()); - println!("cargo:rustc-link-lib=cudart"); - #[cfg(target_os = "linux")] - println!("cargo:rustc-link-lib=stdc++"); + println!("cargo::rustc-check-cfg=cfg(no_cuda)"); + #[cfg(no_cuda)] + { + println!("cargo::warning={}", cudart_sys::no_cuda_message!()); + } + #[cfg(not(no_cuda))] + { + use cudart_sys::{get_cuda_lib_path, get_cuda_version}; + use std::env::var; + let cuda_version = get_cuda_version().expect("Failed to determine CUDA version"); + if !cuda_version.starts_with("12.") { + println!("cargo::warning=CUDA version {cuda_version} detected. This crate is only tested with CUDA 12.*."); + } + let cudaarchs = var("CUDAARCHS").unwrap_or("native".to_string()); + let dst = cmake::Config::new("native") + .profile("Release") + .define("CMAKE_CUDA_ARCHITECTURES", cudaarchs) + .build(); + let boojum_lib_path = dst.to_str().unwrap(); + println!("cargo:rustc-link-search=native={boojum_lib_path}"); + println!("cargo:rustc-link-lib=static=boojum-cuda-native"); + let cuda_lib_path = get_cuda_lib_path().unwrap(); + let cuda_lib_path_str = cuda_lib_path.to_str().unwrap(); + println!("cargo:rustc-link-search=native={cuda_lib_path_str}"); + println!("cargo:rustc-link-lib=cudart"); + #[cfg(target_os = "linux")] + println!("cargo:rustc-link-lib=stdc++"); + } } diff --git a/build/poseidon_constants.rs b/build/poseidon_constants.rs index a1b69bc..27d6f14 100644 --- a/build/poseidon_constants.rs +++ b/build/poseidon_constants.rs @@ -5,7 +5,7 @@ use boojum::implementations::poseidon_goldilocks_params::*; // use itertools::Itertools; const TEMPLATE_PATH: &str = "native/poseidon_constants_template.cuh"; -const RESULT_PATH: &str = "native/poseidon_constants.cuh"; +const RESULT_PATH: &str = "poseidon_constants.cuh"; fn split_u64(value: u64) -> (u32, u32) { let lo = value as u32; diff --git a/build/template.rs b/build/template.rs index 72ed55c..6ae8ab3 100644 --- a/build/template.rs +++ b/build/template.rs @@ -1,4 +1,6 @@ +use std::env::var; use std::fs; +use std::path::Path; const PREFIX: char = '%'; const SUFFIX: char = '%'; @@ -11,8 +13,10 @@ pub(crate) fn generate(replacements: &[(&str, String)], template_path: &str, res from.push(SUFFIX); text = text.replace(&from, value); } - let current = fs::read_to_string(result_path).unwrap_or_default(); + let out_dir = var("OUT_DIR").unwrap(); + let result_path = Path::new(&out_dir).join(result_path); + let current = fs::read_to_string(&result_path).unwrap_or_default(); if !text.eq(¤t) { - fs::write(result_path, text).unwrap(); + fs::write(&result_path, text).unwrap(); } } diff --git a/native/.gitignore b/native/.gitignore index 9c1ff3a..9f63b71 100644 --- a/native/.gitignore +++ b/native/.gitignore @@ -1,3 +1 @@ /cmake-build-*/ -/gates.cu -/poseidon_constants.cuh diff --git a/native/CMakeLists.txt b/native/CMakeLists.txt index f587620..6ffb004 100644 --- a/native/CMakeLists.txt +++ b/native/CMakeLists.txt @@ -5,41 +5,43 @@ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES native) endif () add_library(boojum-cuda-native STATIC + ops_cub/common.cuh + ops_cub/device_radix_sort.cu + ops_cub/device_reduce.cu + ops_cub/device_run_length_encode.cu + ops_cub/device_scan.cu barycentric.cu blake2s.cu carry_chain.cuh common.cuh context.cu context.cuh + ${CMAKE_INSTALL_PREFIX}/gate_kernels.cuh gates.cu gates.cuh gates_poseidon.cuh goldilocks.cuh - goldilocks_extension.cuh goldilocks_extension.cu + goldilocks_extension.cuh memory.cuh ntt.cu ntt_b2n.cuh ntt_n2b.cuh ops_complex.cu ops_complex.cuh - ops_cub/common.cuh - ops_cub/device_radix_sort.cu - ops_cub/device_reduce.cu - ops_cub/device_run_length_encode.cu - ops_cub/device_scan.cu ops_simple.cu poseidon2_cooperative.cu poseidon2_single_thread.cu poseidon2_single_thread.cuh poseidon_common.cu - poseidon_constants.cuh + ${CMAKE_INSTALL_PREFIX}/poseidon_constants.cuh poseidon_cooperative.cu poseidon_single_thread.cu poseidon_single_thread.cuh poseidon_utils.cuh ptx.cuh ) +target_include_directories(boojum-cuda-native PRIVATE ${CMAKE_INSTALL_PREFIX}) set_target_properties(boojum-cuda-native PROPERTIES CUDA_STANDARD 17) set_target_properties(boojum-cuda-native PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(boojum-cuda-native PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) diff --git a/native/gate_kernels_template.cuh b/native/gate_kernels_template.cuh new file mode 100644 index 0000000..4457665 --- /dev/null +++ b/native/gate_kernels_template.cuh @@ -0,0 +1,3 @@ +namespace gates { +%CODE% +} // namespace gates diff --git a/native/gates.cu b/native/gates.cu new file mode 100644 index 0000000..dfad4da --- /dev/null +++ b/native/gates.cu @@ -0,0 +1,4 @@ +#include "gates.cuh" +#include "gates_poseidon.cuh" +// do not reorder includes +#include "gate_kernels.cuh" diff --git a/native/gates_template.cu b/native/gates_template.cu deleted file mode 100644 index 8d5699e..0000000 --- a/native/gates_template.cu +++ /dev/null @@ -1,6 +0,0 @@ -#include "gates.cuh" -#include "gates_poseidon.cuh" - -namespace gates { -%CODE% -} // namespace gates diff --git a/native/poseidon_constants_template.cuh b/native/poseidon_constants_template.cuh index 62724fc..91a498d 100644 --- a/native/poseidon_constants_template.cuh +++ b/native/poseidon_constants_template.cuh @@ -1,8 +1,6 @@ // clang-format off #pragma once -#include "goldilocks.cuh" - namespace poseidon_common { using namespace goldilocks; diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 041239e..0000000 --- a/src/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/gates_data.rs diff --git a/src/context.rs b/src/context.rs index bd56935..d79028b 100644 --- a/src/context.rs +++ b/src/context.rs @@ -5,7 +5,7 @@ use boojum::field::{Field, PrimeField}; use cudart::memory::{memory_copy, DeviceAllocation}; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; -use cudart_sys::{cudaMemcpyToSymbol, CudaMemoryCopyKind}; +use cudart_sys::{cudaMemcpyToSymbol, cuda_struct_and_stub, CudaMemoryCopyKind}; use std::mem::size_of; use std::os::raw::c_void; @@ -29,6 +29,8 @@ impl PowersLayerData { } } +unsafe impl Sync for PowersLayerData {} + #[repr(C)] struct PowersData { fine: PowersLayerData, @@ -48,14 +50,14 @@ impl PowersData { } } -extern "C" { - static powers_data_w: PowersData; - static powers_data_w_bitrev_for_ntt: PowersData; - static powers_data_w_inv_bitrev_for_ntt: PowersData; - static powers_data_g_f: PowersData; - static powers_data_g_i: PowersData; - static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1]; -} +unsafe impl Sync for PowersData {} + +cuda_struct_and_stub! { static powers_data_w: PowersData; } +cuda_struct_and_stub! { static powers_data_w_bitrev_for_ntt: PowersData; } +cuda_struct_and_stub! { static powers_data_w_inv_bitrev_for_ntt: PowersData; } +cuda_struct_and_stub! { static powers_data_g_f: PowersData; } +cuda_struct_and_stub! { static powers_data_g_i: PowersData; } +cuda_struct_and_stub! { static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1]; } unsafe fn copy_to_symbol(symbol: &T, src: &T) -> CudaResult<()> { cudaMemcpyToSymbol( diff --git a/src/gates.rs b/src/gates.rs index 488bc8c..ebcdb92 100644 --- a/src/gates.rs +++ b/src/gates.rs @@ -57,7 +57,7 @@ struct GateData { kernel: GateEvalSignature, } -include!("gates_data.rs"); +include!(concat!(env!("OUT_DIR"), "/gates_data.rs")); pub fn find_gate_id_by_name(name: &str) -> Option { HASH_MAP.get(name).copied() diff --git a/src/ops_complex.rs b/src/ops_complex.rs index 2ac8b86..059e19b 100644 --- a/src/ops_complex.rs +++ b/src/ops_complex.rs @@ -1485,7 +1485,7 @@ mod tests { let stream = CudaStream::default(); let base_ef = ExtensionField::from_coeff_in_base([GoldilocksField(42), GoldilocksField(42)]); - let base_vf = unsafe { mem::transmute(base_ef) }; + let base_vf = unsafe { mem::transmute::(base_ef) }; let mut d_base = DeviceAllocation::alloc(1).unwrap(); memory_copy_async(&mut d_base, &[base_vf], &stream).unwrap(); let b = &d_base[0]; diff --git a/src/ops_cub/device_radix_sort.rs b/src/ops_cub/device_radix_sort.rs index 6ecaf84..df06845 100644 --- a/src/ops_cub/device_radix_sort.rs +++ b/src/ops_cub/device_radix_sort.rs @@ -5,9 +5,9 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; -extern "C" { +cuda_fn_and_stub! { fn sort_keys_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -18,7 +18,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_descending_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -29,7 +31,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -40,7 +44,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_descending_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -200,7 +206,7 @@ pub fn sort_keys( ) } -extern "C" { +cuda_fn_and_stub! { fn sort_pairs_u32_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -213,7 +219,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u32_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -226,7 +234,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u32_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -239,7 +249,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u32_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -252,7 +264,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u64_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -265,7 +279,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u64_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -278,7 +294,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u64_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -291,7 +309,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u64_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_reduce.rs b/src/ops_cub/device_reduce.rs index 0c6eb81..e510419 100644 --- a/src/ops_cub/device_reduce.rs +++ b/src/ops_cub/device_reduce.rs @@ -5,14 +5,14 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::{DeviceSlice, DeviceVariable}; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::device_structures::{ DeviceMatrixChunkImpl, DeviceRepr, DeviceVectorChunkImpl, PtrAndStride, }; use crate::extension_field::ExtensionField; -extern "C" { +cuda_fn_and_stub! { fn reduce_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -21,7 +21,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -30,7 +32,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -40,7 +44,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -50,7 +56,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -59,7 +67,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -68,7 +78,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -78,7 +90,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_run_length_encode.rs b/src/ops_cub/device_run_length_encode.rs index dc632ff..df8136d 100644 --- a/src/ops_cub/device_run_length_encode.rs +++ b/src/ops_cub/device_run_length_encode.rs @@ -5,9 +5,9 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::{DeviceSlice, DeviceVariable}; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; -extern "C" { +cuda_fn_and_stub! { fn encode_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -18,7 +18,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn encode_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_scan.rs b/src/ops_cub/device_scan.rs index f668cf7..8a220f3 100644 --- a/src/ops_cub/device_scan.rs +++ b/src/ops_cub/device_scan.rs @@ -6,11 +6,11 @@ use cudart::event::{CudaEvent, CudaEventCreateFlags}; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; use cudart::stream::{CudaStream, CudaStreamCreateFlags, CudaStreamWaitEventFlags}; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::extension_field::ExtensionField; -extern "C" { +cuda_fn_and_stub! { fn exclusive_sum_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -19,7 +19,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_sum_reverse_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -28,7 +30,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_sum_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -37,7 +41,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_sum_reverse_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -46,7 +52,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -55,7 +63,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_reverse_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -64,7 +74,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -73,7 +85,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_reverse_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -82,8 +96,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -92,8 +107,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_reverse_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -102,8 +118,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -112,8 +129,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_reverse_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -122,7 +140,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -131,7 +151,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_reverse_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -140,7 +162,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -149,7 +173,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_reverse_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -158,8 +184,8 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; - - #[allow(improper_ctypes)] +} +cuda_fn_and_stub! { fn exclusive_scan_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -168,8 +194,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_reverse_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -178,8 +205,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -188,8 +216,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_reverse_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, From 59b4007ace168073bda084e759d7687ef89158a1 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Mon, 5 Aug 2024 08:38:12 +0000 Subject: [PATCH 2/7] formating nit --- src/ops_cub/device_scan.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ops_cub/device_scan.rs b/src/ops_cub/device_scan.rs index 8a220f3..7d12dc8 100644 --- a/src/ops_cub/device_scan.rs +++ b/src/ops_cub/device_scan.rs @@ -185,6 +185,7 @@ cuda_fn_and_stub! { stream: cudaStream_t, ) -> cudaError_t; } + cuda_fn_and_stub! { fn exclusive_scan_mul_ef( d_temp_storage: *mut u8, From abab58e7246c1edd605c1867f96e57ecd79f6db7 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Mon, 5 Aug 2024 09:14:44 +0000 Subject: [PATCH 3/7] refactor package names and dependencies, use era_ style everywhere --- Cargo.lock | 50 ++++++++++++------------- Cargo.toml | 12 +++--- benches/blake2s.rs | 8 ++-- benches/gates.rs | 12 +++--- benches/goldilocks.rs | 8 ++-- benches/ntt.rs | 10 ++--- benches/ops_complex.rs | 10 ++--- benches/poseidon.rs | 12 +++--- build/main.rs | 6 +-- native/CMakeLists.txt | 22 +++++------ src/barycentric.rs | 16 ++++---- src/blake2s.rs | 22 +++++------ src/context.rs | 8 ++-- src/device_structures.rs | 4 +- src/extension_field.rs | 16 ++++---- src/gates.rs | 12 +++--- src/ntt.rs | 16 ++++---- src/ops_complex.rs | 22 ++++++----- src/ops_cub/device_radix_sort.rs | 12 +++--- src/ops_cub/device_reduce.rs | 12 +++--- src/ops_cub/device_run_length_encode.rs | 12 +++--- src/ops_cub/device_scan.rs | 14 +++---- src/ops_simple.rs | 22 +++++------ src/poseidon.rs | 14 +++---- src/utils.rs | 2 +- 25 files changed, 179 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6f944e..9e1b463 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,25 +111,6 @@ dependencies = [ "unroll", ] -[[package]] -name = "boojum-cuda" -version = "0.1.1" -dependencies = [ - "blake2", - "boojum", - "cmake", - "criterion", - "criterion-macro", - "era_criterion_cuda", - "era_cudart", - "era_cudart_sys", - "itertools 0.13.0", - "lazy_static", - "rand 0.8.5", - "rayon", - "serial_test", -] - [[package]] name = "bumpalo" version = "3.16.0" @@ -445,10 +426,29 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "era_boojum_cuda" +version = "0.2.0" +dependencies = [ + "blake2", + "boojum", + "cmake", + "criterion", + "criterion-macro", + "era_criterion_cuda", + "era_cudart", + "era_cudart_sys", + "itertools 0.13.0", + "lazy_static", + "rand 0.8.5", + "rayon", + "serial_test", +] + [[package]] name = "era_criterion_cuda" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" +version = "0.2.0" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" dependencies = [ "criterion", "era_cudart", @@ -456,8 +456,8 @@ dependencies = [ [[package]] name = "era_cudart" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" +version = "0.2.0" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" dependencies = [ "bitflags", "era_cudart_sys", @@ -466,8 +466,8 @@ dependencies = [ [[package]] name = "era_cudart_sys" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" +version = "0.2.0" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" dependencies = [ "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index db9f04e..7d300b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "boojum-cuda" -version = "0.1.1" +name = "era_boojum_cuda" +version = "0.2.0" edition = "2021" build = "build/main.rs" authors = ["The Matter Labs Team "] @@ -13,21 +13,21 @@ description = "Boojum-CUDA is a library implementing GPU-accelerated cryptograph [build-dependencies] boojum = "=0.2.1" -cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart_sys" } +era_cudart_sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_cudart_sys" } cmake = "0.1" itertools = "0.13" [dependencies] boojum = "=0.2.1" -cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart" } -cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor" , package = "era_cudart_sys" } +era_cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_cudart" } +era_cudart_sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0" , package = "era_cudart_sys" } itertools = "0.13" lazy_static = "1.4" [dev-dependencies] blake2 = "0.10" criterion = "0.5" -criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_criterion_cuda" } +era_criterion_cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_criterion_cuda" } criterion-macro = "0.4" itertools = "0.13" rand = "0.8" diff --git a/benches/blake2s.rs b/benches/blake2s.rs index 261fde6..6f70ee8 100644 --- a/benches/blake2s.rs +++ b/benches/blake2s.rs @@ -3,10 +3,10 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use boojum_cuda::blake2s::blake2s_pow; -use criterion_cuda::CudaMeasurement; -use cudart::memory::{memory_set_async, DeviceAllocation}; -use cudart::stream::CudaStream; +use era_boojum_cuda::blake2s::blake2s_pow; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::{memory_set_async, DeviceAllocation}; +use era_cudart::stream::CudaStream; fn blake2s(c: &mut Criterion) { const MIN_BITS_COUNT: u32 = 17; diff --git a/benches/gates.rs b/benches/gates.rs index 6099fc8..e41bbf1 100644 --- a/benches/gates.rs +++ b/benches/gates.rs @@ -8,12 +8,12 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Samplin use rand::prelude::*; use rayon::prelude::*; -use boojum_cuda::device_structures::{DeviceMatrixChunk, DeviceMatrixChunkMut}; -use boojum_cuda::gates::*; -use criterion_cuda::CudaMeasurement; -use cudart::memory::{memory_copy, DeviceAllocation}; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; +use era_boojum_cuda::device_structures::{DeviceMatrixChunk, DeviceMatrixChunkMut}; +use era_boojum_cuda::gates::*; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::{memory_copy, DeviceAllocation}; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; fn poseidon_group(c: &mut Criterion, group_name: &str, gate_name: &str) { const VARIABLES_COUNT: usize = 140; diff --git a/benches/goldilocks.rs b/benches/goldilocks.rs index 7239a48..892a53a 100644 --- a/benches/goldilocks.rs +++ b/benches/goldilocks.rs @@ -11,10 +11,10 @@ use criterion::{ use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; -use boojum_cuda::ops_simple; -use criterion_cuda::CudaMeasurement; -use cudart::memory::{memory_copy, DeviceAllocation}; -use cudart::stream::CudaStream; +use era_boojum_cuda::ops_simple; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::{memory_copy, DeviceAllocation}; +use era_cudart::stream::CudaStream; fn goldilocks_inv(c: &mut Criterion) { const MIN_LOG_N: usize = 17; diff --git a/benches/ntt.rs b/benches/ntt.rs index 5576ce5..43e8f3f 100644 --- a/benches/ntt.rs +++ b/benches/ntt.rs @@ -9,11 +9,11 @@ use criterion::{criterion_group, criterion_main, Criterion, SamplingMode, Throug use rand::{thread_rng, Rng}; use rayon::prelude::*; -use boojum_cuda::context::Context; -use boojum_cuda::ntt::*; -use criterion_cuda::CudaMeasurement; -use cudart::memory::{memory_copy, DeviceAllocation}; -use cudart::stream::CudaStream; +use era_boojum_cuda::context::Context; +use era_boojum_cuda::ntt::*; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::{memory_copy, DeviceAllocation}; +use era_cudart::stream::CudaStream; type CudaMeasurementInvElems = CudaMeasurement; diff --git a/benches/ops_complex.rs b/benches/ops_complex.rs index 979e8b6..da0d399 100644 --- a/benches/ops_complex.rs +++ b/benches/ops_complex.rs @@ -3,11 +3,11 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use boojum_cuda::device_structures::DeviceMatrixMut; -use boojum_cuda::ops_complex::bit_reverse_in_place; -use criterion_cuda::CudaMeasurement; -use cudart::memory::DeviceAllocation; -use cudart::stream::CudaStream; +use era_boojum_cuda::device_structures::DeviceMatrixMut; +use era_boojum_cuda::ops_complex::bit_reverse_in_place; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::DeviceAllocation; +use era_cudart::stream::CudaStream; fn bit_reverse(c: &mut Criterion) { const LOG_MIN_BATCH_SIZE: usize = 0; diff --git a/benches/poseidon.rs b/benches/poseidon.rs index 54b77a7..cb1bc3c 100644 --- a/benches/poseidon.rs +++ b/benches/poseidon.rs @@ -12,12 +12,12 @@ use criterion::{ use rand::{thread_rng, Rng}; use rayon::prelude::*; -use boojum_cuda::poseidon::*; -use criterion_cuda::CudaMeasurement; -use cudart::memory::{memory_copy, DeviceAllocation}; -use cudart::result::CudaResult; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; +use era_boojum_cuda::poseidon::*; +use era_criterion_cuda::CudaMeasurement; +use era_cudart::memory::{memory_copy, DeviceAllocation}; +use era_cudart::result::CudaResult; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; #[allow(clippy::type_complexity)] fn leaves_group( diff --git a/build/main.rs b/build/main.rs index 9a67d72..a32a4c6 100644 --- a/build/main.rs +++ b/build/main.rs @@ -12,11 +12,11 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(no_cuda)"); #[cfg(no_cuda)] { - println!("cargo::warning={}", cudart_sys::no_cuda_message!()); + println!("cargo::warning={}", era_cudart_sys::no_cuda_message!()); } #[cfg(not(no_cuda))] { - use cudart_sys::{get_cuda_lib_path, get_cuda_version}; + use era_cudart_sys::{get_cuda_lib_path, get_cuda_version}; use std::env::var; let cuda_version = get_cuda_version().expect("Failed to determine CUDA version"); if !cuda_version.starts_with("12.") { @@ -29,7 +29,7 @@ fn main() { .build(); let boojum_lib_path = dst.to_str().unwrap(); println!("cargo:rustc-link-search=native={boojum_lib_path}"); - println!("cargo:rustc-link-lib=static=boojum-cuda-native"); + println!("cargo:rustc-link-lib=static=era_boojum_cuda_native"); let cuda_lib_path = get_cuda_lib_path().unwrap(); let cuda_lib_path_str = cuda_lib_path.to_str().unwrap(); println!("cargo:rustc-link-search=native={cuda_lib_path_str}"); diff --git a/native/CMakeLists.txt b/native/CMakeLists.txt index 6ffb004..8964f65 100644 --- a/native/CMakeLists.txt +++ b/native/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.24) -project(boojum-cuda-native) +project(era_boojum_cuda_native) enable_language(CUDA) if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES native) endif () -add_library(boojum-cuda-native STATIC +add_library(era_boojum_cuda_native STATIC ops_cub/common.cuh ops_cub/device_radix_sort.cu ops_cub/device_reduce.cu @@ -41,12 +41,12 @@ add_library(boojum-cuda-native STATIC poseidon_utils.cuh ptx.cuh ) -target_include_directories(boojum-cuda-native PRIVATE ${CMAKE_INSTALL_PREFIX}) -set_target_properties(boojum-cuda-native PROPERTIES CUDA_STANDARD 17) -set_target_properties(boojum-cuda-native PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -set_target_properties(boojum-cuda-native PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) -target_compile_options(boojum-cuda-native PRIVATE --expt-relaxed-constexpr) -target_compile_options(boojum-cuda-native PRIVATE --ptxas-options=-v) -#target_compile_options(boojum-cuda-native PRIVATE -lineinfo) -#target_compile_options(boojum-cuda-native PRIVATE --keep) -install(TARGETS boojum-cuda-native DESTINATION .) +target_include_directories(era_boojum_cuda_native PRIVATE ${CMAKE_INSTALL_PREFIX}) +set_target_properties(era_boojum_cuda_native PROPERTIES CUDA_STANDARD 17) +set_target_properties(era_boojum_cuda_native PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +set_target_properties(era_boojum_cuda_native PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) +target_compile_options(era_boojum_cuda_native PRIVATE --expt-relaxed-constexpr) +target_compile_options(era_boojum_cuda_native PRIVATE --ptxas-options=-v) +#target_compile_options(era_boojum_cuda_native PRIVATE -lineinfo) +#target_compile_options(era_boojum_cuda_native PRIVATE --keep) +install(TARGETS era_boojum_cuda_native DESTINATION .) diff --git a/src/barycentric.rs b/src/barycentric.rs index 26183d2..3b92211 100644 --- a/src/barycentric.rs +++ b/src/barycentric.rs @@ -11,12 +11,12 @@ use crate::utils::WARP_SIZE; use crate::BaseField; use boojum::cs::implementations::utils::domain_generator_for_size; use boojum::field::{Field, PrimeField}; -use cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; -use cudart::paste::paste; -use cudart::result::CudaResult; -use cudart::slice::{DeviceSlice, DeviceVariable}; -use cudart::stream::CudaStream; -use cudart::{cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function}; +use era_cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; +use era_cudart::paste::paste; +use era_cudart::result::CudaResult; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::stream::CudaStream; +use era_cudart::{cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function}; use std::cmp; type BF = BaseField; @@ -294,8 +294,8 @@ mod tests { use boojum::field::goldilocks::GoldilocksExt2; use boojum::field::{rand_from_rng, Field, PrimeField, U64Representable}; use boojum::worker::Worker; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; use rand::{thread_rng, Rng}; use serial_test::serial; use std::alloc::Global; diff --git a/src/blake2s.rs b/src/blake2s.rs index d4d24b5..b4ccae5 100644 --- a/src/blake2s.rs +++ b/src/blake2s.rs @@ -1,12 +1,12 @@ -use cudart::cuda_kernel; -use cudart::device::{device_get_attribute, get_device}; -use cudart::execution::{CudaLaunchConfig, KernelFunction}; -use cudart::memory::memory_set_async; -use cudart::occupancy::max_active_blocks_per_multiprocessor; -use cudart::result::CudaResult; -use cudart::slice::{DeviceSlice, DeviceVariable}; -use cudart::stream::CudaStream; -use cudart_sys::CudaDeviceAttr; +use era_cudart::cuda_kernel; +use era_cudart::device::{device_get_attribute, get_device}; +use era_cudart::execution::{CudaLaunchConfig, KernelFunction}; +use era_cudart::memory::memory_set_async; +use era_cudart::occupancy::max_active_blocks_per_multiprocessor; +use era_cudart::result::CudaResult; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::stream::CudaStream; +use era_cudart_sys::CudaDeviceAttr; use crate::utils::WARP_SIZE; @@ -44,8 +44,8 @@ pub fn blake2s_pow( #[cfg(test)] mod tests { use blake2::{Blake2s256, Digest}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; #[test] fn blake2s_pow() { diff --git a/src/context.rs b/src/context.rs index d79028b..8ae05d7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -2,10 +2,10 @@ use boojum::cs::implementations::utils::domain_generator_for_size; use boojum::fft::{bitreverse_enumeration_inplace, distribute_powers}; use boojum::field::goldilocks::GoldilocksField; use boojum::field::{Field, PrimeField}; -use cudart::memory::{memory_copy, DeviceAllocation}; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::DeviceSlice; -use cudart_sys::{cudaMemcpyToSymbol, cuda_struct_and_stub, CudaMemoryCopyKind}; +use era_cudart::memory::{memory_copy, DeviceAllocation}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::DeviceSlice; +use era_cudart_sys::{cudaMemcpyToSymbol, cuda_struct_and_stub, CudaMemoryCopyKind}; use std::mem::size_of; use std::os::raw::c_void; diff --git a/src/device_structures.rs b/src/device_structures.rs index 6a17626..bf243a1 100644 --- a/src/device_structures.rs +++ b/src/device_structures.rs @@ -1,7 +1,7 @@ use crate::extension_field::{ExtensionField, VectorizedExtensionField}; use crate::BaseField; -use cudart::memory::DeviceAllocation; -use cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::memory::DeviceAllocation; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; pub trait DeviceRepr: Sized { type Type: Sized; diff --git a/src/extension_field.rs b/src/extension_field.rs index b85eaee..4522def 100644 --- a/src/extension_field.rs +++ b/src/extension_field.rs @@ -1,10 +1,10 @@ use boojum::field::goldilocks::{GoldilocksExt2, GoldilocksField}; -use cudart::cuda_kernel_declaration; -use cudart::cuda_kernel_signature_arguments_and_function; -use cudart::execution::{CudaLaunchConfig, KernelFunction}; -use cudart::paste::paste; -use cudart::result::CudaResult; -use cudart::stream::CudaStream; +use era_cudart::cuda_kernel_declaration; +use era_cudart::cuda_kernel_signature_arguments_and_function; +use era_cudart::execution::{CudaLaunchConfig, KernelFunction}; +use era_cudart::paste::paste; +use era_cudart::result::CudaResult; +use era_cudart::stream::CudaStream; use crate::device_structures::{ DeviceMatrixChunkImpl, DeviceMatrixChunkMutImpl, DeviceRepr, MutPtrAndStride, PtrAndStride, @@ -169,8 +169,8 @@ mod tests { use crate::extension_field::{ExtensionField, VectorizedExtensionField}; use boojum::field::goldilocks::GoldilocksField; use boojum::field::Field; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; use itertools::Itertools; use rand::distributions::Uniform; use rand::prelude::*; diff --git a/src/gates.rs b/src/gates.rs index ebcdb92..a113f75 100644 --- a/src/gates.rs +++ b/src/gates.rs @@ -8,10 +8,10 @@ use crate::BaseField; use boojum::cs::traits::evaluator::GateConstraintEvaluator; use boojum::cs::traits::gate::Gate; use boojum::gpu_synthesizer::get_evaluator_name; -use cudart::cuda_kernel; -use cudart::execution::{CudaLaunchConfig, KernelFunction}; -use cudart::result::CudaResult; -use cudart::stream::CudaStream; +use era_cudart::cuda_kernel; +use era_cudart::execution::{CudaLaunchConfig, KernelFunction}; +use era_cudart::result::CudaResult; +use era_cudart::stream::CudaStream; use std::mem::size_of; type BF = BaseField; @@ -243,8 +243,8 @@ mod tests { use boojum::field::Field; use boojum::gpu_synthesizer::{TestDestination, TestSource}; use boojum::implementations::poseidon2::Poseidon2Goldilocks; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::slice::DeviceSlice; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::slice::DeviceSlice; use rand::prelude::*; type EF = ExtensionField; diff --git a/src/ntt.rs b/src/ntt.rs index 3aa9d7d..d1aec1c 100644 --- a/src/ntt.rs +++ b/src/ntt.rs @@ -1,11 +1,11 @@ use crate::context::OMEGA_LOG_ORDER; use boojum::field::goldilocks::GoldilocksField; -use cudart::cuda_kernel; -use cudart::error::get_last_error; -use cudart::execution::{CudaLaunchConfig, KernelFunction}; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; +use era_cudart::cuda_kernel; +use era_cudart::error::get_last_error; +use era_cudart::execution::{CudaLaunchConfig, KernelFunction}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; cuda_kernel!( SingleStage, @@ -380,7 +380,9 @@ mod tests { }; use boojum::field::{Field, PrimeField}; use boojum::worker::Worker; - use cudart::memory::{memory_copy_async, CudaHostAllocFlags, DeviceAllocation, HostAllocation}; + use era_cudart::memory::{ + memory_copy_async, CudaHostAllocFlags, DeviceAllocation, HostAllocation, + }; use rand::Rng; use serial_test::serial; use std::alloc::Global; diff --git a/src/ops_complex.rs b/src/ops_complex.rs index 059e19b..4645d7a 100644 --- a/src/ops_complex.rs +++ b/src/ops_complex.rs @@ -9,12 +9,14 @@ use crate::ops_cub::device_scan::{get_scan_temp_storage_bytes, scan_in_place, Sc use crate::ops_simple::{set_by_val, set_to_zero}; use crate::utils::{get_grid_block_dims_for_threads_count, WARP_SIZE}; use crate::BaseField; -use cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; -use cudart::paste::paste; -use cudart::result::CudaResult; -use cudart::slice::{DeviceSlice, DeviceVariable}; -use cudart::stream::CudaStream; -use cudart::{cuda_kernel, cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function}; +use era_cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; +use era_cudart::paste::paste; +use era_cudart::result::CudaResult; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::stream::CudaStream; +use era_cudart::{ + cuda_kernel, cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function, +}; use std::mem; type BF = BaseField; @@ -1341,10 +1343,10 @@ mod tests { use boojum::field::goldilocks::GoldilocksField; use boojum::field::{Field, PrimeField}; use boojum::worker::Worker; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::result::CudaResult; - use cudart::slice::DeviceSlice; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::result::CudaResult; + use era_cudart::slice::DeviceSlice; + use era_cudart::stream::CudaStream; use itertools::Itertools; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; diff --git a/src/ops_cub/device_radix_sort.rs b/src/ops_cub/device_radix_sort.rs index df06845..20ff25e 100644 --- a/src/ops_cub/device_radix_sort.rs +++ b/src/ops_cub/device_radix_sort.rs @@ -2,10 +2,10 @@ use std::ptr::null_mut; use boojum::field::goldilocks::GoldilocksField; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; +use era_cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; cuda_fn_and_stub! { fn sort_keys_u32( @@ -498,8 +498,8 @@ mod tests { use rand::distributions::{Distribution, Standard}; use rand::{thread_rng, Rng}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; use super::*; diff --git a/src/ops_cub/device_reduce.rs b/src/ops_cub/device_reduce.rs index e510419..afd3ce6 100644 --- a/src/ops_cub/device_reduce.rs +++ b/src/ops_cub/device_reduce.rs @@ -2,10 +2,10 @@ use std::ptr::{null, null_mut}; use boojum::field::goldilocks::GoldilocksField; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::{DeviceSlice, DeviceVariable}; -use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::stream::CudaStream; +use era_cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::device_structures::{ DeviceMatrixChunkImpl, DeviceRepr, DeviceVectorChunkImpl, PtrAndStride, @@ -319,8 +319,8 @@ mod tests { use itertools::Itertools; use rand::{thread_rng, Rng}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; use crate::device_structures::DeviceMatrix; use crate::extension_field::ExtensionField; diff --git a/src/ops_cub/device_run_length_encode.rs b/src/ops_cub/device_run_length_encode.rs index df8136d..ecac1d5 100644 --- a/src/ops_cub/device_run_length_encode.rs +++ b/src/ops_cub/device_run_length_encode.rs @@ -2,10 +2,10 @@ use std::ptr::null_mut; use boojum::field::goldilocks::GoldilocksField; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::{DeviceSlice, DeviceVariable}; -use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::{DeviceSlice, DeviceVariable}; +use era_cudart::stream::CudaStream; +use era_cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; cuda_fn_and_stub! { fn encode_u32( @@ -168,8 +168,8 @@ mod tests { use rand::distributions::{Distribution, Standard, Uniform}; use rand::{thread_rng, Rng}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; fn encode() where diff --git a/src/ops_cub/device_scan.rs b/src/ops_cub/device_scan.rs index 7d12dc8..95db4fb 100644 --- a/src/ops_cub/device_scan.rs +++ b/src/ops_cub/device_scan.rs @@ -2,11 +2,11 @@ use std::ptr::null_mut; use boojum::field::goldilocks::GoldilocksField; -use cudart::event::{CudaEvent, CudaEventCreateFlags}; -use cudart::result::{CudaResult, CudaResultWrap}; -use cudart::slice::DeviceSlice; -use cudart::stream::{CudaStream, CudaStreamCreateFlags, CudaStreamWaitEventFlags}; -use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; +use era_cudart::event::{CudaEvent, CudaEventCreateFlags}; +use era_cudart::result::{CudaResult, CudaResultWrap}; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::{CudaStream, CudaStreamCreateFlags, CudaStreamWaitEventFlags}; +use era_cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::extension_field::ExtensionField; @@ -694,8 +694,8 @@ mod tests { use rand::distributions::Uniform; use rand::{thread_rng, Rng}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::stream::CudaStream; use crate::extension_field::ExtensionField; use crate::ops_cub::device_scan::{get_scan_temp_storage_bytes, Scan}; diff --git a/src/ops_simple.rs b/src/ops_simple.rs index d67c7a9..e7e4811 100644 --- a/src/ops_simple.rs +++ b/src/ops_simple.rs @@ -5,13 +5,13 @@ use crate::device_structures::{ use crate::extension_field::VectorizedExtensionField; use crate::utils::{get_grid_block_dims_for_threads_count, WARP_SIZE}; use crate::BaseField; -use cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; -use cudart::memory::memory_set_async; -use cudart::paste::paste; -use cudart::result::CudaResult; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; -use cudart::{cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function}; +use era_cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; +use era_cudart::memory::memory_set_async; +use era_cudart::paste::paste; +use era_cudart::result::CudaResult; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; +use era_cudart::{cuda_kernel_declaration, cuda_kernel_signature_arguments_and_function}; type BF = BaseField; type EF = VectorizedExtensionField; @@ -697,10 +697,10 @@ ternary_ops_impl!(EF, EF, EF, EF); mod tests { use boojum::field::goldilocks::GoldilocksField; use boojum::field::{Field, PrimeField, U64Representable}; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::result::CudaResult; - use cudart::slice::DeviceSlice; - use cudart::stream::CudaStream; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::result::CudaResult; + use era_cudart::slice::DeviceSlice; + use era_cudart::stream::CudaStream; use itertools::Itertools; use std::ops::{Add, Mul, Sub}; diff --git a/src/poseidon.rs b/src/poseidon.rs index 7111b38..27a8f86 100644 --- a/src/poseidon.rs +++ b/src/poseidon.rs @@ -5,11 +5,11 @@ use crate::utils::{get_grid_block_dims_for_threads_count, WARP_SIZE}; use crate::BaseField; use boojum::field::goldilocks::GoldilocksField; use boojum::implementations::poseidon_goldilocks_params::*; -use cudart::cuda_kernel; -use cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; -use cudart::result::CudaResult; -use cudart::slice::DeviceSlice; -use cudart::stream::CudaStream; +use era_cudart::cuda_kernel; +use era_cudart::execution::{CudaLaunchConfig, Dim3, KernelFunction}; +use era_cudart::result::CudaResult; +use era_cudart::slice::DeviceSlice; +use era_cudart::stream::CudaStream; type BF = BaseField; @@ -397,8 +397,8 @@ mod tests { use itertools::Itertools; use rand::Rng; - use cudart::memory::{memory_copy_async, DeviceAllocation}; - use cudart::slice::CudaSlice; + use era_cudart::memory::{memory_copy_async, DeviceAllocation}; + use era_cudart::slice::CudaSlice; // use boojum::implementations::poseidon_goldilocks::poseidon_permutation_optimized; use crate::device_structures::{DeviceMatrix, DeviceMatrixMut}; diff --git a/src/utils.rs b/src/utils.rs index cca6d3e..6673309 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,4 @@ -use cudart::execution::Dim3; +use era_cudart::execution::Dim3; use std::cmp::min; pub const WARP_SIZE: u32 = 32; From ac672249dcd0a561026fbea6f922162b336e9907 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Mon, 5 Aug 2024 16:17:54 +0000 Subject: [PATCH 4/7] fix CUDA version error message --- build/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/main.rs b/build/main.rs index a32a4c6..0aeb588 100644 --- a/build/main.rs +++ b/build/main.rs @@ -18,7 +18,7 @@ fn main() { { use era_cudart_sys::{get_cuda_lib_path, get_cuda_version}; use std::env::var; - let cuda_version = get_cuda_version().expect("Failed to determine CUDA version"); + let cuda_version = get_cuda_version().expect("Failed to determine the CUDA version."); if !cuda_version.starts_with("12.") { println!("cargo::warning=CUDA version {cuda_version} detected. This crate is only tested with CUDA 12.*."); } From 2bc2178b46700815d71251afe3171fc385a4b6f3 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Tue, 6 Aug 2024 10:36:20 +0200 Subject: [PATCH 5/7] Fix CUDA Toolkit naming, bump boojum dependency and toolchain --- Cargo.lock | 21 ++++++++++++--------- Cargo.toml | 12 ++++++------ README.md | 11 ++++++----- build/main.rs | 5 +++-- rust-toolchain.toml | 2 +- 5 files changed, 28 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e1b463..28d7cce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,9 +82,9 @@ dependencies = [ [[package]] name = "boojum" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cf10f4b3980dc82dc31709dfa8193b7d6106a3a7ce9f9a9f8872bfb8719aa2d" +checksum = "df88daa33db46d683967ca09a4f04817c38950483f2501a771d497669a8a4bb1" dependencies = [ "arrayvec", "bincode", @@ -382,9 +382,9 @@ dependencies = [ [[package]] name = "cs_derive" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab1f510bfddd1fc643a1d1bf8a405e279ffc818ee7ac86ed658e667a44958178" +checksum = "24cf603ca4299c6e20e644da88897f7b81d688510f4887e818b0bfe0b792081b" dependencies = [ "proc-macro-error", "proc-macro2", @@ -448,7 +448,8 @@ dependencies = [ [[package]] name = "era_criterion_cuda" version = "0.2.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b05cf99fdc96bcb2fca3b48bc02ff0774e84e58d48b94e1e5c92786b83dc01" dependencies = [ "criterion", "era_cudart", @@ -457,7 +458,8 @@ dependencies = [ [[package]] name = "era_cudart" version = "0.2.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6592e1277ac1ab0f3925151784a3809f4f973b1a63a0244b6d44e3872b413199" dependencies = [ "bitflags", "era_cudart_sys", @@ -467,7 +469,8 @@ dependencies = [ [[package]] name = "era_cudart_sys" version = "0.2.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#70cb1fe932a82e0b69196fd446c245b48617485e" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21767c452b418a7fb2bb9ffb07c744e4616da8d14176db4dcab76649c3206ece" dependencies = [ "serde_json", ] @@ -1223,9 +1226,9 @@ dependencies = [ [[package]] name = "scc" -version = "2.1.7" +version = "2.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a870e34715d5d59c8536040d4d4e7a41af44d527dc50237036ba4090db7996fc" +checksum = "8d777f59627453628a9a5be1ee8d948745b94b1dfc2d0c3099cbd9e08ab89e7c" dependencies = [ "sdd", ] diff --git a/Cargo.toml b/Cargo.toml index 7d300b4..103785d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,22 +12,22 @@ categories = ["cryptography"] description = "Boojum-CUDA is a library implementing GPU-accelerated cryptographic functionality for the zkSync prover" [build-dependencies] -boojum = "=0.2.1" -era_cudart_sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_cudart_sys" } +boojum = "=0.2.2" +era_cudart_sys = "=0.2.0" cmake = "0.1" itertools = "0.13" [dependencies] -boojum = "=0.2.1" -era_cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_cudart" } -era_cudart_sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0" , package = "era_cudart_sys" } +boojum = "=0.2.2" +era_cudart = "=0.2.0" +era_cudart_sys = "=0.2.0" itertools = "0.13" lazy_static = "1.4" [dev-dependencies] blake2 = "0.10" criterion = "0.5" -era_criterion_cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", version = "=0.2.0", package = "era_criterion_cuda" } +era_criterion_cuda = "=0.2.0" criterion-macro = "0.4" itertools = "0.13" rand = "0.8" diff --git a/README.md b/README.md index 832cf0e..6ad4aa6 100644 --- a/README.md +++ b/README.md @@ -7,18 +7,19 @@ decentralization. Since it's EVM compatible (Solidity/Vyper), 99% of Ethereum pr or re-auditing a single line of code. zkSync Era also uses an LLVM-based compiler that will eventually let developers write smart contracts in C++, Rust and other popular languages. - ## Boojum-CUDA + Boojum-CUDA is a library implementing GPU-accelerated cryptographic functionality for the zkSync prover. -Prerequisites: -- CUDA 12.x +Prerequisites: + +- CUDA Toolkit 12.x - CMake 3.24 and up - clang - rust nightly toolchain -By default, the CUDA code is compiled for the GPU that is present in the system. If there is no GPU in the system or -another architecture is desired, the environment variable `CUDAARCHS` can be set to the desired architecture. +By default, the CUDA code is compiled for the GPU that is present in the system. If there is no GPU in the system or +another architecture is desired, the environment variable `CUDAARCHS` can be set to the desired architecture. See https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html. ## Policies diff --git a/build/main.rs b/build/main.rs index 0aeb588..553045d 100644 --- a/build/main.rs +++ b/build/main.rs @@ -18,9 +18,10 @@ fn main() { { use era_cudart_sys::{get_cuda_lib_path, get_cuda_version}; use std::env::var; - let cuda_version = get_cuda_version().expect("Failed to determine the CUDA version."); + let cuda_version = + get_cuda_version().expect("Failed to determine the CUDA Toolkit version."); if !cuda_version.starts_with("12.") { - println!("cargo::warning=CUDA version {cuda_version} detected. This crate is only tested with CUDA 12.*."); + println!("cargo::warning=CUDA Toolkit version {cuda_version} detected. This crate is only tested with CUDA Toolkit 12.*."); } let cudaarchs = var("CUDAARCHS").unwrap_or("native".to_string()); let dst = cmake::Config::new("native") diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a671fa6..bc5d1d6 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2024-05-07" +channel = "nightly-2024-08-01" From 37e1413caa773bbd0e50587469ff865ef2373b63 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Tue, 6 Aug 2024 10:43:05 +0200 Subject: [PATCH 6/7] fix toolchain version in CI --- .github/workflows/build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7ac959f..06a9b6a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -49,15 +49,15 @@ jobs: - name: Setup rust run: | rustup set profile minimal - rustup toolchain install nightly-2024-05-07 - rustup default nightly-2024-05-07 + rustup toolchain install nightly-2024-08-01 + rustup default nightly-2024-08-01 - name: Build tests and copy binaries to a separate dir shell: bash run: | mkdir artifacts CUDAARCHS=80 CARGO_TARGET_DIR=./build \ - cargo +nightly-2024-05-07 test --no-run --release --message-format=json -q \ + cargo +nightly-2024-08-01 test --no-run --release --message-format=json -q \ | jq -r 'select(.executable != null) | .executable' \ | while read binary; do cp "$binary" artifacts/ From 2ba7fb87a4b27c6d2171a67fb9de8fe25da38927 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Tue, 6 Aug 2024 11:17:17 +0200 Subject: [PATCH 7/7] rename crate back to `boojum-cuda` --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 103785d..f3a732b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "era_boojum_cuda" +name = "boojum-cuda" version = "0.2.0" edition = "2021" build = "build/main.rs"