From 5f301b720fd831f6cfba8f7ba17ba01899732e1a Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Sun, 4 Aug 2024 12:40:49 +0200 Subject: [PATCH] build refactor --- Cargo.lock | 388 ++++++++---------------- Cargo.toml | 8 +- build/gates.rs | 6 +- build/main.rs | 46 +-- build/poseidon_constants.rs | 2 +- build/template.rs | 8 +- native/.gitignore | 2 - native/CMakeLists.txt | 16 +- native/gate_kernels_template.cuh | 3 + native/gates.cu | 4 + native/gates_template.cu | 6 - native/poseidon_constants_template.cuh | 2 - src/.gitignore | 1 - src/context.rs | 20 +- src/gates.rs | 2 +- src/ops_complex.rs | 2 +- src/ops_cub/device_radix_sort.rs | 26 +- src/ops_cub/device_reduce.rs | 18 +- src/ops_cub/device_run_length_encode.rs | 6 +- src/ops_cub/device_scan.rs | 51 +++- 20 files changed, 274 insertions(+), 343 deletions(-) create mode 100644 native/gate_kernels_template.cuh create mode 100644 native/gates.cu delete mode 100644 native/gates_template.cu delete mode 100644 src/.gitignore diff --git a/Cargo.lock b/Cargo.lock index b652e50..15b0397 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "arrayvec" @@ -44,34 +44,11 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.66", - "which", -] - [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bitvec" @@ -142,8 +119,8 @@ dependencies = [ "boojum", "cmake", "criterion", - "criterion-cuda", "criterion-macro", + "era_criterion_cuda", "era_cudart", "era_cudart_sys", "itertools 0.13.0", @@ -173,9 +150,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cast" @@ -185,18 +162,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.99" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] +checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" [[package]] name = "cfg-if" @@ -231,31 +199,20 @@ dependencies = [ "half", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.7" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.7" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" dependencies = [ "anstyle", "clap_lex", @@ -263,9 +220,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cmake" @@ -340,15 +297,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "criterion-cuda" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "criterion", - "cudart", -] - [[package]] name = "criterion-macro" version = "0.4.0" @@ -463,25 +411,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "cudart" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "bitflags", - "cudart-sys", - "paste", -] - -[[package]] -name = "cudart-sys" -version = "0.1.0" -source = "git+https://github.com/matter-labs/era-cuda.git?branch=main#0ac0eeb7fdef62e3ef05c97de0c8accbc963f3bc" -dependencies = [ - "bindgen", - "serde_json", -] - [[package]] name = "derivative" version = "2.2.0" @@ -506,9 +435,9 @@ dependencies = [ [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "equivalent" @@ -516,11 +445,19 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "era_criterion_cuda" +version = "0.1.0" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" +dependencies = [ + "criterion", + "era_cudart", +] + [[package]] name = "era_cudart" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1725b17e5e41b89f566ace3900f119fdc87f04e2daa8e253b668573ad67a454f" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" dependencies = [ "bitflags", "era_cudart_sys", @@ -530,23 +467,11 @@ dependencies = [ [[package]] name = "era_cudart_sys" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60d46683f8a9a5364874f95b00073f6dc93d33e9a019f150b0d6ce09ffc13251" +source = "git+https://github.com/matter-labs/era-cuda.git?branch=rr-build-refactor#160d7fa3038cf56e59bc90bf5399afa69841780f" dependencies = [ - "bindgen", "serde_json", ] -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys", -] - [[package]] name = "ethbloom" version = "0.13.0" @@ -730,12 +655,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "half" version = "2.4.1" @@ -764,15 +683,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys", -] - [[package]] name = "impl-codec" version = "0.6.0" @@ -813,9 +723,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", "hashbrown", @@ -829,7 +739,7 @@ checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -841,15 +751,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -889,34 +790,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libc" version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets", -] - -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - [[package]] name = "lock_api" version = "0.4.12" @@ -929,9 +808,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "memchr" @@ -939,27 +818,11 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num-bigint" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", @@ -1011,9 +874,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" -version = "11.1.3" +version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "pairing_ce" @@ -1125,18 +988,11 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "prettyplease" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "proc-macro2", - "syn 2.0.66", + "zerocopy", ] [[package]] @@ -1187,9 +1043,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -1298,18 +1154,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -1344,31 +1200,12 @@ dependencies = [ "rustc-hex", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hex" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6" -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "ryu" version = "1.0.18" @@ -1386,9 +1223,9 @@ dependencies = [ [[package]] name = "scc" -version = "2.1.1" +version = "2.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ad2bbb0ae5100a07b7a6f2ed7ab5fd0045551a4c507989b7a620046ea3efdc" +checksum = "a870e34715d5d59c8536040d4d4e7a41af44d527dc50237036ba4090db7996fc" dependencies = [ "sdd", ] @@ -1401,37 +1238,38 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sdd" -version = "0.2.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84345e4c9bd703274a082fb80caaa99b7612be48dfaa1dd9266577ec412309d" +checksum = "177258b64c0faaa9ffd3c65cd3262c2bc7e2588dbbd9c1641d0346145c1bbda8" [[package]] name = "serde" -version = "1.0.203" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1458,7 +1296,7 @@ checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", ] [[package]] @@ -1482,12 +1320,6 @@ dependencies = [ "keccak", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "slab" version = "0.4.9" @@ -1514,9 +1346,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -1531,9 +1363,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -1567,9 +1399,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" @@ -1630,9 +1462,9 @@ dependencies = [ [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -1671,7 +1503,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -1693,7 +1525,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1714,18 +1546,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.3.9" @@ -1744,11 +1564,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1766,11 +1586,20 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -1784,51 +1613,51 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -1847,3 +1676,24 @@ checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" dependencies = [ "tap", ] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] diff --git a/Cargo.toml b/Cargo.toml index 2e8d7db..97c6d8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,21 +13,21 @@ description = "Boojum-CUDA is a library implementing GPU-accelerated cryptograph [build-dependencies] boojum = "=0.2.0" -cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" } +cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart_sys" } cmake = "0.1" itertools = "0.13" [dependencies] boojum = "=0.2.0" -cudart = { version = "=0.1.0", package = "era_cudart" } -cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" } +cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart" } +cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor" , package = "era_cudart_sys" } itertools = "0.13" lazy_static = "1.4" [dev-dependencies] blake2 = "0.10" criterion = "0.5" -criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "main", package = "criterion-cuda" } +criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_criterion_cuda" } criterion-macro = "0.4" itertools = "0.13" rand = "0.8" diff --git a/build/gates.rs b/build/gates.rs index 8d00716..eee7d27 100644 --- a/build/gates.rs +++ b/build/gates.rs @@ -55,8 +55,8 @@ pub(super) fn generate() { } fn generate_cuda(descriptions: &[Description]) { - const TEMPLATE_PATH: &str = "native/gates_template.cu"; - const RESULT_PATH: &str = "native/gates.cu"; + const TEMPLATE_PATH: &str = "native/gate_kernels_template.cuh"; + const RESULT_PATH: &str = "gate_kernels.cuh"; let mut code = String::new(); let s = &mut code; new_line(s); @@ -180,7 +180,7 @@ fn generate_cuda(descriptions: &[Description]) { fn generate_rust(descriptions: &[Description]) { const TEMPLATE_PATH: &str = "src/gates_data_template.rs"; - const RESULT_PATH: &str = "src/gates_data.rs"; + const RESULT_PATH: &str = "gates_data.rs"; let mut hash_map = String::new(); let mut bindings = String::new(); let mut mappings = String::new(); diff --git a/build/main.rs b/build/main.rs index 33ef1a4..9a67d72 100644 --- a/build/main.rs +++ b/build/main.rs @@ -1,8 +1,7 @@ #![allow(incomplete_features)] +#![allow(unexpected_cfgs)] #![feature(generic_const_exprs)] -use cudart_sys::{cuda_lib_path, cuda_path}; - mod gates; mod poseidon_constants; mod template; @@ -10,19 +9,32 @@ mod template; fn main() { gates::generate(); poseidon_constants::generate(); - #[cfg(target_os = "macos")] - std::process::exit(0); - let dst = cmake::Config::new("native") - .profile("Release") - .define( - "CMAKE_CUDA_ARCHITECTURES", - std::env::var("CUDAARCHS").unwrap_or("native".to_string()), - ) - .build(); - println!("cargo:rustc-link-search=native={}", dst.display()); - println!("cargo:rustc-link-lib=static=boojum-cuda-native"); - println!("cargo:rustc-link-search=native={}", cuda_lib_path!()); - println!("cargo:rustc-link-lib=cudart"); - #[cfg(target_os = "linux")] - println!("cargo:rustc-link-lib=stdc++"); + println!("cargo::rustc-check-cfg=cfg(no_cuda)"); + #[cfg(no_cuda)] + { + println!("cargo::warning={}", cudart_sys::no_cuda_message!()); + } + #[cfg(not(no_cuda))] + { + use cudart_sys::{get_cuda_lib_path, get_cuda_version}; + use std::env::var; + let cuda_version = get_cuda_version().expect("Failed to determine CUDA version"); + if !cuda_version.starts_with("12.") { + println!("cargo::warning=CUDA version {cuda_version} detected. This crate is only tested with CUDA 12.*."); + } + let cudaarchs = var("CUDAARCHS").unwrap_or("native".to_string()); + let dst = cmake::Config::new("native") + .profile("Release") + .define("CMAKE_CUDA_ARCHITECTURES", cudaarchs) + .build(); + let boojum_lib_path = dst.to_str().unwrap(); + println!("cargo:rustc-link-search=native={boojum_lib_path}"); + println!("cargo:rustc-link-lib=static=boojum-cuda-native"); + let cuda_lib_path = get_cuda_lib_path().unwrap(); + let cuda_lib_path_str = cuda_lib_path.to_str().unwrap(); + println!("cargo:rustc-link-search=native={cuda_lib_path_str}"); + println!("cargo:rustc-link-lib=cudart"); + #[cfg(target_os = "linux")] + println!("cargo:rustc-link-lib=stdc++"); + } } diff --git a/build/poseidon_constants.rs b/build/poseidon_constants.rs index a1b69bc..27d6f14 100644 --- a/build/poseidon_constants.rs +++ b/build/poseidon_constants.rs @@ -5,7 +5,7 @@ use boojum::implementations::poseidon_goldilocks_params::*; // use itertools::Itertools; const TEMPLATE_PATH: &str = "native/poseidon_constants_template.cuh"; -const RESULT_PATH: &str = "native/poseidon_constants.cuh"; +const RESULT_PATH: &str = "poseidon_constants.cuh"; fn split_u64(value: u64) -> (u32, u32) { let lo = value as u32; diff --git a/build/template.rs b/build/template.rs index 72ed55c..6ae8ab3 100644 --- a/build/template.rs +++ b/build/template.rs @@ -1,4 +1,6 @@ +use std::env::var; use std::fs; +use std::path::Path; const PREFIX: char = '%'; const SUFFIX: char = '%'; @@ -11,8 +13,10 @@ pub(crate) fn generate(replacements: &[(&str, String)], template_path: &str, res from.push(SUFFIX); text = text.replace(&from, value); } - let current = fs::read_to_string(result_path).unwrap_or_default(); + let out_dir = var("OUT_DIR").unwrap(); + let result_path = Path::new(&out_dir).join(result_path); + let current = fs::read_to_string(&result_path).unwrap_or_default(); if !text.eq(¤t) { - fs::write(result_path, text).unwrap(); + fs::write(&result_path, text).unwrap(); } } diff --git a/native/.gitignore b/native/.gitignore index 9c1ff3a..9f63b71 100644 --- a/native/.gitignore +++ b/native/.gitignore @@ -1,3 +1 @@ /cmake-build-*/ -/gates.cu -/poseidon_constants.cuh diff --git a/native/CMakeLists.txt b/native/CMakeLists.txt index f587620..6ffb004 100644 --- a/native/CMakeLists.txt +++ b/native/CMakeLists.txt @@ -5,41 +5,43 @@ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES native) endif () add_library(boojum-cuda-native STATIC + ops_cub/common.cuh + ops_cub/device_radix_sort.cu + ops_cub/device_reduce.cu + ops_cub/device_run_length_encode.cu + ops_cub/device_scan.cu barycentric.cu blake2s.cu carry_chain.cuh common.cuh context.cu context.cuh + ${CMAKE_INSTALL_PREFIX}/gate_kernels.cuh gates.cu gates.cuh gates_poseidon.cuh goldilocks.cuh - goldilocks_extension.cuh goldilocks_extension.cu + goldilocks_extension.cuh memory.cuh ntt.cu ntt_b2n.cuh ntt_n2b.cuh ops_complex.cu ops_complex.cuh - ops_cub/common.cuh - ops_cub/device_radix_sort.cu - ops_cub/device_reduce.cu - ops_cub/device_run_length_encode.cu - ops_cub/device_scan.cu ops_simple.cu poseidon2_cooperative.cu poseidon2_single_thread.cu poseidon2_single_thread.cuh poseidon_common.cu - poseidon_constants.cuh + ${CMAKE_INSTALL_PREFIX}/poseidon_constants.cuh poseidon_cooperative.cu poseidon_single_thread.cu poseidon_single_thread.cuh poseidon_utils.cuh ptx.cuh ) +target_include_directories(boojum-cuda-native PRIVATE ${CMAKE_INSTALL_PREFIX}) set_target_properties(boojum-cuda-native PROPERTIES CUDA_STANDARD 17) set_target_properties(boojum-cuda-native PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(boojum-cuda-native PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) diff --git a/native/gate_kernels_template.cuh b/native/gate_kernels_template.cuh new file mode 100644 index 0000000..4457665 --- /dev/null +++ b/native/gate_kernels_template.cuh @@ -0,0 +1,3 @@ +namespace gates { +%CODE% +} // namespace gates diff --git a/native/gates.cu b/native/gates.cu new file mode 100644 index 0000000..dfad4da --- /dev/null +++ b/native/gates.cu @@ -0,0 +1,4 @@ +#include "gates.cuh" +#include "gates_poseidon.cuh" +// do not reorder includes +#include "gate_kernels.cuh" diff --git a/native/gates_template.cu b/native/gates_template.cu deleted file mode 100644 index 8d5699e..0000000 --- a/native/gates_template.cu +++ /dev/null @@ -1,6 +0,0 @@ -#include "gates.cuh" -#include "gates_poseidon.cuh" - -namespace gates { -%CODE% -} // namespace gates diff --git a/native/poseidon_constants_template.cuh b/native/poseidon_constants_template.cuh index 62724fc..91a498d 100644 --- a/native/poseidon_constants_template.cuh +++ b/native/poseidon_constants_template.cuh @@ -1,8 +1,6 @@ // clang-format off #pragma once -#include "goldilocks.cuh" - namespace poseidon_common { using namespace goldilocks; diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 041239e..0000000 --- a/src/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/gates_data.rs diff --git a/src/context.rs b/src/context.rs index bd56935..d79028b 100644 --- a/src/context.rs +++ b/src/context.rs @@ -5,7 +5,7 @@ use boojum::field::{Field, PrimeField}; use cudart::memory::{memory_copy, DeviceAllocation}; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; -use cudart_sys::{cudaMemcpyToSymbol, CudaMemoryCopyKind}; +use cudart_sys::{cudaMemcpyToSymbol, cuda_struct_and_stub, CudaMemoryCopyKind}; use std::mem::size_of; use std::os::raw::c_void; @@ -29,6 +29,8 @@ impl PowersLayerData { } } +unsafe impl Sync for PowersLayerData {} + #[repr(C)] struct PowersData { fine: PowersLayerData, @@ -48,14 +50,14 @@ impl PowersData { } } -extern "C" { - static powers_data_w: PowersData; - static powers_data_w_bitrev_for_ntt: PowersData; - static powers_data_w_inv_bitrev_for_ntt: PowersData; - static powers_data_g_f: PowersData; - static powers_data_g_i: PowersData; - static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1]; -} +unsafe impl Sync for PowersData {} + +cuda_struct_and_stub! { static powers_data_w: PowersData; } +cuda_struct_and_stub! { static powers_data_w_bitrev_for_ntt: PowersData; } +cuda_struct_and_stub! { static powers_data_w_inv_bitrev_for_ntt: PowersData; } +cuda_struct_and_stub! { static powers_data_g_f: PowersData; } +cuda_struct_and_stub! { static powers_data_g_i: PowersData; } +cuda_struct_and_stub! { static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1]; } unsafe fn copy_to_symbol(symbol: &T, src: &T) -> CudaResult<()> { cudaMemcpyToSymbol( diff --git a/src/gates.rs b/src/gates.rs index 488bc8c..ebcdb92 100644 --- a/src/gates.rs +++ b/src/gates.rs @@ -57,7 +57,7 @@ struct GateData { kernel: GateEvalSignature, } -include!("gates_data.rs"); +include!(concat!(env!("OUT_DIR"), "/gates_data.rs")); pub fn find_gate_id_by_name(name: &str) -> Option { HASH_MAP.get(name).copied() diff --git a/src/ops_complex.rs b/src/ops_complex.rs index 2ac8b86..059e19b 100644 --- a/src/ops_complex.rs +++ b/src/ops_complex.rs @@ -1485,7 +1485,7 @@ mod tests { let stream = CudaStream::default(); let base_ef = ExtensionField::from_coeff_in_base([GoldilocksField(42), GoldilocksField(42)]); - let base_vf = unsafe { mem::transmute(base_ef) }; + let base_vf = unsafe { mem::transmute::(base_ef) }; let mut d_base = DeviceAllocation::alloc(1).unwrap(); memory_copy_async(&mut d_base, &[base_vf], &stream).unwrap(); let b = &d_base[0]; diff --git a/src/ops_cub/device_radix_sort.rs b/src/ops_cub/device_radix_sort.rs index 6ecaf84..df06845 100644 --- a/src/ops_cub/device_radix_sort.rs +++ b/src/ops_cub/device_radix_sort.rs @@ -5,9 +5,9 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; -extern "C" { +cuda_fn_and_stub! { fn sort_keys_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -18,7 +18,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_descending_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -29,7 +31,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -40,7 +44,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_keys_descending_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -200,7 +206,7 @@ pub fn sort_keys( ) } -extern "C" { +cuda_fn_and_stub! { fn sort_pairs_u32_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -213,7 +219,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u32_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -226,7 +234,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u32_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -239,7 +249,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u32_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -252,7 +264,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u64_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -265,7 +279,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u64_by_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -278,7 +294,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_u64_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -291,7 +309,9 @@ extern "C" { end_bit: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn sort_pairs_descending_u64_by_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_reduce.rs b/src/ops_cub/device_reduce.rs index 0c6eb81..e510419 100644 --- a/src/ops_cub/device_reduce.rs +++ b/src/ops_cub/device_reduce.rs @@ -5,14 +5,14 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::{DeviceSlice, DeviceVariable}; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::device_structures::{ DeviceMatrixChunkImpl, DeviceRepr, DeviceVectorChunkImpl, PtrAndStride, }; use crate::extension_field::ExtensionField; -extern "C" { +cuda_fn_and_stub! { fn reduce_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -21,7 +21,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -30,7 +32,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -40,7 +44,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -50,7 +56,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -59,7 +67,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn reduce_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -68,7 +78,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -78,7 +90,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn segmented_reduce_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_run_length_encode.rs b/src/ops_cub/device_run_length_encode.rs index dc632ff..df8136d 100644 --- a/src/ops_cub/device_run_length_encode.rs +++ b/src/ops_cub/device_run_length_encode.rs @@ -5,9 +5,9 @@ use boojum::field::goldilocks::GoldilocksField; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::{DeviceSlice, DeviceVariable}; use cudart::stream::CudaStream; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; -extern "C" { +cuda_fn_and_stub! { fn encode_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -18,7 +18,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn encode_u64( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, diff --git a/src/ops_cub/device_scan.rs b/src/ops_cub/device_scan.rs index f668cf7..8a220f3 100644 --- a/src/ops_cub/device_scan.rs +++ b/src/ops_cub/device_scan.rs @@ -6,11 +6,11 @@ use cudart::event::{CudaEvent, CudaEventCreateFlags}; use cudart::result::{CudaResult, CudaResultWrap}; use cudart::slice::DeviceSlice; use cudart::stream::{CudaStream, CudaStreamCreateFlags, CudaStreamWaitEventFlags}; -use cudart_sys::{cudaError_t, cudaStream_t}; +use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub}; use crate::extension_field::ExtensionField; -extern "C" { +cuda_fn_and_stub! { fn exclusive_sum_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -19,7 +19,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_sum_reverse_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -28,7 +30,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_sum_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -37,7 +41,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_sum_reverse_u32( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -46,7 +52,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -55,7 +63,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_reverse_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -64,7 +74,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -73,7 +85,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_reverse_add_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -82,8 +96,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -92,8 +107,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_reverse_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -102,8 +118,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -112,8 +129,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_reverse_add_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -122,7 +140,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -131,7 +151,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn exclusive_scan_reverse_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -140,7 +162,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -149,7 +173,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} +cuda_fn_and_stub! { fn inclusive_scan_reverse_mul_bf( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -158,8 +184,8 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; - - #[allow(improper_ctypes)] +} +cuda_fn_and_stub! { fn exclusive_scan_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -168,8 +194,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn exclusive_scan_reverse_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -178,8 +205,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize, @@ -188,8 +216,9 @@ extern "C" { num_items: i32, stream: cudaStream_t, ) -> cudaError_t; +} - #[allow(improper_ctypes)] +cuda_fn_and_stub! { fn inclusive_scan_reverse_mul_ef( d_temp_storage: *mut u8, temp_storage_bytes: &mut usize,