diff --git a/.gitignore b/.gitignore index e0a698ac2e..ab373f59e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/target/ +**/target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html diff --git a/Cargo.lock b/Cargo.lock index 7e6dd97363..8a2f475fb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -460,6 +460,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "anyhow" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" + [[package]] name = "ark-ff" version = "0.3.0" @@ -673,7 +679,7 @@ dependencies = [ "arrow-schema", "arrow-select", "atoi", - "base64", + "base64 0.22.1", "chrono", "comfy-table", "half", @@ -1013,6 +1019,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "axvm" +version = "0.1.0" +dependencies = [ + "anyhow", + "axvm-platform", + "borsh", + "bytemuck", + "chrono", + "getrandom", + "hex", + "serde", +] + +[[package]] +name = "axvm-platform" +version = "0.1.0" +dependencies = [ + "bytemuck", + "critical-section", + "embedded-alloc", + "getrandom", + "libm", + "stability", +] + [[package]] name = "backtrace" version = "0.3.71" @@ -1035,6 +1067,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.1" @@ -1240,6 +1278,30 @@ dependencies = [ "zeroize", ] +[[package]] +name = "borsh" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.77", + "syn_derive", +] + [[package]] name = "brotli" version = "6.0.0" @@ -1359,6 +1421,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -1369,6 +1437,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-targets 0.52.6", ] @@ -1554,6 +1623,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + [[package]] name = "const-hex" version = "1.12.0" @@ -1674,6 +1749,12 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "critical-section" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64009896348fc5af4222e9cf7d7d82a95a256c634ebcf61c53e4ea461422242" + [[package]] name = "crossbeam" version = "0.8.4" @@ -1938,7 +2019,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a369332afd0ef5bd565f6db2139fb9f1dfdd0afa75a7f70f000b74208d76994f" dependencies = [ "arrow", - "base64", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -2029,7 +2110,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64", + "base64 0.22.1", "chrono", "datafusion-common", "datafusion-execution", @@ -2242,6 +2323,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + [[package]] name = "endian-type" version = "0.1.2" @@ -3203,6 +3296,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -4005,7 +4104,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -4649,6 +4748,18 @@ dependencies = [ "rustc-hex", ] +[[package]] +name = "rlsf" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb240c3286247ecdee6fa5341e7cdad0ffdf8e7e401d9937f2d58482a20bf" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "svgbobdoc", +] + [[package]] name = "ruint" version = "1.12.3" @@ -5082,6 +5193,16 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "stability" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d904e7009df136af5297832a3ace3370cd14ff1546a232f4f185036c2736fcac" +dependencies = [ + "quote", + "syn 2.0.77", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -5197,6 +5318,19 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64 0.13.1", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + [[package]] name = "symbolic-common" version = "12.11.0" @@ -5242,6 +5376,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "tap" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index 9e73ce7500..10ae413b44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,8 @@ members = [ "sdk", "vm", "vm/bin", + "riscv/zkvm/lib", + "riscv/zkvm/platform", ] resolver = "2" @@ -26,6 +28,8 @@ resolver = "2" version = "0.1.0" authors = ["Intrinsic Technologies"] edition = "2021" +homepage = "https://axiom.xyz" +repository = "https://github.com/axiom-crypto/" # Fastest runtime configuration [profile.release] @@ -61,6 +65,9 @@ incremental = true lto = "thin" [workspace.dependencies] +axvm-platform = { path = "riscv/zkvm/platform" } +axvm = { path = "riscv/zkvm/lib" } + p3-air = { git = "https://github.com/Plonky3/Plonky3.git", rev = "95e56fa" } p3-field = { git = "https://github.com/Plonky3/Plonky3.git", rev = "95e56fa" } p3-commit = { git = "https://github.com/Plonky3/Plonky3.git", rev = "95e56fa" } diff --git a/riscv/README.md b/riscv/README.md new file mode 100644 index 0000000000..19ff4060fd --- /dev/null +++ b/riscv/README.md @@ -0,0 +1,3 @@ +Contains crates for compiling Rust to RISC-V ELF which targets the Modular VM architecture. + +Currently vendored from RISC0. diff --git a/riscv/examples/README.md b/riscv/examples/README.md new file mode 100644 index 0000000000..be2a99917f --- /dev/null +++ b/riscv/examples/README.md @@ -0,0 +1,37 @@ +To see list of all available built-in targets: + +```bash +rustc --print target-list +``` + +We will currently use the risc0 target until we fork Rust to provide our own RISC-V target. + +WARNING: to prevent from building for your host machine, make sure you do not have `rustflags = ["-Ctarget-cpu=native"]` in your `~/.cargo/config.toml`. + +Build example with full command: + +```bash +cd fibonacci/program +cargo +nightly build -Z build-std=alloc,core,proc_macro,panic_abort --target riscv32im-risc0-zkvm-elf +``` + +Also works with just `cargo +nightly build` because we have a `.cargo/config.toml` that specifies the target and unstable build features. + +After this the ELF will be found via + +```bash +file target/riscv32im-risc0-zkvm-elf/debug/axvm-fibonacci-program +target/riscv32im-risc0-zkvm-elf/debug/axvm-fibonacci-program: ELF 32-bit LSB executable, UCB RISC-V, soft-float ABI, version 1 (SYSV), statically linked, with debug_info, not stripped +``` + +To disassemble the ELF to read the instructions, [install cargo-binutils](https://github.com/rust-embedded/cargo-binutils) and run + +```bash +rust-objdump -d target/riscv32im-risc0-zkvm-elf/debug/axvm-fibonacci-program +``` + +where `-d` is short for `--disassemble`. + +A version of the ELF compiled with `--release` is provided in `fibonacci/program/elf` for reference. + +Additional reference for learning: https://github.com/axiom-crypto/riscv-playground diff --git a/riscv/examples/fibonacci/program/.cargo/config.toml b/riscv/examples/fibonacci/program/.cargo/config.toml new file mode 100644 index 0000000000..bb18f45366 --- /dev/null +++ b/riscv/examples/fibonacci/program/.cargo/config.toml @@ -0,0 +1,5 @@ +[build] +target = "riscv32im-risc0-zkvm-elf" + +[unstable] +build-std = ["core", "alloc", "proc_macro", "panic_abort"] diff --git a/riscv/examples/fibonacci/program/Cargo.toml b/riscv/examples/fibonacci/program/Cargo.toml new file mode 100644 index 0000000000..aad466c3ca --- /dev/null +++ b/riscv/examples/fibonacci/program/Cargo.toml @@ -0,0 +1,8 @@ +[workspace] +[package] +version = "0.1.0" +name = "axvm-fibonacci-program" +edition = "2021" + +[dependencies] +axvm = { path = "../../../zkvm/lib" } diff --git a/riscv/examples/fibonacci/program/elf/axvm-fibonacci-program b/riscv/examples/fibonacci/program/elf/axvm-fibonacci-program new file mode 100755 index 0000000000..84925a5725 Binary files /dev/null and b/riscv/examples/fibonacci/program/elf/axvm-fibonacci-program differ diff --git a/riscv/examples/fibonacci/program/src/main.rs b/riscv/examples/fibonacci/program/src/main.rs new file mode 100644 index 0000000000..4fdd974cdb --- /dev/null +++ b/riscv/examples/fibonacci/program/src/main.rs @@ -0,0 +1,15 @@ +#![no_main] +#![no_std] + +axvm::entry!(main); + +pub fn main() { + let n = 16; + let mut a: u32 = 0; + let mut b: u32 = 1; + for _ in 1..n { + let sum = a + b; + a = b; + b = sum; + } +} diff --git a/riscv/zkvm/lib/Cargo.toml b/riscv/zkvm/lib/Cargo.toml new file mode 100644 index 0000000000..93126ccf3f --- /dev/null +++ b/riscv/zkvm/lib/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "axvm" +description = "Axiom zkVM" +version = { workspace = true } +edition = { workspace = true } +# license = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } + +[dependencies] +anyhow = { version = "1.0", default-features = false } +borsh = { version = "1.5", default-features = false, features = ["derive"] } +bytemuck = { version = "1.13", features = ["extern_crate_alloc"] } +getrandom = { version = "0.2", features = ["custom"] } +hex = { version = "0.4.3", default-features = false, features = ["alloc"] } +axvm-platform = { workspace = true, features = [ + "rust-runtime", + "export-getrandom", +] } +serde = { version = "1.0", default-features = false, features = [ + "alloc", + "derive", +] } + +[dev-dependencies] +chrono = { version = "0.4", default-features = false, features = ["serde"] } + +[features] +default = [] +# The zkVM exposes a getrandom implementation that panics by default. This will +# expose a getrandom implementation that uses the `sys_random` ecall. +getrandom = ["axvm-platform/getrandom"] +# The zkVM uses a bump-pointer heap allocator by default which does not free +# memory. This will use a slower linked-list heap allocator to reclaim memory. +heap-embedded-alloc = ["axvm-platform/heap-embedded-alloc"] +std = ["anyhow/std", "hex/std", "serde/std"] diff --git a/riscv/zkvm/lib/src/env/mod.rs b/riscv/zkvm/lib/src/env/mod.rs new file mode 100644 index 0000000000..5d99b27eb1 --- /dev/null +++ b/riscv/zkvm/lib/src/env/mod.rs @@ -0,0 +1,378 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Functions for interacting with the host environment. +//! +//! The zkVM provides a set of functions to perform operations that manage +//! execution, I/O, and proof composition. The set of functions related to each +//! of these operations are described below. +//! +//! ## System State +//! +//! The guest has some control over the execution of the zkVM by pausing or +//! exiting the program explicitly. This can be achieved using the [pause] and +//! [exit] functions. +//! +//! ## Proof Verification +//! +//! The zkVM supports verification of RISC Zero [receipts] in a guest program, +//! enabling [proof composition]. This can be achieved using the [verify()] and +//! [verify_integrity] functions. +//! +//! ## Input and Output +//! +//! The zkVM provides a set of functions for handling input, public output, and +//! private output. This is useful when interacting with the host and committing +//! to some data publicly. +//! +//! The zkVM provides functions that automatically perform (de)serialization on +//! types and, for performance reasons, there is also a `_slice` variant that +//! works with raw slices of plain old data. Performing operations on slices is +//! more efficient, saving cycles during execution and consequently producing +//! smaller proofs that are faster to produce. However, the `_slice` variants +//! can be less ergonomic, so consider trade-offs when choosing between the two. +//! For more information about guest optimization, see RISC Zero's [instruction +//! on guest optimization][guest-optimization] +//! +//! Convenience functions to read and write to default file descriptors are +//! provided. See [read()], [write()], [self::commit] (and their `_slice` +//! variants) for more information. +//! +//! In order to access default file descriptors directly, see [stdin], [stdout], +//! [stderr] and [journal]. These file descriptors are either [FdReader] or +//! [FdWriter] instances, which can be used to read from or write to the host. +//! To read from or write into them, use the [Read] and [Write] traits. +//! +//! WARNING: Specifying a file descriptor with the same value of a default file +//! descriptor is not recommended and may lead to unexpected behavior. A list of +//! default file descriptors can be found in the [fileno] module. +//! +//! ## Utility +//! +//! The zkVM provides utility functions to log messages to the debug console and +//! to measure the number of processor cycles that have occurred since the guest +//! began. These can be achieved using the [log] and [cycle_count] functions. +//! +//! [receipts]: crate::Receipt +//! [proof composition]:https://www.risczero.com/blog/proof-composition +//! [guest-optimization]: +//! https://dev.risczero.com/api/zkvm/optimization#when-reading-data-as-raw-bytes-use-envread_slice + +extern crate alloc; + +mod read; +mod write; + +use alloc::alloc::{alloc, Layout}; + +use axvm_platform::{ + align_up, fileno, + syscall::{self, sys_cycle_count, sys_halt, sys_log, sys_pause, syscall_2, SyscallName}, + WORD_SIZE, +}; +use bytemuck::Pod; +use serde::{de::DeserializeOwned, Serialize}; + +pub use self::{ + read::{FdReader, Read}, + write::{FdWriter, Write}, +}; + +/// A random 16 byte value initialized to random data, provided by the host, on +/// guest start and upon resuming from a pause. Setting this value ensures that +/// the total memory image has at least 128 bits of entropy, preventing +/// information leakage through the post-state digest. +static mut MEMORY_IMAGE_ENTROPY: [u32; 4] = [0u32; 4]; + +/// Initialize globals before program main +pub(crate) fn init() { + unsafe { + syscall::sys_rand( + MEMORY_IMAGE_ENTROPY.as_mut_ptr(), + MEMORY_IMAGE_ENTROPY.len(), + ) + } +} + +/// Finalize execution +pub(crate) fn finalize(halt: bool, user_exit: u8) { + unsafe { + // Artifact of risc0 vendoring + let output_words = [0u32; 8]; + + if halt { + sys_halt(user_exit, &output_words) + } else { + sys_pause(user_exit, &output_words) + } + } +} + +/// Terminate execution of the zkVM. +/// +/// Use an exit code of 0 to indicate success, and non-zero to indicate an error. +pub fn exit(exit_code: u8) -> ! { + finalize(true, exit_code); + unreachable!(); +} + +/// Pause the execution of the zkVM. +/// +/// Execution may be continued at a later time. +/// Use an exit code of 0 to indicate success, and non-zero to indicate an error. +pub fn pause(exit_code: u8) { + finalize(false, exit_code); + init(); +} + +/// Exchange data with the host. +pub fn syscall(syscall: SyscallName, to_host: &[u8], from_host: &mut [u32]) -> syscall::Return { + unsafe { + syscall_2( + syscall, + from_host.as_mut_ptr(), + from_host.len(), + to_host.as_ptr() as u32, + to_host.len() as u32, + ) + } +} + +/// Exchanges slices of plain old data with the host. +/// +/// This makes two calls to the given syscall; the first gets the length of the +/// buffer to allocate for the return data, and the second actually +/// receives the return data. +/// +/// On the host side, implement SliceIo to provide a handler for this call. +/// +/// NOTE: This method never frees up the buffer memory storing the host's response. +pub fn send_recv_slice(syscall_name: SyscallName, to_host: &[T]) -> &'static [U] { + let syscall::Return(nbytes, _) = syscall(syscall_name, bytemuck::cast_slice(to_host), &mut []); + let nwords = align_up(nbytes as usize, WORD_SIZE) / WORD_SIZE; + let from_host_buf = unsafe { + let layout = Layout::from_size_align(nwords * WORD_SIZE, WORD_SIZE).unwrap(); + core::slice::from_raw_parts_mut(alloc(layout) as *mut u32, nwords) + }; + syscall(syscall_name, &[], from_host_buf); + &bytemuck::cast_slice(from_host_buf)[..nbytes as usize / core::mem::size_of::()] +} + +/// Read private data from the STDIN of the zkVM and deserializes it. +/// +/// This function operates on every [`DeserializeOwned`] type, so you can +/// specify complex types as data to be read and it'll be deserialized +/// automatically. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// use std::collections::BTreeMap; +/// +/// let input: Option> = env::read(); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +pub fn read() -> T { + stdin().read() +} + +/// Read a slice from the STDIN of the zkVM. +/// +/// This function reads a slice of [plain old data][bytemuck::Pod], not +/// incurring in deserialization overhead. Recommended for performance +/// optimizations. For more context on this, see RISC Zero's [instructions on +/// guest optimization]. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// +/// let len: usize = env::read(); +/// let mut slice = vec![0u8; len]; +/// env::read_slice(&mut slice); +/// +/// assert_eq!(slice.len(), len); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +/// [instructions on guest optimization]: https://dev.risczero.com/api/zkvm/optimization#when-reading-data-as-raw-bytes-use-envread_slice +pub fn read_slice(slice: &mut [T]) { + stdin().read_slice(slice) +} + +/// Serialize the given data and write it to the STDOUT of the zkVM. +/// +/// This is available to the host as the private output on the prover. +/// Some implementations, such as [risc0-r0vm] will also write the data to +/// the host's stdout file descriptor. It is not included in the receipt. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// use std::collections::BTreeMap; +/// +/// let output: BTreeMap = BTreeMap::from([ +/// (1, true), +/// (2, false), +/// ]); +/// +/// env::write(&output); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +pub fn write(data: &T) { + stdout().write(data) +} + +/// Write the given slice to the STDOUT of the zkVM. +/// +/// This is available to the host as the private output on the prover. +/// Some implementations, such as [risc0-r0vm] will also write the data to +/// the host's stdout file descriptor. It is not included in the receipt. +/// +/// This function reads a slice of [plain old data][bytemuck::Pod], not +/// incurring in deserialization overhead. Recommended for performance +/// optimizations. For more context on this, see RISC Zero's [instructions on +/// guest optimization]. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// +/// let slice = [1u8, 2, 3, 4]; +/// env::write_slice(&slice); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +/// [instructions on guest optimization]: https://dev.risczero.com/api/zkvm/optimization#when-reading-data-as-raw-bytes-use-envread_slice +pub fn write_slice(slice: &[T]) { + stdout().write_slice(slice); +} + +/* +/// Serialize the given data and commit it to the journal. +/// +/// Data in the journal is included in the receipt and is available to the +/// verifier. It is considered "public" data. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// use std::collections::BTreeMap; +/// +/// let data: BTreeMap = BTreeMap::from([ +/// (1, true), +/// (2, false), +/// ]); +/// +/// env::commit(&data); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +pub fn commit(data: &T) { + journal().write(data) +} + +/// Commit the given slice to the journal. +/// +/// Data in the journal is included in the receipt and is available to the +/// verifier. It is considered "public" data. +/// +/// This function reads a slice of [plain old data][bytemuck::Pod], not +/// incurring in deserialization overhead. Recommended for performance +/// optimizations. For more context on this, see RISC Zero's [instructions on +/// guest optimization]. +/// +/// # Example +/// +/// ```no_run +/// use risc0_zkvm::guest::env; +/// +/// let slice = [1u8, 2, 3, 4]; +/// env::commit_slice(&slice); +/// ``` +/// +/// More examples can be found in RISC Zero's [example page]. +/// +/// Additional explanation on I/O in the zkVM can be found in RISC Zero's [I/O documentation]. +/// +/// [example page]: https://dev.risczero.com/api/zkvm/examples +/// [I/O documentation]: https://dev.risczero.com/api/zkvm/tutorials/io +/// [instructions on guest optimization]: https://dev.risczero.com/api/zkvm/optimization#when-reading-data-as-raw-bytes-use-envread_slice +pub fn commit_slice(slice: &[T]) { + journal().write_slice(slice); +} +*/ + +/// Return the number of processor cycles that have occurred since the guest +/// began. +/// +/// WARNING: The cycle count is provided by the host and is not checked by the zkVM circuit. +pub fn cycle_count() -> u64 { + sys_cycle_count() +} + +/// Print a message to the debug console. +pub fn log(msg: &str) { + let msg = msg.as_bytes(); + unsafe { + sys_log(msg.as_ptr(), msg.len()); + } +} + +/// Return a writer for STDOUT. +pub fn stdout() -> FdWriter Fn(&'a [u8])> { + FdWriter::new(fileno::STDOUT, |_| {}) +} + +/// Return a writer for STDERR. +pub fn stderr() -> FdWriter Fn(&'a [u8])> { + FdWriter::new(fileno::STDERR, |_| {}) +} + +/// Return a reader for the standard input +pub fn stdin() -> FdReader { + FdReader::new(fileno::STDIN) +} diff --git a/riscv/zkvm/lib/src/env/read.rs b/riscv/zkvm/lib/src/env/read.rs new file mode 100644 index 0000000000..029e388091 --- /dev/null +++ b/riscv/zkvm/lib/src/env/read.rs @@ -0,0 +1,157 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axvm_platform::{ + syscall::{sys_read, sys_read_words}, + WORD_SIZE, +}; +use bytemuck::Pod; +use serde::de::DeserializeOwned; + +use crate::serde::{Deserializer, WordRead}; + +/// Reads and deserializes objects +pub trait Read { + /// Read data from the host. + fn read(&mut self) -> T; + + /// Read raw data from the host. + fn read_slice(&mut self, buf: &mut [T]); +} + +impl Read for &mut R { + fn read(&mut self) -> T { + (**self).read() + } + + fn read_slice(&mut self, buf: &mut [T]) { + (**self).read_slice(buf) + } +} + +/// Provides a FdReader which can read from any file descriptor +pub struct FdReader { + fd: u32, +} + +impl FdReader { + /// Creates a new FdReader reading from the given file descriptor. + pub fn new(fd: u32) -> FdReader { + FdReader { fd } + } + + #[must_use = "read_bytes can potentially do a short read; this case should be handled."] + fn read_bytes(&mut self, buf: &mut [u8]) -> usize { + unsafe { sys_read(self.fd, buf.as_mut_ptr(), buf.len()) } + } + + // Like read_bytes, but fills the buffer completely or until EOF occurs. + #[must_use = "read_bytes_all can potentially return EOF; this case should be handled."] + fn read_bytes_all(&mut self, mut buf: &mut [u8]) -> usize { + let mut tot_read = 0; + while !buf.is_empty() { + let nread = self.read_bytes(buf); + if nread == 0 { + break; + } + tot_read += nread; + (_, buf) = buf.split_at_mut(nread); + } + + tot_read + } +} + +impl Read for FdReader { + fn read(&mut self) -> T { + T::deserialize(&mut Deserializer::new(self)).unwrap() + } + + fn read_slice(&mut self, buf: &mut [T]) { + if let Ok(words) = bytemuck::try_cast_slice_mut(buf) { + // Reading words performs significantly better if we're word aligned. + self.read_words(words).unwrap(); + } else { + let bytes = bytemuck::cast_slice_mut(buf); + if self.read_bytes_all(bytes) != bytes.len() { + panic!("{:?}", crate::serde::Error::DeserializeUnexpectedEnd); + } + } + } +} + +impl WordRead for FdReader { + fn read_words(&mut self, words: &mut [u32]) -> crate::serde::Result<()> { + let nread_bytes = unsafe { sys_read_words(self.fd, words.as_mut_ptr(), words.len()) }; + if nread_bytes == words.len() * WORD_SIZE { + Ok(()) + } else { + Err(crate::serde::Error::DeserializeUnexpectedEnd) + } + } + + fn read_padded_bytes(&mut self, bytes: &mut [u8]) -> crate::serde::Result<()> { + if self.read_bytes_all(bytes) != bytes.len() { + return Err(crate::serde::Error::DeserializeUnexpectedEnd); + } + + let unaligned = bytes.len() % WORD_SIZE; + if unaligned != 0 { + let pad_bytes = WORD_SIZE - unaligned; + let mut padding = [0u8; WORD_SIZE]; + if self.read_bytes_all(&mut padding[..pad_bytes]) != pad_bytes { + return Err(crate::serde::Error::DeserializeUnexpectedEnd); + } + } + Ok(()) + } +} + +#[cfg(feature = "std")] +impl WordRead for std::io::BufReader { + fn read_words(&mut self, words: &mut [u32]) -> crate::serde::Result<()> { + use std::io::Read; + let nread_bytes = self + .read(bytemuck::cast_slice_mut(words)) + .map_err(|_| crate::serde::Error::DeserializeUnexpectedEnd)?; + if nread_bytes == words.len() * WORD_SIZE { + Ok(()) + } else { + Err(crate::serde::Error::DeserializeUnexpectedEnd) + } + } + + fn read_padded_bytes(&mut self, bytes: &mut [u8]) -> crate::serde::Result<()> { + use std::io::Read; + self.read_exact(bytes) + .map_err(|_| crate::serde::Error::DeserializeUnexpectedEnd)?; + + let unaligned = bytes.len() % WORD_SIZE; + if unaligned != 0 { + let pad_bytes = WORD_SIZE - unaligned; + let mut padding = [0u8; WORD_SIZE]; + self.read_exact(&mut padding[..pad_bytes]) + .map_err(|_| crate::serde::Error::DeserializeUnexpectedEnd)?; + } + + Ok(()) + } +} + +#[cfg(feature = "std")] +impl std::io::Read for FdReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + Ok(self.read_bytes(buf)) + } +} diff --git a/riscv/zkvm/lib/src/env/write.rs b/riscv/zkvm/lib/src/env/write.rs new file mode 100644 index 0000000000..ffc8ead0f2 --- /dev/null +++ b/riscv/zkvm/lib/src/env/write.rs @@ -0,0 +1,95 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axvm_platform::{syscall::sys_write, WORD_SIZE}; +use bytemuck::Pod; +use serde::Serialize; + +use crate::serde::{Serializer, WordWrite}; + +/// Serializes and writes objects. +pub trait Write { + /// Write a serialized object. + fn write(&mut self, val: T); + + /// Write raw data. + fn write_slice(&mut self, buf: &[T]); +} + +impl Write for &mut W { + fn write(&mut self, val: T) { + (**self).write(val) + } + + fn write_slice(&mut self, buf: &[T]) { + (**self).write_slice(buf) + } +} + +/// Provides a FdWriter which can write to any file descriptor. +pub struct FdWriter { + fd: u32, + hook: F, +} + +impl FdWriter { + /// Creates a new FdWriter writing to the given file descriptor. + pub fn new(fd: u32, hook: F) -> Self { + FdWriter { fd, hook } + } + + fn write_bytes(&mut self, bytes: &[u8]) { + unsafe { sys_write(self.fd, bytes.as_ptr(), bytes.len()) } + (self.hook)(bytes); + } +} + +impl Write for FdWriter { + fn write(&mut self, val: T) { + val.serialize(&mut Serializer::new(self)).unwrap(); + } + + fn write_slice(&mut self, buf: &[T]) { + self.write_bytes(bytemuck::cast_slice(buf)); + } +} + +impl WordWrite for FdWriter { + fn write_words(&mut self, words: &[u32]) -> crate::serde::Result<()> { + self.write_bytes(bytemuck::cast_slice(words)); + Ok(()) + } + + fn write_padded_bytes(&mut self, bytes: &[u8]) -> crate::serde::Result<()> { + self.write_bytes(bytes); + let unaligned = bytes.len() % WORD_SIZE; + if unaligned != 0 { + let pad_bytes = WORD_SIZE - unaligned; + self.write_bytes(&[0u8; WORD_SIZE][..pad_bytes]); + } + Ok(()) + } +} + +#[cfg(feature = "std")] +impl std::io::Write for FdWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.write_bytes(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} diff --git a/riscv/zkvm/lib/src/lib.rs b/riscv/zkvm/lib/src/lib.rs new file mode 100644 index 0000000000..9b80ef03f4 --- /dev/null +++ b/riscv/zkvm/lib/src/lib.rs @@ -0,0 +1,213 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! The RISC Zero zkVM's guest-side RISC-V API. +//! +//! Code that is validated by the [RISC Zero zkVM](crate) is run inside the guest. In almost all +//! practical cases, the guest will want to read private input data from the host and write public +//! data to the journal. This can be done with [env::read] and [env::commit], respectively; +//! additional I/O functionality is also available in [mod@env]. +//! +//! ## Installation +//! +//! To build and run RISC Zero zkVM code, you will need to install the RISC Zero +//! toolchain, which can be done using the rzup utility: +//! +//! ```sh +//! curl -L https://risczero.com/install | bash +//! rzup install +//! ``` +//! +//! ## Example +//! +//! The following guest code[^starter-ex] proves a number is +//! composite by multiplying two unsigned integers, and panicking if either is +//! `1` or if the multiplication overflows: +//! +//! ```ignore +//! #![no_main] +//! #![no_std] +//! +//! use risc0_zkvm::guest::env; +//! +//! risc0_zkvm::guest::entry!(main); +//! +//! fn main() { +//! // Load the first number from the host +//! let a: u64 = env::read(); +//! // Load the second number from the host +//! let b: u64 = env::read(); +//! // Verify that neither of them are 1 (i.e. nontrivial factors) +//! if a == 1 || b == 1 { +//! panic!("Trivial factors") +//! } +//! // Compute the product while being careful with integer overflow +//! let product = a.checked_mul(b).expect("Integer overflow"); +//! env::commit(&product); +//! } +//! ``` +//! +//! Notice how [env::read] is used to load the two factors, and [env::commit] is used to make their +//! composite product publicly available. All input an output of your guest is private except for +//! what is written to the journal with [env::commit]. +//! +//! By default, the guest only has the Rust `core` libraries and not `std`. A partial +//! implementation of the Rust standard libraries can be enabled with the `std` feature on this [crate]. +//! When this feature is not enabled, the lines including `#![no_std]` and `#![no_main]` are +//! required, as well as the use of the [crate::guest::entry] macro. When `std` is enabled, these +//! three lines can be omitted and many features of `std` can be used. +//! +//! If you encounter problems building zkVM guest code, you can see if we have a +//! known workaround for your issue by looking in our +//! [rust guest workarounds](https://github.com/risc0/risc0/issues?q=is%3Aissue+is%3Aopen+label%3A%22rust+guest+workarounds%22) +//! tag on GitHub. +//! +//! [^starter-ex]: The example is based on the [Factors example](https://github.com/risc0/risc0/tree/main/examples/factors). + +#![cfg_attr(not(feature = "std"), no_std)] +#![deny(rustdoc::broken_intra_doc_links)] +#![deny(missing_docs)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] + +extern crate alloc; + +// FIXME: re-enable for std +// pub mod env; +// pub mod serde; + +#[cfg(target_os = "zkvm")] +use core::arch::asm; + +#[cfg(target_os = "zkvm")] +core::arch::global_asm!(include_str!("memset.s")); +#[cfg(target_os = "zkvm")] +core::arch::global_asm!(include_str!("memcpy.s")); + +fn _fault() -> ! { + #[cfg(target_os = "zkvm")] + unsafe { + asm!("sw x0, 1(x0)") + }; + unreachable!(); +} + +// /// Aborts the guest with the given message. +// pub fn abort(msg: &str) -> ! { +// // SAFETY: A compliant host should fault when it receives this syscall. +// // sys_panic will issue an invalid instruction for non-compliant hosts. +// unsafe { +// sys_panic(msg.as_ptr(), msg.len()); +// } +// } + +/// Used for defining the guest's entrypoint and main function. +/// +/// When `#![no_main]` is used, the programs entrypoint and main function is left undefined. The +/// `entry` macro is required to indicate the main function and link it to an entrypoint provided +/// by the RISC Zero SDK. +/// +/// When `std` is enabled, the entrypoint will be linked automatically and this macro is not +/// required. +/// +/// # Example +/// +/// ```ignore +/// #![no_main] +/// #![no_std] +/// +/// risc0_zkvm::entry!(main); +/// +/// fn main() { } +/// ``` +#[macro_export] +macro_rules! entry { + ($path:path) => { + // Type check the given path + const ZKVM_ENTRY: fn() = $path; + + // Include generated main in a module so we don't conflict + // with any other definitions of "main" in this file. + mod zkvm_generated_main { + #[no_mangle] + fn main() { + super::ZKVM_ENTRY() + } + } + }; +} + +#[cfg(target_os = "zkvm")] +#[no_mangle] +unsafe extern "C" fn __start() -> ! { + #[cfg(feature = "heap-embedded-alloc")] + axvm_platform::heap::embedded::init(); + + // env::init(); + + { + extern "C" { + fn main(); + } + main() + } + + // env::finalize(true, 0); + unreachable!(); +} + +#[cfg(target_os = "zkvm")] +static STACK_TOP: u32 = axvm_platform::memory::STACK_TOP; + +// Entry point; sets up global pointer and stack pointer and passes +// to zkvm_start. TODO: when asm_const is stabilized, use that here +// instead of defining a symbol and dereferencing it. +#[cfg(target_os = "zkvm")] +core::arch::global_asm!( + r#" +.section .text._start; +.globl _start; +_start: + .option push; + .option norelax; + la gp, __global_pointer$; + .option pop; + la sp, {0} + lw sp, 0(sp) + call __start; +"#, + sym STACK_TOP +); + +/// Require that accesses to behind the given pointer before the memory +/// barrier don't get optimized away or reordered to after the memory +/// barrier. +#[allow(unused_variables)] +pub fn memory_barrier(ptr: *const T) { + // SAFETY: This passes a pointer in, but does nothing with it. + #[cfg(target_os = "zkvm")] + unsafe { + asm!("/* {0} */", in(reg) (ptr)) + } + #[cfg(not(target_os = "zkvm"))] + core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst) +} + +// When std is not linked, register a panic handler here so the user does not +// have to. If std is linked, it will define the panic handler instead. This +// panic handler must not be included. +#[cfg(all(target_os = "zkvm", not(feature = "std")))] +#[panic_handler] +fn panic_impl(panic_info: &core::panic::PanicInfo) -> ! { + axvm_platform::rust_rt::panic_fault(panic_info); +} diff --git a/riscv/zkvm/lib/src/memcpy.s b/riscv/zkvm/lib/src/memcpy.s new file mode 100644 index 0000000000..e0043ec220 --- /dev/null +++ b/riscv/zkvm/lib/src/memcpy.s @@ -0,0 +1,498 @@ +// This is musl-libc commit 37e18b7bf307fa4a8c745feebfcba54a0ba74f30: +// +// src/string/memcpy.c +// +// This was compiled into assembly with: +// +// clang-14 -target riscv32 -march=rv32im -O3 -S memcpy.c -nostdlib -fno-builtin -funroll-loops +// +// and labels manually updated to not conflict. +// +// musl as a whole is licensed under the following standard MIT license: +// +// ---------------------------------------------------------------------- +// Copyright © 2005-2020 Rich Felker, et al. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ---------------------------------------------------------------------- +// +// Authors/contributors include: +// +// A. Wilcox +// Ada Worcester +// Alex Dowad +// Alex Suykov +// Alexander Monakov +// Andre McCurdy +// Andrew Kelley +// Anthony G. Basile +// Aric Belsito +// Arvid Picciani +// Bartosz Brachaczek +// Benjamin Peterson +// Bobby Bingham +// Boris Brezillon +// Brent Cook +// Chris Spiegel +// Clément Vasseur +// Daniel Micay +// Daniel Sabogal +// Daurnimator +// David Carlier +// David Edelsohn +// Denys Vlasenko +// Dmitry Ivanov +// Dmitry V. Levin +// Drew DeVault +// Emil Renner Berthing +// Fangrui Song +// Felix Fietkau +// Felix Janda +// Gianluca Anzolin +// Hauke Mehrtens +// He X +// Hiltjo Posthuma +// Isaac Dunham +// Jaydeep Patil +// Jens Gustedt +// Jeremy Huntwork +// Jo-Philipp Wich +// Joakim Sindholt +// John Spencer +// Julien Ramseier +// Justin Cormack +// Kaarle Ritvanen +// Khem Raj +// Kylie McClain +// Leah Neukirchen +// Luca Barbato +// Luka Perkov +// M Farkas-Dyck (Strake) +// Mahesh Bodapati +// Markus Wichmann +// Masanori Ogino +// Michael Clark +// Michael Forney +// Mikhail Kremnyov +// Natanael Copa +// Nicholas J. Kain +// orc +// Pascal Cuoq +// Patrick Oppenlander +// Petr Hosek +// Petr Skocik +// Pierre Carrier +// Reini Urban +// Rich Felker +// Richard Pennington +// Ryan Fairfax +// Samuel Holland +// Segev Finer +// Shiz +// sin +// Solar Designer +// Stefan Kristiansson +// Stefan O'Rear +// Szabolcs Nagy +// Timo Teräs +// Trutz Behn +// Valentin Ochs +// Will Dietz +// William Haddon +// William Pitcock +// +// Portions of this software are derived from third-party works licensed +// under terms compatible with the above MIT license: +// +// The TRE regular expression implementation (src/regex/reg* and +// src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +// under a 2-clause BSD license (license text in the source files). The +// included version has been heavily modified by Rich Felker in 2012, in +// the interests of size, simplicity, and namespace cleanliness. +// +// Much of the math library code (src/math/* and src/complex/*) is +// Copyright © 1993,2004 Sun Microsystems or +// Copyright © 2003-2011 David Schultz or +// Copyright © 2003-2009 Steven G. Kargl or +// Copyright © 2003-2009 Bruce D. Evans or +// Copyright © 2008 Stephen L. Moshier or +// Copyright © 2017-2018 Arm Limited +// and labelled as such in comments in the individual source files. All +// have been licensed under extremely permissive terms. +// +// The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 +// The Android Open Source Project and is licensed under a two-clause BSD +// license. It was taken from Bionic libc, used on Android. +// +// The AArch64 memcpy and memset code (src/string/aarch64/*) are +// Copyright © 1999-2019, Arm Limited. +// +// The implementation of DES for crypt (src/crypt/crypt_des.c) is +// Copyright © 1994 David Burren. It is licensed under a BSD license. +// +// The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was +// originally written by Solar Designer and placed into the public +// domain. The code also comes with a fallback permissive license for use +// in jurisdictions that may not recognize the public domain. +// +// The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +// Valentin Ochs and is licensed under an MIT-style license. +// +// The x86_64 port was written by Nicholas J. Kain and is licensed under +// the standard MIT terms. +// +// The mips and microblaze ports were originally written by Richard +// Pennington for use in the ellcc project. The original code was adapted +// by Rich Felker for build system and code conventions during upstream +// integration. It is licensed under the standard MIT terms. +// +// The mips64 port was contributed by Imagination Technologies and is +// licensed under the standard MIT terms. +// +// The powerpc port was also originally written by Richard Pennington, +// and later supplemented and integrated by John Spencer. It is licensed +// under the standard MIT terms. +// +// All other files which have no copyright comments are original works +// produced specifically for use as part of this library, written either +// by Rich Felker, the main author of the library, or by one or more +// contributors listed above. Details on authorship of individual files +// can be found in the git version control history of the project. The +// omission of copyright and license comments in each file is in the +// interest of source tree size. +// +// In addition, permission is hereby granted for all public header files +// (include/* and arch/* /bits/* ) and crt files intended to be linked into +// applications (crt/*, ldso/dlstart.c, and arch/* /crt_arch.h) to omit +// the copyright notice and permission notice otherwise required by the +// license, and to use these files without any requirement of +// attribution. These files include substantial contributions from: +// +// Bobby Bingham +// John Spencer +// Nicholas J. Kain +// Rich Felker +// Richard Pennington +// Stefan Kristiansson +// Szabolcs Nagy +// +// all of whom have explicitly granted such permission. +// +// This file previously contained text expressing a belief that most of +// the files covered by the above exception were sufficiently trivial not +// to be subject to copyright, resulting in confusion over whether it +// negated the permissions granted in the license. In the spirit of +// permissive licensing, and of not having licensing issues being an +// obstacle to adoption, that text has been removed. + .text + .attribute 4, 16 + .attribute 5, "rv32im" + .file "musl_memcpy.c" + .globl memcpy + .p2align 2 + .type memcpy,@function +memcpy: + andi a3, a1, 3 + seqz a3, a3 + seqz a4, a2 + or a3, a3, a4 + bnez a3, .LBBmemcpy0_11 + addi a5, a1, 1 + mv a6, a0 +.LBBmemcpy0_2: + lb a7, 0(a1) + addi a4, a1, 1 + addi a3, a6, 1 + sb a7, 0(a6) + addi a2, a2, -1 + andi a1, a5, 3 + snez a1, a1 + snez a6, a2 + and a7, a1, a6 + addi a5, a5, 1 + mv a1, a4 + mv a6, a3 + bnez a7, .LBBmemcpy0_2 + andi a1, a3, 3 + beqz a1, .LBBmemcpy0_12 +.LBBmemcpy0_4: + li a5, 32 + bltu a2, a5, .LBBmemcpy0_26 + li a5, 3 + beq a1, a5, .LBBmemcpy0_19 + li a5, 2 + beq a1, a5, .LBBmemcpy0_22 + li a5, 1 + bne a1, a5, .LBBmemcpy0_26 + lw a5, 0(a4) + sb a5, 0(a3) + srli a1, a5, 8 + sb a1, 1(a3) + srli a6, a5, 16 + addi a1, a3, 3 + sb a6, 2(a3) + addi a2, a2, -3 + addi a3, a4, 16 + li a4, 16 +.LBBmemcpy0_9: + lw a6, -12(a3) + srli a5, a5, 24 + slli a7, a6, 8 + lw t0, -8(a3) + or a5, a7, a5 + sw a5, 0(a1) + srli a5, a6, 24 + slli a6, t0, 8 + lw a7, -4(a3) + or a5, a6, a5 + sw a5, 4(a1) + srli a6, t0, 24 + slli t0, a7, 8 + lw a5, 0(a3) + or a6, t0, a6 + sw a6, 8(a1) + srli a6, a7, 24 + slli a7, a5, 8 + or a6, a7, a6 + sw a6, 12(a1) + addi a1, a1, 16 + addi a2, a2, -16 + addi a3, a3, 16 + bltu a4, a2, .LBBmemcpy0_9 + addi a4, a3, -13 + j .LBBmemcpy0_25 +.LBBmemcpy0_11: + mv a3, a0 + mv a4, a1 + andi a1, a3, 3 + bnez a1, .LBBmemcpy0_4 +.LBBmemcpy0_12: + li a1, 16 + bltu a2, a1, .LBBmemcpy0_15 + li a1, 15 +.LBBmemcpy0_14: + lw a5, 0(a4) + lw a6, 4(a4) + lw a7, 8(a4) + lw t0, 12(a4) + sw a5, 0(a3) + sw a6, 4(a3) + sw a7, 8(a3) + sw t0, 12(a3) + addi a4, a4, 16 + addi a2, a2, -16 + addi a3, a3, 16 + bltu a1, a2, .LBBmemcpy0_14 +.LBBmemcpy0_15: + andi a1, a2, 8 + beqz a1, .LBBmemcpy0_17 + lw a1, 0(a4) + lw a5, 4(a4) + sw a1, 0(a3) + sw a5, 4(a3) + addi a3, a3, 8 + addi a4, a4, 8 +.LBBmemcpy0_17: + andi a1, a2, 4 + beqz a1, .LBBmemcpy0_30 + lw a1, 0(a4) + sw a1, 0(a3) + addi a3, a3, 4 + addi a4, a4, 4 + j .LBBmemcpy0_30 +.LBBmemcpy0_19: + lw a5, 0(a4) + addi a1, a3, 1 + sb a5, 0(a3) + addi a2, a2, -1 + addi a3, a4, 16 + li a4, 18 +.LBBmemcpy0_20: + lw a6, -12(a3) + srli a5, a5, 8 + slli a7, a6, 24 + lw t0, -8(a3) + or a5, a7, a5 + sw a5, 0(a1) + srli a5, a6, 8 + slli a6, t0, 24 + lw a7, -4(a3) + or a5, a6, a5 + sw a5, 4(a1) + srli a6, t0, 8 + slli t0, a7, 24 + lw a5, 0(a3) + or a6, t0, a6 + sw a6, 8(a1) + srli a6, a7, 8 + slli a7, a5, 24 + or a6, a7, a6 + sw a6, 12(a1) + addi a1, a1, 16 + addi a2, a2, -16 + addi a3, a3, 16 + bltu a4, a2, .LBBmemcpy0_20 + addi a4, a3, -15 + j .LBBmemcpy0_25 +.LBBmemcpy0_22: + lw a5, 0(a4) + sb a5, 0(a3) + srli a6, a5, 8 + addi a1, a3, 2 + sb a6, 1(a3) + addi a2, a2, -2 + addi a3, a4, 16 + li a4, 17 +.LBBmemcpy0_23: + lw a6, -12(a3) + srli a5, a5, 16 + slli a7, a6, 16 + lw t0, -8(a3) + or a5, a7, a5 + sw a5, 0(a1) + srli a5, a6, 16 + slli a6, t0, 16 + lw a7, -4(a3) + or a5, a6, a5 + sw a5, 4(a1) + srli a6, t0, 16 + slli t0, a7, 16 + lw a5, 0(a3) + or a6, t0, a6 + sw a6, 8(a1) + srli a6, a7, 16 + slli a7, a5, 16 + or a6, a7, a6 + sw a6, 12(a1) + addi a1, a1, 16 + addi a2, a2, -16 + addi a3, a3, 16 + bltu a4, a2, .LBBmemcpy0_23 + addi a4, a3, -14 +.LBBmemcpy0_25: + mv a3, a1 +.LBBmemcpy0_26: + andi a1, a2, 16 + bnez a1, .LBBmemcpy0_35 + andi a1, a2, 8 + bnez a1, .LBBmemcpy0_36 +.LBBmemcpy0_28: + andi a1, a2, 4 + beqz a1, .LBBmemcpy0_30 +.LBBmemcpy0_29: + lb a1, 0(a4) + lb a5, 1(a4) + lb a6, 2(a4) + sb a1, 0(a3) + sb a5, 1(a3) + lb a1, 3(a4) + sb a6, 2(a3) + addi a4, a4, 4 + addi a5, a3, 4 + sb a1, 3(a3) + mv a3, a5 +.LBBmemcpy0_30: + andi a1, a2, 2 + bnez a1, .LBBmemcpy0_33 + andi a1, a2, 1 + bnez a1, .LBBmemcpy0_34 +.LBBmemcpy0_32: + ret +.LBBmemcpy0_33: + lb a1, 0(a4) + lb a5, 1(a4) + sb a1, 0(a3) + addi a4, a4, 2 + addi a1, a3, 2 + sb a5, 1(a3) + mv a3, a1 + andi a1, a2, 1 + beqz a1, .LBBmemcpy0_32 +.LBBmemcpy0_34: + lb a1, 0(a4) + sb a1, 0(a3) + ret +.LBBmemcpy0_35: + lb a1, 0(a4) + lb a5, 1(a4) + lb a6, 2(a4) + sb a1, 0(a3) + sb a5, 1(a3) + lb a1, 3(a4) + sb a6, 2(a3) + lb a5, 4(a4) + lb a6, 5(a4) + sb a1, 3(a3) + lb a1, 6(a4) + sb a5, 4(a3) + sb a6, 5(a3) + lb a5, 7(a4) + sb a1, 6(a3) + lb a1, 8(a4) + lb a6, 9(a4) + sb a5, 7(a3) + lb a5, 10(a4) + sb a1, 8(a3) + sb a6, 9(a3) + lb a1, 11(a4) + sb a5, 10(a3) + lb a5, 12(a4) + lb a6, 13(a4) + sb a1, 11(a3) + lb a1, 14(a4) + sb a5, 12(a3) + sb a6, 13(a3) + lb a5, 15(a4) + sb a1, 14(a3) + addi a4, a4, 16 + addi a1, a3, 16 + sb a5, 15(a3) + mv a3, a1 + andi a1, a2, 8 + beqz a1, .LBBmemcpy0_28 +.LBBmemcpy0_36: + lb a1, 0(a4) + lb a5, 1(a4) + lb a6, 2(a4) + sb a1, 0(a3) + sb a5, 1(a3) + lb a1, 3(a4) + sb a6, 2(a3) + lb a5, 4(a4) + lb a6, 5(a4) + sb a1, 3(a3) + lb a1, 6(a4) + sb a5, 4(a3) + sb a6, 5(a3) + lb a5, 7(a4) + sb a1, 6(a3) + addi a4, a4, 8 + addi a1, a3, 8 + sb a5, 7(a3) + mv a3, a1 + andi a1, a2, 4 + bnez a1, .LBBmemcpy0_29 + j .LBBmemcpy0_30 +.Lfuncmemcpy_end0: + .size memcpy, .Lfuncmemcpy_end0-memcpy + + .ident "Ubuntu clang version 14.0.6-++20220622053131+f28c006a5895-1~exp1~20220622173215.157" + .section ".note.GNU-stack","",@progbits + .addrsig diff --git a/riscv/zkvm/lib/src/memset.s b/riscv/zkvm/lib/src/memset.s new file mode 100644 index 0000000000..64c0d9dbfd --- /dev/null +++ b/riscv/zkvm/lib/src/memset.s @@ -0,0 +1,281 @@ +// This is musl-libc memset commit 37e18b7bf307fa4a8c745feebfcba54a0ba74f30: +// +// src/string/memset.c +// +// This was compiled into assembly with: +// +// clang-14 -target riscv32 -march=rv32im -O3 -S memset.c -nostdlib -fno-builtin -funroll-loops +// +// and labels manually updated to not conflict. +// +// musl as a whole is licensed under the following standard MIT license: +// +// ---------------------------------------------------------------------- +// Copyright © 2005-2020 Rich Felker, et al. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ---------------------------------------------------------------------- +// +// Authors/contributors include: +// +// A. Wilcox +// Ada Worcester +// Alex Dowad +// Alex Suykov +// Alexander Monakov +// Andre McCurdy +// Andrew Kelley +// Anthony G. Basile +// Aric Belsito +// Arvid Picciani +// Bartosz Brachaczek +// Benjamin Peterson +// Bobby Bingham +// Boris Brezillon +// Brent Cook +// Chris Spiegel +// Clément Vasseur +// Daniel Micay +// Daniel Sabogal +// Daurnimator +// David Carlier +// David Edelsohn +// Denys Vlasenko +// Dmitry Ivanov +// Dmitry V. Levin +// Drew DeVault +// Emil Renner Berthing +// Fangrui Song +// Felix Fietkau +// Felix Janda +// Gianluca Anzolin +// Hauke Mehrtens +// He X +// Hiltjo Posthuma +// Isaac Dunham +// Jaydeep Patil +// Jens Gustedt +// Jeremy Huntwork +// Jo-Philipp Wich +// Joakim Sindholt +// John Spencer +// Julien Ramseier +// Justin Cormack +// Kaarle Ritvanen +// Khem Raj +// Kylie McClain +// Leah Neukirchen +// Luca Barbato +// Luka Perkov +// M Farkas-Dyck (Strake) +// Mahesh Bodapati +// Markus Wichmann +// Masanori Ogino +// Michael Clark +// Michael Forney +// Mikhail Kremnyov +// Natanael Copa +// Nicholas J. Kain +// orc +// Pascal Cuoq +// Patrick Oppenlander +// Petr Hosek +// Petr Skocik +// Pierre Carrier +// Reini Urban +// Rich Felker +// Richard Pennington +// Ryan Fairfax +// Samuel Holland +// Segev Finer +// Shiz +// sin +// Solar Designer +// Stefan Kristiansson +// Stefan O'Rear +// Szabolcs Nagy +// Timo Teräs +// Trutz Behn +// Valentin Ochs +// Will Dietz +// William Haddon +// William Pitcock +// +// Portions of this software are derived from third-party works licensed +// under terms compatible with the above MIT license: +// +// The TRE regular expression implementation (src/regex/reg* and +// src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +// under a 2-clause BSD license (license text in the source files). The +// included version has been heavily modified by Rich Felker in 2012, in +// the interests of size, simplicity, and namespace cleanliness. +// +// Much of the math library code (src/math/* and src/complex/*) is +// Copyright © 1993,2004 Sun Microsystems or +// Copyright © 2003-2011 David Schultz or +// Copyright © 2003-2009 Steven G. Kargl or +// Copyright © 2003-2009 Bruce D. Evans or +// Copyright © 2008 Stephen L. Moshier or +// Copyright © 2017-2018 Arm Limited +// and labelled as such in comments in the individual source files. All +// have been licensed under extremely permissive terms. +// +// The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 +// The Android Open Source Project and is licensed under a two-clause BSD +// license. It was taken from Bionic libc, used on Android. +// +// The AArch64 memcpy and memset code (src/string/aarch64/*) are +// Copyright © 1999-2019, Arm Limited. +// +// The implementation of DES for crypt (src/crypt/crypt_des.c) is +// Copyright © 1994 David Burren. It is licensed under a BSD license. +// +// The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was +// originally written by Solar Designer and placed into the public +// domain. The code also comes with a fallback permissive license for use +// in jurisdictions that may not recognize the public domain. +// +// The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +// Valentin Ochs and is licensed under an MIT-style license. +// +// The x86_64 port was written by Nicholas J. Kain and is licensed under +// the standard MIT terms. +// +// The mips and microblaze ports were originally written by Richard +// Pennington for use in the ellcc project. The original code was adapted +// by Rich Felker for build system and code conventions during upstream +// integration. It is licensed under the standard MIT terms. +// +// The mips64 port was contributed by Imagination Technologies and is +// licensed under the standard MIT terms. +// +// The powerpc port was also originally written by Richard Pennington, +// and later supplemented and integrated by John Spencer. It is licensed +// under the standard MIT terms. +// +// All other files which have no copyright comments are original works +// produced specifically for use as part of this library, written either +// by Rich Felker, the main author of the library, or by one or more +// contributors listed above. Details on authorship of individual files +// can be found in the git version control history of the project. The +// omission of copyright and license comments in each file is in the +// interest of source tree size. +// +// In addition, permission is hereby granted for all public header files +// (include/* and arch/* /bits/* ) and crt files intended to be linked into +// applications (crt/*, ldso/dlstart.c, and arch/* /crt_arch.h) to omit +// the copyright notice and permission notice otherwise required by the +// license, and to use these files without any requirement of +// attribution. These files include substantial contributions from: +// +// Bobby Bingham +// John Spencer +// Nicholas J. Kain +// Rich Felker +// Richard Pennington +// Stefan Kristiansson +// Szabolcs Nagy +// +// all of whom have explicitly granted such permission. +// +// This file previously contained text expressing a belief that most of +// the files covered by the above exception were sufficiently trivial not +// to be subject to copyright, resulting in confusion over whether it +// negated the permissions granted in the license. In the spirit of +// permissive licensing, and of not having licensing issues being an +// obstacle to adoption, that text has been removed. + .text + .attribute 4, 16 + .attribute 5, "rv32im" + .file "musl_memset.c" + .globl memset + .p2align 2 + .type memset,@function +memset: + beqz a2, .LBB0_9memset + sb a1, 0(a0) + add a3, a2, a0 + li a4, 3 + sb a1, -1(a3) + bltu a2, a4, .LBB0_9memset + sb a1, 1(a0) + sb a1, 2(a0) + sb a1, -2(a3) + li a4, 7 + sb a1, -3(a3) + bltu a2, a4, .LBB0_9memset + sb a1, 3(a0) + li a5, 9 + sb a1, -4(a3) + bltu a2, a5, .LBB0_9memset + neg a3, a0 + andi a4, a3, 3 + add a3, a0, a4 + sub a2, a2, a4 + andi a2, a2, -4 + andi a1, a1, 255 + lui a4, 4112 + addi a4, a4, 257 + mul a1, a1, a4 + sw a1, 0(a3) + add a4, a3, a2 + sw a1, -4(a4) + bltu a2, a5, .LBB0_9memset + sw a1, 4(a3) + sw a1, 8(a3) + sw a1, -12(a4) + li a5, 25 + sw a1, -8(a4) + bltu a2, a5, .LBB0_9memset + sw a1, 12(a3) + sw a1, 16(a3) + sw a1, 20(a3) + sw a1, 24(a3) + sw a1, -28(a4) + sw a1, -24(a4) + sw a1, -20(a4) + andi a5, a3, 4 + ori a5, a5, 24 + sub a2, a2, a5 + li a6, 32 + sw a1, -16(a4) + bltu a2, a6, .LBB0_9memset + add a3, a3, a5 + li a4, 31 +.LBB0_8memset: + sw a1, 0(a3) + sw a1, 4(a3) + sw a1, 8(a3) + sw a1, 12(a3) + sw a1, 16(a3) + sw a1, 20(a3) + sw a1, 24(a3) + sw a1, 28(a3) + addi a2, a2, -32 + addi a3, a3, 32 + bltu a4, a2, .LBB0_8memset +.LBB0_9memset: + ret +.Lfunc_end0memset: + .size memset, .Lfunc_end0memset-memset + + .ident "Ubuntu clang version 14.0.6-++20220622053131+f28c006a5895-1~exp1~20220622173215.157" + .section ".note.GNU-stack","",@progbits + .addrsig diff --git a/riscv/zkvm/lib/src/serde/deserializer.rs b/riscv/zkvm/lib/src/serde/deserializer.rs new file mode 100644 index 0000000000..d52636b07a --- /dev/null +++ b/riscv/zkvm/lib/src/serde/deserializer.rs @@ -0,0 +1,591 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use alloc::{string::String, vec}; + +use axvm_platform::{align_up, WORD_SIZE}; +use bytemuck::Pod; +use serde::de::{DeserializeOwned, DeserializeSeed, IntoDeserializer, Visitor}; + +use super::err::{Error, Result}; + +/// A reader for reading streams with serialized word-based data +pub trait WordRead { + /// Fill the given buffer with words from input. Returns an error if EOF + /// was encountered. + fn read_words(&mut self, words: &mut [u32]) -> Result<()>; + + /// Fill the given buffer with bytes from input, and discard the + /// padding up to the next word boundary. Returns an error if EOF was + /// encountered. + fn read_padded_bytes(&mut self, bytes: &mut [u8]) -> Result<()>; +} + +// Allow borrowed WordReads to work transparently +impl WordRead for &mut R { + fn read_words(&mut self, words: &mut [u32]) -> Result<()> { + (**self).read_words(words) + } + + fn read_padded_bytes(&mut self, bytes: &mut [u8]) -> Result<()> { + (**self).read_padded_bytes(bytes) + } +} + +impl WordRead for &[u32] { + fn read_words(&mut self, out: &mut [u32]) -> Result<()> { + if out.len() > self.len() { + Err(Error::DeserializeUnexpectedEnd) + } else { + out.clone_from_slice(&self[..out.len()]); + (_, *self) = self.split_at(out.len()); + Ok(()) + } + } + + fn read_padded_bytes(&mut self, out: &mut [u8]) -> Result<()> { + let bytes: &[u8] = bytemuck::cast_slice(self); + if out.len() > bytes.len() { + Err(Error::DeserializeUnexpectedEnd) + } else { + out.clone_from_slice(&bytes[..out.len()]); + (_, *self) = self.split_at(align_up(out.len(), WORD_SIZE) / WORD_SIZE); + Ok(()) + } + } +} + +/// Deserialize a slice into the specified type. +/// +/// Deserialize `slice` into type `T`. Returns an `Err` if deserialization isn't +/// possible, such as if `slice` is not the serialized form of an object of type +/// `T`. +pub fn from_slice(slice: &[P]) -> Result { + match bytemuck::try_cast_slice(slice) { + Ok(slice) => { + let mut deserializer = Deserializer::new(slice); + T::deserialize(&mut deserializer) + } + // P is u8 or another value without word-alignment. Data must be copied. + Err(bytemuck::PodCastError::TargetAlignmentGreaterAndInputNotAligned) => { + let vec = bytemuck::allocation::pod_collect_to_vec::(slice); + let mut deserializer = Deserializer::new(vec.as_slice()); + T::deserialize(&mut deserializer) + } + Err(ref e) => panic!("failed to cast or read slice as [u32]: {}", e), + } +} + +/// Enables deserializing from a WordRead +pub struct Deserializer<'de, R: WordRead + 'de> { + reader: R, + phantom: core::marker::PhantomData<&'de ()>, +} + +struct SeqAccess<'a, 'de, R: WordRead + 'de> { + deserializer: &'a mut Deserializer<'de, R>, + len: usize, +} + +impl<'de, 'a, R: WordRead + 'de> serde::de::SeqAccess<'de> for SeqAccess<'a, 'de, R> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + if self.len > 0 { + self.len -= 1; + Ok(Some(DeserializeSeed::deserialize( + seed, + &mut *self.deserializer, + )?)) + } else { + Ok(None) + } + } + + fn size_hint(&self) -> Option { + Some(self.len) + } +} + +impl<'de, 'a, R: WordRead + 'de> serde::de::VariantAccess<'de> for &'a mut Deserializer<'de, R> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + Ok(()) + } + + fn newtype_variant_seed>(self, seed: V) -> Result { + DeserializeSeed::deserialize(seed, self) + } + + fn tuple_variant>(self, len: usize, visitor: V) -> Result { + serde::de::Deserializer::deserialize_tuple(self, len, visitor) + } + + fn struct_variant>( + self, + fields: &'static [&'static str], + visitor: V, + ) -> Result { + serde::de::Deserializer::deserialize_tuple(self, fields.len(), visitor) + } +} + +impl<'de, 'a, R: WordRead + 'de> serde::de::EnumAccess<'de> for &'a mut Deserializer<'de, R> { + type Error = Error; + type Variant = Self; + + fn variant_seed>(self, seed: V) -> Result<(V::Value, Self)> { + let tag = self.try_take_word()?; + let val = DeserializeSeed::deserialize(seed, tag.into_deserializer())?; + Ok((val, self)) + } +} + +struct MapAccess<'a, 'de, R: WordRead + 'de> { + deserializer: &'a mut Deserializer<'de, R>, + len: usize, +} + +impl<'a, 'de: 'a, R: WordRead + 'de> serde::de::MapAccess<'de> for MapAccess<'a, 'de, R> { + type Error = Error; + + fn next_key_seed>(&mut self, seed: K) -> Result> { + if self.len > 0 { + self.len -= 1; + Ok(Some(DeserializeSeed::deserialize( + seed, + &mut *self.deserializer, + )?)) + } else { + Ok(None) + } + } + + fn next_value_seed>(&mut self, seed: V) -> Result { + DeserializeSeed::deserialize(seed, &mut *self.deserializer) + } + + fn size_hint(&self) -> Option { + Some(self.len) + } +} + +impl<'de, R: WordRead + 'de> Deserializer<'de, R> { + /// Construct a Deserializer + /// + /// Creates a deserializer for deserializing from the given WordRead + pub fn new(reader: R) -> Self { + Deserializer { + reader, + phantom: core::marker::PhantomData, + } + } + + fn try_take_word(&mut self) -> Result { + let mut val = 0u32; + self.reader.read_words(core::slice::from_mut(&mut val))?; + Ok(val) + } + + fn try_take_dword(&mut self) -> Result { + let low = self.try_take_word()? as u64; + let high = self.try_take_word()? as u64; + Ok(low | high << 32) + } +} + +impl<'de, 'a, R: WordRead + 'de> serde::Deserializer<'de> for &'a mut Deserializer<'de, R> { + type Error = Error; + + fn is_human_readable(&self) -> bool { + false + } + + fn deserialize_any(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotSupported) + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let val = match self.try_take_word()? { + 0 => false, + 1 => true, + _ => return Err(Error::DeserializeBadBool), + }; + visitor.visit_bool(val) + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.try_take_word()? as i32) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.try_take_word()? as i32) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.try_take_word()? as i32) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.try_take_dword()? as i64) + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut bytes = [0u8; 16]; + self.reader.read_padded_bytes(&mut bytes)?; + visitor.visit_i128(i128::from_le_bytes(bytes)) + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u32(self.try_take_word()?) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u32(self.try_take_word()?) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u32(self.try_take_word()?) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u64(self.try_take_dword()?) + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut bytes = [0u8; 16]; + self.reader.read_padded_bytes(&mut bytes)?; + visitor.visit_u128(u128::from_le_bytes(bytes)) + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_f32(f32::from_bits(self.try_take_word()?)) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_f64(f64::from_bits(self.try_take_dword()?)) + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let c = char::from_u32(self.try_take_word()?).ok_or(Error::DeserializeBadChar)?; + visitor.visit_char(c) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let len_bytes = self.try_take_word()? as usize; + // TODO: Can we use MaybeUninit here instead of zeroing out? + // The documentation for sys::io::Read implies that it's not + // safe; is there another way to not do double writes here? + let mut bytes = vec![0u8; len_bytes]; + self.reader.read_padded_bytes(&mut bytes)?; + visitor.visit_string(String::from_utf8(bytes).map_err(|_| Error::DeserializeBadChar)?) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let len_bytes = self.try_take_word()? as usize; + // TODO: Can we use MaybeUninit here instead of zeroing out? + // The documentation for sys::io::Read implies that it's not + // safe; is there another way to not do double writes here? + let mut bytes = vec![0u8; len_bytes]; + self.reader.read_padded_bytes(&mut bytes)?; + visitor.visit_byte_buf(bytes) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.try_take_word()? { + 0 => visitor.visit_none(), + 1 => visitor.visit_some(self), + _ => Err(Error::DeserializeBadOption), + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let len = self.try_take_word()? as usize; + visitor.visit_seq(SeqAccess { + deserializer: self, + len, + }) + } + + fn deserialize_tuple(self, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(SeqAccess { + deserializer: self, + len, + }) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_tuple(len, visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let len = self.try_take_word()? as usize; + visitor.visit_map(MapAccess { + deserializer: self, + len, + }) + } + + fn deserialize_struct( + self, + _name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_tuple(fields.len(), visitor) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_enum(self) + } + + fn deserialize_identifier(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotSupported) + } + + fn deserialize_ignored_any(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotSupported) + } +} + +#[cfg(test)] +mod tests { + use alloc::{string::String, vec::Vec}; + use core::f32; + + use serde::{Deserialize, Serialize}; + + use super::*; + + #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] + pub struct SomeStruct {} + + #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] + pub enum MyEnum { + MyUnaryConstructor(Vec), + MyBinaryConstructor(Vec, SomeStruct), + } + + #[test] + fn test_enum_unary() { + let a = MyEnum::MyUnaryConstructor(vec![1, 2, 3, 4, 5]); + let encoded = crate::serde::to_vec(&a).unwrap(); + let decoded: MyEnum = from_slice(&encoded).unwrap(); + assert_eq!(a, decoded); + } + + #[test] + fn test_enum_binary() { + let a = MyEnum::MyBinaryConstructor(vec![1, 2, 3, 4, 5], SomeStruct {}); + let encoded = crate::serde::to_vec(&a).unwrap(); + let decoded: MyEnum = from_slice(&encoded).unwrap(); + assert_eq!(a, decoded); + } + + #[test] + fn test_struct() { + use serde::Deserialize; + + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + bool: bool, + i8: i8, + u8: u8, + i16: i16, + u16: u16, + i32: i32, + u32: u32, + f32: f32, + i64: i64, + u64: u64, + f64: f64, + } + + let words = [ + 1, + -4_i32 as u32, + 4, + -5_i32 as u32, + 5, + -6_i32 as u32, + 6, + f32::to_bits(f32::consts::PI), + -7_i32 as u32, + 0xffffffff, + 7, + 0x00000000, + f64::to_bits(2.71).checked_rem(0x100000000).unwrap() as u32, + f64::to_bits(2.71).checked_shr(32).unwrap() as u32, + ]; + let expected = Test { + bool: true, + i8: -4, + u8: 4, + i16: -5, + u16: 5, + i32: -6, + u32: 6, + f32: f32::consts::PI, + i64: -7, + u64: 7, + f64: 2.71, + }; + assert_eq!(expected, from_slice(&words).unwrap()); + } + + #[test] + fn test_str() { + use serde::Deserialize; + + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + first: String, + second: String, + } + + let words = [1, 0x00000061, 3, 0x00636261]; + let expected = Test { + first: "a".into(), + second: "abc".into(), + }; + assert_eq!(expected, from_slice(&words).unwrap()); + } +} diff --git a/riscv/zkvm/lib/src/serde/err.rs b/riscv/zkvm/lib/src/serde/err.rs new file mode 100644 index 0000000000..b5602f446d --- /dev/null +++ b/riscv/zkvm/lib/src/serde/err.rs @@ -0,0 +1,70 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use alloc::string::{String, ToString}; +use core::fmt::{Display, Formatter}; + +/// Errors used by Serde +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Error { + /// A custom error + Custom(String), + /// Found a bool that wasn't 0 or 1 + DeserializeBadBool, + /// Found an invalid unicode char + DeserializeBadChar, + /// Found an Option discriminant that wasn't 0 or 1 + DeserializeBadOption, + /// Tried to parse invalid utf-8 + DeserializeBadUtf8, + /// Unexpected end during deserialization + DeserializeUnexpectedEnd, + /// Not supported + NotSupported, + /// The serialize buffer is full + SerializeBufferFull, +} + +/// A Result type for `risc0_zkvm::serde` operations that can fail +pub type Result = core::result::Result; + +impl Display for Error { + fn fmt(&self, formatter: &mut Formatter) -> core::fmt::Result { + formatter.write_str(match self { + Self::Custom(msg) => msg, + Self::DeserializeBadBool => "Found a bool that wasn't 0 or 1", + Self::DeserializeBadChar => "Found an invalid unicode char", + Self::DeserializeBadOption => "Found an Option discriminant that wasn't 0 or 1", + Self::DeserializeBadUtf8 => "Tried to parse invalid utf-8", + Self::DeserializeUnexpectedEnd => "Unexpected end during deserialization", + Self::NotSupported => "Not supported", + Self::SerializeBufferFull => "The serialize buffer is full", + }) + } +} + +impl serde::ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Custom(msg.to_string()) + } +} + +impl serde::de::Error for Error { + fn custom(msg: T) -> Self { + Error::Custom(msg.to_string()) + } +} + +// This is an alias for either std::Error, or serde's no_std error replacement. +impl serde::ser::StdError for Error {} diff --git a/riscv/zkvm/lib/src/serde/mod.rs b/riscv/zkvm/lib/src/serde/mod.rs new file mode 100644 index 0000000000..9456c0e295 --- /dev/null +++ b/riscv/zkvm/lib/src/serde/mod.rs @@ -0,0 +1,89 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Serialization and deserialization tools for the RISC Zero zkVM +//! +//! Data needs to be serialized for transmission between the zkVM host and +//! guest. This module contains tools for this serialization and the +//! corresponding deserialization. +//! +//! On the host side, a serialization function such as [to_vec] should be used +//! when transmitting data to the guest. Similarly, the deserialization function +//! [from_slice] should be used when reading data from the guest. For example: +//! ```rust +//! use risc0_zkvm::serde::{from_slice, to_vec}; +//! let input = 42_u32; +//! let encoded = to_vec(&[input]).unwrap(); +//! let output: u32 = from_slice(&encoded).unwrap(); +//! assert_eq!(input, output); +//! ``` +//! +//! On the guest side, the necessary (de)serialization functionality is +//! included in [`env`] module functions such as [`env::read`] and +//! [`env::commit`], so this crate rarely needs to be directly used in the +//! guest. +//! +//! [`env`]: ../guest/env/index.html +//! [`env::commit`]: ../guest/env/fn.commit.html +//! [`env::read`]: ../guest/env/fn.read.html + +mod deserializer; +mod err; +mod serializer; + +pub use deserializer::{from_slice, Deserializer, WordRead}; +pub use err::{Error, Result}; +pub use serializer::{to_vec, to_vec_with_capacity, Serializer, WordWrite}; + +#[cfg(test)] +mod tests { + use alloc::{collections::BTreeMap, string::String, vec, vec::Vec}; + + use chrono::NaiveDate; + + use crate::serde::{from_slice, to_vec}; + + #[test] + fn test_vec_round_trip() { + let input: Vec = vec![1, 2, 3]; + let data = to_vec(&input).unwrap(); + let output: Vec = from_slice(data.as_slice()).unwrap(); + assert_eq!(input, output); + } + + #[test] + fn test_map_round_trip() { + let input: BTreeMap = + BTreeMap::from([("foo".into(), 1), ("bar".into(), 2), ("baz".into(), 3)]); + let data = to_vec(&input).unwrap(); + let output: BTreeMap = from_slice(data.as_slice()).unwrap(); + assert_eq!(input, output); + } + + #[test] + fn test_tuple_round_trip() { + let input: (u32, u64) = (1, 2); + let data = to_vec(&input).unwrap(); + let output: (u32, u64) = from_slice(data.as_slice()).unwrap(); + assert_eq!(input, output); + } + + #[test] + fn naive_date_round_trip() { + let input: NaiveDate = NaiveDate::parse_from_str("2015-09-05", "%Y-%m-%d").unwrap(); + let date_vec = to_vec(&input).unwrap(); + let output: NaiveDate = from_slice(date_vec.as_slice()).unwrap(); + assert_eq!(input, output); + } +} diff --git a/riscv/zkvm/lib/src/serde/serializer.rs b/riscv/zkvm/lib/src/serde/serializer.rs new file mode 100644 index 0000000000..0e2cc9183a --- /dev/null +++ b/riscv/zkvm/lib/src/serde/serializer.rs @@ -0,0 +1,510 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use alloc::vec::Vec; + +use axvm_platform::WORD_SIZE; + +use super::err::{Error, Result}; +use crate::alloc::string::ToString; + +/// A writer for writing streams preferring word-based data. +pub trait WordWrite { + /// Write the given words to the stream. + fn write_words(&mut self, words: &[u32]) -> Result<()>; + + /// Write the given bytes to the stream, padding up to the next word + /// boundary. + // TODO: Do we still want to pad the bytes now that we have + // posix-style I/O that can read things into buffers right where + // we want them to be? If we don't, we could change the + // serialization buffers to use Vec instead of Vec, + fn write_padded_bytes(&mut self, bytes: &[u8]) -> Result<()>; +} + +impl WordWrite for Vec { + fn write_words(&mut self, words: &[u32]) -> Result<()> { + self.extend_from_slice(words); + Ok(()) + } + + fn write_padded_bytes(&mut self, bytes: &[u8]) -> Result<()> { + let chunks = bytes.chunks_exact(WORD_SIZE); + let last_word = chunks.remainder(); + self.extend(chunks.map(|word_bytes| u32::from_le_bytes(word_bytes.try_into().unwrap()))); + if !last_word.is_empty() { + let mut last_word_bytes = [0u8; WORD_SIZE]; + last_word_bytes[..last_word.len()].clone_from_slice(last_word); + self.push(u32::from_le_bytes(last_word_bytes)); + } + Ok(()) + } +} + +// Allow borrowed WordWrites to work transparently. +impl WordWrite for &mut W { + #[inline] + fn write_words(&mut self, words: &[u32]) -> Result<()> { + (**self).write_words(words) + } + + #[inline] + fn write_padded_bytes(&mut self, bytes: &[u8]) -> Result<()> { + (**self).write_padded_bytes(bytes) + } +} + +/// Serialize to a vector of u32 words +pub fn to_vec(value: &T) -> Result> +where + T: serde::Serialize + ?Sized, +{ + // Use the in-memory size of the value as a guess for the length + // of the serialized value. + let mut vec: Vec = Vec::with_capacity(core::mem::size_of_val(value)); + let mut serializer = Serializer::new(&mut vec); + value.serialize(&mut serializer)?; + Ok(vec) +} + +/// Serialize to a vector of u32 words with size hinting +/// +/// Includes a caller-provided hint `cap` giving the capacity of u32 words +/// necessary to serialize `value`. +pub fn to_vec_with_capacity(value: &T, cap: usize) -> Result> +where + T: serde::Serialize + ?Sized, +{ + let mut vec: Vec = Vec::with_capacity(cap); + let mut serializer = Serializer::new(&mut vec); + value.serialize(&mut serializer)?; + Ok(vec) +} + +/// Enables serializing to a stream +pub struct Serializer { + stream: W, +} + +impl Serializer { + /// Construct a Serializer + /// + /// Creates a serializer that writes to `stream`. + pub fn new(stream: W) -> Self { + Serializer { stream } + } +} + +impl<'a, W: WordWrite> serde::ser::Serializer for &'a mut Serializer { + type Ok = (); + type Error = Error; + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + fn is_human_readable(&self) -> bool { + false + } + + fn collect_str(self, value: &T) -> Result<()> + where + T: ?Sized + core::fmt::Display, + { + self.serialize_str(&value.to_string()) + } + + fn serialize_bool(self, v: bool) -> Result<()> { + self.serialize_u8(if v { 1 } else { 0 }) + } + + fn serialize_i8(self, v: i8) -> Result<()> { + self.serialize_i32(v as i32) + } + + fn serialize_i16(self, v: i16) -> Result<()> { + self.serialize_i32(v as i32) + } + + fn serialize_i32(self, v: i32) -> Result<()> { + self.serialize_u32(v as u32) + } + + fn serialize_i64(self, v: i64) -> Result<()> { + self.serialize_u64(v as u64) + } + + fn serialize_i128(self, v: i128) -> Result<()> { + self.serialize_u128(v as u128) + } + + fn serialize_u8(self, v: u8) -> Result<()> { + self.serialize_u32(v as u32) + } + + fn serialize_u16(self, v: u16) -> Result<()> { + self.serialize_u32(v as u32) + } + + fn serialize_u32(self, v: u32) -> Result<()> { + self.stream.write_words(&[v]) + } + + fn serialize_u64(self, v: u64) -> Result<()> { + self.serialize_u32((v & 0xFFFFFFFF) as u32)?; + self.serialize_u32(((v >> 32) & 0xFFFFFFFF) as u32) + } + + fn serialize_u128(self, v: u128) -> Result<()> { + self.stream.write_padded_bytes(&v.to_le_bytes()) + } + + fn serialize_f32(self, v: f32) -> Result<()> { + self.serialize_u32(v.to_bits()) + } + + fn serialize_f64(self, v: f64) -> Result<()> { + self.serialize_u64(f64::to_bits(v)) + } + + fn serialize_char(self, v: char) -> Result<()> { + self.serialize_u32(v as u32) + } + + fn serialize_str(self, v: &str) -> Result<()> { + let bytes = v.as_bytes(); + self.serialize_u32(bytes.len() as u32)?; + self.stream.write_padded_bytes(bytes) + } + + // NOTE: Serializing byte slices _does not_ currently call serialize_bytes. This + // is because the default collect_seq implementation handles all [T] with + // `collect_seq` which does not differentiate. Two options for enabling more + // efficient serialization (or commit) of bytes values and + // bytes-interpretable slices (e.g. [u32]) are: + // A) Implement collect_seq and check at runtime whether a type could be + // serialized as bytes. + // B) Use the experimental Rust specialization + // features. + fn serialize_bytes(self, v: &[u8]) -> Result<()> { + self.serialize_u32(v.len() as u32)?; + self.stream.write_padded_bytes(v) + } + + fn serialize_none(self) -> Result<()> { + self.serialize_u32(0) + } + + fn serialize_some(self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + self.serialize_u32(1)?; + value.serialize(self) + } + + fn serialize_unit(self) -> Result<()> { + Ok(()) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { + Ok(()) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + variant_index: u32, + _variant: &'static str, + ) -> Result<()> { + self.serialize_u32(variant_index) + } + + fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + variant_index: u32, + _variant: &'static str, + value: &T, + ) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + self.serialize_u32(variant_index)?; + value.serialize(self) + } + + fn serialize_seq(self, len: Option) -> Result { + match len { + Some(val) => { + self.serialize_u32(val.try_into().unwrap())?; + Ok(self) + } + None => Err(Error::NotSupported), + } + } + + fn serialize_tuple(self, _len: usize) -> Result { + Ok(self) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Ok(self) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + self.serialize_u32(variant_index)?; + Ok(self) + } + + fn serialize_map(self, len: Option) -> Result { + match len { + Some(val) => { + self.serialize_u32(val.try_into().unwrap())?; + Ok(self) + } + None => Err(Error::NotSupported), + } + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Ok(self) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + self.serialize_u32(variant_index)?; + Ok(self) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeSeq for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeTuple for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeTupleStruct for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeTupleVariant for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeMap for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + key.serialize(&mut **self) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeStruct for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +impl<'a, W: WordWrite> serde::ser::SerializeStructVariant for &'a mut Serializer { + type Ok = (); + type Error = Error; + + fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> + where + T: serde::Serialize + ?Sized, + { + value.serialize(&mut **self) + } + + fn end(self) -> Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use alloc::string::String; + + use serde::Serialize; + + use super::*; + + #[test] + #[allow(clippy::approx_constant)] + fn test_struct() { + #[derive(Serialize, PartialEq, Debug)] + struct Test { + bool: bool, + i8: i8, + u8: u8, + i16: i16, + u16: u16, + i32: i32, + u32: u32, + f32: f32, + i64: i64, + u64: u64, + f64: f64, + } + + let expected = [ + 1, + -4_i32 as u32, + 4, + -5_i32 as u32, + 5, + -6_i32 as u32, + 6, + f32::to_bits(3.14_f32), + -7_i32 as u32, + 0xffffffff, + 7, + 0x00000000, + f64::to_bits(2.71).checked_rem(0x100000000).unwrap() as u32, + f64::to_bits(2.71).checked_shr(32).unwrap() as u32, + ]; + + let input = Test { + bool: true, + i8: -4, + u8: 4, + i16: -5, + u16: 5, + i32: -6, + u32: 6, + f32: 3.14, + i64: -7, + u64: 7, + f64: 2.71, + }; + assert_eq!(expected, to_vec(&input).unwrap().as_slice()); + } + + #[test] + fn test_str() { + #[derive(Serialize, PartialEq, Debug)] + struct Test { + first: String, + second: String, + } + + let expected = [1, 0x00000061, 3, 0x00636261]; + let input = Test { + first: "a".into(), + second: "abc".into(), + }; + assert_eq!(expected, to_vec(&input).unwrap().as_slice()); + } +} diff --git a/riscv/zkvm/platform/Cargo.toml b/riscv/zkvm/platform/Cargo.toml new file mode 100644 index 0000000000..5d9d14fec5 --- /dev/null +++ b/riscv/zkvm/platform/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "axvm-platform" +description = "Axiom VM" +version = { workspace = true } +edition = { workspace = true } +# license = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } + +[dependencies] +stability = "0.2" + +# This crate should have as few dependencies as possible so it can be +# used as many places as possible to share the platform definitions. +[target.'cfg(target_os = "zkvm")'.dependencies] +bytemuck = { version = "1.13", optional = true } +critical-section = { version = "1.1.2", optional = true } +embedded-alloc = { version = "0.6.0", features = [ + "allocator_api", +], optional = true } +getrandom = { version = "0.2", features = ["custom"], optional = true } +libm = { version = "0.2", optional = true } + +[package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] +features = [ + "rust-runtime", + "panic-handler", + "entrypoint", + "export-syscalls", + "export-libm", + "export-getrandom", + "getrandom", + "unstable", +] + +[features] +default = [] +entrypoint = [] +# exports a `getrandom` implementation that panics +export-getrandom = ["dep:bytemuck", "dep:getrandom"] +export-libm = ["dep:libm"] +export-syscalls = [] +# exports a `getrandom` implementation that uses sys_random +getrandom = ["export-getrandom"] +heap-embedded-alloc = [ + "dep:critical-section", + "dep:embedded-alloc", + "rust-runtime", +] +panic-handler = [] +# Build a rust runtime +rust-runtime = ["export-libm", "export-syscalls"] +unstable = [] diff --git a/riscv/zkvm/platform/README.md b/riscv/zkvm/platform/README.md new file mode 100644 index 0000000000..145bdb65d8 --- /dev/null +++ b/riscv/zkvm/platform/README.md @@ -0,0 +1,2 @@ +Platform definitions for the RISC Zero zkVM, including IO port addresses, +memory regions, and low-level runtime functions. diff --git a/riscv/zkvm/platform/src/getrandom.rs b/riscv/zkvm/platform/src/getrandom.rs new file mode 100644 index 0000000000..f19eef9c80 --- /dev/null +++ b/riscv/zkvm/platform/src/getrandom.rs @@ -0,0 +1,86 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use getrandom::{register_custom_getrandom, Error}; + +/// This is a getrandom handler for the zkvm. It's intended to hook into a +/// getrandom crate or a dependent of the getrandom crate used by the guest code. +#[cfg(feature = "getrandom")] +pub fn zkvm_getrandom(dest: &mut [u8]) -> Result<(), Error> { + use crate::{syscall::sys_rand, WORD_SIZE}; + + if dest.is_empty() { + return Ok(()); + } + + let (head, aligned, tail) = bytemuck::pod_align_to_mut::<_, u32>(dest); + + // Fill the aligned portion of the dest buffer with random words. + // sys_rand uses copy-in to fill the buffer at 4-words per cycle. + if aligned.len() > 0 { + unsafe { + sys_rand(aligned.as_mut_ptr(), aligned.len()); + } + } + + // Up to 4 bytes may be split between the head and tail. + // Sample an additional word and do an unaligned write to fill the last parts. + if head.len() > 0 || tail.len() > 0 { + assert!(head.len() < WORD_SIZE); + assert!(tail.len() < WORD_SIZE); + + let mut words = [0u32; 2]; + unsafe { + sys_rand(words.as_mut_ptr(), 2); + } + + head.copy_from_slice(&words[0].to_ne_bytes()[..head.len()]); + tail.copy_from_slice(&words[1].to_ne_bytes()[..tail.len()]); + } + Ok(()) +} + +#[cfg(not(feature = "getrandom"))] +pub fn zkvm_getrandom(dest: &mut [u8]) -> Result<(), Error> { + panic!( + r#" + +WARNING: `getrandom()` called from guest. +========================================= + +This panic was generated from the guest program calling `getrandom()`. +Using randomness in the zkVM requires careful attention to your +application’s security requirements. Therefore, the default behavior is +for the zkVM to generate this panic message when the guest calls +`getrandom()`. If you wrote this guest program, it is crucial that you +understand the implications of using randomness in your guest code and +make changes to fit your needs. + + +The zkVM supports providing random data from the host in response to +`getrandom()`. When the randomness is being used to protect private data, this +is a good option. However, it is not appropriate for all use cases. Consider a +situation when random data is needed to ensure the honesty of the prover (e.g. +to flip a coin for a game between the prover and verifier). In this scenario, +host provided randomness is not suitable because the prover may alter the source +of randomness. For such use cases, great care must be taken to provide a source +of randomness that the prover cannot manipulate or predict. Host provided +randomness can be enabled with the `getrandom` feature flag on the `risc0-zkvm` +crate used for the guest. + +"# + ); +} + +register_custom_getrandom!(zkvm_getrandom); diff --git a/riscv/zkvm/platform/src/heap/bump.rs b/riscv/zkvm/platform/src/heap/bump.rs new file mode 100644 index 0000000000..f9843833ff --- /dev/null +++ b/riscv/zkvm/platform/src/heap/bump.rs @@ -0,0 +1,38 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use core::alloc::{GlobalAlloc, Layout}; + +use crate::syscall::sys_alloc_aligned; + +#[global_allocator] +pub static HEAP: BumpPointerAlloc = BumpPointerAlloc; + +pub struct BumpPointerAlloc; + +unsafe impl GlobalAlloc for BumpPointerAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + sys_alloc_aligned(layout.size(), layout.align()) + } + + unsafe fn dealloc(&self, _: *mut u8, _: Layout) { + // this allocator never deallocates memory + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + // NOTE: This is safe to avoid zeroing allocated bytes, as the bump allocator does not + // re-use memory and the zkVM memory is zero-initialized. + self.alloc(layout) + } +} diff --git a/riscv/zkvm/platform/src/heap/embedded.rs b/riscv/zkvm/platform/src/heap/embedded.rs new file mode 100644 index 0000000000..f2d4de24a4 --- /dev/null +++ b/riscv/zkvm/platform/src/heap/embedded.rs @@ -0,0 +1,41 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use critical_section::RawRestoreState; +use embedded_alloc::LlffHeap as Heap; + +#[global_allocator] +pub static HEAP: Heap = Heap::empty(); + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState { + // this is a no-op. we're in a single-threaded, non-preemptive context + } + + unsafe fn release(_token: RawRestoreState) { + // this is a no-op. we're in a single-threaded, non-preemptive context + } +} + +pub fn init() { + extern "C" { + static _end: u8; + } + let heap_pos: usize = unsafe { (&_end) as *const u8 as usize }; + let heap_size: usize = crate::memory::GUEST_MAX_MEM - heap_pos; + unsafe { HEAP.init(heap_pos, heap_size) } +} diff --git a/riscv/zkvm/platform/src/heap/mod.rs b/riscv/zkvm/platform/src/heap/mod.rs new file mode 100644 index 0000000000..1206fe0939 --- /dev/null +++ b/riscv/zkvm/platform/src/heap/mod.rs @@ -0,0 +1,19 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "heap-embedded-alloc"))] +mod bump; + +#[cfg(feature = "heap-embedded-alloc")] +pub mod embedded; diff --git a/riscv/zkvm/platform/src/lib.rs b/riscv/zkvm/platform/src/lib.rs new file mode 100644 index 0000000000..3b59f13cc7 --- /dev/null +++ b/riscv/zkvm/platform/src/lib.rs @@ -0,0 +1,56 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![doc = include_str!("../README.md")] +#![no_std] +#![allow(unused_variables)] +#![deny(rustdoc::broken_intra_doc_links)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] + +pub mod memory; +#[macro_use] +pub mod syscall; +#[cfg(all(feature = "export-getrandom", target_os = "zkvm"))] +mod getrandom; +#[cfg(all(feature = "rust-runtime", target_os = "zkvm"))] +pub mod heap; +#[cfg(all(feature = "export-libm", target_os = "zkvm"))] +mod libm_extern; +#[cfg(all(feature = "rust-runtime", target_os = "zkvm"))] +pub mod rust_rt; + +/// Size of a zkVM machine word in bytes. +/// 4 bytes (i.e. 32 bits) as the zkVM is an implementation of the rv32im ISA. +pub const WORD_SIZE: usize = core::mem::size_of::(); + +/// Size of a zkVM memory page. +pub const PAGE_SIZE: usize = 1024; + +/// Standard IO file descriptors for use with sys_read and sys_write. +pub mod fileno { + pub const STDIN: u32 = 0; + pub const STDOUT: u32 = 1; + pub const STDERR: u32 = 2; + pub const JOURNAL: u32 = 3; +} + +/// Align address upwards. +/// +/// Returns the smallest `x` with alignment `align` so that `x >= addr`. +/// +/// `align` must be a power of 2. +pub const fn align_up(addr: usize, align: usize) -> usize { + let mask = align - 1; + (addr + mask) & !mask +} diff --git a/riscv/zkvm/platform/src/libm_extern.rs b/riscv/zkvm/platform/src/libm_extern.rs new file mode 100644 index 0000000000..079b6ea62e --- /dev/null +++ b/riscv/zkvm/platform/src/libm_extern.rs @@ -0,0 +1,577 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Export symbols for math functions from libm to the global namespace. + +#![allow(dead_code)] + +#[no_mangle] +pub extern "C" fn acosf(x: f32) -> f32 { + libm::acosf(x) +} + +#[no_mangle] +pub extern "C" fn acoshf(x: f32) -> f32 { + libm::acoshf(x) +} + +#[no_mangle] +pub extern "C" fn acosh(x: f64) -> f64 { + libm::acosh(x) +} + +#[no_mangle] +pub extern "C" fn acos(x: f64) -> f64 { + libm::acos(x) +} + +#[no_mangle] +pub extern "C" fn asinf(x: f32) -> f32 { + libm::asinf(x) +} + +#[no_mangle] +pub extern "C" fn asinhf(x: f32) -> f32 { + libm::asinhf(x) +} + +#[no_mangle] +pub extern "C" fn asinh(x: f64) -> f64 { + libm::asinh(x) +} + +#[no_mangle] +pub extern "C" fn asin(x: f64) -> f64 { + libm::asin(x) +} + +#[no_mangle] +pub extern "C" fn atan2f(y: f32, x: f32) -> f32 { + libm::atan2f(y, x) +} + +#[no_mangle] +pub extern "C" fn atan2(y: f64, x: f64) -> f64 { + libm::atan2(y, x) +} + +#[no_mangle] +pub extern "C" fn atanf(x: f32) -> f32 { + libm::atanf(x) +} + +#[no_mangle] +pub extern "C" fn atanhf(x: f32) -> f32 { + libm::atanhf(x) +} + +#[no_mangle] +pub extern "C" fn atanh(x: f64) -> f64 { + libm::atanh(x) +} + +#[no_mangle] +pub extern "C" fn atan(x: f64) -> f64 { + libm::atan(x) +} + +#[no_mangle] +pub extern "C" fn cbrtf(x: f32) -> f32 { + libm::cbrtf(x) +} + +#[no_mangle] +pub extern "C" fn cbrt(x: f64) -> f64 { + libm::cbrt(x) +} + +#[no_mangle] +pub extern "C" fn ceilf(x: f32) -> f32 { + libm::ceilf(x) +} + +#[no_mangle] +pub extern "C" fn ceil(x: f64) -> f64 { + libm::ceil(x) +} + +#[no_mangle] +pub extern "C" fn copysignf(x: f32, y: f32) -> f32 { + libm::copysignf(x, y) +} + +#[no_mangle] +pub extern "C" fn copysign(x: f64, y: f64) -> f64 { + libm::copysign(x, y) +} + +#[no_mangle] +pub extern "C" fn cosf(x: f32) -> f32 { + libm::cosf(x) +} + +#[no_mangle] +pub extern "C" fn coshf(x: f32) -> f32 { + libm::coshf(x) +} + +#[no_mangle] +pub extern "C" fn cosh(x: f64) -> f64 { + libm::cosh(x) +} + +#[no_mangle] +pub extern "C" fn cos(x: f64) -> f64 { + libm::cos(x) +} + +#[no_mangle] +pub extern "C" fn erfcf(x: f32) -> f32 { + libm::erfcf(x) +} + +#[no_mangle] +pub extern "C" fn erfc(x: f64) -> f64 { + libm::erfc(x) +} + +#[no_mangle] +pub extern "C" fn erff(x: f32) -> f32 { + libm::erff(x) +} + +#[no_mangle] +pub extern "C" fn erf(x: f64) -> f64 { + libm::erf(x) +} + +#[no_mangle] +pub extern "C" fn exp10f(x: f32) -> f32 { + libm::exp10f(x) +} + +#[no_mangle] +pub extern "C" fn exp10(x: f64) -> f64 { + libm::exp10(x) +} + +#[no_mangle] +pub extern "C" fn exp2f(x: f32) -> f32 { + libm::exp2f(x) +} + +#[no_mangle] +pub extern "C" fn exp2(x: f64) -> f64 { + libm::exp2(x) +} + +#[no_mangle] +pub extern "C" fn expf(x: f32) -> f32 { + libm::expf(x) +} + +#[no_mangle] +pub extern "C" fn expm1f(x: f32) -> f32 { + libm::expm1f(x) +} + +#[no_mangle] +pub extern "C" fn expm1(x: f64) -> f64 { + libm::expm1(x) +} + +#[no_mangle] +pub extern "C" fn exp(x: f64) -> f64 { + libm::exp(x) +} + +#[no_mangle] +pub extern "C" fn fabsf(x: f32) -> f32 { + libm::fabsf(x) +} + +#[no_mangle] +pub extern "C" fn fabs(x: f64) -> f64 { + libm::fabs(x) +} + +#[no_mangle] +pub extern "C" fn fdimf(x: f32, y: f32) -> f32 { + libm::fdimf(x, y) +} + +#[no_mangle] +pub extern "C" fn fdim(x: f64, y: f64) -> f64 { + libm::fdim(x, y) +} + +#[no_mangle] +pub extern "C" fn floorf(x: f32) -> f32 { + libm::floorf(x) +} + +#[no_mangle] +pub extern "C" fn floor(x: f64) -> f64 { + libm::floor(x) +} + +#[no_mangle] +pub extern "C" fn fmaf(x: f32, y: f32, z: f32) -> f32 { + libm::fmaf(x, y, z) +} + +#[no_mangle] +pub extern "C" fn fma(x: f64, y: f64, z: f64) -> f64 { + libm::fma(x, y, z) +} + +#[no_mangle] +pub extern "C" fn fmaxf(x: f32, y: f32) -> f32 { + libm::fmaxf(x, y) +} + +#[no_mangle] +pub extern "C" fn fmax(x: f64, y: f64) -> f64 { + libm::fmax(x, y) +} + +#[no_mangle] +pub extern "C" fn fminf(x: f32, y: f32) -> f32 { + libm::fminf(x, y) +} + +#[no_mangle] +pub extern "C" fn fmin(x: f64, y: f64) -> f64 { + libm::fmin(x, y) +} + +#[no_mangle] +pub extern "C" fn fmodf(x: f32, y: f32) -> f32 { + libm::fmodf(x, y) +} + +#[no_mangle] +pub extern "C" fn fmod(x: f64, y: f64) -> f64 { + libm::fmod(x, y) +} + +#[no_mangle] +pub fn frexpf(x: f32) -> (f32, i32) { + libm::frexpf(x) +} + +#[no_mangle] +pub fn frexp(x: f64) -> (f64, i32) { + libm::frexp(x) +} + +#[no_mangle] +pub extern "C" fn hypotf(x: f32, y: f32) -> f32 { + libm::hypotf(x, y) +} + +#[no_mangle] +pub extern "C" fn hypot(x: f64, y: f64) -> f64 { + libm::hypot(x, y) +} + +#[no_mangle] +pub extern "C" fn ilogbf(x: f32) -> i32 { + libm::ilogbf(x) +} + +#[no_mangle] +pub extern "C" fn ilogb(x: f64) -> i32 { + libm::ilogb(x) +} + +#[no_mangle] +pub extern "C" fn j0f(x: f32) -> f32 { + libm::j0f(x) +} + +#[no_mangle] +pub extern "C" fn j0(x: f64) -> f64 { + libm::j0(x) +} + +#[no_mangle] +pub extern "C" fn j1f(x: f32) -> f32 { + libm::j1f(x) +} + +#[no_mangle] +pub extern "C" fn j1(x: f64) -> f64 { + libm::j1(x) +} + +#[no_mangle] +pub extern "C" fn jnf(n: i32, x: f32) -> f32 { + libm::jnf(n, x) +} + +#[no_mangle] +pub extern "C" fn jn(n: i32, x: f64) -> f64 { + libm::jn(n, x) +} + +#[no_mangle] +pub extern "C" fn ldexpf(x: f32, n: i32) -> f32 { + libm::ldexpf(x, n) +} + +#[no_mangle] +pub extern "C" fn ldexp(x: f64, n: i32) -> f64 { + libm::ldexp(x, n) +} + +#[no_mangle] +pub fn lgammaf_r(x: f32) -> (f32, i32) { + libm::lgammaf_r(x) +} + +#[no_mangle] +pub fn lgammf(x: f32) -> f32 { + libm::lgammaf(x) +} + +#[no_mangle] +pub fn lgamma_r(x: f64) -> (f64, i32) { + libm::lgamma_r(x) +} + +#[no_mangle] +pub extern "C" fn lgamma(x: f64) -> f64 { + libm::lgamma(x) +} + +#[no_mangle] +pub extern "C" fn log10f(x: f32) -> f32 { + libm::log10f(x) +} + +#[no_mangle] +pub extern "C" fn log10(x: f64) -> f64 { + libm::log10(x) +} + +#[no_mangle] +pub extern "C" fn log1pf(x: f32) -> f32 { + libm::log1pf(x) +} + +#[no_mangle] +pub extern "C" fn log1p(x: f64) -> f64 { + libm::log1p(x) +} + +#[no_mangle] +pub extern "C" fn log2f(x: f32) -> f32 { + libm::log2f(x) +} + +#[no_mangle] +pub extern "C" fn log2(x: f64) -> f64 { + libm::log2(x) +} + +#[no_mangle] +pub extern "C" fn logf(x: f32) -> f32 { + libm::logf(x) +} + +#[no_mangle] +pub extern "C" fn log(x: f64) -> f64 { + libm::log(x) +} + +#[no_mangle] +pub fn modff(x: f32) -> (f32, f32) { + libm::modff(x) +} + +#[no_mangle] +pub fn modf(x: f64) -> (f64, f64) { + libm::modf(x) +} + +#[no_mangle] +pub extern "C" fn nextafterf(x: f32, y: f32) -> f32 { + libm::nextafterf(x, y) +} + +#[no_mangle] +pub extern "C" fn nextafter(x: f64, y: f64) -> f64 { + libm::nextafter(x, y) +} + +#[no_mangle] +pub extern "C" fn powf(x: f32, y: f32) -> f32 { + libm::powf(x, y) +} + +#[no_mangle] +pub extern "C" fn pow(x: f64, y: f64) -> f64 { + libm::pow(x, y) +} + +#[no_mangle] +pub extern "C" fn remainderf(x: f32, y: f32) -> f32 { + libm::remainderf(x, y) +} + +#[no_mangle] +pub extern "C" fn remainder(x: f64, y: f64) -> f64 { + libm::remainder(x, y) +} + +#[no_mangle] +pub fn remquof(x: f32, y: f32) -> (f32, i32) { + libm::remquof(x, y) +} + +#[no_mangle] +pub fn remquo(x: f64, y: f64) -> (f64, i32) { + libm::remquo(x, y) +} + +#[no_mangle] +pub extern "C" fn roundf(x: f32) -> f32 { + libm::roundf(x) +} + +#[no_mangle] +pub extern "C" fn round(x: f64) -> f64 { + libm::round(x) +} + +#[no_mangle] +pub extern "C" fn scalbnf(x: f32, n: i32) -> f32 { + libm::scalbnf(x, n) +} + +#[no_mangle] +pub extern "C" fn scalbn(x: f64, n: i32) -> f64 { + libm::scalbn(x, n) +} + +#[no_mangle] +pub fn sincosf(x: f32) -> (f32, f32) { + libm::sincosf(x) +} + +#[no_mangle] +pub fn sincos(x: f64) -> (f64, f64) { + libm::sincos(x) +} + +#[no_mangle] +pub extern "C" fn sinf(x: f32) -> f32 { + libm::sinf(x) +} + +#[no_mangle] +pub extern "C" fn sinhf(x: f32) -> f32 { + libm::sinhf(x) +} + +#[no_mangle] +pub extern "C" fn sinh(x: f64) -> f64 { + libm::sinh(x) +} + +#[no_mangle] +pub extern "C" fn sin(x: f64) -> f64 { + libm::sin(x) +} + +#[no_mangle] +pub extern "C" fn sqrtf(x: f32) -> f32 { + libm::sqrtf(x) +} + +#[no_mangle] +pub extern "C" fn sqrt(x: f64) -> f64 { + libm::sqrt(x) +} + +#[no_mangle] +pub extern "C" fn tanf(x: f32) -> f32 { + libm::tanf(x) +} + +#[no_mangle] +pub extern "C" fn tanhf(x: f32) -> f32 { + libm::tanhf(x) +} + +#[no_mangle] +pub extern "C" fn tanh(x: f64) -> f64 { + libm::tanh(x) +} + +#[no_mangle] +pub extern "C" fn tan(x: f64) -> f64 { + libm::tan(x) +} + +#[no_mangle] +pub extern "C" fn tgammaf(x: f32) -> f32 { + libm::tgammaf(x) +} + +#[no_mangle] +pub extern "C" fn tgamma(x: f64) -> f64 { + libm::tgamma(x) +} + +#[no_mangle] +pub extern "C" fn truncf(x: f32) -> f32 { + libm::truncf(x) +} + +#[no_mangle] +pub extern "C" fn trunc(x: f64) -> f64 { + libm::trunc(x) +} + +#[no_mangle] +pub extern "C" fn y0f(x: f32) -> f32 { + libm::y0f(x) +} + +#[no_mangle] +pub extern "C" fn y0(x: f64) -> f64 { + libm::y0(x) +} + +#[no_mangle] +pub extern "C" fn y1f(x: f32) -> f32 { + libm::y1f(x) +} + +#[no_mangle] +pub extern "C" fn y1(x: f64) -> f64 { + libm::y1(x) +} + +#[no_mangle] +pub extern "C" fn ynf(n: i32, x: f32) -> f32 { + libm::ynf(n, x) +} + +#[no_mangle] +pub extern "C" fn yn(n: i32, x: f64) -> f64 { + libm::yn(n, x) +} diff --git a/riscv/zkvm/platform/src/memory.rs b/riscv/zkvm/platform/src/memory.rs new file mode 100644 index 0000000000..d5b36d5283 --- /dev/null +++ b/riscv/zkvm/platform/src/memory.rs @@ -0,0 +1,70 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::WORD_SIZE; + +pub const MEM_BITS: usize = 28; +pub const MEM_SIZE: usize = 1 << MEM_BITS; +pub const GUEST_MIN_MEM: usize = 0x0000_0400; +pub const GUEST_MAX_MEM: usize = SYSTEM.start; + +/// Top of stack; stack grows down from this location. +pub const STACK_TOP: u32 = 0x0020_0400; +/// Program (text followed by data and then bss) gets loaded in +/// starting at this location. HEAP begins right afterwards. +pub const TEXT_START: u32 = 0x0020_0800; +pub const SYSTEM: Region = Region::new(0x0C00_0000, mb(16)); +pub const PAGE_TABLE: Region = Region::new(0x0D00_0000, mb(16)); +pub const PRE_LOAD: Region = Region::new(0x0D70_0000, mb(9)); + +pub struct Region { + start: usize, + len_bytes: usize, +} + +impl Region { + pub const fn new(start: usize, len_bytes: usize) -> Self { + Self { start, len_bytes } + } + + pub const fn start(&self) -> usize { + self.start + } + + pub const fn len_bytes(&self) -> usize { + self.len_bytes + } + + pub const fn len_words(&self) -> usize { + assert!((self.len_bytes % WORD_SIZE) == 0); + self.len_bytes / WORD_SIZE + } + + pub const fn end(&self) -> usize { + self.start + self.len_bytes + } +} + +const fn kb(kb: usize) -> usize { + kb * 1024 +} + +const fn mb(mb: usize) -> usize { + kb(mb * 1024) +} + +/// Returns whether `addr` is within guest memory bounds. +pub fn is_guest_memory(addr: u32) -> bool { + GUEST_MIN_MEM <= (addr as usize) && (addr as usize) < GUEST_MAX_MEM +} diff --git a/riscv/zkvm/platform/src/rust_rt.rs b/riscv/zkvm/platform/src/rust_rt.rs new file mode 100644 index 0000000000..5b3bb7dd2b --- /dev/null +++ b/riscv/zkvm/platform/src/rust_rt.rs @@ -0,0 +1,78 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! This module contains the components required to link a Rust binary. +//! +//! In particular: +//! * It defines an entrypoint ensuring initialization and finalization are done +//! properly. +//! * It includes a panic handler. +//! * It includes an allocator. + +use core::panic::PanicInfo; + +use crate::syscall::sys_panic; + +extern crate alloc; + +/// panic! implementation for use in no_std guest programs. +#[cfg_attr(feature = "panic-handler", panic_handler)] +pub fn panic_fault(panic_info: &PanicInfo) -> ! { + let msg = alloc::format!("{}", panic_info); + let msg_bytes = msg.as_bytes(); + unsafe { sys_panic(msg.as_ptr(), msg.len()) } +} + +#[cfg(feature = "entrypoint")] +mod entrypoint { + use crate::syscall::sys_halt; + + #[no_mangle] + unsafe extern "C" fn __start() -> ! { + // This definition of __start differs from risc0_zkvm::guest in that it does not initialize the + // journal and will halt with empty output. It also assumes main follows the standard C + // convention, and uses the returned i32 value as the user exit code for halt. + let exit_code = { + extern "C" { + fn main(argc: i32, argv: *const *const u8) -> i32; + } + + main(0, core::ptr::null()) + }; + + const EMPTY_OUTPUT: [u32; 8] = [0; 8]; + sys_halt(exit_code as u8, &EMPTY_OUTPUT); + } + + static STACK_TOP: u32 = crate::memory::STACK_TOP; + + // Entry point; sets up global pointer and stack pointer and passes + // to __start. TODO: when asm_const is stabilized, use that here + // instead of defining a symbol and dereferencing it. + core::arch::global_asm!( + r#" + .section .text._start + .globl _start + _start: + .option push; + .option norelax + la gp, __global_pointer$ + .option pop + la sp, {0} + lw sp, 0(sp) + call __start; + "#, + sym STACK_TOP + ); +} diff --git a/riscv/zkvm/platform/src/syscall.rs b/riscv/zkvm/platform/src/syscall.rs new file mode 100644 index 0000000000..b56b3163c5 --- /dev/null +++ b/riscv/zkvm/platform/src/syscall.rs @@ -0,0 +1,896 @@ +// Copyright 2024 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_os = "zkvm")] +use core::arch::asm; +use core::{cmp::min, ffi::CStr, ptr::null_mut, str::Utf8Error}; + +use crate::WORD_SIZE; + +pub mod ecall { + pub const HALT: u32 = 0; + pub const INPUT: u32 = 1; + pub const SOFTWARE: u32 = 2; + pub const SHA: u32 = 3; + pub const BIGINT: u32 = 4; + pub const USER: u32 = 5; + pub const MACHINE: u32 = 5; +} + +pub mod halt { + pub const TERMINATE: u32 = 0; + pub const PAUSE: u32 = 1; + pub const SPLIT: u32 = 2; +} + +pub mod reg_abi { + pub const REG_ZERO: usize = 0; // zero constant + pub const REG_RA: usize = 1; // return address + pub const REG_SP: usize = 2; // stack pointer + pub const REG_GP: usize = 3; // global pointer + pub const REG_TP: usize = 4; // thread pointer + pub const REG_T0: usize = 5; // temporary + pub const REG_T1: usize = 6; // temporary + pub const REG_T2: usize = 7; // temporary + pub const REG_S0: usize = 8; // saved register + pub const REG_FP: usize = 8; // frame pointer + pub const REG_S1: usize = 9; // saved register + pub const REG_A0: usize = 10; // fn arg / return value + pub const REG_A1: usize = 11; // fn arg / return value + pub const REG_A2: usize = 12; // fn arg + pub const REG_A3: usize = 13; // fn arg + pub const REG_A4: usize = 14; // fn arg + pub const REG_A5: usize = 15; // fn arg + pub const REG_A6: usize = 16; // fn arg + pub const REG_A7: usize = 17; // fn arg + pub const REG_S2: usize = 18; // saved register + pub const REG_S3: usize = 19; // saved register + pub const REG_S4: usize = 20; // saved register + pub const REG_S5: usize = 21; // saved register + pub const REG_S6: usize = 22; // saved register + pub const REG_S7: usize = 23; // saved register + pub const REG_S8: usize = 24; // saved register + pub const REG_S9: usize = 25; // saved register + pub const REG_S10: usize = 26; // saved register + pub const REG_S11: usize = 27; // saved register + pub const REG_T3: usize = 28; // temporary + pub const REG_T4: usize = 29; // temporary + pub const REG_T5: usize = 30; // temporary + pub const REG_T6: usize = 31; // temporary + pub const REG_MAX: usize = 32; // maximum number of registers +} + +pub const DIGEST_WORDS: usize = 8; +pub const DIGEST_BYTES: usize = WORD_SIZE * DIGEST_WORDS; + +/// Number of words in each cycle received using the SOFTWARE ecall +pub const IO_CHUNK_WORDS: usize = 4; + +// Limit syscall buffers so that the Executor doesn't get into an infinite +// split situation. +pub const MAX_BUF_BYTES: usize = 4 * 1024; +pub const MAX_BUF_WORDS: usize = MAX_BUF_BYTES / WORD_SIZE; +pub const MAX_SHA_COMPRESS_BLOCKS: usize = 1000; + +pub mod bigint { + pub const OP_MULTIPLY: u32 = 0; + + /// BigInt width, in bits, handled by the BigInt accelerator circuit. + pub const WIDTH_BITS: usize = 256; + + /// BigInt width, in bytes, handled by the BigInt accelerator circuit. + pub const WIDTH_BYTES: usize = WIDTH_BITS / 8; + + /// BigInt width, in words, handled by the BigInt accelerator circuit. + pub const WIDTH_WORDS: usize = WIDTH_BYTES / crate::WORD_SIZE; +} + +/// A UTF-8 NUL-terminated name of a syscall with static lifetime. +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +pub struct SyscallName(*const u8); + +/// Construct a SyscallName declaration at compile time. +/// +/// ```rust +/// use axvm_platform::declare_syscall; +/// +/// declare_syscall!(SYS_MY_SYSTEM_CALL); +/// ``` +#[macro_export] +macro_rules! declare_syscall { + ( + $(#[$meta:meta])* + $vis:vis $name:ident + ) => { + // Go through `CStr` to avoid `unsafe` in the caller. + $(#[$meta])* + $vis const $name: $crate::syscall::SyscallName = match ::core::ffi::CStr::from_bytes_until_nul( + concat!(module_path!(), "::", stringify!($name), "\0").as_bytes(), + ) { + Ok(c_str) => match $crate::syscall::SyscallName::from_c_str(c_str) { + Ok(name) => name, + Err(_) => unreachable!(), + }, + Err(_) => unreachable!(), + }; + }; +} + +pub mod nr { + declare_syscall!(pub SYS_ARGC); + declare_syscall!(pub SYS_ARGV); + declare_syscall!(pub SYS_CYCLE_COUNT); + declare_syscall!(pub SYS_EXECUTE_ZKR); + declare_syscall!(pub SYS_EXIT); + declare_syscall!(pub SYS_FORK); + declare_syscall!(pub SYS_GETENV); + declare_syscall!(pub SYS_LOG); + declare_syscall!(pub SYS_PANIC); + declare_syscall!(pub SYS_PIPE); + declare_syscall!(pub SYS_PROVE_ZKR); + declare_syscall!(pub SYS_RANDOM); + declare_syscall!(pub SYS_READ); + declare_syscall!(pub SYS_VERIFY_INTEGRITY); + declare_syscall!(pub SYS_WRITE); +} + +impl SyscallName { + /// Converts a static C string to a system call name, if it is UTF-8. + #[inline] + pub const fn from_c_str(c_str: &'static CStr) -> Result { + match c_str.to_str() { + Ok(_) => Ok(unsafe { Self::from_bytes_with_nul(c_str.as_ptr().cast()) }), + Err(error) => Err(error), + } + } + + /// Converts a raw UTF-8 C string pointer to a system call name. + /// + /// # Safety + /// + /// The pointer must reference a static null-terminated UTF-8 string. + pub const unsafe fn from_bytes_with_nul(ptr: *const u8) -> Self { + Self(ptr) + } + + pub fn as_ptr(&self) -> *const u8 { + self.0 + } + + pub fn as_str(&self) -> &str { + core::str::from_utf8(unsafe { core::ffi::CStr::from_ptr(self.as_ptr().cast()).to_bytes() }) + .unwrap() + } +} + +impl AsRef for SyscallName { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +/// Returned registers (a0, a1) from a syscall invocation. +#[repr(C)] +pub struct Return(pub u32, pub u32); + +macro_rules! impl_syscall { + ($func_name:ident + // Ugh, unfortunately we can't make this a regular macro list since the asm macro + // doesn't expand register names so in($register) doesn't work. + $(, $a0:ident + $(, $a1:ident + $(, $a2: ident + $(, $a3: ident + $(, $a4: ident + )? + )? + )? + )? + )?) => { + /// Invoke a raw system call + /// + /// # Safety + /// + /// `from_host` must be aligned and dereferenceable. + #[cfg_attr(feature = "export-syscalls", no_mangle)] + pub unsafe extern "C" fn $func_name(syscall: SyscallName, + from_host: *mut u32, + from_host_words: usize + $(,$a0: u32 + $(,$a1: u32 + $(,$a2: u32 + $(,$a3: u32 + $(,$a4: u32 + )? + )? + )? + )? + )? + ) -> Return { + #[cfg(target_os = "zkvm")] { + let a0: u32; + let a1: u32; + ::core::arch::asm!( + "ecall", + in("t0") $crate::syscall::ecall::SOFTWARE, + inlateout("a0") from_host => a0, + inlateout("a1") from_host_words => a1, + in("a2") syscall.as_ptr() + $(,in("a3") $a0 + $(,in("a4") $a1 + $(,in("a5") $a2 + $(,in("a6") $a3 + $(,in("a7") $a4 + )? + )? + )? + )? + )?); + Return(a0, a1) + } + #[cfg(not(target_os = "zkvm"))] + unimplemented!() + } + } +} + +impl_syscall!(syscall_0); +impl_syscall!(syscall_1, a3); +impl_syscall!(syscall_2, a3, a4); +impl_syscall!(syscall_3, a3, a4, a5); +impl_syscall!(syscall_4, a3, a4, a5, a6); +impl_syscall!(syscall_5, a3, a4, a5, a6, a7); + +fn ecall_1(t0: u32, a0: u32, a1: u32) { + #[cfg(target_os = "zkvm")] + unsafe { + asm!( + "ecall", + in("t0") t0, + in("a0") a0, + in("a1") a1, + ) + }; + #[cfg(not(target_os = "zkvm"))] + { + core::hint::black_box((t0, a0, a1)); + unimplemented!() + } +} + +fn ecall_4(t0: u32, a0: u32, a1: u32, a2: u32, a3: u32, a4: u32) { + #[cfg(target_os = "zkvm")] + unsafe { + asm!( + "ecall", + in("t0") t0, + in("a0") a0, + in("a1") a1, + in("a2") a2, + in("a3") a3, + in("a4") a4, + ) + }; + #[cfg(not(target_os = "zkvm"))] + { + core::hint::black_box((t0, a0, a1, a2, a3, a4)); + unimplemented!() + } +} + +/// # Safety +/// +/// `out_state` must be aligned and dereferenceable. +// [inline(never)] is added to mitigate potentially leaking information about program execution +// through the final value of the program counter (pc) on halt where there is more than one +// location in the program where `sys_halt` is called. As long as the halt instruction only exists +// in one place within the program, the pc will always be the same invariant with input. +#[inline(never)] +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub extern "C" fn sys_halt(user_exit: u8, out_state: *const [u32; DIGEST_WORDS]) -> ! { + ecall_1( + ecall::HALT, + halt::TERMINATE | ((user_exit as u32) << 8), + out_state as u32, + ); + unreachable!(); +} + +/// # Safety +/// +/// `out_state` must be aligned and dereferenceable. +// [inline(never)] is added to mitigate potentially leaking information about program execution +// through the final value of the program counter (pc) on pause where there is more than one +// location in the program where `sys_pause` is called. As long as the pause instruction only exists +// in one place within the program, the pc will always be the same invariant with input. +#[inline(never)] +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_pause(user_exit: u8, out_state: *const [u32; DIGEST_WORDS]) { + ecall_1( + ecall::HALT, + halt::PAUSE | ((user_exit as u32) << 8), + out_state as u32, + ); +} + +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub extern "C" fn sys_input(index: u32) -> u32 { + let t0 = ecall::INPUT; + let index = index & 0x07; + #[cfg(target_os = "zkvm")] + unsafe { + let a0: u32; + asm!( + "ecall", + in("t0") t0, + inlateout("a0") index => a0, + ); + a0 + } + #[cfg(not(target_os = "zkvm"))] + { + core::hint::black_box((t0, index)); + unimplemented!() + } +} + +/// # Safety +/// +/// `out_state`, `in_state`, `block1_ptr`, and `block2_ptr` must be aligned and +/// dereferenceable. +#[inline(always)] +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_sha_compress( + out_state: *mut [u32; DIGEST_WORDS], + in_state: *const [u32; DIGEST_WORDS], + block1_ptr: *const [u32; DIGEST_WORDS], + block2_ptr: *const [u32; DIGEST_WORDS], +) { + ecall_4( + ecall::SHA, + out_state as u32, + in_state as u32, + block1_ptr as u32, + block2_ptr as u32, + 1, + ); +} + +/// # Safety +/// +/// `out_state`, `in_state`, and `buf` must be aligned and dereferenceable. +#[inline(always)] +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_sha_buffer( + out_state: *mut [u32; DIGEST_WORDS], + in_state: *const [u32; DIGEST_WORDS], + buf: *const u8, + count: u32, +) { + let mut ptr = buf; + let mut count_remain = count; + let mut in_state = in_state; + while count_remain > 0 { + let count = min(count_remain, MAX_SHA_COMPRESS_BLOCKS as u32); + ecall_4( + ecall::SHA, + out_state as u32, + in_state as u32, + ptr as u32, + ptr.add(DIGEST_BYTES) as u32, + count, + ); + count_remain -= count; + ptr = ptr.add(2 * DIGEST_BYTES * count as usize); + in_state = out_state; + } +} + +/// # Safety +/// +/// `result`, `x`, `y`, and `modulus` must be aligned and dereferenceable. +#[inline(always)] +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_bigint( + result: *mut [u32; bigint::WIDTH_WORDS], + op: u32, + x: *const [u32; bigint::WIDTH_WORDS], + y: *const [u32; bigint::WIDTH_WORDS], + modulus: *const [u32; bigint::WIDTH_WORDS], +) { + ecall_4( + ecall::BIGINT, + result as u32, + op, + x as u32, + y as u32, + modulus as u32, + ); +} + +/// # Safety +/// +/// `recv_buf` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_rand(recv_buf: *mut u32, words: usize) { + syscall_0(nr::SYS_RANDOM, recv_buf, words); +} + +/// # Safety +/// +/// `msg_ptr` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_panic(msg_ptr: *const u8, len: usize) -> ! { + syscall_2(nr::SYS_PANIC, null_mut(), 0, msg_ptr as u32, len as u32); + + // As a fallback for non-compliant hosts, issue an illegal instruction. + #[cfg(target_os = "zkvm")] + asm!("sw x0, 1(x0)"); + unreachable!() +} + +/// # Safety +/// +/// `msg_ptr` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_log(msg_ptr: *const u8, len: usize) { + syscall_2(nr::SYS_LOG, null_mut(), 0, msg_ptr as u32, len as u32); +} + +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub extern "C" fn sys_cycle_count() -> u64 { + let Return(hi, lo) = unsafe { syscall_0(nr::SYS_CYCLE_COUNT, null_mut(), 0) }; + ((hi as u64) << 32) + lo as u64 +} + +/// Reads the given number of bytes into the given buffer, posix-style. Returns +/// the number of bytes actually read. On end of file, returns 0. +/// +/// Like POSIX read, this is not guaranteed to read all bytes +/// requested. If we haven't reached EOF, it is however guaranteed to +/// read at least one byte. +/// +/// Users should prefer a higher-level abstraction. +/// +/// # Safety +/// +/// `recv_ptr` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_read(fd: u32, recv_ptr: *mut u8, nread: usize) -> usize { + // The SYS_READ system call can do a given number of word-aligned reads + // efficiently. The semantics of the system call are: + // + // (nread, word) = syscall_2(nr::SYS_READ, outbuf, + // num_words_in_outbuf, fd, nbytes); + // + // This reads exactly nbytes from the file descriptor, and fills the words + // in outbuf, followed by up to 4 bytes returned in "word", and fills + // the rest with NULs. It returns the number of bytes read. + // + // sys_read exposes this as a byte-aligned read by: + // * Copies any unaligned bytes at the start or end of the region. + + // Fills 0-3 bytes from a u32 into memory, returning the pointer afterwards. + unsafe fn fill_from_word(mut ptr: *mut u8, mut word: u32, nfill: usize) -> *mut u8 { + debug_assert!(nfill < 4, "nfill={nfill}"); + for _ in 0..nfill { + *ptr = (word & 0xFF) as u8; + word >>= 8; + ptr = ptr.add(1); + } + ptr + } + + // Determine how many bytes at the beginning of the buffer we have + // to read in order to become word-aligned. + let ptr_offset = (recv_ptr as usize) & (WORD_SIZE - 1); + let (main_ptr, main_requested, nread_first) = if ptr_offset == 0 { + (recv_ptr, nread, 0) + } else { + let unaligned_at_start = min(nread, WORD_SIZE - ptr_offset); + // Read unaligned bytes into "firstword". + let Return(nread_first, firstword) = + syscall_2(nr::SYS_READ, null_mut(), 0, fd, unaligned_at_start as u32); + debug_assert_eq!(nread_first as usize, unaligned_at_start); + + // Align up to a word boundary to do the main copy. + let main_ptr = fill_from_word(recv_ptr, firstword, unaligned_at_start); + if nread == unaligned_at_start { + // We only read part of a word, and don't have to read any full words. + return nread; + } + (main_ptr, nread - unaligned_at_start, nread_first as usize) + }; + + // Copy in all of the word-aligned data + let main_words = main_requested / WORD_SIZE; + let (nread_main, lastword) = + sys_read_internal(fd, main_ptr as *mut u32, main_words, main_requested); + debug_assert!(nread_main <= main_requested); + let read_words = nread_main / WORD_SIZE; + + // Copy in individual bytes after the word-aligned section. + let unaligned_at_end = main_requested % WORD_SIZE; + + // The last 0-3 bytes are returned in lastword. Write those to complete the _requested_ read amount. + fill_from_word( + main_ptr.add(main_words * WORD_SIZE), + lastword, + unaligned_at_end, + ); + + nread_first + nread_main +} + +/// Reads up to the given number of words into the buffer [recv_buf, +/// recv_buf + nwords). Returns the number of bytes actually read. +/// sys_read_words is a more efficient interface than sys_read, but +/// varies from POSIX semantics. Notably: +/// +/// * The read length is specified in words, not bytes. (The output +/// length is still returned in bytes) +/// +/// * If not all data is available, `sys_read_words` will return a short read. +/// +/// * recv_buf must be word-aligned. +/// +/// * Return a short read in the case of EOF mid-way through. +/// +/// # Safety +/// +/// `recv_ptr' must be a word-aligned pointer and point to a region of +/// `nwords' size. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_read_words(fd: u32, recv_ptr: *mut u32, nwords: usize) -> usize { + sys_read_internal(fd, recv_ptr, nwords, nwords * WORD_SIZE).0 +} + +fn sys_read_internal(fd: u32, recv_ptr: *mut u32, nwords: usize, nbytes: usize) -> (usize, u32) { + let mut nwords_remain = nwords; + let mut nbytes_remain = nbytes; + let mut nread_total_bytes = 0; + let mut recv_ptr = recv_ptr; + let mut final_word = 0; + while nbytes_remain > 0 { + debug_assert!( + final_word == 0, + "host returned non-zero final word on a fully aligned read" + ); + let chunk_len = min(nbytes_remain, MAX_BUF_BYTES) as u32; + let Return(nread_bytes, last_word) = unsafe { + syscall_2( + nr::SYS_READ, + recv_ptr, + min(nwords_remain, MAX_BUF_WORDS), + fd, + chunk_len, + ) + }; + let nread_bytes = nread_bytes as usize; + let nread_words = nread_bytes / WORD_SIZE; + recv_ptr = unsafe { recv_ptr.add(nread_words) }; + final_word = last_word; + nwords_remain -= nread_words; + nread_total_bytes += nread_bytes; + nbytes_remain -= nread_bytes; + if nread_bytes < chunk_len as usize { + // We've reached EOF, and the host has returned a partial word. + break; + } + } + (nread_total_bytes, final_word) +} + +/// # Safety +/// +/// `write_ptr` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_write(fd: u32, write_ptr: *const u8, nbytes: usize) { + let mut nbytes_remain = nbytes; + let mut write_ptr = write_ptr; + while nbytes_remain > 0 { + let nbytes = min(nbytes_remain, MAX_BUF_BYTES); + syscall_3( + nr::SYS_WRITE, + null_mut(), + 0, + fd, + write_ptr as u32, + nbytes as u32, + ); + write_ptr = write_ptr.add(nbytes); + nbytes_remain -= nbytes; + } +} + +/// Retrieves the value of an environment variable, and stores as much +/// of it as it can it in the memory at [out_words, out_words + +/// out_nwords). +/// +/// Returns the length of the value, in bytes, or usize::MAX if the variable is +/// not set. +/// +/// This is normally called twice to read an environment variable: +/// Once to get the length of the value, and once to fill in allocated +/// memory. +/// +/// NOTE: Repeated calls to sys_getenv are not guaranteed to result in the same +/// data being returned. Returned data is entirely in the control of the host. +/// +/// # Safety +/// +/// `out_words` and `varname` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_getenv( + out_words: *mut u32, + out_nwords: usize, + varname: *const u8, + varname_len: usize, +) -> usize { + let Return(a0, _) = syscall_2( + nr::SYS_GETENV, + out_words, + out_nwords, + varname as u32, + varname_len as u32, + ); + if a0 == u32::MAX { + usize::MAX + } else { + a0 as usize + } +} + +/// Retrieves the count of arguments provided to program execution. +/// +/// NOTE: Repeated calls to sys_argc are not guaranteed to result in the same +/// data being returned. Returned data is entirely in the control of the host. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub extern "C" fn sys_argc() -> usize { + let Return(a0, _) = unsafe { syscall_0(nr::SYS_ARGC, null_mut(), 0) }; + a0 as usize +} + +/// Retrieves the argument with arg_index, and stores as much +/// of it as it can it in the memory at [out_words, out_words + +/// out_nwords). +/// +/// Returns the length, in bytes, of the argument string. If the requested +/// argument index does not exist (i.e. `arg_index` >= argc) then this syscall +/// will not return. +/// +/// This is normally called twice to read an argument: Once to get the length of +/// the value, and once to fill in allocated memory. +/// +/// NOTE: Repeated calls to sys_argv are not guaranteed to result in the same +/// data being returned. Returned data is entirely in the control of the host. +/// +/// # Safety +/// +/// `out_words` must be aligned and dereferenceable. +#[cfg_attr(feature = "export-syscalls", no_mangle)] +pub unsafe extern "C" fn sys_argv( + out_words: *mut u32, + out_nwords: usize, + arg_index: usize, +) -> usize { + let Return(a0, _) = syscall_1(nr::SYS_ARGV, out_words, out_nwords, arg_index as u32); + a0 as usize +} + +#[cfg_attr(feature = "export-syscalls", no_mangle)] +#[deprecated] +pub extern "C" fn sys_alloc_words(nwords: usize) -> *mut u32 { + unsafe { sys_alloc_aligned(WORD_SIZE * nwords, WORD_SIZE) as *mut u32 } +} + +/// # Safety +/// +/// This function should be safe to call, but clippy complains if it is not marked as `unsafe`. +#[cfg(feature = "export-syscalls")] +#[no_mangle] +pub unsafe extern "C" fn sys_alloc_aligned(bytes: usize, align: usize) -> *mut u8 { + #[cfg(target_os = "zkvm")] + extern "C" { + // This symbol is defined by the loader and marks the end + // of all elf sections, so this is where we start our + // heap. + // + // This is generated automatically by the linker; see + // https://lld.llvm.org/ELF/linker_script.html#sections-command + static _end: u8; + } + + // Pointer to next heap address to use, or 0 if the heap has not yet been + // initialized. + static mut HEAP_POS: usize = 0; + + // SAFETY: Single threaded, so nothing else can touch this while we're working. + let mut heap_pos = unsafe { HEAP_POS }; + + #[cfg(target_os = "zkvm")] + if heap_pos == 0 { + heap_pos = unsafe { (&_end) as *const u8 as usize }; + } + + // Honor requested alignment if larger than word size. + // Note: align is typically a power of two. + let align = usize::max(align, WORD_SIZE); + + let offset = heap_pos & (align - 1); + if offset != 0 { + heap_pos += align - offset; + } + + let ptr = heap_pos as *mut u8; + heap_pos += bytes; + + // Check to make sure heap doesn't collide with SYSTEM memory. + if crate::memory::SYSTEM.start() < heap_pos { + const MSG: &[u8] = "Out of memory!".as_bytes(); + unsafe { sys_panic(MSG.as_ptr(), MSG.len()) }; + } + + unsafe { HEAP_POS = heap_pos }; + ptr +} + +/// Send a ReceiptClaim digest to the host to request verification. +/// +/// A cooperative prover will only return if there is a verifying proof +/// associated with that claim digest, and will always return a result code +/// of 0 to register a0. The caller must encode the claim_digest into a +/// public assumptions list for inclusion in the guest output. +/// +/// # Safety +/// +/// `claim_digest` must be aligned and dereferenceable. +/// `control_root` must be aligned and dereferenceable. +#[cfg(feature = "export-syscalls")] +#[no_mangle] +pub unsafe extern "C" fn sys_verify_integrity( + claim_digest: *const [u32; DIGEST_WORDS], + control_root: *const [u32; DIGEST_WORDS], +) { + let mut to_host = [0u32; DIGEST_WORDS * 2]; + to_host[..DIGEST_WORDS].copy_from_slice(claim_digest.as_ref().unwrap_unchecked()); + to_host[DIGEST_WORDS..].copy_from_slice(control_root.as_ref().unwrap_unchecked()); + + let Return(a0, _) = unsafe { + // Send the claim_digest to the host via software ecall. + syscall_2( + nr::SYS_VERIFY_INTEGRITY, + null_mut(), + 0, + to_host.as_ptr() as u32, + (DIGEST_BYTES * 2) as u32, + ) + }; + + // Check to ensure the host indicated success by returning 0. + // This should always be the case. This check is included for + // forwards-compatibility. + if a0 != 0 { + const MSG: &[u8] = "sys_verify_integrity returned error result".as_bytes(); + unsafe { sys_panic(MSG.as_ptr(), MSG.len()) }; + } +} + +// Make sure we only get one of these since it's stateful. +#[cfg(not(feature = "export-syscalls"))] +extern "C" { + pub fn sys_alloc_aligned(nwords: usize, align: usize) -> *mut u8; +} + +/// `sys_fork()` creates a new process by duplicating the calling process. The +/// new process is referred to as the child process. The calling process is +/// referred to as the parent process. +/// +/// The child process and the parent process run in separate memory spaces. At +/// the time of `sys_fork()` both memory spaces have the same content. +/// +/// # Return Value +/// +/// On success, the PID of the child process (1) is returned in the parent, and +/// 0 is returned in the child. On failure, -1 is returned in the parent, no +/// child process is created. +#[cfg(feature = "export-syscalls")] +#[no_mangle] +pub extern "C" fn sys_fork() -> i32 { + let Return(a0, _) = unsafe { syscall_0(nr::SYS_FORK, null_mut(), 0) }; + a0 as i32 +} + +/// `sys_pipe()` creates a pipe, a unidirectional data channel that can be used +/// for interprocess communication. The pointer `pipefd` is used to return two +/// file descriptors referring to the ends of the pipe. `pipefd[0]` refers to +/// the read end of the pipe. `pipefd[1]` refers to the write end of the pipe. +/// Data written to the write end of the pipe is buffered by the host until it +/// is read from the read end of the pipe. +/// +/// # Return Value +/// +/// On success, zero is returned. On error, -1 is returned, and `pipefd` is +/// left unchanged. +/// +/// # Safety +/// +/// `pipefd` must be aligned, dereferenceable, and have capacity for 2 u32 +/// values. +#[cfg(feature = "export-syscalls")] +#[no_mangle] +pub unsafe extern "C" fn sys_pipe(pipefd: *mut u32) -> i32 { + let Return(a0, _) = syscall_0(nr::SYS_PIPE, pipefd, 2); + a0 as i32 +} + +/// `sys_exit()` causes normal process termination. +/// +/// Currently the `status` is unused and ignored. +#[cfg(feature = "export-syscalls")] +#[no_mangle] +pub extern "C" fn sys_exit(status: i32) -> ! { + let Return(a0, _) = unsafe { syscall_0(nr::SYS_EXIT, null_mut(), 0) }; + #[allow(clippy::empty_loop)] + loop { + // prevent dishonest provers from relying on the ability to prove the + // child process rather than the intended parent process. + } +} + +/// Executes a `ZKR' in the recursion circuit, specified by control +/// ID. The control ID must be registered in the host's index of ZKRs. +/// +/// This only triggers the execution of the ZKR; it does not add any +/// assumptions. In order to prove that the ZKR executed correctly, +/// users must calculate the claim digest and add it to the list of +/// assumptions. +/// +/// # Safety +/// +/// `control_id` must be aligned and dereferenceable. +/// +/// `input` must be aligned and have `input_len` u32s dereferenceable +#[cfg(feature = "export-syscalls")] +#[no_mangle] +#[stability::unstable] +pub unsafe extern "C" fn sys_prove_zkr( + claim_digest: *const [u32; DIGEST_WORDS], + control_id: *const [u32; DIGEST_WORDS], + control_root: *const [u32; DIGEST_WORDS], + input: *const u32, + input_len: usize, +) { + let Return(a0, _) = unsafe { + syscall_5( + nr::SYS_PROVE_ZKR, + null_mut(), + 0, + claim_digest as u32, + control_id as u32, + control_root as u32, + input as u32, + input_len as u32, + ) + }; + + // Check to ensure the host indicated success by returning 0. + // Currently, this should always be the case. This check is + // included for forwards-compatibility. + if a0 != 0 { + const MSG: &[u8] = "sys_prove_zkr returned error result".as_bytes(); + unsafe { sys_panic(MSG.as_ptr(), MSG.len()) }; + } +}