Skip to content

Commit

Permalink
core trace area estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
tqn committed Jan 23, 2025
1 parent 277cf77 commit d7d5b64
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 34 deletions.
2 changes: 2 additions & 0 deletions crates/core/executor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@ sp1-zkvm = { workspace = true, features = ["lib"] }
test-artifacts = { workspace = true }

[features]
default = ["gas"] # REMOVE ME BEFORE MERGING
bigint-rug = ["sp1-curves/bigint-rug"]
profiling = [
"dep:goblin",
"dep:rustc-demangle",
"dep:gecko_profile",
"dep:indicatif",
]
gas = []
36 changes: 36 additions & 0 deletions crates/core/executor/src/air.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,42 @@ impl RiscvAirId {
RiscvAirId::Global,
]
}

// Whether the trace generation for this AIR is deferred.
pub(crate) fn is_deferred(self) -> bool {
matches!(
self,
// Global memory.
RiscvAirId::MemoryGlobalInit
| RiscvAirId::MemoryGlobalFinalize
// Precompiles.
| RiscvAirId::ShaExtend
| RiscvAirId::ShaCompress
| RiscvAirId::EdAddAssign
| RiscvAirId::EdDecompress
| RiscvAirId::Secp256k1Decompress
| RiscvAirId::Secp256k1AddAssign
| RiscvAirId::Secp256k1DoubleAssign
| RiscvAirId::Secp256r1Decompress
| RiscvAirId::Secp256r1AddAssign
| RiscvAirId::Secp256r1DoubleAssign
| RiscvAirId::KeccakPermute
| RiscvAirId::Bn254AddAssign
| RiscvAirId::Bn254DoubleAssign
| RiscvAirId::Bls12381AddAssign
| RiscvAirId::Bls12381DoubleAssign
| RiscvAirId::Uint256MulMod
| RiscvAirId::U256XU2048Mul
| RiscvAirId::Bls12381FpOpAssign
| RiscvAirId::Bls12381Fp2AddSubAssign
| RiscvAirId::Bls12381Fp2MulAssign
| RiscvAirId::Bn254FpOpAssign
| RiscvAirId::Bn254Fp2AddSubAssign
| RiscvAirId::Bn254Fp2MulAssign
| RiscvAirId::Bls12381Decompress
)
}

/// Returns the string representation of the AIR.
#[must_use]
pub fn as_str(&self) -> &str {
Expand Down
99 changes: 66 additions & 33 deletions crates/core/executor/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
use std::{fs::File, io::BufWriter};
use std::{str::FromStr, sync::Arc};

#[cfg(feature = "gas")]
use crate::gas::TraceAreaEstimator;
#[cfg(feature = "profiling")]
use crate::profiler::Profiler;
use clap::ValueEnum;
Expand Down Expand Up @@ -104,6 +106,10 @@ pub struct Executor<'a> {
/// Whether we should write to the report.
pub print_report: bool,

/// Data used to estimate total trace area.
#[cfg(feature = "gas")]
pub trace_area_estimator: Option<Box<TraceAreaEstimator>>,

/// Whether we should emit global memory init and finalize events. This can be enabled in
/// Checkpoint mode and disabled in Trace mode.
pub emit_global_memory_events: bool,
Expand Down Expand Up @@ -177,8 +183,8 @@ pub struct Executor<'a> {
/// The maximum LDE size to allow.
pub lde_size_threshold: u64,

/// event counts for the current shard.
pub event_counts: EnumMap<RiscvAirId, u64>,
/// Temporary event counts for the current shard. This is a field to reuse memory.
event_counts: EnumMap<RiscvAirId, u64>,
}

/// The different modes the executor can run in.
Expand Down Expand Up @@ -336,6 +342,10 @@ impl<'a> Executor<'a> {
report: ExecutionReport::default(),
local_counts: LocalCounts::default(),
print_report: false,
// >>>>>>>>>> FIX BEFORE MERGING <<<<<<<<<<
// figure out when this should be None or Some
#[cfg(feature = "gas")]
trace_area_estimator: Some(Box::default()),
subproof_verifier: context.subproof_verifier,
hook_registry,
opts,
Expand Down Expand Up @@ -373,6 +383,16 @@ impl<'a> Executor<'a> {
HookEnv { runtime: self }
}

/// An estimate of the total trace area required for the core proving stage.
/// This provides a prover gas metric.
#[cfg(feature = "gas")]
#[must_use]
pub fn total_trace_area(&self) -> Option<u64> {
self.trace_area_estimator.as_ref().map(|estimator| {
estimator.total_trace_area(self.program.instructions.len(), &self.costs, &self.opts)
})
}

/// Recover runtime state from a program and existing execution state.
#[must_use]
pub fn recover(program: Program, state: ExecutionState, opts: SP1CoreOpts) -> Self {
Expand Down Expand Up @@ -1548,6 +1568,13 @@ impl<'a> Executor<'a> {
self.report.syscall_counts[syscall] += 1;
}

#[cfg(feature = "gas")]
if let Some(estimator) = &mut self.trace_area_estimator {
if let Some(syscall_id) = syscall.as_air_id() {
estimator.deferred_events[syscall_id] += 1;
}
}

// `hint_slice` is allowed in unconstrained mode since it is used to write the hint.
// Other syscalls are not allowed because they can lead to non-deterministic
// behavior, especially since many syscalls modify memory in place,
Expand Down Expand Up @@ -1652,11 +1679,10 @@ impl<'a> Executor<'a> {
let mut shape_match_found = true;
if self.state.global_clk % self.shape_check_frequency == 0 {
// Estimate the number of events in the trace.
self.estimate_riscv_event_counts(
Self::estimate_riscv_event_counts(
&mut self.event_counts,
(self.state.clk >> 2) as u64,
self.local_counts.local_mem as u64,
self.local_counts.syscalls_sent as u64,
*self.local_counts.event_counts,
&self.local_counts,
);

// Check if the LDE size is too large.
Expand Down Expand Up @@ -1754,6 +1780,14 @@ impl<'a> Executor<'a> {

/// Bump the record.
pub fn bump_record(&mut self) {
if let Some(estimator) = &mut self.trace_area_estimator {
Self::estimate_riscv_event_counts(
&mut self.event_counts,
(self.state.clk >> 2) as u64,
&self.local_counts,
);
estimator.flush_shard(&self.event_counts, &self.costs);
}
self.local_counts = LocalCounts::default();
// Copy all of the existing local memory accesses to the record's local_memory_access vec.
if self.executor_mode == ExecutorMode::Trace {
Expand Down Expand Up @@ -2101,70 +2135,69 @@ impl<'a> Executor<'a> {
}

/// Maps the opcode counts to the number of events in each air.
pub fn estimate_riscv_event_counts(
&mut self,
fn estimate_riscv_event_counts(
event_counts: &mut EnumMap<RiscvAirId, u64>,
cpu_cycles: u64,
touched_addresses: u64,
syscalls_sent: u64,
opcode_counts: EnumMap<Opcode, u64>,
local_counts: &LocalCounts,
) {
let touched_addresses: u64 = local_counts.local_mem as u64;
let syscalls_sent: u64 = local_counts.syscalls_sent as u64;
let opcode_counts: &EnumMap<Opcode, u64> = &local_counts.event_counts;

// Compute the number of events in the cpu chip.
self.event_counts[RiscvAirId::Cpu] = cpu_cycles;
event_counts[RiscvAirId::Cpu] = cpu_cycles;

// Compute the number of events in the add sub chip.
self.event_counts[RiscvAirId::AddSub] =
opcode_counts[Opcode::ADD] + opcode_counts[Opcode::SUB];
event_counts[RiscvAirId::AddSub] = opcode_counts[Opcode::ADD] + opcode_counts[Opcode::SUB];

// Compute the number of events in the mul chip.
self.event_counts[RiscvAirId::Mul] = opcode_counts[Opcode::MUL]
event_counts[RiscvAirId::Mul] = opcode_counts[Opcode::MUL]
+ opcode_counts[Opcode::MULH]
+ opcode_counts[Opcode::MULHU]
+ opcode_counts[Opcode::MULHSU];

// Compute the number of events in the bitwise chip.
self.event_counts[RiscvAirId::Bitwise] =
event_counts[RiscvAirId::Bitwise] =
opcode_counts[Opcode::XOR] + opcode_counts[Opcode::OR] + opcode_counts[Opcode::AND];

// Compute the number of events in the shift left chip.
self.event_counts[RiscvAirId::ShiftLeft] = opcode_counts[Opcode::SLL];
event_counts[RiscvAirId::ShiftLeft] = opcode_counts[Opcode::SLL];

// Compute the number of events in the shift right chip.
self.event_counts[RiscvAirId::ShiftRight] =
event_counts[RiscvAirId::ShiftRight] =
opcode_counts[Opcode::SRL] + opcode_counts[Opcode::SRA];

// Compute the number of events in the divrem chip.
self.event_counts[RiscvAirId::DivRem] = opcode_counts[Opcode::DIV]
event_counts[RiscvAirId::DivRem] = opcode_counts[Opcode::DIV]
+ opcode_counts[Opcode::DIVU]
+ opcode_counts[Opcode::REM]
+ opcode_counts[Opcode::REMU];

// Compute the number of events in the lt chip.
self.event_counts[RiscvAirId::Lt] =
opcode_counts[Opcode::SLT] + opcode_counts[Opcode::SLTU];
event_counts[RiscvAirId::Lt] = opcode_counts[Opcode::SLT] + opcode_counts[Opcode::SLTU];

// Compute the number of events in the memory local chip.
self.event_counts[RiscvAirId::MemoryLocal] =
event_counts[RiscvAirId::MemoryLocal] =
touched_addresses.div_ceil(NUM_LOCAL_MEMORY_ENTRIES_PER_ROW_EXEC as u64);

// Compute the number of events in the branch chip.
self.event_counts[RiscvAirId::Branch] = opcode_counts[Opcode::BEQ]
event_counts[RiscvAirId::Branch] = opcode_counts[Opcode::BEQ]
+ opcode_counts[Opcode::BNE]
+ opcode_counts[Opcode::BLT]
+ opcode_counts[Opcode::BGE]
+ opcode_counts[Opcode::BLTU]
+ opcode_counts[Opcode::BGEU];

// Compute the number of events in the jump chip.
self.event_counts[RiscvAirId::Jump] =
opcode_counts[Opcode::JAL] + opcode_counts[Opcode::JALR];
event_counts[RiscvAirId::Jump] = opcode_counts[Opcode::JAL] + opcode_counts[Opcode::JALR];

// Compute the number of events in the auipc chip.
self.event_counts[RiscvAirId::Auipc] = opcode_counts[Opcode::AUIPC]
event_counts[RiscvAirId::Auipc] = opcode_counts[Opcode::AUIPC]
+ opcode_counts[Opcode::UNIMP]
+ opcode_counts[Opcode::EBREAK];

// Compute the number of events in the memory instruction chip.
self.event_counts[RiscvAirId::MemoryInstrs] = opcode_counts[Opcode::LB]
event_counts[RiscvAirId::MemoryInstrs] = opcode_counts[Opcode::LB]
+ opcode_counts[Opcode::LH]
+ opcode_counts[Opcode::LW]
+ opcode_counts[Opcode::LBU]
Expand All @@ -2174,18 +2207,18 @@ impl<'a> Executor<'a> {
+ opcode_counts[Opcode::SW];

// Compute the number of events in the syscall instruction chip.
self.event_counts[RiscvAirId::SyscallInstrs] = opcode_counts[Opcode::ECALL];
event_counts[RiscvAirId::SyscallInstrs] = opcode_counts[Opcode::ECALL];

// Compute the number of events in the syscall core chip.
self.event_counts[RiscvAirId::SyscallCore] = syscalls_sent;
event_counts[RiscvAirId::SyscallCore] = syscalls_sent;

// Compute the number of events in the global chip.
self.event_counts[RiscvAirId::Global] =
2 * touched_addresses + self.event_counts[RiscvAirId::SyscallInstrs];
event_counts[RiscvAirId::Global] =
2 * touched_addresses + event_counts[RiscvAirId::SyscallInstrs];

// Adjust for divrem dependencies.
self.event_counts[RiscvAirId::Mul] += self.event_counts[RiscvAirId::DivRem];
self.event_counts[RiscvAirId::Lt] += self.event_counts[RiscvAirId::DivRem];
event_counts[RiscvAirId::Mul] += event_counts[RiscvAirId::DivRem];
event_counts[RiscvAirId::Lt] += event_counts[RiscvAirId::DivRem];

// Note: we ignore the additional dependencies for addsub, since they are accounted for in
// the maximal shapes.
Expand Down
71 changes: 71 additions & 0 deletions crates/core/executor/src/gas.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use enum_map::EnumMap;
use hashbrown::HashMap;
use sp1_stark::SP1CoreOpts;

use crate::RiscvAirId;

const BYTE_NUM_ROWS: u64 = 1 << 16;

#[derive(Default, Clone)]
pub struct TraceAreaEstimator {
pub core_area: u64,
pub deferred_events: EnumMap<RiscvAirId, u64>,
}

impl TraceAreaEstimator {
/// An estimate of the total trace area required for the core proving stage.
/// This provides a prover gas metric.
pub fn total_trace_area(
&self,
program_len: usize,
costs: &HashMap<RiscvAirId, u64>,
opts: &SP1CoreOpts,
) -> u64 {
let deferred_area = self
.deferred_events
.iter()
.map(|(id, &count)| {
let (rows_per_event, threshold) = match id {
RiscvAirId::ShaExtend => (48, opts.split_opts.sha_extend),
RiscvAirId::ShaCompress => (80, opts.split_opts.sha_compress),
RiscvAirId::KeccakPermute => (24, opts.split_opts.keccak),
RiscvAirId::MemoryGlobalInit | RiscvAirId::MemoryGlobalFinalize => {
(1, opts.split_opts.memory)
}
_ => (1, opts.split_opts.deferred),
};
let threshold = threshold as u64;
let rows = count * rows_per_event;
let num_full_airs = rows / threshold;
let num_remainder_air_rows = rows % threshold;
let num_padded_rows = num_full_airs * threshold.next_power_of_two()
+ num_remainder_air_rows.next_power_of_two();
// The costs already seem to include the `rows_per_event` factor.
let cost_per_row = costs[&id] / rows_per_event;
cost_per_row * num_padded_rows
})
.sum::<u64>();

let byte_area = BYTE_NUM_ROWS * costs[&RiscvAirId::Byte];

// // Compute the program chip contribution.
let program_area = program_len as u64 * costs[&RiscvAirId::Program];

self.core_area + deferred_area + byte_area + program_area
}

/// Mark the end of a shard. Estimates the area of core AIRs and defers appropriate counts.
pub(crate) fn flush_shard(
&mut self,
event_counts: &EnumMap<RiscvAirId, u64>,
costs: &HashMap<RiscvAirId, u64>,
) {
for (id, count) in event_counts {
if id.is_deferred() {
self.deferred_events[id] += count;
} else {
self.core_area += costs[&id] * count.next_power_of_two();
}
}
}
}
2 changes: 2 additions & 0 deletions crates/core/executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ mod dependencies;
mod disassembler;
pub mod events;
mod executor;
#[cfg(feature = "gas")]
mod gas;
mod hook;
mod instruction;
mod io;
Expand Down
2 changes: 1 addition & 1 deletion crates/core/executor/src/syscalls/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl SyscallCode {

/// Get the AIR id used in the syscall implementation.
#[must_use]
pub(crate) fn as_air_id(&self) -> Option<RiscvAirId> {
pub(crate) fn as_air_id(self) -> Option<RiscvAirId> {
Some(match self {
SyscallCode::SHA_EXTEND => RiscvAirId::ShaExtend,
SyscallCode::SHA_COMPRESS => RiscvAirId::ShaCompress,
Expand Down
5 changes: 5 additions & 0 deletions crates/prover/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,11 @@ impl<C: SP1ProverComponents> SP1Prover<C> {
runtime.write_proof(proof.clone(), vkey.clone());
}
runtime.run_fast()?;
// >>>>>>>>>> FIX BEFORE MERGING <<<<<<<<<<
// figure out where this should be printed
if let Some(area) = runtime.total_trace_area() {
tracing::info!("prover gas: {}", area);
}
Ok((SP1PublicValues::from(&runtime.state.public_values_stream), runtime.report))
}

Expand Down

0 comments on commit d7d5b64

Please sign in to comment.