Skip to content
This repository has been archived by the owner on Aug 16, 2024. It is now read-only.

merge v1.4.2 #39

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 72 additions & 84 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "shivini"
version = "0.1.0"
version = "0.2.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,7 +10,7 @@ boojum = { git = "https://github.com/matter-labs/era-boojum", branch = "main" }
boojum-cuda = { git = "https://github.com/matter-labs/era-boojum-cuda", branch = "main" }
cudart = { git = "https://github.com/matter-labs/era-cuda", branch = "main", package = "cudart" }
cudart-sys = { git = "https://github.com/matter-labs/era-cuda", branch = "main", package = "cudart-sys" }
circuit_definitions = { git = "https://github.com/matter-labs/era-zkevm_test_harness", branch = "v1.4.1", package = "circuit_definitions", optional = true }
circuit_definitions = { git = "https://github.com/matter-labs/era-zkevm_test_harness", branch = "v1.4.2", package = "circuit_definitions", optional = true }

rand = "0.8"
smallvec = { version = "1.13", features = [
Expand Down
57 changes: 16 additions & 41 deletions src/constraint_evaluation.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,20 @@
use boojum::{
config::CSConfig,
cs::{
gates::lookup_marker::LookupFormalGate,
implementations::{reference_cs::CSReferenceAssembly, setup::TreeNode},
traits::{evaluator::PerChunkOffset, gate::GatePlacementStrategy},
},
use crate::gpu_proof_config::GpuProofConfig;
use boojum::cs::{
gates::lookup_marker::LookupFormalGate,
implementations::setup::TreeNode,
traits::{evaluator::PerChunkOffset, gate::GatePlacementStrategy},
};

use super::*;

pub fn get_evaluators_of_general_purpose_cols<
P: boojum::field::traits::field_like::PrimeFieldLikeVectorized<Base = F>,
CFG: CSConfig,
>(
cs: &CSReferenceAssembly<F, P, CFG>,
pub fn get_evaluators_of_general_purpose_cols(
config: &GpuProofConfig,
selectors_placement: &TreeNode,
) -> Vec<GateEvaluationParams> {
let mut gates = vec![];
for (evaluator_idx, (evaluator, _gate_type_id)) in cs
.evaluation_data_over_general_purpose_columns
for (evaluator_idx, evaluator) in config
.evaluators_over_general_purpose_columns
.iter()
.zip(
cs.evaluation_data_over_general_purpose_columns
.gate_type_ids_for_general_purpose_columns
.iter(),
)
.enumerate()
{
if evaluator.debug_name
Expand Down Expand Up @@ -72,34 +61,21 @@ pub fn get_evaluators_of_general_purpose_cols<
gates
}

pub fn get_specialized_evaluators_from_assembly<
P: boojum::field::traits::field_like::PrimeFieldLikeVectorized<Base = F>,
CFG: CSConfig,
>(
cs: &CSReferenceAssembly<F, P, CFG>,
pub fn get_specialized_evaluators_from_assembly(
config: &GpuProofConfig,
selectors_placement: &TreeNode,
) -> Vec<GateEvaluationParams> {
if cs
.evaluation_data_over_specialized_columns
.evaluators_over_specialized_columns
.len()
< 1
{
if config.evaluators_over_specialized_columns.len() < 1 {
return vec![];
}

let (_deg, _constants_for_gates_over_general_purpose_columns) =
selectors_placement.compute_stats();
let mut gates = vec![];
for (idx, (evaluator, gate_type_id)) in cs
.evaluation_data_over_specialized_columns
for (idx, (evaluator, gate_type_id)) in config
.evaluators_over_specialized_columns
.iter()
.zip(
cs.evaluation_data_over_specialized_columns
.gate_type_ids_for_specialized_columns
.iter(),
)
.zip(config.gate_type_ids_for_specialized_columns.iter())
.enumerate()
{
if evaluator.debug_name
Expand All @@ -120,7 +96,7 @@ pub fn get_specialized_evaluators_from_assembly<
);

let num_terms = evaluator.num_quotient_terms;
let placement_strategy = cs
let placement_strategy = config
.placement_strategies
.get(&gate_type_id)
.copied()
Expand All @@ -136,9 +112,8 @@ pub fn get_specialized_evaluators_from_assembly<

let total_terms = num_terms * num_repetitions;

let (initial_offset, per_repetition_offset, total_constants_available) = cs
.evaluation_data_over_specialized_columns
.offsets_for_specialized_evaluators[idx];
let (initial_offset, per_repetition_offset, total_constants_available) =
config.offsets_for_specialized_evaluators[idx];

let _placement_data = (
num_repetitions,
Expand Down
14 changes: 8 additions & 6 deletions src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,13 @@ impl ProverContext {
aux_events,
aux_h2d_buffer,
});
// 10 sets of powers * 2X safety margin
set_l2_persistence_carveout(2 * 10 * 8 * (1 << 12))?;
set_l2_persistence_for_twiddles(get_stream())?;
for stream in _aux_streams() {
set_l2_persistence_for_twiddles(stream)?;
if l2_persist_max != 0 {
// 10 sets of powers * 2X safety margin
set_l2_persistence_carveout(2 * 10 * 8 * (1 << 12))?;
set_l2_persistence_for_twiddles(get_stream())?;
for stream in _aux_streams() {
set_l2_persistence_for_twiddles(stream)?;
}
}
};
Ok(Self {})
Expand Down Expand Up @@ -119,7 +121,7 @@ impl ProverContext {
let cuda_ctx = CudaContext::create(12, 12)?;
// grab small slice then consume everything
let small_device_alloc = SmallStaticDeviceAllocator::init()?;
let device_alloc = StaticDeviceAllocator::init(num_blocks, block_size)?;
let device_alloc = StaticDeviceAllocator::init(num_blocks, num_blocks, block_size)?;
let small_host_alloc = SmallStaticHostAllocator::init()?;
let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
Self::create_internal(
Expand Down
35 changes: 18 additions & 17 deletions src/data_structures/cache.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
use boojum::config::ProvingCSConfig;
use boojum::cs::implementations::pow::PoWRunner;
use boojum::cs::implementations::prover::ProofConfig;
use boojum::cs::implementations::reference_cs::CSReferenceAssembly;
use boojum::cs::implementations::transcript::Transcript;
use boojum::cs::implementations::verifier::VerificationKey;
use boojum::cs::implementations::verifier::{VerificationKey, VerificationKeyCircuitGeometry};
use boojum::cs::implementations::witness::WitnessVec;
use boojum::cs::oracle::TreeHasher;
use boojum::field::traits::field_like::PrimeFieldLikeVectorized;
use boojum::worker::Worker;
use cudart_sys::CudaError::ErrorMemoryAllocation;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -48,6 +45,7 @@ impl StorageCacheStrategy {
}

use crate::cs::GpuSetup;
use crate::gpu_proof_config::GpuProofConfig;
use crate::prover::{
compute_quotient_degree, gpu_prove_from_external_witness_data_with_cache_strategy,
};
Expand Down Expand Up @@ -496,13 +494,12 @@ pub(crate) struct CacheStrategy {

impl CacheStrategy {
pub(crate) fn get<
P: PrimeFieldLikeVectorized<Base = F>,
TR: Transcript<F, CompatibleCap = [F; 4]>,
H: TreeHasher<F, Output = TR::CompatibleCap>,
POW: PoWRunner,
A: GoodAllocator,
>(
cs: &CSReferenceAssembly<F, P, ProvingCSConfig>,
config: &GpuProofConfig,
external_witness_data: &WitnessVec<F>,
proof_config: ProofConfig,
setup: &GpuSetup<A>,
Expand All @@ -515,13 +512,14 @@ impl CacheStrategy {
println!("reusing cache strategy");
Ok(*strategy)
} else {
let strategies = Self::get_strategy_candidates(cs, &proof_config, setup);
let strategies =
Self::get_strategy_candidates(config, &proof_config, setup, &vk.fixed_parameters);
for (_, strategy) in strategies.iter().copied() {
_setup_cache_reset();
dry_run_start();
let result =
gpu_prove_from_external_witness_data_with_cache_strategy::<P, TR, H, POW, A>(
cs,
gpu_prove_from_external_witness_data_with_cache_strategy::<TR, H, POW, A>(
config,
external_witness_data,
proof_config.clone(),
setup,
Expand All @@ -548,27 +546,30 @@ impl CacheStrategy {
}
}

pub(crate) fn get_strategy_candidates<
P: PrimeFieldLikeVectorized<Base = F>,
A: GoodAllocator,
>(
cs: &CSReferenceAssembly<F, P, ProvingCSConfig>,
pub(crate) fn get_strategy_candidates<A: GoodAllocator>(
config: &GpuProofConfig,
proof_config: &ProofConfig,
setup: &GpuSetup<A>,
geometry: &VerificationKeyCircuitGeometry,
) -> Vec<((usize, usize), CacheStrategy)> {
let fri_lde_degree = proof_config.fri_lde_factor;
let quotient_degree = compute_quotient_degree(&cs, &setup.selectors_placement);
let quotient_degree = compute_quotient_degree(&config, &setup.selectors_placement);
let used_lde_degree = usize::max(quotient_degree, fri_lde_degree);
let setup_layout = setup.layout;
let domain_size = geometry.domain_size as usize;
let lookup_parameters = geometry.lookup_parameters;
let total_tables_len = geometry.total_tables_len as usize;
let num_multiplicity_cols =
lookup_parameters.num_multipicities_polys(total_tables_len, domain_size);
let trace_layout = TraceLayout {
num_variable_cols: setup.variables_hint.len(),
num_witness_cols: setup.witnesses_hint.len(),
num_multiplicity_cols: cs.num_multipicities_polys(),
num_multiplicity_cols,
};
let arguments_layout = ArgumentsLayout::from_trace_layout_and_lookup_params(
trace_layout,
quotient_degree,
cs.lookup_parameters.clone(),
geometry.lookup_parameters,
);
let setup_num_polys = setup_layout.num_polys();
let trace_num_polys = trace_layout.num_polys();
Expand Down
Loading
Loading