diff --git a/aggregator/src/aggregation/circuit.rs b/aggregator/src/aggregation/circuit.rs index c4d30bc61a..cf809b1b6d 100644 --- a/aggregator/src/aggregation/circuit.rs +++ b/aggregator/src/aggregation/circuit.rs @@ -457,8 +457,53 @@ impl Circuit for AggregationCircuit { barycentric_assignments, )?; - // TODO: uncomment this line - // let decoder_exports = config.decoder_config.assign(&mut layouter)?; + let batch_bytes = batch_data.get_batch_data_bytes(); + let encoded_batch_bytes = batch_data.get_encoded_batch_data_bytes(); + let ( + witness_rows, + decoded_literals, + aux_data, + fse_aux_tables, + block_info_arr, + sequence_info_arr, + address_table_arr, + sequence_exec_result, + ) = crate::aggregation::decoder::witgen::process( + &encoded_batch_bytes, + challenges.keccak_input(), + ); + + // sanity check: + let (recovered_bytes, sequence_exec_info_arr) = sequence_exec_result.into_iter().fold( + (Vec::new(), Vec::new()), + |(mut out_byte, mut out_exec), res| { + out_byte.extend(res.recovered_bytes); + out_exec.push(res.exec_trace); + (out_byte, out_exec) + }, + ); + assert_eq!( + batch_bytes, recovered_bytes, + "original and recovered bytes mismatch" + ); + + // TODO: add copy constraints between decoder_exports and batchdataconfig + + // blobdataconfig + let _decoder_exports = config.decoder_config.assign( + &mut layouter, + &batch_bytes, + &encoded_batch_bytes, + witness_rows, + decoded_literals, + aux_data, + fse_aux_tables, + block_info_arr, + sequence_info_arr, + address_table_arr, + sequence_exec_info_arr, + &challenges, + 20, // TODO: configure k for aggregation circuit instead of hard-coded here. + )?; layouter.assign_region( || "batch checks", diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index ee930ae804..ce24c70aca 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -16,8 +16,7 @@ use halo2_proofs::{ circuit::{AssignedCell, Layouter, Value}, halo2curves::bn256::Fr, plonk::{ - Advice, Assigned, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, - VirtualCells, + Advice, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, VirtualCells, }, poly::Rotation, }; @@ -90,8 +89,6 @@ pub struct DecoderConfig { bitstring_table: BitstringTable, /// Helper table for decoding FSE tables. fse_table: FseTable, - - // witgen_debug /// Helper table for sequences as instructions. sequence_instruction_table: SequenceInstructionTable, // /// Helper table in the "output" region for accumulating the result of executing sequences. @@ -126,8 +123,6 @@ struct TagConfig { tag_rlc: Column, /// Represents keccak randomness exponentiated by the tag len. rpow_tag_len: Column, - /// Whether this tag outputs decoded bytes or not. - is_output: Column, /// Whether this tag is processed from back-to-front or not. is_reverse: Column, /// Whether this row represents the first byte in a new tag. Effectively this also means that @@ -171,7 +166,6 @@ impl TagConfig { tag_rlc_acc: meta.advice_column_in(SecondPhase), tag_rlc: meta.advice_column_in(SecondPhase), rpow_tag_len: meta.advice_column_in(SecondPhase), - is_output: meta.advice_column(), is_reverse: meta.advice_column(), is_change: meta.advice_column(), // degree reduction. @@ -1339,9 +1333,9 @@ impl DecoderConfig { meta.query_advice(config.tag_config.tag, Rotation::cur()), meta.query_advice(config.tag_config.tag_next, Rotation::cur()), meta.query_advice(config.tag_config.max_len, Rotation::cur()), - meta.query_advice(config.tag_config.is_output, Rotation::cur()), meta.query_advice(config.tag_config.is_reverse, Rotation::cur()), meta.query_advice(config.block_config.is_block, Rotation::cur()), + 0.expr(), // unused ] .into_iter() .zip_eq(config.fixed_table.table_exprs(meta)) @@ -1442,7 +1436,6 @@ impl DecoderConfig { config.tag_config.tag_rlc, config.tag_config.max_len, config.tag_config.rpow_tag_len, - config.tag_config.is_output, config.tag_config.is_reverse, config.block_config.is_block, config.encoded_rlc, @@ -4115,7 +4108,6 @@ impl DecoderConfig { sequence_exec_info_arr: Vec>, challenges: &Challenges>, k: u32, - // witgen_debug ) -> Result { let mut pow_of_rand: Vec> = vec![Value::known(Fr::ONE)]; @@ -4192,7 +4184,7 @@ impl DecoderConfig { )); } self.literals_header_table - .assign(layouter, literal_headers)?; + .assign(k, self.unusable_rows(), layouter, literal_headers)?; ///////////////////////////////////////// //// Assign Sequence-related Configs //// @@ -4478,12 +4470,6 @@ impl DecoderConfig { i, || row.state.tag_rlc, )?; - region.assign_advice( - || "tag_config.is_output", - self.tag_config.is_output, - i, - || Value::known(Fr::from(row.state.tag.is_output() as u64)), - )?; let tag_len = row.state.tag_len as usize; if tag_len >= pow_of_rand.len() { diff --git a/aggregator/src/aggregation/decoder/tables/bitstring.rs b/aggregator/src/aggregation/decoder/tables/bitstring.rs index 86d67bbf4d..0f5b38927a 100644 --- a/aggregator/src/aggregation/decoder/tables/bitstring.rs +++ b/aggregator/src/aggregation/decoder/tables/bitstring.rs @@ -422,7 +422,6 @@ impl BitstringTable { cb.gate(condition) }); - // witgen_debug // For every bitstring accumulation, the byte indices must be in the order in which // they appear in the rows assigned to the DecoderConfig. Which means: // - byte_idx_2 at the most increments by 1 compared to byte_idx_1. diff --git a/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs b/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs index 3692ca1d5a..ac1272281c 100644 --- a/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs +++ b/aggregator/src/aggregation/decoder/tables/fixed/tag_transition.rs @@ -1,9 +1,6 @@ use halo2_proofs::{circuit::Value, halo2curves::bn256::Fr}; -use crate::aggregation::decoder::{ - tables::fixed::FixedLookupTag, - witgen::{lookup_max_tag_len, ZstdTag}, -}; +use crate::aggregation::decoder::{tables::fixed::FixedLookupTag, witgen::ZstdTag}; use super::FixedLookupValues; @@ -14,8 +11,6 @@ pub struct RomTagTransition { pub tag_next: ZstdTag, /// The maximum number of bytes that are needed to represent the current tag. pub max_len: u64, - /// Whether this tag outputs a decoded byte or not. - pub is_output: bool, /// Whether this tag is processed from back-to-front or not. pub is_reverse: bool, /// Whether this tag belongs to a ``block`` in zstd or not. @@ -48,10 +43,10 @@ impl FixedLookupValues for RomTagTransition { Value::known(Fr::from(FixedLookupTag::TagTransition as u64)), Value::known(Fr::from(tag as u64)), Value::known(Fr::from(tag_next as u64)), - Value::known(Fr::from(lookup_max_tag_len(tag))), - Value::known(Fr::from(tag.is_output())), + Value::known(Fr::from(tag.max_len())), Value::known(Fr::from(tag.is_reverse())), Value::known(Fr::from(tag.is_block())), + Value::known(Fr::zero()), // unused ] }) .to_vec() diff --git a/aggregator/src/aggregation/decoder/tables/fse.rs b/aggregator/src/aggregation/decoder/tables/fse.rs index bfe2afde15..f15db31dfc 100644 --- a/aggregator/src/aggregation/decoder/tables/fse.rs +++ b/aggregator/src/aggregation/decoder/tables/fse.rs @@ -126,6 +126,7 @@ pub struct FseTable { impl FseTable { /// Configure the FSE table. + #[allow(clippy::too_many_arguments)] pub fn configure( meta: &mut ConstraintSystem, q_enable: Column, @@ -959,7 +960,6 @@ impl FseTable { } } - // witgen_debug assert!( state_idx as u64 == table.table_size, "Last state should correspond to end of table" diff --git a/aggregator/src/aggregation/decoder/tables/literals_header.rs b/aggregator/src/aggregation/decoder/tables/literals_header.rs index 512187966e..a9df84670f 100644 --- a/aggregator/src/aggregation/decoder/tables/literals_header.rs +++ b/aggregator/src/aggregation/decoder/tables/literals_header.rs @@ -125,7 +125,6 @@ impl LiteralsHeaderTable { cb.gate(condition) }); - // witgen_debug meta.create_gate( "LiteralsHeaderTable: subsequent rows after q_first=true", |meta| { @@ -144,18 +143,17 @@ impl LiteralsHeaderTable { cb.require_boolean("is_padding is boolean", is_padding_cur.expr()); cb.require_boolean("is_padding delta is boolean", is_padding_delta); - // witgen_debug // block_idx increments. // // This also ensures that we are not populating conflicting literal headers for the // same block_idx in this layout. - // cb.condition(not::expr(is_padding_cur), |cb| { - // cb.require_equal( - // "block_idx increments", - // meta.query_advice(config.block_idx, Rotation::cur()), - // meta.query_advice(config.block_idx, Rotation::prev()) + 1.expr(), - // ); - // }); + cb.condition(not::expr(is_padding_cur), |cb| { + cb.require_equal( + "block_idx increments", + meta.query_advice(config.block_idx, Rotation::cur()), + meta.query_advice(config.block_idx, Rotation::prev()) + 1.expr(), + ); + }); cb.gate(condition) }, @@ -193,6 +191,8 @@ impl LiteralsHeaderTable { /// Assign witness to the literals header table. pub fn assign( &self, + k: u32, + unusable_rows: usize, layouter: &mut impl Layouter, literals_headers: Vec<(u64, u64, (u64, u64, u64))>, ) -> Result<(), Error> { @@ -235,7 +235,6 @@ impl LiteralsHeaderTable { (self.byte1, byte1, "byte1"), (self.byte2, byte2, "byte2"), (self.regen_size, regen_size, "regen_size"), - // witgen_debug: check bit order ( self.size_format_bit0, (size_format & 1) as u64, @@ -258,8 +257,14 @@ impl LiteralsHeaderTable { } } - // TODO(ray): assign is_padding=true for other rows so that the block_idx - // increments gate is not checked. + for offset in literals_headers.len()..((1 << k) - unusable_rows) { + region.assign_advice( + || "is_padding", + self.is_padding, + offset, + || Value::known(F::one()), + )?; + } Ok(()) }, diff --git a/aggregator/src/aggregation/decoder/witgen.rs b/aggregator/src/aggregation/decoder/witgen.rs index 9719e626cd..1a8f368cb3 100644 --- a/aggregator/src/aggregation/decoder/witgen.rs +++ b/aggregator/src/aggregation/decoder/witgen.rs @@ -1,14 +1,7 @@ -#![allow(dead_code)] -#![allow(clippy::too_many_arguments)] - use eth_types::Field; use halo2_proofs::circuit::Value; use revm_precompile::HashMap; -use std::io; -// witgen_debug -use std::io::Write; - mod params; pub use params::*; @@ -18,22 +11,6 @@ pub use types::{ZstdTag::*, *}; pub mod util; use util::{be_bits_to_value, increment_idx, le_bits_to_value, value_bits_le}; -const TAG_MAX_LEN: [(ZstdTag, u64); 9] = [ - (FrameHeaderDescriptor, 1), - (FrameContentSize, 8), - (BlockHeader, 3), - (ZstdBlockLiteralsHeader, 5), - (ZstdBlockLiteralsRawBytes, 1048575), // (1 << 20) - 1 - (ZstdBlockSequenceHeader, 4), - (ZstdBlockSequenceFseCode, 128), - (ZstdBlockSequenceData, 1048575), // (1 << 20) - 1 - (Null, 0), -]; - -pub fn lookup_max_tag_len(tag: ZstdTag) -> u64 { - TAG_MAX_LEN.iter().find(|record| record.0 == tag).unwrap().1 -} - const CMOT_N: u64 = 31; /// FrameHeaderDescriptor and FrameContentSize @@ -83,23 +60,6 @@ fn process_frame_header( _ => fcs, } }; - let fcs_tag_value_iter = fcs_bytes - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let fcs_tag_value = fcs_tag_value_iter - .clone() - .last() - .expect("FrameContentSize expected"); - let fcs_value_rlcs = fcs_bytes - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }) - .collect::>>(); let tag_rlc_iter = fcs_bytes .iter() @@ -110,23 +70,16 @@ fn process_frame_header( .collect::>>(); let tag_rlc = *(tag_rlc_iter.clone().last().expect("Tag RLC expected")); - let aux_1 = fcs_value_rlcs - .last() - .expect("FrameContentSize bytes expected"); - let aux_2 = fhd_value_rlc; - ( byte_offset + 1 + fcs_tag_len, std::iter::once(ZstdWitnessRow { state: ZstdState { tag: ZstdTag::FrameHeaderDescriptor, tag_next: ZstdTag::FrameContentSize, - max_tag_len: lookup_max_tag_len(ZstdTag::FrameHeaderDescriptor), + max_tag_len: ZstdTag::FrameHeaderDescriptor.max_len(), block_idx: 0, tag_len: 1, tag_idx: 1, - tag_value: Value::known(F::from(*fhd_byte as u64)), - tag_value_acc: Value::known(F::from(*fhd_byte as u64)), is_tag_change: true, tag_rlc: Value::known(F::from(*fhd_byte as u64)), tag_rlc_acc: Value::known(F::from(*fhd_byte as u64)), @@ -138,63 +91,37 @@ fn process_frame_header( value_rlc: Value::known(F::zero()), ..Default::default() }, - decoded_data: DecodedData { - decoded_len: fcs, - decoded_len_acc: 0, - total_decoded_len: last_row.decoded_data.total_decoded_len + fcs, - decoded_byte: 0, - decoded_value_rlc: Value::known(F::zero()), - }, + decoded_data: DecodedData { decoded_len: fcs }, bitstream_read_data: BitstreamReadRow::default(), fse_data: FseDecodingRow::default(), }) - .chain( - fcs_bytes - .iter() - .zip(fcs_tag_value_iter) - .zip(fcs_value_rlcs.iter()) - .zip(tag_rlc_iter.iter()) - .enumerate() - .map( - |(i, (((&value_byte, tag_value_acc), _value_rlc), &tag_rlc_acc))| { - ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::FrameContentSize, - tag_next: ZstdTag::BlockHeader, - block_idx: 0, - max_tag_len: lookup_max_tag_len(ZstdTag::FrameContentSize), - tag_len: fcs_tag_len as u64, - tag_idx: (i + 1) as u64, - tag_value: fcs_tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + 2 + i) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: false, - reverse_idx: (fcs_tag_len - i) as u64, - reverse_len: fcs_tag_len as u64, - aux_1: *aux_1, - aux_2, - value_rlc: fhd_value_rlc, - }, - decoded_data: DecodedData { - decoded_len: fcs, - decoded_len_acc: 0, - total_decoded_len: last_row.decoded_data.total_decoded_len + fcs, - decoded_byte: 0, - decoded_value_rlc: Value::known(F::zero()), - }, - bitstream_read_data: BitstreamReadRow::default(), - fse_data: FseDecodingRow::default(), - } - }, - ), - ) + .chain(fcs_bytes.iter().zip(tag_rlc_iter.iter()).enumerate().map( + |(i, (&value_byte, &tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::FrameContentSize, + tag_next: ZstdTag::BlockHeader, + block_idx: 0, + max_tag_len: ZstdTag::FrameContentSize.max_len(), + tag_len: fcs_tag_len as u64, + tag_idx: (i + 1) as u64, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + 2 + i) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: false, + reverse_idx: (fcs_tag_len - i) as u64, + reverse_len: fcs_tag_len as u64, + value_rlc: fhd_value_rlc, + }, + decoded_data: DecodedData { decoded_len: fcs }, + bitstream_read_data: BitstreamReadRow::default(), + fse_data: FseDecodingRow::default(), + }, + )) .collect::>(), ) } @@ -226,7 +153,7 @@ fn process_block( let last_row = rows.last().expect("last row expected to exist"); let ( - _byte_offset, + end_offset, rows, literals, lstream_len, @@ -250,7 +177,7 @@ fn process_block( witness_rows.extend_from_slice(&rows); ( - byte_offset, + end_offset, witness_rows, block_info, sequence_info, @@ -290,12 +217,6 @@ fn process_block_header( _ => unreachable!("BlockType::ZstdCompressedBlock expected"), }; - let tag_value_iter = bh_bytes.iter().scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter.clone().last().expect("BlockHeader expected"); - let tag_rlc_iter = bh_bytes .iter() .scan(Value::known(F::zero()), |acc, &byte| { @@ -309,58 +230,36 @@ fn process_block_header( (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - // BlockHeader follows FrameContentSize which is processed in reverse order. - // Hence value_rlc at the first BlockHeader byte will be calculated as: - // - // value_rlc::cur == aux_1::prev * (rand ^ reverse_len) * rand - // + aux_2::prev * rand - // + value_byte::cur - let acc_start = last_row.encoded_data.aux_1 - * randomness.map(|r| r.pow([last_row.encoded_data.reverse_len, 0, 0, 0])) - + last_row.encoded_data.aux_2; - let _value_rlcs = bh_bytes - .iter() - .scan(acc_start, |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }) - .collect::>>(); - ( byte_offset + N_BLOCK_HEADER_BYTES, bh_bytes .iter() - .zip(tag_value_iter) .zip(tag_rlc_iter.iter()) .enumerate() - .map( - |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::BlockHeader, - tag_next, - block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::BlockHeader), - tag_len: N_BLOCK_HEADER_BYTES as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc: *tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: false, - value_rlc, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow::default(), - decoded_data: last_row.decoded_data.clone(), - fse_data: FseDecodingRow::default(), + .map(|(i, (&value_byte, tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::BlockHeader, + tag_next, + block_idx, + max_tag_len: ZstdTag::BlockHeader.max_len(), + tag_len: N_BLOCK_HEADER_BYTES as u64, + tag_idx: (i + 1) as u64, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc: *tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: false, + value_rlc, + ..Default::default() }, - ) + bitstream_read_data: BitstreamReadRow::default(), + decoded_data: last_row.decoded_data.clone(), + fse_data: FseDecodingRow::default(), + }) .collect::>(), block_info, ) @@ -396,7 +295,7 @@ fn process_block_zstd( block_size: usize, last_block: bool, ) -> BlockProcessingResult { - let end_offset = byte_offset + block_size; + let expected_end_offset = byte_offset + block_size; let mut witness_rows = vec![]; // 1-5 bytes LiteralSectionHeader @@ -428,13 +327,6 @@ fn process_block_zstd( *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) }); - let decoded_value_rlc_iter = - literals - .iter() - .scan(last_row.decoded_data.decoded_value_rlc, |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); let tag_value_iter = literals.iter().scan(Value::known(F::zero()), |acc, &byte| { *acc = *acc * randomness + Value::known(F::from(byte as u64)); Some(*acc) @@ -451,43 +343,34 @@ fn process_block_zstd( literals .iter() .zip(tag_value_iter) - .zip(decoded_value_rlc_iter) .zip(tag_rlc_iter) .enumerate() .map( - |(i, (((&value_byte, tag_value_acc), decoded_value_rlc), tag_rlc_acc))| { - ZstdWitnessRow { - state: ZstdState { - tag, - tag_next, - block_idx, - max_tag_len: lookup_max_tag_len(tag), - tag_len: regen_size as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - value_rlc, - reverse: false, - ..Default::default() - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len + (i as u64) + 1, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: value_byte, - decoded_value_rlc, - }, - bitstream_read_data: BitstreamReadRow::default(), - fse_data: FseDecodingRow::default(), - } + |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag, + tag_next, + block_idx, + max_tag_len: tag.max_len(), + tag_len: regen_size as u64, + tag_idx: (i + 1) as u64, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + }, + bitstream_read_data: BitstreamReadRow::default(), + fse_data: FseDecodingRow::default(), }, ) .collect::>(), @@ -502,7 +385,7 @@ fn process_block_zstd( let last_row = witness_rows.last().expect("last row expected to exist"); let ( - bytes_offset, + end_offset, rows, fse_aux_tables, address_table_rows, @@ -513,16 +396,21 @@ fn process_block_zstd( src, block_idx, byte_offset, - end_offset, + expected_end_offset, literals.clone(), last_row, last_block, randomness, ); + // sanity check: + assert_eq!( + end_offset, expected_end_offset, + "end offset after tag=SequencesData mismatch" + ); witness_rows.extend_from_slice(&rows); ( - bytes_offset, + end_offset, witness_rows, literals, lstream_len, @@ -556,6 +444,7 @@ type SequencesProcessingResult = ( Vec, ); +#[allow(clippy::too_many_arguments)] fn process_sequences( src: &[u8], block_idx: u64, @@ -566,14 +455,13 @@ fn process_sequences( last_block: bool, randomness: Value, ) -> SequencesProcessingResult { - // Initialize witness rows + // Initialize witness values let mut witness_rows: Vec> = vec![]; - - // Other consistent values let encoded_len = last_row.encoded_data.encoded_len; - let _decoded_data = last_row.decoded_data.clone(); - // First, process the sequence header + ////////////////////////////////////////////////////// + ///// Sequence Section Part 1: Sequence Header ////// + ////////////////////////////////////////////////////// let mut sequence_info = SequenceInfo { block_idx: block_idx as usize, ..Default::default() @@ -613,18 +501,20 @@ fn process_sequences( assert!(reserved == 0, "Reserved bits must be 0"); - // TODO: Treatment of other encoding modes + // Note: Only 2 modes of FSE encoding are accepted (instead of 4): + // 0 - Predefined. + // 2 - Variable bit packing. assert!( literal_lengths_mode == 2 || literal_lengths_mode == 0, - "Only FSE_Compressed_Mode is allowed" + "Only FSE_Compressed_Mode or Predefined are allowed" ); assert!( offsets_mode == 2 || offsets_mode == 0, - "Only FSE_Compressed_Mode is allowed" + "Only FSE_Compressed_Mode or Predefined are allowed" ); assert!( match_lengths_mode == 2 || match_lengths_mode == 0, - "Only FSE_Compressed_Mode is allowed" + "Only FSE_Compressed_Mode or Predefined are allowed" ); sequence_info.compression_mode = [ literal_lengths_mode > 0, @@ -640,13 +530,6 @@ fn process_sequences( // Add witness rows for the sequence header let sequence_header_start_offset = byte_offset; let sequence_header_end_offset = byte_offset + num_sequence_header_bytes; - let tag_value_iter = src[sequence_header_start_offset..sequence_header_end_offset] - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); let tag_rlc_iter = src[sequence_header_start_offset..sequence_header_end_offset] .iter() @@ -658,52 +541,45 @@ fn process_sequences( let header_rows = src[sequence_header_start_offset..sequence_header_end_offset] .iter() - .zip(tag_value_iter) .zip(tag_rlc_iter) .enumerate() - .map( - |(i, ((&value_byte, tag_value_acc), tag_rlc_acc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockSequenceHeader, - tag_next: if is_all_predefined_fse { - ZstdTag::ZstdBlockSequenceData - } else { - ZstdTag::ZstdBlockSequenceFseCode - }, - block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceHeader), - tag_len: num_sequence_header_bytes as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (sequence_header_start_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - value_rlc, - reverse: false, - ..Default::default() - }, - decoded_data: DecodedData { - decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len + (i as u64) + 1, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: value_byte, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, + .map(|(i, (&value_byte, tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockSequenceHeader, + tag_next: if is_all_predefined_fse { + ZstdTag::ZstdBlockSequenceData + } else { + ZstdTag::ZstdBlockSequenceFseCode }, - bitstream_read_data: BitstreamReadRow::default(), - fse_data: FseDecodingRow::default(), + block_idx, + max_tag_len: ZstdTag::ZstdBlockSequenceHeader.max_len(), + tag_len: num_sequence_header_bytes as u64, + tag_idx: (i + 1) as u64, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, }, - ) + encoded_data: EncodedData { + byte_idx: (sequence_header_start_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + value_rlc, + reverse: false, + ..Default::default() + }, + decoded_data: DecodedData { + decoded_len: last_row.decoded_data.decoded_len, + }, + bitstream_read_data: BitstreamReadRow::default(), + fse_data: FseDecodingRow::default(), + }) .collect::>(); witness_rows.extend_from_slice(&header_rows); - // Second, process the sequence tables (encoded using FSE) + ///////////////////////////////////////////////// + ///// Sequence Section Part 2: FSE Tables ////// + ///////////////////////////////////////////////// let byte_offset = sequence_header_end_offset; let fse_starting_byte_offset = byte_offset; @@ -717,6 +593,7 @@ fn process_sequences( ) .expect("Reconstructing FSE-packed Literl Length (LL) table should not fail."); let llt = table_llt.parse_state_table(); + // Determine the accuracy log of LLT let al_llt = if literal_lengths_mode > 0 { bit_boundaries_llt .first() @@ -727,12 +604,6 @@ fn process_sequences( 6 }; - // witgen_debug - let stdout = io::stdout(); - let mut handle = stdout.lock(); - // write!(handle, "bit_boundaries_llt: {:?}", bit_boundaries_llt).unwrap(); - // writeln!(handle).unwrap(); - // Cooked Match Offset Table (CMOT) let byte_offset = byte_offset + n_fse_bytes_llt; let (n_fse_bytes_cmot, bit_boundaries_cmot, table_cmot) = FseAuxiliaryTableData::reconstruct( @@ -744,6 +615,7 @@ fn process_sequences( ) .expect("Reconstructing FSE-packed Cooked Match Offset (CMO) table should not fail."); let cmot = table_cmot.parse_state_table(); + // Determine the accuracy log of CMOT let al_cmot = if offsets_mode > 0 { bit_boundaries_cmot .first() @@ -765,6 +637,7 @@ fn process_sequences( ) .expect("Reconstructing FSE-packed Match Length (ML) table should not fail."); let mlt = table_mlt.parse_state_table(); + // Determine the accuracy log of MLT let al_mlt = if match_lengths_mode > 0 { bit_boundaries_mlt .first() @@ -775,11 +648,10 @@ fn process_sequences( 6 }; - // Add witness rows for the FSE tables + // Add witness rows for the above three FSE tables let mut last_row = header_rows.last().cloned().unwrap(); - for (idx, start_offset, end_offset, bit_boundaries, tag_len, table, is_fse_section_end) in [ + for (start_offset, end_offset, bit_boundaries, tag_len, table, is_fse_section_end) in [ ( - 0usize, fse_starting_byte_offset, fse_starting_byte_offset + n_fse_bytes_llt, bit_boundaries_llt, @@ -788,7 +660,6 @@ fn process_sequences( offsets_mode + match_lengths_mode < 1, ), ( - 1usize, fse_starting_byte_offset + n_fse_bytes_llt, fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot, bit_boundaries_cmot, @@ -797,7 +668,6 @@ fn process_sequences( match_lengths_mode < 1, ), ( - 2usize, fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot, fse_starting_byte_offset + n_fse_bytes_llt + n_fse_bytes_cmot + n_fse_bytes_mlt, bit_boundaries_mlt, @@ -807,15 +677,6 @@ fn process_sequences( ), ] { if end_offset > start_offset { - let mut tag_value_iter = - src[start_offset..end_offset] - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - let mut tag_rlc_iter = src[start_offset..end_offset] .iter() @@ -828,7 +689,6 @@ fn process_sequences( let mut decoded: u64 = 0; let mut n_acc: usize = 0; let mut n_emitted: usize = 0; - let mut current_tag_value_acc = Value::known(F::zero()); let mut current_tag_rlc_acc = Value::known(F::zero()); let mut last_byte_idx: i64 = 0; let mut from_pos: (i64, i64) = (1, 0); @@ -842,38 +702,59 @@ fn process_sequences( let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; let mut last_symbol: i32 = 0; + // Convert multi-bit read operations boundaries from the stream into a convenient format + // so they can be easily converted into witness rows later. + + // Format: + + // symbol, The symbol being decoded now + // n_emitted, The total number of unique symbols decoded + // from_byte_position, Which byte the read operation starts at + // from_bit_position, Which bit position the read operation + // starts at, with range ∈ [0, 8) + // to_byte_position, Which byte the read operation ends at + // to_bit_position, Which bit position the read operation ends at, + // with range ∈ [0, 16) + // value_read, Bit value + // value_decoded, The decoded value is processed from the raw bitstring value + // current_tag_value_acc, Depending on the current byte position, + // the accumulator increments accordingly + // current_tag_rlc_acc, Depending on the current byte position, + // the accumulator increments accordingly + // n_acc, How many states are already assigned to the current symbol + // table_kind, What FSE table is being decoded + // table_size, The size of current FSE table + // is_repeating_bits, Whether current bitstring represents repeat bits. + // Repeat bits immediately follows a bitstring=1 read operation. + // Repeat bits indicate how many 0-state symbols to skip. + // is_trailing_bits, FSE bitstreams may have trailing bits + let bitstream_rows = bit_boundaries .iter() .enumerate() .map(|(bit_boundary_idx, (bit_idx, value_read, value_decoded))| { - // Calculate byte and bit positions. Increment allocators. + // First calculate the start and end position of the current read operation from_pos = if next_symbol == -1 { (1, -1) } else { to_pos }; - from_pos.1 += 1; if from_pos.1 == 8 || from_pos.1 == 16 { from_pos = (from_pos.0 + 1, 0); } - from_pos.1 = (from_pos.1 as u64).rem_euclid(8) as i64; - while from_pos.0 > last_byte_idx { - current_tag_value_acc = tag_value_iter.next().unwrap(); current_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx += 1; } + // Derive the end position based on how many bits are read let to_byte_idx = (bit_idx - 1) / 8; let mut to_bit_idx = bit_idx - to_byte_idx * (N_BITS_PER_BYTE as u32) - 1; - if from_pos.0 < (to_byte_idx + 1) as i64 { to_bit_idx += 8; } - to_pos = ((to_byte_idx + 1) as i64, to_bit_idx as i64); - // Decide Fse decoding results if bit_boundary_idx < 1 { - // Accuracy log bits + // Read Scenarios 1: Accuracy log bits (Always the First Read) next_symbol += 1; assert_eq!(value_read, value_decoded, "no varbit packing for AL bits"); ( @@ -885,10 +766,8 @@ fn process_sequences( to_pos.1 as usize, *value_read, *value_decoded, - current_tag_value_acc, current_tag_rlc_acc, n_acc, - // FseDecoder-specific witness values kind as u64, table.table_size, false, @@ -896,7 +775,7 @@ fn process_sequences( ) } else if !is_repeating_bit_boundary.contains_key(&bit_boundary_idx) { if n_acc >= (table.table_size as usize) { - // Trailing bits + // Read Scenarios 2: Trailing Bits assert_eq!( value_read, value_decoded, "no varbit packing for trailing bits" @@ -910,17 +789,15 @@ fn process_sequences( to_pos.1 as usize, *value_read, *value_decoded, - current_tag_value_acc, current_tag_rlc_acc, n_acc, - // FseDecoder-specific witness values kind as u64, table.table_size, false, true, ) } else { - // Regular decoding state + // Read Scenarios 3: Regular Decoding State assert!(next_symbol >= 0); decoded = next_symbol as u64; n_emitted += 1; @@ -966,18 +843,16 @@ fn process_sequences( to_pos.1 as usize, *value_read, *value_decoded, - current_tag_value_acc, current_tag_rlc_acc, n_acc, - // FseDecoder-specific witness values kind as u64, table.table_size, - false, // repeating bits - false, // trailing bits + false, + false, ) } } else { - // Repeating bits + // Read Scenarios 3: Repeating Bits let symbol = last_symbol as u64 + value_decoded; last_symbol = symbol as i32; assert_eq!( @@ -993,7 +868,6 @@ fn process_sequences( to_pos.1 as usize, *value_read, *value_decoded, - current_tag_value_acc, current_tag_rlc_acc, n_acc, // FseDecoder-specific witness values @@ -1014,7 +888,6 @@ fn process_sequences( u64, u64, Value, - Value, usize, u64, u64, @@ -1033,14 +906,12 @@ fn process_sequences( ZstdTag::ZstdBlockSequenceFseCode }, block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceFseCode), + max_tag_len: ZstdTag::ZstdBlockSequenceFseCode.max_len(), tag_len, tag_idx: row.2 as u64, - tag_value, - tag_value_acc: row.8, is_tag_change: j == 0, tag_rlc, - tag_rlc_acc: row.9, + tag_rlc_acc: row.8, }, encoded_data: EncodedData { byte_idx: (start_offset + row.2) as u64, @@ -1059,20 +930,16 @@ fn process_sequences( }, decoded_data: DecodedData { decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: 0u8, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, }, fse_data: FseDecodingRow { - table_kind: row.11, - table_size: row.12, + table_kind: row.10, + table_size: row.11, symbol: row.0, num_emitted: row.1 as u64, value_decoded: row.7, - probability_acc: row.10 as u64, - is_repeat_bits_loop: row.13, - is_trailing_bits: row.14, + probability_acc: row.9 as u64, + is_repeat_bits_loop: row.12, + is_trailing_bits: row.13, }, }); @@ -1094,15 +961,12 @@ fn process_sequences( ZstdTag::ZstdBlockSequenceFseCode }, block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceFseCode), + max_tag_len: ZstdTag::ZstdBlockSequenceFseCode.max_len(), tag_len, tag_idx: (row.2 + 1) as u64, - tag_value, - tag_value_acc: row.8 * randomness - + Value::known(F::from(byte_value as u64)), is_tag_change: false, tag_rlc, - tag_rlc_acc: row.9 * randomness + tag_rlc_acc: row.8 * randomness + Value::known(F::from(byte_value as u64)), }, encoded_data: EncodedData { @@ -1128,20 +992,16 @@ fn process_sequences( }, decoded_data: DecodedData { decoded_len: last_row.decoded_data.decoded_len, - decoded_len_acc: last_row.decoded_data.decoded_len_acc, - total_decoded_len: last_row.decoded_data.total_decoded_len, - decoded_byte: 0u8, - decoded_value_rlc: last_row.decoded_data.decoded_value_rlc, }, fse_data: FseDecodingRow { - table_kind: row.11, - table_size: row.12, + table_kind: row.10, + table_size: row.11, symbol: row.0, num_emitted: row.1 as u64, value_decoded: row.7, - probability_acc: row.10 as u64, + probability_acc: row.9 as u64, is_repeat_bits_loop: false, - is_trailing_bits: row.14, + is_trailing_bits: row.13, }, }) } @@ -1151,6 +1011,10 @@ fn process_sequences( } } + //////////////////////////////////////////////////////////////////////////// + ///// Sequence Section Part 3: Sequence Data (Instruction Bitstream) ////// + //////////////////////////////////////////////////////////////////////////// + // Reconstruct LLTV, CMOTV, and MLTV which specifies bit actions for a specific state let lltv = SequenceFixedStateActionTable::reconstruct_lltv(); let cmotv = SequenceFixedStateActionTable::reconstruct_cmotv(CMOT_N); @@ -1181,33 +1045,6 @@ fn process_sequences( (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; - let value_rlc_iter = - &src[byte_offset..end_offset] - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let mut value_rlc_iter = value_rlc_iter - .clone() - .collect::>>() - .into_iter() - .rev(); - - let tag_value_iter = - &src[byte_offset..end_offset] - .iter() - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter.clone().last().expect("Tag value must exist"); - let mut tag_value_iter = tag_value_iter - .clone() - .collect::>>() - .into_iter() - .rev(); - let tag_rlc_iter = &src[byte_offset..end_offset] .iter() @@ -1222,12 +1059,8 @@ fn process_sequences( .into_iter() .rev(); - let mut next_tag_value_acc = tag_value_iter.next().unwrap(); - let next_value_rlc_acc = value_rlc_iter.next().unwrap(); let mut next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); - let aux_1 = next_value_rlc_acc; - let mut padding_end_idx = 0; while sequence_bitstream[padding_end_idx] == 0 { padding_end_idx += 1; @@ -1243,11 +1076,9 @@ fn process_sequences( ZstdTag::BlockHeader }, block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + max_tag_len: ZstdTag::ZstdBlockSequenceData.max_len(), tag_len: n_sequence_data_bytes as u64, tag_idx: 1_u64, - tag_value, - tag_value_acc: next_tag_value_acc, is_tag_change: true, tag_rlc, tag_rlc_acc: next_tag_rlc_acc, @@ -1260,8 +1091,6 @@ fn process_sequences( reverse: true, reverse_len: n_sequence_data_bytes as u64, reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, - aux_1, - aux_2: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow { bit_start_idx: 0usize, @@ -1283,7 +1112,6 @@ fn process_sequences( // Update accumulators if current_byte_idx > last_byte_idx { - next_tag_value_acc = tag_value_iter.next().unwrap(); next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx = current_byte_idx; } @@ -1321,26 +1149,18 @@ fn process_sequences( let mut is_init = true; let mut nb = nb_switch[mode][order_idx]; let bitstream_end_bit_idx = n_sequence_data_bytes * N_BITS_PER_BYTE; - let mut table_kind = 0u64; - let mut table_size = 0u64; + let mut table_kind; + let mut table_size; let mut last_states: [u64; 3] = [0, 0, 0]; let mut last_symbols: [u64; 3] = [0, 0, 0]; - let mut current_decoding_state = 0u64; + let mut current_decoding_state; let mut tail_holding_bit = false; - // witgen_debug - let stdout = io::stdout(); - let mut handle = stdout.lock(); - while current_bit_idx + nb <= bitstream_end_bit_idx { - // witgen_debug - // write!(handle, "current_byte_idx: {:?}, current_bit_idx: {:?}, nb: {:?}", - // current_byte_idx, current_bit_idx, nb).unwrap(); writeln!(handle).unwrap(); - let bitstring_value = be_bits_to_value(&sequence_bitstream[current_bit_idx..(current_bit_idx + nb)]); + let curr_baseline; - let mut curr_baseline = 0; if mode > 0 { // For the initial baseline determination, ML and CMO positions are flipped. if is_init { @@ -1366,7 +1186,6 @@ fn process_sequences( SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => { table_llt.table_kind as u64 } - _ => unreachable!(), }; table_size = match new_decoded.0 { SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => { @@ -1378,7 +1197,6 @@ fn process_sequences( SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => { table_llt.table_size } - _ => unreachable!(), }; // FSE state update step @@ -1426,7 +1244,6 @@ fn process_sequences( SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => { table_llt.table_kind as u64 } - _ => unreachable!(), }; table_size = match new_decoded.0 { SequenceDataTag::CookedMatchOffsetFse | SequenceDataTag::CookedMatchOffsetValue => { @@ -1438,7 +1255,6 @@ fn process_sequences( SequenceDataTag::LiteralLengthFse | SequenceDataTag::LiteralLengthValue => { table_llt.table_size } - _ => unreachable!(), }; // Value decoding step @@ -1455,11 +1271,6 @@ fn process_sequences( from_bit_idx }; - // witgen_debug - // write!(handle, "current_byte_idx: {:?}, from_bit_idx: {:?}, to_bit_idx: {:?}, nb: {:?}, - // is_nil: {:?}, is_zero_read: {:?}", byte_offset + current_byte_idx, from_bit_idx, - // to_bit_idx, nb, false, (nb == 0)).unwrap(); writeln!(handle).unwrap(); - // Add a witness row witness_rows.push(ZstdWitnessRow { state: ZstdState { @@ -1470,11 +1281,9 @@ fn process_sequences( ZstdTag::BlockHeader }, block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + max_tag_len: ZstdTag::ZstdBlockSequenceData.max_len(), tag_len: n_sequence_data_bytes as u64, tag_idx: current_byte_idx as u64, - tag_value, - tag_value_acc: next_tag_value_acc, is_tag_change: false, tag_rlc, tag_rlc_acc: next_tag_rlc_acc, @@ -1482,10 +1291,6 @@ fn process_sequences( encoded_data: EncodedData { byte_idx: (byte_offset + current_byte_idx) as u64, encoded_len, - // witgen_debug, idx overflow - // TODO(ray): This is a special case of the sequences data being a part of the - // "last block", hence the overflow. I have just re-used the "last" byte from the - // source data in such a case. value_byte: if end_offset - current_byte_idx < src.len() { src[end_offset - current_byte_idx] } else { @@ -1495,8 +1300,6 @@ fn process_sequences( reverse: true, reverse_len: n_sequence_data_bytes as u64, reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, - aux_1, - aux_2: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow { bit_start_idx: from_bit_idx, @@ -1524,26 +1327,27 @@ fn process_sequences( }, }); + // When the range of a multi-byte read operation from the bitstream covers an entire byte, + // a separate row needs to be added for each of such byte to ensure continuity of the value + // accumulators. These compensating rows have is_nil=true. At most, two bytes can be + // entirely covered by a bitstream read operation. let multi_byte_boundaries: [usize; 2] = [15, 23]; let mut skipped_bits = 0usize; for boundary in multi_byte_boundaries { if to_bit_idx >= boundary { + // Skip over covered bytes for byte and bit index for _ in 0..N_BITS_PER_BYTE { (current_byte_idx, current_bit_idx) = increment_idx(current_byte_idx, current_bit_idx); } + // Increment accumulators for nil row if current_byte_idx > last_byte_idx && current_byte_idx <= n_sequence_data_bytes { - next_tag_value_acc = tag_value_iter.next().unwrap(); next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx = current_byte_idx; } skipped_bits += N_BITS_PER_BYTE; - // witgen_debug - // write!(handle, "current_byte_idx: {:?}, from_bit_idx: {:?}, to_bit_idx: {:?}, nb: {:?}, is_nil: {:?}, is_zero_read: {:?}", byte_offset + current_byte_idx, 0, 0, 7, true, false).unwrap(); - // writeln!(handle).unwrap(); - let wrap_by = match to_bit_idx { 15 => 8, 16..=23 => 16, @@ -1558,11 +1362,9 @@ fn process_sequences( ZstdTag::BlockHeader }, block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockSequenceData), + max_tag_len: ZstdTag::ZstdBlockSequenceData.max_len(), tag_len: n_sequence_data_bytes as u64, tag_idx: current_byte_idx as u64, - tag_value, - tag_value_acc: next_tag_value_acc, is_tag_change: false, tag_rlc, tag_rlc_acc: next_tag_rlc_acc, @@ -1570,11 +1372,6 @@ fn process_sequences( encoded_data: EncodedData { byte_idx: (byte_offset + current_byte_idx) as u64, encoded_len, - // witgen_debug, idx overflow - // TODO(ray): This is a special case of the sequences data being a part of - // the "last block", hence the overflow. I have just - // re-used the "last" byte from the source data in - // such a case. value_byte: if end_offset - current_byte_idx < src.len() { src[end_offset - current_byte_idx] } else { @@ -1584,8 +1381,6 @@ fn process_sequences( reverse: true, reverse_len: n_sequence_data_bytes as u64, reverse_idx: (n_sequence_data_bytes - (current_byte_idx - 1)) as u64, - aux_1, - aux_2: Value::known(F::zero()), }, bitstream_read_data: BitstreamReadRow { bit_start_idx: to_bit_idx - wrap_by, @@ -1615,6 +1410,7 @@ fn process_sequences( } } + // Update all variables that indicate current decoding states order_idx += 1; if mode > 0 { if order_idx > 2 { @@ -1626,18 +1422,13 @@ fn process_sequences( mode = 1; // switch to FSE mode order_idx = 0; - // Add the instruction + // Three elements (MO, ML and LL) are all decoded. Add the instruction. let new_instruction = ( curr_instruction[0], curr_instruction[1], curr_instruction[2], ); - // witgen_debug - // write!(handle, "NewInstruction - idx: {:?}, Offset: {:?}, ML: {:?}, LLT: {:?}", - // raw_sequence_instructions.len(), new_instruction.0, new_instruction.1, - // new_instruction.2).unwrap(); writeln!(handle); - raw_sequence_instructions.push(new_instruction); } @@ -1649,6 +1440,10 @@ fn process_sequences( nb_switch[mode][order_idx] }; + // Adjust the end position of the current read operation: + // If the next operation reads 0 bits, the ending bit position should stay on + // the last bit, instead of incrementing to the next position. When the nb=0 streak breaks, + // the held off position is released. if nb > 0 && next_nb > 0 { for _ in 0..(nb - skipped_bits) { (current_byte_idx, current_bit_idx) = @@ -1666,11 +1461,11 @@ fn process_sequences( } if current_byte_idx > last_byte_idx && current_byte_idx <= n_sequence_data_bytes { - next_tag_value_acc = tag_value_iter.next().unwrap(); next_tag_rlc_acc = tag_rlc_iter.next().unwrap(); last_byte_idx = current_byte_idx; } + // Update the next nb for the next read operation nb = next_nb; } @@ -1679,23 +1474,6 @@ fn process_sequences( let mut literal_len_acc: usize = 0; let mut repeated_offset: [usize; 3] = [1, 4, 8]; - // witgen_debug - // for idx in 0..witness_rows.len() { - // if witness_rows[idx].state.tag == ZstdTag::ZstdBlockSequenceData - // && !witness_rows[idx].bitstream_read_data.is_seq_init - // { - // let seq_idx = witness_rows[idx].bitstream_read_data.seq_idx; - // if seq_idx > 0 { - // witness_rows[idx].bitstream_read_data.values = [ - // // literal length, match length and match offset. - // raw_sequence_instructions[seq_idx - 1].2 as u64, - // raw_sequence_instructions[seq_idx - 1].1 as u64, - // raw_sequence_instructions[seq_idx - 1].0 as u64, - // ]; - // } - // } - // } - for (idx, inst) in raw_sequence_instructions.iter().enumerate() { let actual_offset = if inst.0 > 3 { inst.0 - 3 @@ -1817,14 +1595,6 @@ fn process_sequences( ); } - // witgen_debug - let stdout = io::stdout(); - let mut handle = stdout.lock(); - - // witgen_debug - // write!(handle, "=> decoded: {:?}", recovered_inputs).unwrap(); - // writeln!(handle).unwrap(); - ( end_offset, witness_rows, @@ -1889,19 +1659,6 @@ fn process_block_zstd_literals_header( _ => unreachable!("BlockType::* unexpected. Must be raw bytes for literals."), }; - let tag_value_iter = - lh_bytes - .iter() - .take(n_bytes_header) - .scan(Value::known(F::zero()), |acc, &byte| { - *acc = *acc * Value::known(F::from(256u64)) + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let tag_value = tag_value_iter - .clone() - .last() - .expect("LiteralsHeader expected"); - let tag_rlc_iter = lh_bytes .iter() @@ -1912,15 +1669,6 @@ fn process_block_zstd_literals_header( }); let tag_rlc = tag_rlc_iter.clone().last().expect("Tag RLC expected"); - let value_rlc_iter = - lh_bytes - .iter() - .take(n_bytes_header) - .scan(last_row.encoded_data.value_rlc, |acc, &byte| { - *acc = *acc * randomness + Value::known(F::from(byte as u64)); - Some(*acc) - }); - let multiplier = (0..last_row.state.tag_len).fold(Value::known(F::one()), |acc, _| acc * randomness); let value_rlc = last_row.encoded_data.value_rlc * multiplier + last_row.state.tag_rlc; @@ -1930,38 +1678,32 @@ fn process_block_zstd_literals_header( lh_bytes .iter() .take(n_bytes_header) - .zip(tag_value_iter) - .zip(value_rlc_iter) .zip(tag_rlc_iter) .enumerate() - .map( - |(i, (((&value_byte, tag_value_acc), _v_rlc), tag_rlc_acc))| ZstdWitnessRow { - state: ZstdState { - tag: ZstdTag::ZstdBlockLiteralsHeader, - tag_next, - block_idx, - max_tag_len: lookup_max_tag_len(ZstdTag::ZstdBlockLiteralsHeader), - tag_len: n_bytes_header as u64, - tag_idx: (i + 1) as u64, - tag_value, - tag_value_acc, - is_tag_change: i == 0, - tag_rlc, - tag_rlc_acc, - }, - encoded_data: EncodedData { - byte_idx: (byte_offset + i + 1) as u64, - encoded_len: last_row.encoded_data.encoded_len, - value_byte, - reverse: false, - value_rlc, - ..Default::default() - }, - bitstream_read_data: BitstreamReadRow::default(), - decoded_data: last_row.decoded_data.clone(), - fse_data: FseDecodingRow::default(), + .map(|(i, (&value_byte, tag_rlc_acc))| ZstdWitnessRow { + state: ZstdState { + tag: ZstdTag::ZstdBlockLiteralsHeader, + tag_next, + block_idx, + max_tag_len: ZstdTag::ZstdBlockLiteralsHeader.max_len(), + tag_len: n_bytes_header as u64, + tag_idx: (i + 1) as u64, + is_tag_change: i == 0, + tag_rlc, + tag_rlc_acc, + }, + encoded_data: EncodedData { + byte_idx: (byte_offset + i + 1) as u64, + encoded_len: last_row.encoded_data.encoded_len, + value_byte, + reverse: false, + value_rlc, + ..Default::default() }, - ) + bitstream_read_data: BitstreamReadRow::default(), + decoded_data: last_row.decoded_data.clone(), + fse_data: FseDecodingRow::default(), + }) .collect::>(), literals_block_type, n_streams, @@ -1997,16 +1739,11 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR let mut address_table_arr: Vec> = vec![]; // TODO: handle multi-block let mut sequence_exec_info_arr: Vec = vec![]; - let byte_offset = 0; - - // witgen_debug - let stdout = io::stdout(); - let mut handle = stdout.lock(); // FrameHeaderDescriptor and FrameContentSize - let (byte_offset, rows) = process_frame_header::( + let (mut byte_offset, rows) = process_frame_header::( src, - byte_offset, + 0, // frame header starts at offset=0 &ZstdWitnessRow::init(src.len()), randomness, ); @@ -2015,7 +1752,7 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR let mut block_idx: u64 = 1; loop { let ( - _byte_offset, + end_offset, rows, block_info, sequence_info, @@ -2047,52 +1784,14 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR sequence_exec_info_arr.push(sequence_exec_info); if block_info.is_last_block { - // TODO: Recover this assertion after the sequence section decoding is completed. - // assert!(byte_offset >= src.len()); + assert!(end_offset >= src.len()); break; } else { block_idx += 1; + byte_offset = end_offset; } } - // witgen_debug - // for (idx, row) in witness_rows.iter().enumerate() { - // if row.encoded_data.byte_idx >= 33860 && row.encoded_data.byte_idx <= 33870 { - // write!( - // handle, - // - // "{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{: - // ?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?}; - // {:?};{:?};{:?};{:?};{:?};{:?};{:?};{:?};", idx, row.state.tag, - // row.state.tag_next, row.state.block_idx, row.state.max_tag_len, - // row.state.tag_len, row.state.tag_idx, row.state.tag_value, row.state.tag_value_acc, - // row.state.is_tag_change, row.state.tag_rlc_acc, - // row.encoded_data.byte_idx, row.encoded_data.encoded_len, - // row.encoded_data.value_byte, row.encoded_data.reverse, - // row.encoded_data.reverse_idx, row.encoded_data.reverse_len, row.encoded_data.aux_1, - // row.encoded_data.aux_2, row.encoded_data.value_rlc, - // row.decoded_data.decoded_len, row.decoded_data.decoded_len_acc, - // row.decoded_data.total_decoded_len, row.decoded_data.decoded_byte, - // row.decoded_data.decoded_value_rlc, row.fse_data.table_kind, - // row.fse_data.table_size, row.fse_data.symbol, row.fse_data.num_emitted, - // row.fse_data.value_decoded, row.fse_data.probability_acc, row.fse_data. - // is_repeat_bits_loop, row.fse_data.is_trailing_bits, - // row.bitstream_read_data. bit_start_idx, - // row.bitstream_read_data.bit_end_idx, row.bitstream_read_data.bit_value, - // row.bitstream_read_data.is_nil, row.bitstream_read_data.is_zero_bit_read, - // row.bitstream_read_data.is_seq_init, - // row.bitstream_read_data.seq_idx, - // row.bitstream_read_data.states, - // row.bitstream_read_data.symbols, - // row.bitstream_read_data.values, - // row.bitstream_read_data.baseline, - // row.bitstream_read_data.is_update_state, - // ).unwrap(); - - // writeln!(handle).unwrap(); - // } - // } - ( witness_rows, literals, @@ -2108,101 +1807,89 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR #[cfg(test)] mod tests { - // witgen_debug - // use super::*; - // use bitstream_io::write; - // use halo2_proofs::halo2curves::bn256::Fr; - // use serde_json::from_str; - use std::fs; - - // witgen_debug - use std::io::Write; - - // witgen_debug - // #[test] - // #[ignore] - // fn compression_ratio() -> Result<(), std::io::Error> { - // use csv::WriterBuilder; - // use super::*; - - // let get_compression_ratio = |data: &[u8]| -> Result<(u64, u64, H256), std::io::Error> { - // let raw_len = data.len(); - // let compressed = { - // // compression level = 0 defaults to using level=3, which is zstd's default. - // let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; - - // // disable compression of literals, i.e. literals will be raw bytes. - // encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( - // zstd::zstd_safe::ParamSwitch::Disable, - // ))?; - // // set target block size to fit within a single block. - // encoder - // .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; - // // do not include the checksum at the end of the encoded data. - // encoder.include_checksum(false)?; - // // do not include magic bytes at the start of the frame since we will have a - // single // frame. - // encoder.include_magicbytes(false)?; - // // set source length, which will be reflected in the frame header. - // encoder.set_pledged_src_size(Some(raw_len as u64))?; - // // include the content size to know at decode time the expected size of decoded - // // data. - // encoder.include_contentsize(true)?; - - // encoder.write_all(data)?; - // encoder.finish()? - // }; - // let hash = keccak256(&compressed); - // let compressed_len = compressed.len(); - // Ok((raw_len as u64, compressed_len as u64, hash.into())) - // }; - - // let mut batch_files = fs::read_dir("./data")? - // .map(|entry| entry.map(|e| e.path())) - // .collect::, std::io::Error>>()?; - // batch_files.sort(); - - // let batches = batch_files - // .iter() - // .map(fs::read_to_string) - // .filter_map(|data| data.ok()) - // .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) - // .collect::>>(); - - // let file = File::create("modified-ratio.csv")?; - // let mut writer = WriterBuilder::new().from_writer(file); - - // // Write headers to CSV - // writer.write_record(["ID", "Len(input)", "Compression Ratio"])?; - - // // Test and store results in CSV - // for (i, batch) in batches.iter().enumerate() { - // let (raw_len, compr_len, keccak_hash) = get_compression_ratio(batch)?; - // println!( - // "batch{:0>3}, raw_size={:6}, compr_size={:6}, compr_keccak_hash={:64x}", - // i, raw_len, compr_len, keccak_hash - // ); - - // // Write input and result to CSV - // let compr_ratio = raw_len as f64 / compr_len as f64; - // writer.write_record(&[i.to_string(), raw_len.to_string(), compr_ratio.to_string()])?; - // } - - // // Flush the CSV writer - // writer.flush()?; - - // Ok(()) - // } + use eth_types::H256; + use ethers_core::utils::keccak256; + use std::{fs, fs::File, io::Write}; + + #[test] + #[ignore] + fn compression_ratio() -> Result<(), std::io::Error> { + use csv::WriterBuilder; + + let get_compression_ratio = |data: &[u8]| -> Result<(u64, u64, H256), std::io::Error> { + let raw_len = data.len(); + let compressed = { + // compression level = 0 defaults to using level=3, which is zstd's default. + let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; + + // disable compression of literals, i.e. literals will be raw bytes. + encoder.set_parameter(zstd::stream::raw::CParameter::LiteralCompressionMode( + zstd::zstd_safe::ParamSwitch::Disable, + ))?; + // set target block size to fit within a single block. + encoder + .set_parameter(zstd::stream::raw::CParameter::TargetCBlockSize(124 * 1024))?; + // do not include the checksum at the end of the encoded data. + encoder.include_checksum(false)?; + // do not include magic bytes at the start of the frame since we will have a + // single frame. + encoder.include_magicbytes(false)?; + // set source length, which will be reflected in the frame header. + encoder.set_pledged_src_size(Some(raw_len as u64))?; + // include the content size to know at decode time the expected size of decoded + // data. + encoder.include_contentsize(true)?; + + encoder.write_all(data)?; + encoder.finish()? + }; + let hash = keccak256(&compressed); + let compressed_len = compressed.len(); + Ok((raw_len as u64, compressed_len as u64, hash.into())) + }; + + let mut batch_files = fs::read_dir("./data")? + .map(|entry| entry.map(|e| e.path())) + .collect::, std::io::Error>>()?; + batch_files.sort(); + + let batches = batch_files + .iter() + .map(fs::read_to_string) + .filter_map(|data| data.ok()) + .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) + .collect::>>(); + + let file = File::create("modified-ratio.csv")?; + let mut writer = WriterBuilder::new().from_writer(file); + + // Write headers to CSV + writer.write_record(["ID", "Len(input)", "Compression Ratio"])?; + + // Test and store results in CSV + for (i, batch) in batches.iter().enumerate() { + let (raw_len, compr_len, keccak_hash) = get_compression_ratio(batch)?; + println!( + "batch{:0>3}, raw_size={:6}, compr_size={:6}, compr_keccak_hash={:64x}", + i, raw_len, compr_len, keccak_hash + ); + + // Write input and result to CSV + let compr_ratio = raw_len as f64 / compr_len as f64; + writer.write_record(&[i.to_string(), raw_len.to_string(), compr_ratio.to_string()])?; + } + + // Flush the CSV writer + writer.flush()?; + + Ok(()) + } #[test] fn test_zstd_witness_processing_batch_data() -> Result<(), std::io::Error> { use super::*; use halo2_proofs::halo2curves::bn256::Fr; - // witgen_debug - let stdout = io::stdout(); - let mut handle = stdout.lock(); - let mut batch_files = fs::read_dir("./data/test_batches")? .map(|entry| entry.map(|e| e.path())) .collect::, std::io::Error>>()?; @@ -2214,12 +1901,7 @@ mod tests { .map(|data| hex::decode(data.trim_end()).expect("Failed to decode hex data")) .collect::>>(); - for (batch_idx, raw_input_bytes) in batches.into_iter().enumerate() { - // witgen_debug - // if batch_idx == 127 { - // continue; - // } - + for raw_input_bytes in batches.into_iter() { let compressed = { // compression level = 0 defaults to using level=3, which is zstd's default. let mut encoder = zstd::stream::write::Encoder::new(Vec::new(), 0)?; @@ -2246,10 +1928,6 @@ mod tests { encoder.finish()? }; - // witgen_debug - // write!(handle, "=> compressed: {:?}", compressed).unwrap(); - // writeln!(handle).unwrap(); - let ( _witness_rows, _decoded_literals, @@ -2266,14 +1944,6 @@ mod tests { .flat_map(|r| r.recovered_bytes) .collect::>(); - // witgen_debug - write!(handle, "=> batch_idx: {:?}", batch_idx).unwrap(); - writeln!(handle).unwrap(); - - // witgen_debug - // write!(handle, "=> decoded: {:?}", decoded_bytes).unwrap(); - // writeln!(handle).unwrap(); - assert!(raw_input_bytes == decoded_bytes); } diff --git a/aggregator/src/aggregation/decoder/witgen/params.rs b/aggregator/src/aggregation/decoder/witgen/params.rs index f2f39677d7..ba1390727b 100644 --- a/aggregator/src/aggregation/decoder/witgen/params.rs +++ b/aggregator/src/aggregation/decoder/witgen/params.rs @@ -13,5 +13,7 @@ pub const N_BITS_ZSTD_TAG: usize = 4; /// Number of bits in the repeat bits that follow value=1 in reconstructing FSE table. pub const N_BITS_REPEAT_FLAG: usize = 2; -// we use offset window no more than = 22 -pub const CL_WINDOW_LIMIT: usize = 22; +// we use offset window no more than = 17 +// TODO: use for multi-block zstd. +#[allow(dead_code)] +pub const CL_WINDOW_LIMIT: usize = 17; diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index b43855000e..185a2f0691 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -13,61 +13,6 @@ use super::{ util::{read_variable_bit_packing, smaller_powers_of_two, value_bits_le}, }; -/// A read-only memory table (fixed table) for decompression circuit to verify that the next tag -/// fields are assigned correctly. -#[derive(Clone, Debug)] -pub struct RomTagTableRow { - /// The current tag. - tag: ZstdTag, - /// The tag that will be processed after the current tag is finished processing. - tag_next: ZstdTag, - /// The maximum number of bytes that are needed to represent the current tag. - max_len: u64, - /// Whether this tag outputs a decoded byte or not. - is_output: bool, - /// Whether this tag is processed from back-to-front or not. - is_reverse: bool, - /// Whether this tag belongs to a ``block`` in zstd or not. - is_block: bool, -} - -impl RomTagTableRow { - pub(crate) fn rows() -> Vec { - use ZstdTag::{ - BlockHeader, FrameContentSize, FrameHeaderDescriptor, ZstdBlockLiteralsHeader, - ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader, - }; - - [ - (FrameHeaderDescriptor, FrameContentSize, 1), - (FrameContentSize, BlockHeader, 8), - (BlockHeader, ZstdBlockLiteralsHeader, 3), - (ZstdBlockLiteralsHeader, ZstdBlockLiteralsRawBytes, 5), - (ZstdBlockLiteralsRawBytes, ZstdBlockSequenceHeader, 1048575), // (1 << 20) - 1 - ] - .map(|(tag, tag_next, max_len)| Self { - tag, - tag_next, - max_len, - is_output: tag.is_output(), - is_reverse: tag.is_reverse(), - is_block: tag.is_block(), - }) - .to_vec() - } - - pub(crate) fn values(&self) -> Vec> { - vec![ - Value::known(F::from(usize::from(self.tag) as u64)), - Value::known(F::from(usize::from(self.tag_next) as u64)), - Value::known(F::from(self.max_len)), - Value::known(F::from(self.is_output as u64)), - Value::known(F::from(self.is_reverse as u64)), - Value::known(F::from(self.is_block as u64)), - ] - } -} - #[derive(Debug, Default, Clone, Copy)] pub enum BlockType { #[default] @@ -150,7 +95,7 @@ impl_expr!(LstreamNum); /// Various tags that we can decode from a zstd encoded data. #[derive(Clone, Copy, Debug, EnumIter, PartialEq, Eq, Hash)] pub enum ZstdTag { - /// Null should not occur. + /// Null is reserved for padding rows. Null = 0, /// The frame header's descriptor. FrameHeaderDescriptor, @@ -171,21 +116,6 @@ pub enum ZstdTag { } impl ZstdTag { - /// Whether this tag produces an output or not. - pub fn is_output(&self) -> bool { - match self { - Self::Null => false, - Self::FrameHeaderDescriptor => false, - Self::FrameContentSize => false, - Self::BlockHeader => false, - Self::ZstdBlockLiteralsHeader => false, - Self::ZstdBlockLiteralsRawBytes => false, - Self::ZstdBlockSequenceHeader => false, - Self::ZstdBlockSequenceFseCode => false, - Self::ZstdBlockSequenceData => true, - } - } - /// Whether this tag is a part of block or not. pub fn is_block(&self) -> bool { match self { @@ -215,6 +145,23 @@ impl ZstdTag { Self::ZstdBlockSequenceData => true, } } + + /// The maximum number of bytes that can be taken by this tag. + pub fn max_len(&self) -> u64 { + match self { + Self::Null => 0, + Self::FrameHeaderDescriptor => 1, + Self::FrameContentSize => 8, + Self::BlockHeader => 3, + // as per spec, should be 5. But given that our encoder does not compress literals, it + // is 3. + Self::ZstdBlockLiteralsHeader => 3, + Self::ZstdBlockLiteralsRawBytes => (1 << 17) - 1, + Self::ZstdBlockSequenceHeader => 4, + Self::ZstdBlockSequenceFseCode => 128, + Self::ZstdBlockSequenceData => (1 << 17) - 1, + } + } } impl_expr!(ZstdTag); @@ -263,10 +210,7 @@ pub struct ZstdState { pub max_tag_len: u64, pub tag_len: u64, pub tag_idx: u64, - pub tag_value: Value, - pub tag_value_acc: Value, pub is_tag_change: bool, - // Unlike tag_value, tag_rlc only uses challenge as multiplier pub tag_rlc: Value, pub tag_rlc_acc: Value, } @@ -280,8 +224,6 @@ impl Default for ZstdState { max_tag_len: 0, tag_len: 0, tag_idx: 0, - tag_value: Value::known(F::zero()), - tag_value_acc: Value::known(F::zero()), is_tag_change: false, tag_rlc: Value::known(F::zero()), tag_rlc_acc: Value::known(F::zero()), @@ -297,8 +239,6 @@ pub struct EncodedData { pub reverse: bool, pub reverse_idx: u64, pub reverse_len: u64, - pub aux_1: Value, - pub aux_2: Value, pub value_rlc: Value, } @@ -317,20 +257,14 @@ impl Default for EncodedData { reverse: false, reverse_idx: 0, reverse_len: 0, - aux_1: Value::known(F::zero()), - aux_2: Value::known(F::zero()), value_rlc: Value::known(F::zero()), } } } #[derive(Clone, Debug, Default)] -pub struct DecodedData { +pub struct DecodedData { pub decoded_len: u64, - pub decoded_len_acc: u64, - pub total_decoded_len: u64, - pub decoded_byte: u8, - pub decoded_value_rlc: Value, } /// FSE decoding data from witness generation @@ -414,8 +348,7 @@ pub struct BitstreamReadRow { /// Sequence data is interleaved with 6 bitstreams. Each producing a different type of value. #[derive(Clone, Copy, Debug)] pub enum SequenceDataTag { - Null = 0, - LiteralLengthFse, + LiteralLengthFse = 1, MatchLengthFse, CookedMatchOffsetFse, LiteralLengthValue, @@ -959,7 +892,7 @@ pub struct ZstdWitnessRow { /// Data on compressed data pub encoded_data: EncodedData, /// Data on decompressed data - pub decoded_data: DecodedData, + pub decoded_data: DecodedData, /// Fse decoding state transition data pub fse_data: FseDecodingRow, /// Bitstream reader @@ -1035,21 +968,6 @@ mod tests { // Here we test whether we can actually reconstruct the FSE table for distributions that // include prob=-1 cases, one such example is the Predefined FSE table as per // specifications. - // - // short literalsLength_defaultDistribution[36] = - // { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - // 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, - // -1,-1,-1,-1 }; - // - // short matchLengths_defaultDistribution[53] = - // { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, - // -1,-1,-1,-1,-1 }; - // - // short offsetCodes_defaultDistribution[29] = - // { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - // 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; let default_distribution_llt = vec![ 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1, @@ -1118,10 +1036,82 @@ mod tests { let (_n_bytes, _bit_boundaries, table) = FseAuxiliaryTableData::reconstruct(&src, 0, FseTableKind::LLT, 0, false)?; - let _parsed_state_map = table.parse_state_table(); + let parsed_state_map = table.parse_state_table(); + + let mut expected_state_table = BTreeMap::new(); + + let expected_state_table_states: [[u64; 4]; 64] = [ + [0, 0, 4, 2], + [1, 0, 8, 2], + [2, 0, 12, 2], + [3, 0, 16, 2], + [4, 0, 20, 2], + [5, 0, 24, 2], + [6, 1, 32, 4], + [7, 1, 48, 4], + [8, 2, 0, 5], + [9, 3, 0, 4], + [10, 4, 16, 4], + [11, 4, 32, 4], + [12, 6, 0, 5], + [13, 8, 32, 5], + [14, 9, 32, 5], + [15, 10, 32, 5], + [16, 12, 0, 6], + [17, 14, 0, 6], + [18, 15, 0, 4], + [19, 17, 0, 6], + [20, 20, 0, 6], + [21, 24, 32, 5], + [22, 0, 28, 2], + [23, 0, 32, 2], + [24, 0, 36, 2], + [25, 0, 40, 2], + [26, 0, 44, 2], + [27, 1, 0, 3], + [28, 1, 8, 3], + [29, 2, 32, 5], + [30, 3, 16, 4], + [31, 4, 48, 4], + [32, 4, 0, 3], + [33, 5, 0, 5], + [34, 7, 0, 6], + [35, 8, 0, 4], + [36, 9, 0, 4], + [37, 10, 0, 4], + [38, 13, 0, 5], + [39, 15, 16, 4], + [40, 16, 0, 6], + [41, 18, 0, 5], + [42, 24, 0, 4], + [43, 0, 48, 2], + [44, 0, 52, 2], + [45, 0, 56, 2], + [46, 0, 60, 2], + [47, 0, 0, 1], + [48, 0, 2, 1], + [49, 1, 16, 3], + [50, 1, 24, 3], + [51, 3, 32, 4], + [52, 3, 48, 4], + [53, 4, 8, 3], + [54, 5, 32, 5], + [55, 6, 32, 5], + [56, 8, 16, 4], + [57, 9, 16, 4], + [58, 10, 16, 4], + [59, 13, 32, 5], + [60, 15, 32, 4], + [61, 15, 48, 4], + [62, 18, 32, 5], + [63, 24, 16, 4], + ]; + + for state in expected_state_table_states { + expected_state_table.insert(state[0], (state[1], state[2], state[3])); + } - // witgen_debug - // TODO: assertions + assert!(parsed_state_map == expected_state_table); Ok(()) } diff --git a/aggregator/src/blob.rs b/aggregator/src/blob.rs index a4e4e5588b..c62d308b23 100644 --- a/aggregator/src/blob.rs +++ b/aggregator/src/blob.rs @@ -285,20 +285,23 @@ impl BatchData { .collect() } + /// Get the zstd encoded batch data bytes. + pub(crate) fn get_encoded_batch_data_bytes(&self) -> Vec { + let batch_data_bytes = self.get_batch_data_bytes(); + let mut encoder = init_zstd_encoder(); + encoder + .set_pledged_src_size(Some(batch_data_bytes.len() as u64)) + .expect("infallible"); + encoder.write_all(&batch_data_bytes).expect("infallible"); + encoder.finish().expect("infallible") + } + /// Get the BLOB_WIDTH number of scalar field elements, as 32-bytes unsigned integers. pub(crate) fn get_coefficients(&self) -> [U256; BLOB_WIDTH] { let mut coefficients = [[0u8; N_BYTES_U256]; BLOB_WIDTH]; // We only consider the data from `valid` chunks and ignore the padded chunks. - let batch_bytes = self.get_batch_data_bytes(); - let blob_bytes = { - let mut encoder = init_zstd_encoder(); - encoder - .set_pledged_src_size(Some(batch_bytes.len() as u64)) - .expect("infallible"); - encoder.write_all(&batch_bytes).expect("infallible"); - encoder.finish().expect("infallible") - }; + let blob_bytes = self.get_encoded_batch_data_bytes(); assert!( blob_bytes.len() < N_BLOB_BYTES, "too many bytes in batch data"