From 789574307f24c6478d73c173745fcfa25847177a Mon Sep 17 00:00:00 2001 From: Ho Date: Tue, 14 May 2024 19:26:23 +0900 Subject: [PATCH] `SequenceInstructionTable` and `SequenceExecutionConfig` (#1259) * rebase to upstream Signed-off-by: noelwei * wip of addrtable Signed-off-by: noelwei * all gates Signed-off-by: noelwei * change the lookup purpose Signed-off-by: noelwei * fix according to reviews * purge unused seqvaluetable Signed-off-by: noelwei * change zero testing to corresponding gadget complete assignment Signed-off-by: noelwei * trivial fixing Signed-off-by: noelwei * purge the duplicated works Signed-off-by: noelwei * unit test (WIP) Signed-off-by: noelwei * seq exec circuit (WIP) Signed-off-by: noelwei * seq exec circuit (WIP) Signed-off-by: noelwei * seq exec circuit (WIP) Signed-off-by: noelwei * output region: gates and lookups (WIP) Signed-off-by: noelwei * pass unittest for seqinst table Signed-off-by: noelwei * seq exec: complete the seq num lookup Signed-off-by: noelwei * add seq exec info in witgen Signed-off-by: noelwei * assign and unit tests (WIP) Signed-off-by: noelwei * refactor for better assignment Signed-off-by: noelwei * assignments and unit tests (WIP) Signed-off-by: noelwei * induce debug utilities in AddressRow Signed-off-by: noelwei * pass first unit test (WIP) Signed-off-by: noelwei * chore: integrate seq-inst-table and seq-exec-config into decoder-config * refactor to low degree Signed-off-by: noelwei * more unittest for seq exec (WIP) Signed-off-by: noelwei * pass unit tests Signed-off-by: noelwei * add assign entry Signed-off-by: noelwei * update some witgens, pass decoder's unit test Signed-off-by: noelwei * integrate seq exec into decoder (WIP) Signed-off-by: noelwei * temporary disable 3 lookups and unit test pass for the rest Signed-off-by: noelwei * all of the unit test passed Signed-off-by: noelwei * trivial updates: head condition in seq exec and exported cells Signed-off-by: noelwei * clear the warnings Signed-off-by: noelwei * chore: fmt --------- Signed-off-by: noelwei Co-authored-by: Rohit Narurkar --- aggregator/src/aggregation/decoder.rs | 247 ++-- .../src/aggregation/decoder/seq_exec.rs | 1271 +++++++++++++++++ aggregator/src/aggregation/decoder/tables.rs | 3 + .../decoder/tables/seqinst_table.rs | 1046 ++++++++++++++ aggregator/src/aggregation/decoder/witgen.rs | 145 +- .../src/aggregation/decoder/witgen/params.rs | 3 + .../src/aggregation/decoder/witgen/types.rs | 62 + .../src/aggregation/decoder/witgen/util.rs | 2 +- 8 files changed, 2649 insertions(+), 130 deletions(-) create mode 100644 aggregator/src/aggregation/decoder/seq_exec.rs create mode 100644 aggregator/src/aggregation/decoder/tables/seqinst_table.rs diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index 2c4956fd9b..d3d825a144 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -1,3 +1,4 @@ +mod seq_exec; mod tables; pub mod witgen; use witgen::*; @@ -28,7 +29,10 @@ use zkevm_circuits::{ }; use self::{ - tables::{BitstringTable, FixedTable, FseTable, LiteralsHeaderTable}, + tables::{ + BitstringTable, FixedTable, FseTable, LiteralsHeaderTable, + SeqInstTable as SequenceInstructionTable, + }, util::value_bits_le, witgen::{ FseTableKind, ZstdTag, N_BITS_PER_BYTE, N_BITS_REPEAT_FLAG, N_BITS_ZSTD_TAG, @@ -36,8 +40,12 @@ use self::{ }, }; +use seq_exec::{LiteralTable, SeqExecConfig as SequenceExecutionConfig, SequenceConfig}; + #[derive(Clone, Debug)] pub struct DecoderConfig { + /// TODO(check): const col. + const_col: Column, /// Fixed column to mark all the usable rows. q_enable: Column, /// Fixed column to mark the first row in the layout. @@ -82,10 +90,11 @@ pub struct DecoderConfig { fse_table: FseTable, // witgen_debug - // /// Helper table for sequences as instructions. - // /// TODO(enable): sequence_instruction_table: SequenceInstructionTable, + /// Helper table for sequences as instructions. + sequence_instruction_table: SequenceInstructionTable, // /// Helper table in the "output" region for accumulating the result of executing sequences. - // /// TODO(enable): sequence_execution_table: SequenceExecutionTable, + sequence_execution_config: SequenceExecutionConfig, + /// Fixed lookups table. fixed_table: FixedTable, } @@ -972,7 +981,9 @@ impl DecoderConfig { pow2_table, bitwise_op_table, ); - // TODO(enable): let sequence_instruction_table = SequenceInstructionTable::configure(meta); + let sequence_instruction_table = SequenceInstructionTable::configure(meta); + + debug_assert!(meta.degree() <= 9); // Peripheral configs let tag_config = TagConfig::configure(meta, q_enable); @@ -982,24 +993,34 @@ impl DecoderConfig { let bitstream_decoder = BitstreamDecoder::configure(meta, q_enable, q_first, u8_table); let fse_decoder = FseDecoder::configure(meta, q_enable); let sequences_data_decoder = SequencesDataDecoder::configure(meta); + let sequence_execution_config = SequenceExecutionConfig::configure( + meta, + challenges, + &LiteralTable::construct([ + q_enable.into(), + tag_config.tag.into(), + block_config.block_idx.into(), + tag_config.tag_idx.into(), + byte.into(), + tag_config.is_change.into(), + is_padding.into(), + ]), + &sequence_instruction_table, + &SequenceConfig::construct([ + q_enable.into(), + block_config.is_block.into(), + block_config.block_idx.into(), + block_config.num_sequences.into(), + ]), + ); - // TODO(enable): - // let literals_table = [ - // tag_config.tag, - // block_config.block_idx, - // byte_idx, - // byte, - // is_padding, - // ]; - // let sequence_execution_table = SequenceExecutionTable::configure( - // meta, - // challenges, - // &literals_table, - // &sequence_instruction_table, - // ); + debug_assert!(meta.degree() <= 9); // Main config + let const_col = meta.fixed_column(); + meta.enable_constant(const_col); let config = Self { + const_col, q_enable, q_first, byte_idx, @@ -1025,8 +1046,8 @@ impl DecoderConfig { bitstring_table, fse_table, - // TODO(enable): sequence_instruction_table, - // TODO(enable): sequence_execution_table, + sequence_instruction_table, + sequence_execution_config, fixed_table, }; @@ -2009,30 +2030,30 @@ impl DecoderConfig { cb.gate(condition) }); - // TODO: lookup(SeqInstTable) for seq_count_lookup - // meta.lookup_any( - // "DecoderConfig: tag ZstdBlockSequenceHeader (sequence count)", - // |meta| { - // let condition = and::expr([ - // is_zb_sequence_header(meta), - // meta.query_advice(config.tag_config.is_change, Rotation::cur()), - // ]); - // let (block_idx, num_sequences) = ( - // meta.query_advice(config.block_config.block_idx, Rotation::cur()), - // meta.query_advice(config.block_config.num_sequences, Rotation::cur()), - // ); - // [ - // 1.expr(), // q_enabled - // block_idx, - // 1.expr(), // s_beginning - // num_sequences, - // ] - // .into_iter() - // .zip_eq(config.sequence_instruction_table.seq_count_exprs(meta)) - // .map(|(arg, table)| (condition.expr() * arg, table)) - // .collect() - // }, - // ); + meta.lookup_any( + "DecoderConfig: tag ZstdBlockSequenceHeader (sequence count)", + |meta| { + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + is_zb_sequence_header(meta), + meta.query_advice(config.tag_config.is_change, Rotation::cur()), + ]); + let (block_idx, num_sequences) = ( + meta.query_advice(config.block_config.block_idx, Rotation::cur()), + meta.query_advice(config.block_config.num_sequences, Rotation::cur()), + ); + [ + 1.expr(), // q_enabled + block_idx, + 1.expr(), // s_beginning + num_sequences, + ] + .into_iter() + .zip_eq(config.sequence_instruction_table.seq_count_exprs(meta)) + .map(|(arg, table)| (condition.expr() * arg, table)) + .collect() + }, + ); debug_assert!(meta.degree() <= 9); @@ -3178,44 +3199,45 @@ impl DecoderConfig { }, ); - // TODO(enable): lookup(SeqInstTable) at code-to-value for seq_values_lookup - // meta.lookup_any( - // "DecoderConfig: tag ZstdBlockSequenceData (sequence instructions table)", - // |meta| { - // // At the row where we compute the code-to-value of LLT, we have the values for - // // all of match offset, match length and literal length. - // let condition = and::expr([ - // meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), - // config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), - // config.fse_decoder.is_llt(meta, Rotation::cur()), - // config - // .sequences_data_decoder - // .is_code_to_value(meta, Rotation::cur()), - // ]); - // let (block_idx, sequence_idx) = ( - // meta.query_advice(config.block_config.block_idx, Rotation::cur()), - // meta.query_advice(config.sequences_data_decoder.idx, Rotation::cur()), - // ); - // let (literal_length_value, match_offset_value, match_length_value) = ( - // meta.query_advice(config.sequences_data_decoder.values[0], Rotation::cur()), - // meta.query_advice(config.sequences_data_decoder.values[2], Rotation::cur()), - // meta.query_advice(config.sequences_data_decoder.values[1], Rotation::cur()), - // ); - // [ - // 1.expr(), // q_enabled - // block_idx, - // 0.expr(), // s_beginning - // sequence_idx, - // literal_length_value, - // match_offset_value, - // match_length_value, - // ] - // .into_iter() - // .zip_eq(config.sequence_instruction_table.seq_values_exprs(meta)) - // .map(|(arg, table)| (condition.expr() * arg, table)) - // .collect() - // }, - // ); + meta.lookup_any( + "DecoderConfig: tag ZstdBlockSequenceData (sequence instructions table)", + |meta| { + // At the row where we compute the code-to-value of LLT, we have the values for + // all of match offset, match length and literal length. + let condition = and::expr([ + meta.query_fixed(config.q_enable, Rotation::cur()), + meta.query_advice(config.tag_config.is_sequence_data, Rotation::cur()), + not::expr(meta.query_advice(config.tag_config.is_change, Rotation::cur())), + config.bitstream_decoder.is_not_nil(meta, Rotation::cur()), + config.fse_decoder.is_llt(meta, Rotation::cur()), + config + .sequences_data_decoder + .is_code_to_value(meta, Rotation::cur()), + ]); + let (block_idx, sequence_idx) = ( + meta.query_advice(config.block_config.block_idx, Rotation::cur()), + meta.query_advice(config.sequences_data_decoder.idx, Rotation::cur()), + ); + let (literal_length_value, match_offset_value, match_length_value) = ( + meta.query_advice(config.sequences_data_decoder.values[0], Rotation::cur()), + meta.query_advice(config.sequences_data_decoder.values[2], Rotation::cur()), + meta.query_advice(config.sequences_data_decoder.values[1], Rotation::cur()), + ); + [ + 1.expr(), // q_enabled + block_idx, + 0.expr(), // s_beginning + sequence_idx, + literal_length_value, + match_offset_value, + match_length_value, + ] + .into_iter() + .zip_eq(config.sequence_instruction_table.seq_values_exprs(meta)) + .map(|(arg, table)| (condition.expr() * arg, table)) + .collect() + }, + ); meta.lookup_any( "DecoderConfig: tag ZstdBlockSequenceData (FseTable)", @@ -4062,11 +4084,16 @@ impl DecoderConfig { pub fn assign( &self, layouter: &mut impl Layouter, + raw_bytes: &[u8], + compressed_bytes: &[u8], witness_rows: Vec>, + literal_datas: Vec>, _aux_data: Vec, fse_aux_tables: Vec, block_info_arr: Vec, sequence_info_arr: Vec, + address_table_arr: Vec>, + sequence_exec_info_arr: Vec>, challenges: &Challenges>, k: u32, // witgen_debug @@ -4077,6 +4104,11 @@ impl DecoderConfig { assert!(block_info_arr.len() > 0, "Must have at least 1 block"); assert!(sequence_info_arr.len() > 0, "Must have at least 1 block"); + assert!(address_table_arr.len() == 1, "TODO: multi-block"); + assert!(sequence_exec_info_arr.len() == 1, "TODO: multi-block"); + //let address_table_rows = &address_table_arr[0]; + let sequence_exec_info = &sequence_exec_info_arr[0]; + let mut curr_block_info = block_info_arr[0]; let mut curr_sequence_info = sequence_info_arr[0]; @@ -4142,10 +4174,30 @@ impl DecoderConfig { ), )); } - self.literals_header_table .assign(layouter, literal_headers)?; + ///////////////////////////////////////// + //// Assign Sequence-related Configs //// + ///////////////////////////////////////// + self.sequence_instruction_table.assign( + layouter, + address_table_arr.iter().map(|rows| rows.iter()), + (1 << k) - self.unusable_rows(), + )?; + // TODO: use equality constraint for the exported_len and exported_rlc cell + let (_exported_len, _exported_rlc) = self.sequence_execution_config.assign( + layouter, + challenges, + literal_datas + .iter() + .zip(&sequence_info_arr) + .zip(&sequence_exec_info_arr) + .map(|((lit, seq_info), exec)| (lit.as_slice(), seq_info, exec.as_slice())), + raw_bytes, + (1 << k) - self.unusable_rows(), + )?; + ///////////////////////////////////////// ///// Assign Decompression Region ////// ///////////////////////////////////////// @@ -4334,7 +4386,6 @@ impl DecoderConfig { i, || Value::known(Fr::from(row.state.tag_idx as u64)), )?; - let is_sequence_data = row.state.tag == ZstdTag::ZstdBlockSequenceData; region.assign_advice( || "tag_config.is_sequence_data", @@ -4764,6 +4815,7 @@ mod tests { #[derive(Clone, Debug, Default)] struct DecoderConfigTester { + raw: Vec, compressed: Vec, k: u32, } @@ -4826,23 +4878,44 @@ mod tests { let ( witness_rows, - _decoded_literals, + decoded_literals, aux_data, fse_aux_tables, block_info_arr, sequence_info_arr, + address_table_arr, + sequence_exec_result, ) = process(&self.compressed, challenges.keccak_input()); + let (recovered_bytes, sequence_exec_info_arr) = sequence_exec_result.into_iter().fold( + (Vec::new(), Vec::new()), + |(mut out_byte, mut out_exec), res| { + out_byte.extend(res.recovered_bytes); + out_exec.push(res.exec_trace); + (out_byte, out_exec) + }, + ); + + assert_eq!( + std::str::from_utf8(&recovered_bytes), + std::str::from_utf8(&self.raw), + ); + u8_table.load(&mut layouter)?; bitwise_op_table.load(&mut layouter)?; pow_rand_table.assign(&mut layouter, &challenges, 1 << (self.k - 1))?; config.assign::( &mut layouter, + &self.raw, + &self.compressed, witness_rows, + decoded_literals, aux_data, fse_aux_tables, block_info_arr, sequence_info_arr, + address_table_arr, + sequence_exec_info_arr, &challenges, self.k, )?; @@ -4893,7 +4966,7 @@ mod tests { }; let k = 18; - let decoder_config_tester = DecoderConfigTester { compressed, k }; + let decoder_config_tester = DecoderConfigTester { raw, compressed, k }; let mock_prover = MockProver::::run(k, &decoder_config_tester, vec![]).unwrap(); mock_prover.assert_satisfied_par(); } diff --git a/aggregator/src/aggregation/decoder/seq_exec.rs b/aggregator/src/aggregation/decoder/seq_exec.rs new file mode 100644 index 0000000000..66edc56c4e --- /dev/null +++ b/aggregator/src/aggregation/decoder/seq_exec.rs @@ -0,0 +1,1271 @@ +use super::tables; +use crate::aggregation::decoder::witgen; +use eth_types::Field; +use gadgets::util::{and, not, select, Expr}; +use halo2_proofs::{ + circuit::{AssignedCell, Layouter, Region, Value}, + plonk::{ + Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase, VirtualCells, + }, + poly::Rotation, +}; +use itertools::Itertools; +use tables::SeqInstTable; +use witgen::{SequenceExec, SequenceExecInfo, SequenceInfo, ZstdTag}; +use zkevm_circuits::{ + evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, + util::Challenges, +}; + +/// TODO: This is in fact part of the `BlockConfig` in +/// Decoder, we can use BlockConfig if it is decoupled +/// from Decoder module later + +#[derive(Clone)] +pub struct SequenceConfig { + // the enabled flag + q_enabled: Column, + // the `is_block` flag in `BlockConfig` + flag: Column, + // the index of block which the literal section is in + block_index: Column, + // Number of sequences decoded from the sequences section header in the block. + num_sequences: Column, +} + +impl SequenceConfig { + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + seq_cfg: &SequenceInfo, + ) -> Result<(), Error> { + layouter.assign_region( + || "seq cfg mock", + |mut region| { + let mut offset = 0usize; + + for col in [self.flag, self.block_index, self.num_sequences] { + region.assign_advice( + || "flush for non lookup", + col, + offset, + || Value::known(F::zero()), + )?; + } + + offset += 1; + for (col, val) in [ + (self.flag, F::one()), + (self.block_index, F::from(seq_cfg.block_idx as u64)), + (self.num_sequences, F::from(seq_cfg.num_sequences as u64)), + ] { + region.assign_advice( + || "flush mock table", + col, + offset, + || Value::known(val), + )?; + } + region.assign_fixed( + || "enable mock table", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + Ok(()) + }, + ) + } + + /// construct table for rows: [enabled, blk_index, num_seq] + pub fn construct(cols: [Column; 4]) -> Self { + Self { + q_enabled: cols[0].try_into().unwrap(), + flag: cols[1].try_into().unwrap(), + block_index: cols[2].try_into().unwrap(), + num_sequences: cols[3].try_into().unwrap(), + } + } + + /// export the exps for literal copying lookup: [tag, blk_ind, byte_ind, char, padding] + pub fn lookup_tbl(&self, meta: &mut VirtualCells<'_, F>) -> [Expression; 4] { + [ + meta.query_fixed(self.q_enabled, Rotation::cur()), + meta.query_advice(self.flag, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.num_sequences, Rotation::cur()), + ] + } +} + +/// The literal table which execution circuit expect to lookup from +#[derive(Clone)] +pub struct LiteralTable { + // the enabled flag + q_enabled: Column, + // the tag for current row in literal section + tag: Column, + // the index of block which the literal section is in + block_index: Column, + // the 1-indexed byte of byte of literal section's raw bytes + byte_index: Column, + // the corresponding char of current index + char: Column, + // the flag IN NEXT ROW is set to 1 indicate it is + // the last byte in current section + last_flag: Column, + // the flag should be 0 for a valid lookup row + padding_flag: Column, +} + +impl LiteralTable { + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + literals: &[u64], + ) -> Result<(), Error> { + layouter.assign_region( + || "literal tbl mock", + |mut region| { + let mut offset = 0usize; + + for col in [ + self.tag, + self.block_index, + self.byte_index, + self.char, + self.last_flag, + self.padding_flag, + ] { + region.assign_advice( + || "flush for non lookup", + col, + offset, + || Value::known(F::zero()), + )?; + } + offset += 1; + // TODO: ensure the index in literal table is 0 or 1 indexed + for (i, char) in literals.iter().copied().enumerate() { + region.assign_fixed( + || "enable mock table", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + for (col, val) in [ + (self.tag, F::from(ZstdTag::ZstdBlockLiteralsRawBytes as u64)), + (self.block_index, F::one()), + (self.byte_index, F::from(i as u64 + 1)), + (self.char, F::from(char)), + (self.last_flag, F::zero()), + (self.padding_flag, F::zero()), + ] { + region.assign_advice( + || "flush mock table", + col, + offset, + || Value::known(val), + )?; + } + offset += 1; + } + + for col in [self.byte_index, self.char, self.padding_flag] { + region.assign_advice( + || "flush dummy row for border", + col, + offset, + || Value::known(F::zero()), + )?; + } + region.assign_advice( + || "set dummy border", + self.tag, + offset, + || Value::known(F::from(ZstdTag::ZstdBlockLiteralsRawBytes as u64)), + )?; + region.assign_advice( + || "set dummy border", + self.block_index, + offset, + || Value::known(F::from(2 as u64)), + )?; + region.assign_advice( + || "set dummy border", + self.last_flag, + offset, + || Value::known(F::one()), + )?; + + Ok(()) + }, + ) + } + + /// construct table for rows: [q_enable, tag, blk_index, byte_index, char, last, padding] + pub fn construct(cols: [Column; 7]) -> Self { + Self { + q_enabled: cols[0].try_into().unwrap(), + tag: cols[1].try_into().unwrap(), + block_index: cols[2].try_into().unwrap(), + byte_index: cols[3].try_into().unwrap(), + char: cols[4].try_into().unwrap(), + last_flag: cols[5].try_into().unwrap(), + padding_flag: cols[6].try_into().unwrap(), + } + } + + /// export the exps for literal copying lookup: [tag, blk_ind, byte_ind, char, padding] + pub fn lookup_tbl_for_lit_cp( + &self, + meta: &mut VirtualCells<'_, F>, + ) -> [Expression; 6] { + [ + meta.query_fixed(self.q_enabled, Rotation::cur()), + meta.query_advice(self.tag, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.byte_index, Rotation::cur()), + meta.query_advice(self.char, Rotation::cur()), + meta.query_advice(self.padding_flag, Rotation::cur()), + ] + } + + /// export the exps for literal size lookup: [tag, blk_ind, byte_ind, flag, padding] + pub fn lookup_tbl_for_lit_size( + &self, + meta: &mut VirtualCells<'_, F>, + ) -> [Expression; 6] { + [ + meta.query_fixed(self.q_enabled, Rotation::cur()), + meta.query_advice(self.tag, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.byte_index, Rotation::cur()), + meta.query_advice(self.last_flag, Rotation::next()), + meta.query_advice(self.padding_flag, Rotation::cur()), + ] + } +} + +/// SeqExecConfig handling the sequences in each block and output the +/// decompressed bytes +#[derive(Clone, Debug)] +pub struct SeqExecConfig { + // active flag, one active row parse + q_enabled: Column, + // 1-index for each block, keep the same for each row + // until all sequenced has been handled + block_index: Column, + // the 1-indexed seq number (1..=n_seq) for each + // sequence. + seq_index: Column, + // the decoded length of output byte so it is start + // from 1 for the first output char + decoded_len: Column, + // the decoded byte under current index + decoded_byte: Column, + // the rlc of decoded output byte + decoded_rlc: Column, + /// An incremental accumulator of the number of bytes decoded so far. + // decoded_len_acc: Column, + + // the flag indicate current seq is the special one + // (copying the rest bytes in literal section) + s_last_lit_cp_phase: Column, + // the flag indicate the execution is under + // "literal copying" phase + s_lit_cp_phase: Column, + // the flag indicate the execution is under + // back reference phase + s_back_ref_phase: Column, + // the copied index in literal section + literal_pos: Column, + // the back-ref pos + backref_offset: Column, + // counting the progress of back ref bytes + backref_progress: Column, + _marker: std::marker::PhantomData, +} + +type ExportedCell = AssignedCell; + +impl SeqExecConfig { + /// Construct the sequence instruction table + /// the maxium rotation is prev(2), next(1) + pub fn configure( + meta: &mut ConstraintSystem, + challenges: &Challenges>, + literal_table: &LiteralTable, + inst_table: &SeqInstTable, + seq_config: &SequenceConfig, + ) -> Self { + let q_enabled = meta.fixed_column(); + let block_index = meta.advice_column(); + let seq_index = meta.advice_column(); + let decoded_len = meta.advice_column(); + let decoded_byte = meta.advice_column(); + let decoded_rlc = meta.advice_column_in(SecondPhase); + //let decoded_len_acc = meta.advice_column(); + let s_last_lit_cp_phase = meta.advice_column(); + let s_lit_cp_phase = meta.advice_column(); + let s_back_ref_phase = meta.advice_column(); + let backref_offset = meta.advice_column(); + let backref_progress = meta.advice_column(); + let literal_pos = meta.advice_column(); + + // need to constraint the final block index so + // we ensure all blocks has been handled + meta.enable_equality(block_index); + // need to export the final rlc and len + meta.enable_equality(decoded_rlc); + // the flag indicate current row is the beginning of + // a new block + meta.enable_equality(decoded_len); + + // the flag indicate the execution has ended and rows + // are filled by padding data + let mut is_inst_begin = 0.expr(); + // the flag exp indicate current row is the beginning + // of a new instruction, it is also the beginning of + // a literal copying + let mut is_block_begin = 0.expr(); + + let mut is_padding = 0.expr(); + + meta.create_gate("borders", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + // boolean constraint that index is increment + cb.require_boolean("instruction border is boolean", is_inst_begin.expr()); + + is_block_begin = meta.query_advice(block_index, Rotation::cur()) + - meta.query_advice(block_index, Rotation::prev()); + + cb.require_boolean("block border is boolean", is_block_begin.expr()); + + is_inst_begin = select::expr( + is_block_begin.expr(), + 1.expr(), + meta.query_advice(seq_index, Rotation::cur()) + - meta.query_advice(seq_index, Rotation::prev()), + ); + + cb.require_boolean("inst border is boolean", is_inst_begin.expr()); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + debug_assert!(meta.degree() <= 9); + + meta.create_gate("phases", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let s_lit_cp_phase_next = meta.query_advice(s_lit_cp_phase, Rotation::next()); + let s_back_ref_phase_next = meta.query_advice(s_back_ref_phase, Rotation::next()); + let s_lit_cp_phase_prev = meta.query_advice(s_lit_cp_phase, Rotation::prev()); + let s_back_ref_phase_prev = meta.query_advice(s_back_ref_phase, Rotation::prev()); + let s_lit_cp_phase = meta.query_advice(s_lit_cp_phase, Rotation::cur()); + let s_back_ref_phase = meta.query_advice(s_back_ref_phase, Rotation::cur()); + + cb.require_boolean("phase is boolean", s_lit_cp_phase.expr()); + cb.require_boolean("phase is boolean", s_back_ref_phase.expr()); + + is_padding = 1.expr() - s_lit_cp_phase.expr() - s_back_ref_phase.expr(); + // constraint padding is boolean, so cp/back_ref phase is excluded + // i.e. two phases can not be enabled at the same time + cb.require_boolean("padding is boolean", is_padding.expr()); + + cb.condition( + and::expr([ + not::expr(is_inst_begin.expr()), + not::expr(s_lit_cp_phase_prev.expr()), + ]), + |cb| { + cb.require_equal( + "inside a inst, cp phase keep 0 once it changed to 0", + s_lit_cp_phase.expr(), + 0.expr(), + ); + }, + ); + + cb.condition( + and::expr([ + not::expr(is_inst_begin.expr()), + s_back_ref_phase_prev.expr(), + ]), + |cb| { + cb.require_equal( + "inside a inst, backref phase keep 1 once it changed to 1", + s_back_ref_phase_prev.expr(), + 1.expr(), + ); + }, + ); + + let is_padding_next = + 1.expr() - s_lit_cp_phase_next.expr() - s_back_ref_phase_next.expr(); + cb.condition(is_padding.expr(), |cb| { + cb.require_equal( + "padding never change once actived", + is_padding_next.expr(), + is_padding.expr(), + ); + }); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + debug_assert!(meta.degree() <= 9); + meta.create_gate("last literal cp phase", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let s_last_lit_cp_phase_prev = meta.query_advice(s_last_lit_cp_phase, Rotation::prev()); + let s_last_lit_cp_phase = meta.query_advice(s_last_lit_cp_phase, Rotation::cur()); + cb.require_boolean("last lit_cp phase is boolean", s_last_lit_cp_phase.expr()); + + cb.condition( + and::expr([ + s_last_lit_cp_phase.expr(), + not::expr(s_last_lit_cp_phase_prev.expr()), + ]), + |cb| { + cb.require_equal( + "phase can only be actived in inst border", + is_inst_begin.expr(), + 1.expr(), + ); + }, + ); + + cb.condition( + and::expr([ + s_last_lit_cp_phase_prev.expr(), + not::expr(is_block_begin.expr()), + ]), + |cb| { + cb.require_equal( + "phase must keep actived until block end", + s_last_lit_cp_phase_prev.expr(), + s_last_lit_cp_phase.expr(), + ); + }, + ); + + cb.condition(s_last_lit_cp_phase.expr(), |cb| { + cb.require_equal( + "lit cp must actived if last lit cp is actived", + meta.query_advice(s_lit_cp_phase, Rotation::cur()), + 1.expr(), + ); + }); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + debug_assert!(meta.degree() <= 9); + meta.create_gate("phase pos", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let literal_pos_prev = meta.query_advice(literal_pos, Rotation::prev()); + let literal_pos = meta.query_advice(literal_pos, Rotation::cur()); + let s_lit_cp_phase = meta.query_advice(s_lit_cp_phase, Rotation::cur()); + + cb.require_equal( + "lit cp is increment in one block", + select::expr( + is_block_begin.expr(), + // so we start at 1 if first row is lit cp + // or 0 if not + s_lit_cp_phase.expr(), + literal_pos_prev.expr() + s_lit_cp_phase.expr(), + ), + literal_pos.expr(), + ); + + let backref_progress_prev = meta.query_advice(backref_progress, Rotation::prev()); + let backref_progress = meta.query_advice(backref_progress, Rotation::cur()); + + let s_back_ref_phase = meta.query_advice(s_back_ref_phase, Rotation::cur()); + + cb.require_equal( + "backref progress is increment in one inst", + select::expr( + is_inst_begin.expr(), + // so we start at 1 if first row is lit cp + // or 0 if not + s_back_ref_phase.expr(), + backref_progress_prev.expr() + s_back_ref_phase.expr(), + ), + backref_progress.expr(), + ); + + let backref_offset_prev = meta.query_advice(backref_offset, Rotation::prev()); + let backref_offset = meta.query_advice(backref_offset, Rotation::cur()); + + cb.condition(not::expr(is_inst_begin.expr()), |cb| { + cb.require_equal( + "backref offset kee the same in one inst", + backref_offset.expr(), + backref_offset_prev.expr(), + ) + }); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + debug_assert!(meta.degree() <= 9); + + meta.create_gate("output and paddings", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let decoded_len_prev = meta.query_advice(decoded_len, Rotation::prev()); + let decoded_rlc_prev = meta.query_advice(decoded_rlc, Rotation::prev()); + let decoded_len = meta.query_advice(decoded_len, Rotation::cur()); + let decoded_rlc = meta.query_advice(decoded_rlc, Rotation::cur()); + let decoded_byte = meta.query_advice(decoded_byte, Rotation::cur()); + + cb.require_equal( + "decoded len increase 1 in next row until paddings", + select::expr( + is_padding.expr(), + decoded_len_prev.expr(), + decoded_len_prev.expr() + 1.expr(), + ), + decoded_len.expr(), + ); + cb.condition(is_padding.expr(), |cb| { + cb.require_zero("while padding, byte is always zero", decoded_byte.expr()) + }); + + cb.require_equal( + "rlc accumulate", + decoded_rlc_prev.expr() + * select::expr( + decoded_len.expr() - decoded_len_prev.expr(), + challenges.evm_word(), + 1.expr(), + ) + + decoded_byte.expr(), + decoded_rlc.expr(), + ); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + meta.lookup_any("the instruction from inst table", |meta| { + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + + let block_index = meta.query_advice(block_index, Rotation::prev()); + let seq_index = meta.query_advice(seq_index, Rotation::prev()); + let not_last_lit_cp = + not::expr(meta.query_advice(s_last_lit_cp_phase, Rotation::prev())); + let literal_pos_at_inst_end = meta.query_advice(literal_pos, Rotation::prev()); + let backref_offset_at_inst_end = meta.query_advice(backref_offset, Rotation::prev()); + let backref_len_at_inst_end = meta.query_advice(backref_progress, Rotation::prev()); + + inst_table + .instructions() + .into_iter() + .zip([ + block_index, + seq_index, + backref_offset_at_inst_end, + literal_pos_at_inst_end, + backref_len_at_inst_end, + ]) + .map(|(lookup_col, src_expr)| { + let lookup_expr = meta.query_advice(lookup_col, Rotation::cur()); + let src_expr = + src_expr * is_inst_begin.expr() * not_last_lit_cp.expr() * q_enabled.expr(); + assert!(src_expr.degree() <= 5); + (src_expr, lookup_expr) + }) + .collect() + }); + + debug_assert!(meta.degree() <= 9); + meta.lookup_any("lit cp char", |meta| { + let enabled = meta.query_fixed(q_enabled, Rotation::cur()) + * meta.query_advice(s_lit_cp_phase, Rotation::cur()); + + let block_index = meta.query_advice(block_index, Rotation::cur()); + let literal_pos = meta.query_advice(literal_pos, Rotation::cur()); + let cp_byte = meta.query_advice(decoded_byte, Rotation::cur()); + + let tbl_exprs = literal_table.lookup_tbl_for_lit_cp(meta); + tbl_exprs + .into_iter() + .zip_eq([ + 1.expr(), + ZstdTag::ZstdBlockLiteralsRawBytes.expr(), + block_index, + literal_pos, + cp_byte, + 0.expr(), + ]) + .map(|(lookup_expr, src_expr)| (src_expr * enabled.expr(), lookup_expr)) + .collect() + }); + + debug_assert!(meta.degree() <= 9); + meta.lookup_any("back ref char", |meta| { + let enabled = meta.query_fixed(q_enabled, Rotation::cur()); + + let backref_pos = meta.query_advice(backref_offset, Rotation::cur()); + let cp_byte = meta.query_advice(decoded_byte, Rotation::cur()); + let decode_pos = meta.query_advice(decoded_len, Rotation::cur()); + let ref_pos = decode_pos.expr() - backref_pos.expr(); + + let tbl_exprs = [enabled.expr(), decode_pos.expr(), cp_byte.expr()]; + tbl_exprs + .into_iter() + .zip([1.expr(), ref_pos, cp_byte]) + .map(|(lookup_expr, src_expr)| { + ( + src_expr + * enabled.expr() + * meta.query_advice(s_back_ref_phase, Rotation::cur()), + lookup_expr, + ) + }) + .collect() + }); + + debug_assert!(meta.degree() <= 9); + meta.lookup_any("actual literal byte", |meta| { + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + let block_index = meta.query_advice(block_index, Rotation::prev()); + let literal_pos_at_block_end = meta.query_advice(literal_pos, Rotation::prev()); + + let tbl_exprs = literal_table.lookup_tbl_for_lit_size(meta); + tbl_exprs + .into_iter() + .zip_eq([ + 1.expr(), + ZstdTag::ZstdBlockLiteralsRawBytes.expr(), + block_index, + literal_pos_at_block_end, + 1.expr(), + 0.expr(), + ]) + .map(|(lookup_expr, src_expr)| { + ( + src_expr * is_block_begin.expr() * q_enabled.expr(), + lookup_expr, + ) + }) + .collect() + }); + + debug_assert!(meta.degree() <= 9); + meta.lookup_any("instruction counts", |meta| { + let q_enabled = meta.query_fixed(q_enabled, Rotation::prev()); + let block_index = meta.query_advice(block_index, Rotation::prev()); + let seq_index_at_block_end = meta.query_advice(seq_index, Rotation::prev()) + // if we have a additional literal copying phase, we + // in fact has one extra instruction + - meta.query_advice(s_last_lit_cp_phase, Rotation::prev()); + + seq_config + .lookup_tbl(meta) + .into_iter() + .zip_eq([1.expr(), 1.expr(), block_index, seq_index_at_block_end]) + .map(|(lookup_expr, src_expr)| { + ( + src_expr * is_block_begin.expr() * q_enabled.expr(), + lookup_expr, + ) + }) + .collect() + }); + + debug_assert!(meta.degree() <= 9); + Self { + q_enabled, + block_index, + seq_index, + decoded_len, + decoded_byte, + decoded_rlc, + s_last_lit_cp_phase, + s_lit_cp_phase, + s_back_ref_phase, + backref_progress, + literal_pos, + backref_offset, + _marker: Default::default(), + } + } + + /// fill the rest region with padding rows + pub fn paddings<'a>( + &self, + region: &mut Region, + offset: usize, + till_offset: usize, + decoded_len: usize, + decoded_rlc: Value, + padded_block_ind: u64, + ) -> Result<(ExportedCell, ExportedCell), Error> { + for offset in offset..=till_offset { + // flush one more row for rotation next() + if offset != till_offset { + region.assign_fixed( + || "enable padding row", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + } + + for (col, val) in [ + (self.block_index, Value::known(F::from(padded_block_ind))), + (self.decoded_len, Value::known(F::from(decoded_len as u64))), + (self.decoded_rlc, decoded_rlc), + ] { + region.assign_advice(|| "set padding rows", col, offset, || val)?; + } + + for col in [ + self.decoded_byte, + self.s_last_lit_cp_phase, + self.s_lit_cp_phase, + self.s_back_ref_phase, + self.backref_offset, + self.backref_progress, + self.literal_pos, + self.seq_index, + ] { + region.assign_advice( + || "flush padding rows", + col, + offset, + || Value::known(F::zero()), + )?; + } + } + + let len_export = region.assign_advice( + || "export len", + self.decoded_len, + till_offset, + || Value::known(F::from(decoded_len as u64)), + )?; + + let rlc_export = region.assign_advice( + || "export rlc", + self.decoded_rlc, + till_offset, + || decoded_rlc, + )?; + + Ok((len_export, rlc_export)) + } + + /// assign a single block from current offset / byte decompression + /// progress and return the offset / progress below the last used row + pub fn assign_block<'a>( + &self, + region: &mut Region, + chng: Value, + mut offset: usize, + mut decoded_len: usize, + mut decoded_rlc: Value, + seq_info: &SequenceInfo, + seq_exec_infos: impl Iterator, + literals: &[u64], + // all of the decompressed bytes, not only current block + decompressed_bytes: &[u8], + ) -> Result<(usize, usize, Value), Error> { + let block_ind = seq_info.block_idx; + let mut cur_literal_cp = 0usize; + let mut inst_begin_offset = offset; + let mut cur_inst: Option = None; + + for SequenceExec(inst_ind, exec_info) in seq_exec_infos { + let inst_ind = *inst_ind + 1; + if let Some(old_ind) = cur_inst.replace(inst_ind) { + if old_ind != inst_ind { + inst_begin_offset = offset; + } + } + + let base_rows = [ + (self.block_index, F::from(block_ind as u64)), + (self.seq_index, F::from(inst_ind as u64)), + ( + self.s_last_lit_cp_phase, + if inst_ind > seq_info.num_sequences { + F::one() + } else { + F::zero() + }, + ), + ]; + + let (is_literal, r) = match exec_info { + SequenceExecInfo::LiteralCopy(r) => { + assert_eq!(cur_literal_cp, r.start); + cur_literal_cp = r.end; + (true, r.clone()) + } + SequenceExecInfo::BackRef(r) => (false, r.clone()), + }; + + for (i, pos) in r.clone().enumerate() { + decoded_len += 1; + let out_byte = F::from(if is_literal { + literals[pos as usize] + } else { + decompressed_bytes[pos as usize] as u64 + }); + decoded_rlc = decoded_rlc * chng + Value::known(out_byte); + + //println!("set row at {}, output {}:{:x}", offset, decoded_len, + // out_byte.get_lower_32()); + + region.assign_advice( + || "set output region", + self.decoded_rlc, + offset, + || decoded_rlc, + )?; + + // all of the "pos" is 1-index for lookup since the + // bytes_output is 1-indexed + let pos = pos + 1; + let ref_offset = if is_literal { + None + } else { + Some(decoded_len - pos) + }; + // for back-ref part, we refill the backref_pos in the whole + // instruction + if !is_literal && i == 0 { + //println!("fill-back match offset {} in {}..{}", ref_offset.unwrap(), + // inst_begin_offset, offset); + for back_offset in inst_begin_offset..offset { + region.assign_advice( + || "set output region", + self.backref_offset, + back_offset, + || Value::known(F::from(ref_offset.expect("backref set") as u64)), + )?; + } + } + + let decodes = [ + (self.decoded_len, F::from(decoded_len as u64)), + (self.decoded_byte, out_byte), + ( + self.backref_offset, + F::from(ref_offset.unwrap_or_default() as u64), + ), + ]; + + for (col, val) in + base_rows + .clone() + .into_iter() + .chain(decodes) + .chain(if is_literal { + //println!("inst {}: literal cp {}-{}", inst_ind, pos, 0); + [ + (self.s_lit_cp_phase, F::one()), + (self.s_back_ref_phase, F::zero()), + (self.literal_pos, F::from(pos as u64)), + (self.backref_progress, F::zero()), + ] + } else { + //println!("inst {}: backref cp {}-{}", inst_ind, cur_literal_cp, i+1); + [ + (self.s_lit_cp_phase, F::zero()), + (self.s_back_ref_phase, F::one()), + (self.literal_pos, F::from(cur_literal_cp as u64)), + (self.backref_progress, F::from(i as u64 + 1)), + ] + }) + { + region.assign_advice( + || "set output region", + col, + offset, + || Value::known(val), + )?; + } + + region.assign_fixed( + || "enable row", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + offset += 1; + } + } + + debug_assert_eq!(cur_literal_cp, literals.len()); + + Ok((offset, decoded_len, decoded_rlc)) + } + + /// assign the top row + pub fn init_top_row( + &self, + region: &mut Region, + from_offset: Option, + ) -> Result { + let offset = from_offset.unwrap_or_default(); + + for col in [ + self.decoded_byte, + self.decoded_len, + self.decoded_rlc, + self.block_index, + self.seq_index, + self.s_back_ref_phase, + self.s_lit_cp_phase, + self.s_back_ref_phase, + self.backref_offset, + self.literal_pos, + self.backref_progress, + ] { + region.assign_advice(|| "top row fluash", col, offset, || Value::known(F::zero()))?; + } + + for (col, val) in [ + (self.decoded_len, F::zero()), + (self.decoded_rlc, F::zero()), + (self.block_index, F::zero()), + ] { + region.assign_advice_from_constant(|| "top row constraint", col, offset, val)?; + } + + region.assign_advice_from_constant( + || "blk index begin constraint", + self.block_index, + offset + 1, + F::one(), + )?; + + Ok(offset + 1) + } + + /// assign with multiple blocks and export the cell at + /// final row (specified by `eanbled_rows`) for + /// (decoded_len, decoded_rlc) + pub fn assign<'a>( + &self, + layouter: &mut impl Layouter, + chng: &Challenges>, + // per-block inputs: (literal, seq_info, seq_exec_trace) + per_blk_inputs: impl IntoIterator + + Clone, + // all of the decompressed bytes, not only current block + decompressed_bytes: &[u8], + enabled_rows: usize, + ) -> Result<(ExportedCell, ExportedCell), Error> { + layouter.assign_region( + || "output region", + |mut region| { + let mut offset = self.init_top_row(&mut region, None)?; + let mut decoded_len = 0usize; + let mut decoded_rlc = Value::known(F::zero()); + let mut blk_ind = 0; + for (literals, seq_info, exec_trace) in per_blk_inputs.clone() { + blk_ind = seq_info.block_idx; + (offset, decoded_len, decoded_rlc) = self.assign_block( + &mut region, + chng.evm_word(), + offset, + decoded_len, + decoded_rlc, + seq_info, + exec_trace.iter(), + literals, + decompressed_bytes, + )?; + } + + self.paddings( + &mut region, + offset, + enabled_rows, + decoded_len, + decoded_rlc, + blk_ind as u64 + 1, + ) + }, + ) + } + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + chng: &Challenges>, + n_seq: usize, + seq_exec_infos: &[SequenceExec], + literals: &[u8], + // all of the decompressed bytes, not only current block + decompressed_bytes: &[u8], + enabled_rows: usize, + ) -> Result<(), Error> { + let literals = literals + .iter() + .copied() + .map(|b| b as u64) + .collect::>(); + + layouter.assign_region( + || "output region", + |mut region| { + let offset = self.init_top_row(&mut region, None)?; + let (offset, decoded_len, decoded_rlc) = self.assign_block( + &mut region, + chng.evm_word(), + offset, + 0, + Value::known(F::zero()), + &SequenceInfo { + block_idx: 1, + num_sequences: n_seq, + ..Default::default() + }, + seq_exec_infos.iter(), + &literals, + decompressed_bytes, + )?; + self.paddings( + &mut region, + offset, + enabled_rows, + decoded_len, + decoded_rlc, + 2, + )?; + + Ok(()) + }, + ) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use halo2_proofs::{ + circuit::SimpleFloorPlanner, dev::MockProver, halo2curves::bn256::Fr, plonk::Circuit, + }; + use witgen::AddressTableRow; + use zkevm_circuits::util::MockChallenges; + + #[derive(Clone, Debug)] + struct SeqExecMock { + outputs: Vec, + literals: Vec, + seq_conf: SequenceInfo, + insts: Vec, + exec_trace: Vec, + } + + impl SeqExecMock { + // use the code in witgen to generate exec trace + pub fn mock_generate(literals: Vec, insts: Vec) -> Self { + let seq_conf = SequenceInfo { + block_idx: 1, + num_sequences: insts.len(), + ..Default::default() + }; + + let mut exec_trace = Vec::new(); + let mut outputs = Vec::new(); + + let mut current_literal_pos: usize = 0; + for inst in &insts { + let new_literal_pos = current_literal_pos + (inst.literal_length as usize); + if new_literal_pos > current_literal_pos { + let r = current_literal_pos..new_literal_pos; + exec_trace.push(SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::LiteralCopy(r.clone()), + )); + outputs.extend_from_slice(&literals[r]); + } + + let match_pos = outputs.len() - (inst.actual_offset as usize); + if inst.match_length > 0 { + let r = match_pos..(inst.match_length as usize + match_pos); + exec_trace.push(SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::BackRef(r.clone()), + )); + let matched_bytes = Vec::from(&outputs[r]); + outputs.extend(matched_bytes); + } + current_literal_pos = new_literal_pos; + } + + // Add remaining literal bytes + if current_literal_pos < literals.len() { + let r = current_literal_pos..literals.len(); + exec_trace.push(SequenceExec( + seq_conf.num_sequences, + SequenceExecInfo::LiteralCopy(r.clone()), + )); + outputs.extend_from_slice(&literals[r]); + } + + Self { + outputs, + literals, + seq_conf, + insts, + exec_trace, + } + } + } + + #[derive(Clone)] + struct SeqExecMockConfig { + config: SeqExecConfig, + inst_tbl: SeqInstTable, + literal_tbl: LiteralTable, + seq_cfg: SequenceConfig, + chng_mock: MockChallenges, + } + + impl Circuit for SeqExecMock { + type Config = SeqExecMockConfig; + type FloorPlanner = SimpleFloorPlanner; + fn without_witnesses(&self) -> Self { + unimplemented!() + } + + fn configure(meta: &mut ConstraintSystem) -> Self::Config { + let const_col = meta.fixed_column(); + meta.enable_constant(const_col); + + let literal_tbl = LiteralTable::construct([ + meta.fixed_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + ]); + + let seq_cfg = SequenceConfig::construct([ + meta.fixed_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + meta.advice_column().into(), + ]); + + let inst_tbl = SeqInstTable::configure(meta); + + let chng_mock = MockChallenges::construct(meta); + let chng = chng_mock.exprs(meta); + + let config = SeqExecConfig::configure(meta, &chng, &literal_tbl, &inst_tbl, &seq_cfg); + + Self::Config { + config, + literal_tbl, + inst_tbl, + seq_cfg, + chng_mock, + } + } + + fn synthesize( + &self, + config: Self::Config, + mut layouter: impl Layouter, + ) -> Result<(), Error> { + config.literal_tbl.mock_assign( + &mut layouter, + self.literals + .iter() + .copied() + .map(|b| b as u64) + .collect::>() + .as_slice(), + )?; + + config.seq_cfg.mock_assign(&mut layouter, &self.seq_conf)?; + + config + .inst_tbl + .mock_assign(&mut layouter, &self.insts, 15)?; + + let chng_val = config.chng_mock.values(&mut layouter); + + config.config.mock_assign( + &mut layouter, + &chng_val, + self.insts.len(), + &self.exec_trace, + &self.literals, + &self.outputs, + 50, + )?; + + Ok(()) + } + } + + #[test] + fn seq_exec_literal_only() { + // no instructions, we only copy literals to output + let circuit = SeqExecMock::mock_generate(Vec::from("abcd".as_bytes()), Vec::new()); + + assert_eq!(circuit.outputs, Vec::from("abcd".as_bytes())); + + let k = 12; + let mock_prover = + MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + } + + #[test] + fn seq_exec_simple() { + // no instructions, we only copy literals to output + let circuit = SeqExecMock::mock_generate( + Vec::from("abcdef".as_bytes()), + AddressTableRow::mock_samples_full([ + [1, 4, 1, 1, 4, 8], + [9, 1, 3, 6, 1, 4], + [3, 0, 4, 5, 6, 1], + ]), + ); + + assert_eq!(circuit.outputs, Vec::from("abcddeabcdeabf".as_bytes())); + + let k = 12; + let mock_prover = + MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + } + + #[test] + fn seq_exec_no_tail_cp() { + // no instructions, we only copy literals to output + let circuit = SeqExecMock::mock_generate( + Vec::from("abcde".as_bytes()), + AddressTableRow::mock_samples_full([[1, 4, 1, 1, 4, 8], [9, 1, 3, 6, 1, 4]]), + ); + + assert_eq!(circuit.outputs, Vec::from("abcddeabc".as_bytes())); + + let k = 12; + let mock_prover = + MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + } +} diff --git a/aggregator/src/aggregation/decoder/tables.rs b/aggregator/src/aggregation/decoder/tables.rs index dbc08b22f4..0957cf238f 100644 --- a/aggregator/src/aggregation/decoder/tables.rs +++ b/aggregator/src/aggregation/decoder/tables.rs @@ -12,6 +12,9 @@ pub use fse::FseTable; mod literals_header; pub use literals_header::LiteralsHeaderTable; +mod seqinst_table; +/// Input for validating the sequence instruction comes from the parsed value +pub use seqinst_table::SeqInstTable; /// Fixed lookup table and its variants. mod fixed; pub use fixed::{predefined_fse, FixedLookupTag, FixedTable, PredefinedFse}; diff --git a/aggregator/src/aggregation/decoder/tables/seqinst_table.rs b/aggregator/src/aggregation/decoder/tables/seqinst_table.rs new file mode 100644 index 0000000000..3bc01a78ee --- /dev/null +++ b/aggregator/src/aggregation/decoder/tables/seqinst_table.rs @@ -0,0 +1,1046 @@ +use crate::aggregation::decoder::witgen; +use eth_types::Field; +use gadgets::{ + is_equal::*, + is_zero::*, + util::{and, not, select, Expr}, +}; +use halo2_proofs::{ + circuit::{Layouter, Region, Value}, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed, VirtualCells}, + poly::Rotation, +}; +use witgen::AddressTableRow; +use zkevm_circuits::{ + evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::LookupTable, +}; + +/// Table used carry the raw sequence instructions parsed from sequence section +/// and would be later transformed as the back-reference instructions +/// +/// For every block, one row in the table represent a single sequence instruction +/// in the sequence section, and handle all data parsed from the same sequence. +/// The 'block_index' is a 1-index for each block with n sequences in its +/// sequence section, the parsed value from bitstream for current sequence is put +/// in the 'input cols' section (`literal_len`, `match_offset` and `match_len`) +/// The transformed sequence instructions is put in 'output cols' section ( +/// `acc_literal_len`, `offset` and `match_len`), +/// notice we can use `match_len` without transformation. +/// +/// | enabled |block_index| n_seq |seq_index|s_beginning||| +/// |---------|-----------|-------|---------|-----------|------------|-------------| +/// | 1 | 1 | 30 | 0 | 1 | | | +/// | 1 | 1 | 30 | 1 | 0 | (4,2,4) | (4,4,4) | +/// | 1 | 1 | 30 | 2 | 0 | (1,5,2) | (5,5,2) | +/// | 1 | 1 | 30 | 3 | 0 | (0,2,1) | (5,1,1) | +/// | 1 | ... | 30 | ... | 0 | ... | | +/// | 1 | 1 | 30 | 30 | 0 | (1,50,11) | | +/// | 1 | 2 | 20 | 0 | 1 | | | +/// | 1 | 2 | 20 | 1 | 0 | (3,52,13) | | +/// | 1 | ... | 20 | ... | 0 | | | +/// | 1 | 2 | 20 | 20 | 0 | | | +/// | 1 | 3 | 4 | 0 | 1 | | | +/// | ... | ... | ... | ... | ... | | | +/// | 1 | 998 | 0 | 0 | 1 | | | +/// | 1 | 999 | 0 | 0 | 1 | | | +/// +/// When all sequences from compressed data has been handled, the rest rows being enabled +/// (q_enabled is true) has to be padded with increased block index, with `n_seq` is 0 +/// and `s_beginning` is true +/// +/// The transform from 'input cols' to 'output cols' according to zstd's spec +/// include following steps: +/// 1. accumulate the copied literal bytes in one section +/// 2. for match offset > 3, set the actual offset val is -=3, else we refer it +/// from the reference tables represented by 'repeated_offset_1/2/3' cols +/// 3. After each sequence, the reference tables is updated according to the +/// value of cooked offset and whether `literal_len` is zero +/// +/// |literal_len|match_offset|acc_lit_len| offset |match_len|rep_offset_1|rep_offset_2|rep_offset_3|s_beginning| +/// |-----------|------------|-----------|--------|---------|------------|------------|------------|-----------| +/// | | | | | | 1 | 4 | 8 | 1 | +/// | 4 | 2 | 4 | 4 | 4 | 4 | 1 | 8 | 0 | +/// | 1 | 5 | 5 | 5 | 2 | 5 | 4 | 1 | 0 | +/// | 0 | 2 | 5 | 1 | 1 | 1 | 5 | 4 | 0 | +/// | | | | | | | | | 0 | + +#[derive(Clone, Debug)] +pub struct SeqInstTable { + // active flag, one active row parse + q_enabled: Column, + + // 1-index for each block, keep the same for each row + // until all sequenced has been handled + block_index: Column, + // the count of sequences in one block, keey the same + // for each row when block index is not changed + n_seq: Column, + // the 1-indexed seq number (1..=n_seq) for each + // sequence. We have extra row at the beginning of + // each block with seq_index is 0 + seq_index: Column, + // the flag for the first row in each block (i.e. seq_index is 0) + s_beginning: Column, + + // the value directly decoded from bitstream, one row + // for one sequence + literal_len: Column, + match_offset: Column, + match_len: Column, + + // exported instructions for one sequence, + // note the match_len would be exported as-is + // updated offset + offset: Column, + // updated (acc) literal len + acc_literal_len: Column, + + // the reference table for repeated offset + rep_offset_1: Column, + rep_offset_2: Column, + rep_offset_3: Column, + + // 3 mode on update ref table, corresponding to + // 1: offset = 1 (if lt_len != 0) + ref_update_mode_1: Column, + // 2: offset = 2 or offset = 1 (if lt_len == 0) + ref_update_mode_2: Column, + // 3: offset = 3 or offset = 2 (if lt_len == 0) + ref_update_mode_3: Column, + // 4: special case of offset = 3 (if lt_len == 0) + ref_update_mode_4: Column, + + // detect if literal_len is zero + literal_is_zero: IsZeroConfig, + // detect if seq_index in current row equal + // to n_seq (i.e. n_seq - seq_index is zero) + seq_index_is_n_seq: IsEqualConfig, + // detect if current match_offset is 1, 2 or 3 + offset_is_1: IsEqualConfig, + offset_is_2: IsEqualConfig, + offset_is_3: IsEqualConfig, + + // detect if rep_offset_1 is 0 (indicate the data + // is corrupt) + ref_offset_1_is_zero: IsZeroConfig, +} + +impl LookupTable for SeqInstTable { + fn columns(&self) -> Vec> { + vec![ + self.q_enabled.into(), + self.block_index.into(), + self.n_seq.into(), + self.s_beginning.into(), + self.seq_index.into(), + self.literal_len.into(), + self.match_offset.into(), + self.match_len.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("q_enabled"), + String::from("n_seq"), + String::from("block_index"), + String::from("s_beginning"), + String::from("seq_index"), + String::from("literal_len"), + String::from("match_offset"), + String::from("match_len"), + ] + } +} + +#[derive(Clone, Debug)] +struct ChipContext { + literal_is_zero_chip: IsZeroChip, + ref_offset_1_is_zero_chip: IsZeroChip, + seq_index_chip: IsEqualChip, + offset_is_1_chip: IsEqualChip, + offset_is_2_chip: IsEqualChip, + offset_is_3_chip: IsEqualChip, +} + +impl ChipContext { + fn construct(config: &SeqInstTable) -> Self { + let literal_is_zero_chip = IsZeroChip::construct(config.literal_is_zero.clone()); + let ref_offset_1_is_zero_chip = IsZeroChip::construct(config.ref_offset_1_is_zero.clone()); + let seq_index_chip = IsEqualChip::construct(config.seq_index_is_n_seq.clone()); + let offset_is_1_chip = IsEqualChip::construct(config.offset_is_1.clone()); + let offset_is_2_chip = IsEqualChip::construct(config.offset_is_2.clone()); + let offset_is_3_chip = IsEqualChip::construct(config.offset_is_3.clone()); + + Self { + literal_is_zero_chip, + ref_offset_1_is_zero_chip, + seq_index_chip, + offset_is_1_chip, + offset_is_2_chip, + offset_is_3_chip, + } + } +} + +impl SeqInstTable { + /// The sequence count should be lookuped by parsed bitstream, + /// used the block index and value for sequnce count tag to + /// lookup (`true`, `block_index`, 1, `value`) + /// The table would be padded by increased block index to + /// fill all rows being enabled + /// + /// | enabled |block_index| flag | n_seq | + /// |---------|-----------|-------|-------| + /// | 1 | 1 | 1 | 30 | + /// | 1 | ... | ... | 30 | + /// | 1 | 2 | 1 | 20 | + /// | 1 | ... | ... | 20 | + /// | 1 | 3 | 1 | 4 | + /// | ... | ... | ... | ... | + /// | 1 | 999 | 1 | 0 | + pub fn seq_count_exprs(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_fixed(self.q_enabled, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.s_beginning, Rotation::cur()), + meta.query_advice(self.n_seq, Rotation::cur()), + ] + } + + /// The sequence values should be lookuped by parsed bitstream, + /// used the block index and value with each sequence tag for + /// multiple lookup (`true`, `block_index`, 0, `seq_index`, `value`) on + /// corresponding value column (literal len, offset, match len) + /// , or a lookup with suitable rotations + /// | enabled |block_index|s_beginning|seq_index| literal | offset | match | + /// |---------|-----------|-----------|---------|---------|--------|-------| + /// | 1 | 1 | 0 | 1 | 4 | 2 | 4 | + /// | 1 | 1 | 0 | 2 | 1 | 5 | 2 | + /// | 1 | 1 | 0 | 3 | 0 | 2 | 3 | + /// | 1 | ... | 0 | ... | ... | ... | ... | + /// | 1 | 1 | 0 | 30 | 1 | 50 | 11 | + /// | 1 | 2 | 0 | 1 | 3 | 52 | 13 | + /// | 1 | ... | 0 | ... | ... | ... | ... | + pub fn seq_values_exprs(&self, meta: &mut VirtualCells) -> Vec> { + vec![ + meta.query_fixed(self.q_enabled, Rotation::cur()), + meta.query_advice(self.block_index, Rotation::cur()), + meta.query_advice(self.s_beginning, Rotation::cur()), + meta.query_advice(self.seq_index, Rotation::cur()), + meta.query_advice(self.literal_len, Rotation::cur()), + meta.query_advice(self.match_offset, Rotation::cur()), + meta.query_advice(self.match_len, Rotation::cur()), + ] + } + + /// Obtian the instruction table cols + pub fn instructions(&self) -> [Column; 5] { + [ + self.block_index, + self.seq_index, + self.offset, + self.acc_literal_len, + self.match_len, + ] + } + + /// Construct the sequence instruction table + /// the maxium rotation is prev(1), next(1) + pub fn configure(meta: &mut ConstraintSystem) -> Self { + let q_enabled = meta.fixed_column(); + let block_index = meta.advice_column(); + let n_seq = meta.advice_column(); + let literal_len = meta.advice_column(); + let match_offset = meta.advice_column(); + let match_len = meta.advice_column(); + let offset = meta.advice_column(); + let acc_literal_len = meta.advice_column(); + let s_beginning = meta.advice_column(); + let seq_index = meta.advice_column(); + let rep_offset_1 = meta.advice_column(); + let rep_offset_2 = meta.advice_column(); + let rep_offset_3 = meta.advice_column(); + let ref_update_mode_1 = meta.advice_column(); + let ref_update_mode_2 = meta.advice_column(); + let ref_update_mode_3 = meta.advice_column(); + let ref_update_mode_4 = meta.advice_column(); + + let [literal_is_zero, ref_offset_1_is_zero] = [literal_len, rep_offset_1].map(|col| { + let inv_col = meta.advice_column(); + IsZeroChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(col, Rotation::cur()), + inv_col, + ) + }); + let [offset_is_1, offset_is_2, offset_is_3] = [1, 2, 3].map(|val| { + IsEqualChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(match_offset, Rotation::cur()), + |_| val.expr(), + ) + }); + let seq_index_is_n_seq = IsEqualChip::configure( + meta, + |meta| meta.query_fixed(q_enabled, Rotation::cur()), + |meta| meta.query_advice(seq_index, Rotation::cur()), + |meta| meta.query_advice(n_seq, Rotation::cur()), + ); + + // seq_index must increment and compare with n_seq for seq border + meta.create_gate("seq index and section borders", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let seq_index_next = meta.query_advice(seq_index, Rotation::next()); + let seq_index = meta.query_advice(seq_index, Rotation::cur()); + let is_seq_border = &seq_index_is_n_seq; + + cb.require_equal( + "seq index must increment or 0 in s_beginning", + select::expr(is_seq_border.expr(), 0.expr(), seq_index.expr() + 1.expr()), + seq_index_next.expr(), + ); + + let s_beginning = meta.query_advice(s_beginning, Rotation::next()); + cb.require_boolean("s_beginning is boolean", s_beginning.expr()); + + cb.condition(not::expr(is_seq_border.expr()), |cb| { + cb.require_zero( + "s_beginning on enabled after seq border", + s_beginning.expr(), + ) + }); + + cb.gate(meta.query_fixed(q_enabled, Rotation::next())) + }); + + debug_assert!(meta.degree() <= 9); + + // block index must be increment at seq border, so section for each + // block index can occur once + // and the lookup from seq_table enforce valid block / seq / s_beginning + // must be put + meta.create_gate("block index", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let block_index_next = meta.query_advice(block_index, Rotation::next()); + let block_index = meta.query_advice(block_index, Rotation::cur()); + + let is_seq_border = &seq_index_is_n_seq; + + cb.require_equal( + "block can only increase in seq border", + select::expr( + is_seq_border.expr(), + block_index.expr() + 1.expr(), + block_index.expr(), + ), + block_index_next, + ); + cb.gate(meta.query_fixed(q_enabled, Rotation::next())) + }); + + // so, we enforce s_beginning enabled for valid block index + meta.create_gate("border constaints", |meta| { + let mut cb = BaseConstraintBuilder::default(); + let s_beginning = meta.query_advice(s_beginning, Rotation::cur()); + + let repeated_offset_pairs = [rep_offset_1, rep_offset_2, rep_offset_3].map(|col| { + ( + meta.query_advice(col, Rotation::cur()), + meta.query_advice(col, Rotation::prev()), + ) + }); + + for (repeated_offset, repeated_offset_prev) in repeated_offset_pairs { + cb.condition(s_beginning.expr(), |cb| { + cb.require_equal( + "offset must be inherited in border", + repeated_offset, + repeated_offset_prev, + ) + }); + } + + let literal_len = meta.query_advice(literal_len, Rotation::cur()); + cb.require_equal( + "literal len accumulation", + select::expr( + s_beginning.expr(), + literal_len.expr(), + literal_len.expr() + meta.query_advice(acc_literal_len, Rotation::prev()), + ), + meta.query_advice(acc_literal_len, Rotation::cur()), + ); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + meta.create_gate("offset update mode", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + cb.require_equal( + "ref update mode 1", + and::expr([not::expr(literal_is_zero.expr()), offset_is_1.expr()]), + meta.query_advice(ref_update_mode_1, Rotation::cur()), + ); + + cb.require_equal( + "ref update mode 2", + select::expr( + literal_is_zero.expr(), + offset_is_1.expr(), + offset_is_2.expr(), + ), + meta.query_advice(ref_update_mode_2, Rotation::cur()), + ); + + cb.require_equal( + "ref update mode 3", + select::expr( + literal_is_zero.expr(), + offset_is_2.expr(), + offset_is_3.expr(), + ), + meta.query_advice(ref_update_mode_3, Rotation::cur()), + ); + + cb.require_equal( + "ref update mode 4", + and::expr([literal_is_zero.expr(), offset_is_3.expr()]), + meta.query_advice(ref_update_mode_4, Rotation::cur()), + ); + + cb.gate(meta.query_fixed(q_enabled, Rotation::cur())) + }); + + // offset is in-section (not s_beginning) + meta.create_gate("offset reference", |meta| { + let mut cb = BaseConstraintBuilder::default(); + + let offset_val = meta.query_advice(offset, Rotation::cur()); + let offset = meta.query_advice(match_offset, Rotation::cur()); + + let [rep_offset_1_prev, rep_offset_2_prev, rep_offset_3_prev] = + [rep_offset_1, rep_offset_2, rep_offset_3] + .map(|col| meta.query_advice(col, Rotation::prev())); + + let [rep_offset_1, rep_offset_2, rep_offset_3] = + [rep_offset_1, rep_offset_2, rep_offset_3] + .map(|col| meta.query_advice(col, Rotation::cur())); + + let ref_update_mode_1 = meta.query_advice(ref_update_mode_1, Rotation::cur()); + let ref_update_mode_2 = meta.query_advice(ref_update_mode_2, Rotation::cur()); + let ref_update_mode_3 = meta.query_advice(ref_update_mode_3, Rotation::cur()); + let ref_update_mode_4 = meta.query_advice(ref_update_mode_4, Rotation::cur()); + let s_is_offset_ref = ref_update_mode_1.expr() + + ref_update_mode_2.expr() + + ref_update_mode_3.expr() + + ref_update_mode_4.expr(); + + // should not need this since all ref update modes are exclusive + // cb.require_boolean("is offset is boolean", s_is_offset_ref.expr()); + + // and ref in offset_1 is updated by current value + cb.require_equal( + "set offset 0 to offset val", + offset_val.expr(), + rep_offset_1.expr(), + ); + + // following we ref updated table + + // for no-ref ref offset table 2/3 is + // updated with a "shift" nature, and 1 is cooked_offset - 3 + cb.condition(not::expr(s_is_offset_ref.expr()), |cb| { + cb.require_equal( + "offset is cooked_val - 3", + offset.expr() - 3.expr(), + rep_offset_1.expr(), + ); + + cb.require_equal( + "shift 1 -> 2", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal( + "shift 2 -> 3", + rep_offset_2_prev.expr(), + rep_offset_3.expr(), + ); + }); + + // update mode 1 (offset == 1 and lit_len != 0) + cb.condition(ref_update_mode_1.expr(), |cb| { + cb.require_equal( + "copy offset 1 for ref 1", + rep_offset_1_prev.expr(), + rep_offset_1.expr(), + ); + cb.require_equal( + "copy offset 2 for ref 1", + rep_offset_2_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal( + "copy offset 3 for ref 1", + rep_offset_3_prev.expr(), + rep_offset_3.expr(), + ); + }); + // update mode 2 (offset == 2 / offet == 1 while lit_len != 0) + cb.condition(ref_update_mode_2.expr(), |cb| { + cb.require_equal( + "swap 1&2 for ref 2", + rep_offset_2_prev.expr(), + rep_offset_1.expr(), + ); + cb.require_equal( + "swap 1&2 for ref 2", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal( + "copy offset 3 for ref 2", + rep_offset_3_prev.expr(), + rep_offset_3.expr(), + ); + }); + // update mode 3 (offset == 3 / offet == 2 while lit_len != 0) + cb.condition(ref_update_mode_3.expr(), |cb| { + cb.require_equal( + "rotate 3-1 for ref 3", + rep_offset_3_prev.expr(), + rep_offset_1.expr(), + ); + cb.require_equal( + "rotate 3-1 for ref 3", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal( + "rotate 3-1 for ref 3", + rep_offset_2_prev.expr(), + rep_offset_3.expr(), + ); + }); + // update mode 4 (offset == 3 while lit_len == 0) + cb.condition(ref_update_mode_4.expr(), |cb| { + cb.require_zero("data must not corrupt", ref_offset_1_is_zero.expr()); + cb.require_equal( + "take ref 1 and minus 1 for ref 4", + rep_offset_1_prev.expr() - 1.expr(), + rep_offset_1.expr(), + ); + cb.require_equal( + "rotate 3-1 for ref 4", + rep_offset_1_prev.expr(), + rep_offset_2.expr(), + ); + cb.require_equal( + "rotate 3-1 for ref 4", + rep_offset_2_prev.expr(), + rep_offset_3.expr(), + ); + }); + + cb.gate( + meta.query_fixed(q_enabled, Rotation::cur()) + * not::expr(meta.query_advice(s_beginning, Rotation::cur())), + ) + }); + + // the beginning of following rows must be constrainted + meta.enable_equality(block_index); + meta.enable_equality(seq_index); + meta.enable_equality(rep_offset_1); + meta.enable_equality(rep_offset_2); + meta.enable_equality(rep_offset_3); + + Self { + q_enabled, + block_index, + n_seq, + literal_len, + match_offset, + match_len, + offset, + acc_literal_len, + s_beginning, + seq_index, + rep_offset_1, + rep_offset_2, + rep_offset_3, + offset_is_1, + offset_is_2, + offset_is_3, + literal_is_zero, + seq_index_is_n_seq, + ref_offset_1_is_zero, + ref_update_mode_1, + ref_update_mode_2, + ref_update_mode_3, + ref_update_mode_4, + } + } + + // assign a heading / padding row before a each block + fn assign_heading_row<'a>( + &self, + region: &mut Region, + offset: usize, + block_ind: u64, + n_seq: usize, + chip_ctx: &ChipContext, + offset_table: &[u64; 3], + ) -> Result { + region.assign_fixed( + || "enable row", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + + for col in [ + self.rep_offset_1, + self.rep_offset_2, + self.rep_offset_3, + self.match_len, + self.match_offset, + self.literal_len, + self.acc_literal_len, + self.offset, + self.seq_index, + self.ref_update_mode_1, + self.ref_update_mode_2, + self.ref_update_mode_3, + self.ref_update_mode_4, + ] { + region.assign_advice(|| "padding values", col, offset, || Value::known(F::zero()))?; + } + + for (col, val) in [ + (self.rep_offset_1, offset_table[0]), + (self.rep_offset_2, offset_table[1]), + (self.rep_offset_3, offset_table[2]), + (self.block_index, block_ind), + (self.n_seq, n_seq as u64), + ] { + region.assign_advice( + || "header block fill", + col, + offset, + || Value::known(F::from(val)), + )?; + } + + chip_ctx + .literal_is_zero_chip + .assign(region, offset, Value::known(F::zero()))?; + chip_ctx.ref_offset_1_is_zero_chip.assign( + region, + offset, + Value::known(F::from(offset_table[0])), + )?; + + for (chip, val) in [ + (&chip_ctx.offset_is_1_chip, F::from(1u64)), + (&chip_ctx.offset_is_2_chip, F::from(2u64)), + (&chip_ctx.offset_is_3_chip, F::from(3u64)), + (&chip_ctx.seq_index_chip, F::from(n_seq as u64)), + ] { + chip.assign(region, offset, Value::known(F::zero()), Value::known(val))?; + } + + region.assign_advice( + || "set beginning flag", + self.s_beginning, + offset, + || Value::known(F::one()), + )?; + + Ok(offset + 1) + } + + // padding for the rest row + fn padding_rows<'a>( + &self, + region: &mut Region, + mut offset: usize, + till_offset: usize, + mut blk_index: u64, + chip_ctx: &ChipContext, + offset_table: &[u64; 3], + ) -> Result<(), Error> { + // pad the rest rows until final row + while offset < till_offset { + offset = + self.assign_heading_row(region, offset, blk_index, 0, chip_ctx, offset_table)?; + + blk_index += 1; + } + + Ok(()) + } + + // assign a single block from current offset + // and return the offset below the last used row + fn assign_block<'a>( + &self, + region: &mut Region, + mut offset: usize, + block_ind: u64, + n_seq: usize, + table_rows: impl Iterator, + chip_ctx: &ChipContext, + offset_table: &mut [u64; 3], + ) -> Result { + let mut seq_index = 0u64; + let mut acc_literal_len = 0u64; + + for table_row in table_rows { + seq_index += 1; + + region.assign_fixed( + || "enable row", + self.q_enabled, + offset, + || Value::known(F::one()), + )?; + + let ref_update_mode = match table_row.cooked_match_offset { + 0 => panic!("invalid cooked offset"), + 1 => { + if table_row.literal_length == 0 { + 2 + } else { + 1 + } + } + 2 => { + if table_row.literal_length == 0 { + 3 + } else { + 2 + } + } + 3 => { + if table_row.literal_length == 0 { + 4 + } else { + 3 + } + } + _ => 0, + }; + + acc_literal_len += table_row.literal_length; + // sanity check + assert_eq!(acc_literal_len, table_row.literal_length_acc); + + offset_table[0] = table_row.repeated_offset1; + offset_table[1] = table_row.repeated_offset2; + offset_table[2] = table_row.repeated_offset3; + + for (name, col, val) in [ + ("beginning flag", self.s_beginning, F::zero()), + ( + "offset table 1", + self.rep_offset_1, + F::from(offset_table[0]), + ), + ( + "offset table 2", + self.rep_offset_2, + F::from(offset_table[1]), + ), + ( + "offset table 3", + self.rep_offset_3, + F::from(offset_table[2]), + ), + ("mlen", self.match_len, F::from(table_row.match_length)), + ( + "moff", + self.match_offset, + F::from(table_row.cooked_match_offset), + ), + ("llen", self.literal_len, F::from(table_row.literal_length)), + ("llen_acc", self.acc_literal_len, F::from(acc_literal_len)), + ("offset", self.offset, F::from(table_row.actual_offset)), + ("seq ind", self.seq_index, F::from(seq_index)), + ("block ind", self.block_index, F::from(block_ind)), + ("n_seq", self.n_seq, F::from(n_seq as u64)), + ( + "ref update mode", + self.ref_update_mode_1, + if ref_update_mode == 1 { + F::one() + } else { + F::zero() + }, + ), + ( + "ref update mode", + self.ref_update_mode_2, + if ref_update_mode == 2 { + F::one() + } else { + F::zero() + }, + ), + ( + "ref update mode", + self.ref_update_mode_3, + if ref_update_mode == 3 { + F::one() + } else { + F::zero() + }, + ), + ( + "ref update mode", + self.ref_update_mode_4, + if ref_update_mode == 4 { + F::one() + } else { + F::zero() + }, + ), + ] { + region.assign_advice(|| name, col, offset, || Value::known(val))?; + } + + for (chip, val) in [ + ( + &chip_ctx.literal_is_zero_chip, + F::from(table_row.literal_length), + ), + ( + &chip_ctx.ref_offset_1_is_zero_chip, + F::from(offset_table[0]), + ), + ] { + chip.assign(region, offset, Value::known(val))?; + } + + for (chip, val_l, val_r) in [ + ( + &chip_ctx.offset_is_1_chip, + F::from(table_row.cooked_match_offset), + F::from(1u64), + ), + ( + &chip_ctx.offset_is_2_chip, + F::from(table_row.cooked_match_offset), + F::from(2u64), + ), + ( + &chip_ctx.offset_is_3_chip, + F::from(table_row.cooked_match_offset), + F::from(3u64), + ), + ( + &chip_ctx.seq_index_chip, + F::from(seq_index), + F::from(n_seq as u64), + ), + ] { + chip.assign(region, offset, Value::known(val_l), Value::known(val_r))?; + } + offset += 1; + } + + assert_eq!(n_seq as u64, seq_index); + + Ok(offset) + } + + // assign the top row + fn init_top_row( + &self, + region: &mut Region, + from_offset: Option, + ) -> Result { + let offset = from_offset.unwrap_or_default(); + // top row constraint + for (col, val) in [ + (self.rep_offset_1, F::from(1u64)), + (self.rep_offset_2, F::from(4u64)), + (self.rep_offset_3, F::from(8u64)), + ] { + region.assign_advice_from_constant(|| "top row", col, offset, val)?; + } + + for col in [self.block_index, self.seq_index, self.acc_literal_len] { + region.assign_advice(|| "top row flush", col, offset, || Value::known(F::zero()))?; + } + + for (col, val) in [(self.block_index, F::one()), (self.seq_index, F::zero())] { + region.assign_advice_from_constant(|| "begin row constraint", col, offset + 1, val)?; + } + + Ok(offset + 1) + } + + /// assign with multiple blocks, known the number of + /// sequences in advance + pub fn assign<'a, R: ExactSizeIterator>( + &self, + layouter: &mut impl Layouter, + table_rows: impl IntoIterator + Clone, + enabled_rows: usize, + ) -> Result<(), Error> { + let chip_ctx = ChipContext::construct(self); + layouter.assign_region( + || "addr table", + |mut region| { + let mut offset_table: [u64; 3] = [1, 4, 8]; + let mut blk_id = 0u64; + let mut offset = self.init_top_row(&mut region, None)?; + for (i, rows_in_blk) in table_rows.clone().into_iter().enumerate() { + blk_id = (i + 1) as u64; + let n_seqs = rows_in_blk.len(); + offset = self.assign_heading_row( + &mut region, + offset, + blk_id, + n_seqs, + &chip_ctx, + &mut offset_table, + )?; + offset = self.assign_block( + &mut region, + offset, + blk_id, + n_seqs, + rows_in_blk, + &chip_ctx, + &mut offset_table, + )?; + assert!(offset < enabled_rows); + } + + self.padding_rows( + &mut region, + offset, + enabled_rows, + blk_id + 1, + &chip_ctx, + &offset_table, + )?; + + Ok(()) + }, + ) + } + + #[cfg(test)] + pub fn mock_assign( + &self, + layouter: &mut impl Layouter, + table_rows: &[AddressTableRow], + enabled_rows: usize, + ) -> Result<(), Error> { + let chip_ctx = ChipContext::construct(self); + layouter.assign_region( + || "addr table", + |mut region| { + let mut offset_table: [u64; 3] = [1, 4, 8]; + let offset = self.init_top_row(&mut region, None)?; + let offset = self.assign_heading_row( + &mut region, + offset, + 1, + table_rows.len(), + &chip_ctx, + &mut offset_table, + )?; + let offset = self.assign_block( + &mut region, + offset, + 1, + table_rows.len(), + table_rows.iter(), + &chip_ctx, + &mut offset_table, + )?; + assert!(offset < enabled_rows); + + self.padding_rows( + &mut region, + offset, + enabled_rows, + 2, + &chip_ctx, + &offset_table, + )?; + + Ok(()) + }, + ) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use halo2_proofs::{ + circuit::SimpleFloorPlanner, dev::MockProver, halo2curves::bn256::Fr, plonk::Circuit, + }; + + #[derive(Clone, Debug)] + struct SeqTable(Vec); + + impl Circuit for SeqTable { + type Config = SeqInstTable; + type FloorPlanner = SimpleFloorPlanner; + fn without_witnesses(&self) -> Self { + unimplemented!() + } + + fn configure(meta: &mut ConstraintSystem) -> Self::Config { + let const_col = meta.fixed_column(); + meta.enable_constant(const_col); + + Self::Config::configure(meta) + } + + fn synthesize( + &self, + config: Self::Config, + mut layouter: impl Layouter, + ) -> Result<(), Error> { + config.mock_assign(&mut layouter, &self.0, 15)?; + + Ok(()) + } + } + + #[test] + fn seqinst_table_gates() { + // example comes from zstd's spec + let circuit = SeqTable(AddressTableRow::mock_samples(&[ + [1114, 11, 1111, 1, 4], + [1, 22, 1111, 1, 4], + [2225, 22, 2222, 1111, 1], + [1114, 111, 1111, 2222, 1111], + [3336, 33, 3333, 1111, 2222], + [2, 22, 1111, 3333, 2222], + [3, 33, 2222, 1111, 3333], + [3, 0, 2221, 2222, 1111], + [1, 0, 2222, 2221, 1111], + ])); + + let k = 12; + let mock_prover = + MockProver::::run(k, &circuit, vec![]).expect("failed to run mock prover"); + mock_prover.verify().unwrap(); + } +} diff --git a/aggregator/src/aggregation/decoder/witgen.rs b/aggregator/src/aggregation/decoder/witgen.rs index e91215abba..f888474ba2 100644 --- a/aggregator/src/aggregation/decoder/witgen.rs +++ b/aggregator/src/aggregation/decoder/witgen.rs @@ -209,6 +209,8 @@ type AggregateBlockResult = ( Vec, Vec, [FseAuxiliaryTableData; 3], // 3 sequence section FSE tables + Vec, + SequenceExecResult, ); fn process_block( src: &[u8], @@ -224,7 +226,7 @@ fn process_block( witness_rows.extend_from_slice(&rows); let last_row = rows.last().expect("last row expected to exist"); - let (_byte_offset, rows, literals, lstream_len, aux_data, sequence_info, fse_aux_tables) = + let (_byte_offset, rows, literals, lstream_len, aux_data, sequence_info, fse_aux_tables, address_table_rows, sequence_exec_info) = match block_info.block_type { BlockType::ZstdCompressedBlock => process_block_zstd( src, @@ -248,6 +250,8 @@ fn process_block( lstream_len, aux_data, fse_aux_tables, + address_table_rows, + sequence_exec_info, ) } @@ -354,6 +358,12 @@ fn process_block_header( ) } +#[derive(Debug, Default, Clone)] +pub struct SequenceExecResult { + pub exec_trace: Vec, + pub recovered_bytes: Vec, +} + type BlockProcessingResult = ( usize, Vec>, @@ -362,6 +372,8 @@ type BlockProcessingResult = ( Vec, SequenceInfo, [FseAuxiliaryTableData; 3], // 3 sequence section FSE tables + Vec, + SequenceExecResult, ); type LiteralsBlockResult = (usize, Vec>, Vec, Vec, Vec); @@ -486,7 +498,7 @@ fn process_block_zstd( witness_rows.extend_from_slice(&rows); let last_row = witness_rows.last().expect("last row expected to exist"); - let (bytes_offset, rows, fse_aux_tables, address_table_rows, original_inputs, sequence_info) = + let (bytes_offset, rows, fse_aux_tables, address_table_rows, original_inputs, sequence_info, sequence_exec_info) = process_sequences::( src, block_idx, @@ -516,6 +528,11 @@ fn process_block_zstd( ], sequence_info, fse_aux_tables, + address_table_rows, + SequenceExecResult { + exec_trace: sequence_exec_info, + recovered_bytes: original_inputs, + }, ) } @@ -526,6 +543,7 @@ type SequencesProcessingResult = ( Vec, // Parsed sequence instructions Vec, // Recovered original input SequenceInfo, + Vec, ); fn process_sequences( @@ -1543,32 +1561,20 @@ fn process_sequences( let actual_offset = if inst.0 > 3 { inst.0 - 3 } else { - let mut repeat_idx = inst.0; + let repeat_idx = inst.0; if inst.2 == 0 { - repeat_idx += 1; if repeat_idx > 3 { - repeat_idx = 1; + repeated_offset[1] - 1 + } else { + repeated_offset[repeat_idx] } + } else { + repeated_offset[repeat_idx-1] } - - repeated_offset[repeat_idx] } as u64; literal_len_acc += inst.2; - address_table_rows.push(AddressTableRow { - s_padding: 0, - instruction_idx: idx as u64, - literal_length: inst.2 as u64, - cooked_match_offset: inst.0 as u64, - match_length: inst.1 as u64, - literal_length_acc: literal_len_acc as u64, - repeated_offset1: repeated_offset[0] as u64, - repeated_offset2: repeated_offset[1] as u64, - repeated_offset3: repeated_offset[2] as u64, - actual_offset, - }); - // Update repeated offset if inst.0 > 3 { repeated_offset[2] = repeated_offset[1]; @@ -1578,9 +1584,6 @@ fn process_sequences( let mut repeat_idx = inst.0; if inst.2 == 0 { repeat_idx += 1; - if repeat_idx > 3 { - repeat_idx = 1; - } } if repeat_idx == 2 { @@ -1592,44 +1595,84 @@ fn process_sequences( repeated_offset[2] = repeated_offset[1]; repeated_offset[1] = repeated_offset[0]; repeated_offset[0] = result; + } else if repeat_idx == 4 { + let result = repeated_offset[0]-1; + assert!(result > 0, "corruptied data"); + repeated_offset[2] = repeated_offset[1]; + repeated_offset[1] = repeated_offset[0]; + repeated_offset[0] = result; } else { // repeat 1 } }; + + address_table_rows.push(AddressTableRow { + s_padding: 0, + instruction_idx: idx as u64, + literal_length: inst.2 as u64, + cooked_match_offset: inst.0 as u64, + match_length: inst.1 as u64, + literal_length_acc: literal_len_acc as u64, + repeated_offset1: repeated_offset[0] as u64, + repeated_offset2: repeated_offset[1] as u64, + repeated_offset3: repeated_offset[2] as u64, + actual_offset, + }); + } // Executing sequence instructions to acquire the original input. // At this point, the address table rows are not padded. Paddings will be added as sequence // instructions progress. let mut recovered_inputs: Vec = vec![]; + let mut seq_exec_info: Vec = vec![]; let mut current_literal_pos: usize = 0; for inst in address_table_rows.clone() { let new_literal_pos = current_literal_pos + (inst.literal_length as usize); - recovered_inputs.extend_from_slice( - literals[current_literal_pos..new_literal_pos] - .iter() - .map(|&v| v as u8) - .collect::>() - .as_slice(), - ); + if new_literal_pos > current_literal_pos { + let r = current_literal_pos..new_literal_pos; + seq_exec_info.push( + SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::LiteralCopy(r.clone()), + ) + ); + recovered_inputs.extend_from_slice( + literals[r] + .iter() + .map(|&v| v as u8) + .collect::>() + .as_slice(), + ); + } let match_pos = recovered_inputs.len() - (inst.actual_offset as usize); - let matched_bytes = recovered_inputs - .clone() - .into_iter() - .skip(match_pos) - .take(inst.match_length as usize) - .collect::>(); - recovered_inputs.extend_from_slice(&matched_bytes.as_slice()); - + if inst.match_length > 0 { + let r = match_pos..(inst.match_length as usize + match_pos); + seq_exec_info.push( + SequenceExec( + inst.instruction_idx as usize, + SequenceExecInfo::BackRef(r.clone()), + ) + ); + let matched_bytes = Vec::from(&recovered_inputs[r]); + recovered_inputs.extend_from_slice(&matched_bytes.as_slice()); + } current_literal_pos = new_literal_pos; } // Add remaining literal bytes if current_literal_pos < literals.len() { + let r = current_literal_pos..literals.len(); + seq_exec_info.push( + SequenceExec( + sequence_info.num_sequences, + SequenceExecInfo::LiteralCopy(r.clone()), + ) + ); recovered_inputs.extend_from_slice( - literals[current_literal_pos..literals.len()] + literals[r] .iter() .map(|&v| v as u8) .collect::>() @@ -1644,6 +1687,7 @@ fn process_sequences( address_table_rows, recovered_inputs, sequence_info, + seq_exec_info, ) } @@ -1786,21 +1830,29 @@ fn process_block_zstd_literals_header( /// Result for processing multiple blocks from compressed data pub type MultiBlockProcessResult = ( Vec>, - Vec, + Vec>, // literals Vec, Vec, Vec, Vec, + // TODO: handle multi-block. + Vec>, + // TODO: handle multi-block. + Vec, ); /// Process a slice of bytes into decompression circuit witness rows pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessResult { let mut witness_rows = vec![]; - let mut literals: Vec = vec![]; + let mut literals: Vec> = vec![]; let mut aux_data: Vec = vec![]; let mut fse_aux_tables: Vec = vec![]; let mut block_info_arr: Vec = vec![]; let mut sequence_info_arr: Vec = vec![]; + // TODO: handle multi-block + let mut address_table_arr: Vec> = vec![]; + // TODO: handle multi-block + let mut sequence_exec_info_arr: Vec = vec![]; let byte_offset = 0; // witgen_debug @@ -1827,6 +1879,8 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR lstream_lens, pipeline_data, new_fse_aux_tables, + address_table_rows, + sequence_exec_info, ) = process_block::( src, block_idx, @@ -1836,7 +1890,7 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR ); witness_rows.extend_from_slice(&rows); - literals.extend_from_slice(&new_literals); + literals.push(new_literals); aux_data.extend_from_slice(&lstream_lens); aux_data.extend_from_slice(&pipeline_data); for fse_aux_table in new_fse_aux_tables { @@ -1845,6 +1899,8 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR block_info_arr.push(block_info); sequence_info_arr.push(sequence_info); + address_table_arr.push(address_table_rows); + sequence_exec_info_arr.push(sequence_exec_info); if block_info.is_last_block { // TODO: Recover this assertion after the sequence section decoding is completed. @@ -1895,6 +1951,9 @@ pub fn process(src: &[u8], randomness: Value) -> MultiBlockProcessR fse_aux_tables, block_info_arr, sequence_info_arr, + // TODO: handle multi-block. + address_table_arr, + sequence_exec_info_arr, ) } @@ -2032,6 +2091,8 @@ mod tests { _fse_aux_tables, block_info_arr, sequence_info_arr, + _, + _, ) = process::(&compressed, Value::known(Fr::from(123456789))); Ok(()) diff --git a/aggregator/src/aggregation/decoder/witgen/params.rs b/aggregator/src/aggregation/decoder/witgen/params.rs index 17b64342c9..1fa6fb9222 100644 --- a/aggregator/src/aggregation/decoder/witgen/params.rs +++ b/aggregator/src/aggregation/decoder/witgen/params.rs @@ -12,3 +12,6 @@ pub const N_BITS_ZSTD_TAG: usize = 4; /// Number of bits in the repeat bits that follow value=1 in reconstructing FSE table. pub const N_BITS_REPEAT_FLAG: usize = 2; + +// we use offset window no more than = 22 +pub const CL_WINDOW_LIMIT : usize = 22; \ No newline at end of file diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index 8d3c534a9a..d367b91e3a 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -106,6 +106,17 @@ pub struct SequenceInfo { pub compression_mode: [bool; 3], } +/// The type for indicate each range in output bytes by sequence execution +#[derive(Debug, Clone)] +pub enum SequenceExecInfo { + LiteralCopy(std::ops::Range), + BackRef(std::ops::Range), +} + +/// The type to describe an execution: (instruction_id, exec_info) +#[derive(Debug, Clone)] +pub struct SequenceExec (pub usize, pub SequenceExecInfo); + /// The type of Lstream. #[derive(Clone, Copy, Debug, EnumIter)] pub enum LstreamNum { @@ -439,6 +450,57 @@ pub struct AddressTableRow { pub actual_offset: u64, } +impl AddressTableRow { + + /// a debug helper, input datas in the form of example in + /// zstd spec: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets + /// i.e. [offset, literal, rep_1, rep_2, rep_3] + #[cfg(test)] + pub fn mock_samples(samples: &[[u64;5]]) -> Vec { + Self::mock_samples_full( + samples.into_iter().map(|sample|[ + sample[0], + sample[1], + 0, + sample[2], + sample[3], + sample[4], + ]) + ) + } + + /// build row with args [offset, literal, match_len, rep_1, rep_2, rep_3] + #[cfg(test)] + pub fn mock_samples_full(samples: impl IntoIterator) -> Vec { + let mut ret = Vec::::new(); + + for sample in samples { + let mut new_item = Self { + cooked_match_offset: sample[0], + literal_length: sample[1], + match_length: sample[2], + repeated_offset1: sample[3], + repeated_offset2: sample[4], + repeated_offset3: sample[5], + actual_offset: sample[3], + ..Default::default() + }; + + if let Some(old_item) = ret.last() { + new_item.instruction_idx = old_item.instruction_idx + 1; + new_item.literal_length_acc = old_item.literal_length_acc + sample[1]; + } else { + new_item.literal_length_acc = sample[1]; + } + + ret.push(new_item); + } + + ret + } + +} + /// Data for BL and Number of Bits for a state in LLT, CMOT and MLT #[derive(Clone, Debug)] pub struct SequenceFixedStateActionTable { diff --git a/aggregator/src/aggregation/decoder/witgen/util.rs b/aggregator/src/aggregation/decoder/witgen/util.rs index dd250dac14..892d9e407d 100644 --- a/aggregator/src/aggregation/decoder/witgen/util.rs +++ b/aggregator/src/aggregation/decoder/witgen/util.rs @@ -2,7 +2,7 @@ use bitstream_io::{ read::{BitRead, BitReader}, LittleEndian, }; -use std::io::{Cursor, Result}; +use std::io::{Cursor, Result, Read}; use super::N_BITS_PER_BYTE;