From c0bc74d502716e19350c6aeefb6f1c5b1aae694c Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Sun, 21 Apr 2024 15:21:29 +0100 Subject: [PATCH 1/4] feat: add ROM tables for fse code to value (sequence section) --- aggregator/src/aggregation/decoder.rs | 22 ++- aggregator/src/aggregation/decoder/tables.rs | 11 ++ .../decoder/tables/bitstring_accumulation.rs | 1 + .../decoder/tables/rom_sequence_codes.rs | 175 ++++++++++++++++++ 4 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs create mode 100644 aggregator/src/aggregation/decoder/tables/rom_sequence_codes.rs diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index 89cc8583ca..0297d6dc77 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -23,9 +23,13 @@ use zkevm_circuits::{ util::Challenges, }; -use crate::aggregation::decoder::witgen::N_BLOCK_HEADER_BYTES; - -use self::tables::{LiteralsHeaderTable, RomTagTable}; +use self::{ + tables::{ + LiteralLengthCodes, LiteralsHeaderTable, MatchLengthCodes, MatchOffsetCodes, + RomSequenceCodes, RomTagTable, + }, + witgen::N_BLOCK_HEADER_BYTES, +}; #[derive(Clone, Debug)] pub struct DecoderConfig { @@ -65,6 +69,12 @@ pub struct DecoderConfig { literals_header_table: LiteralsHeaderTable, /// ROM table for validating tag transition. rom_tag_table: RomTagTable, + /// ROM table for Literal Length Codes. + rom_llc_table: RomSequenceCodes, + /// ROM table for Match Length Codes. + rom_mlc_table: RomSequenceCodes, + /// ROM table for Match Offset Codes. + rom_moc_table: RomSequenceCodes, } #[derive(Clone, Debug)] @@ -339,6 +349,9 @@ impl DecoderConfig { ) -> Self { // Fixed tables let rom_tag_table = RomTagTable::construct(meta); + let rom_llc_table = RomSequenceCodes::::construct(meta); + let rom_mlc_table = RomSequenceCodes::::construct(meta); + let rom_moc_table = RomSequenceCodes::::construct(meta); // Helper tables let literals_header_table = LiteralsHeaderTable::configure(meta, range8, range16); @@ -373,6 +386,9 @@ impl DecoderConfig { range16, literals_header_table, rom_tag_table, + rom_llc_table, + rom_mlc_table, + rom_moc_table, }; macro_rules! is_tag { diff --git a/aggregator/src/aggregation/decoder/tables.rs b/aggregator/src/aggregation/decoder/tables.rs index d838929e04..e427fe63da 100644 --- a/aggregator/src/aggregation/decoder/tables.rs +++ b/aggregator/src/aggregation/decoder/tables.rs @@ -1,7 +1,18 @@ +/// Since bitstrings to decode can be spanned over more than one byte from the encoded bytes, we +/// construct a table to accumulate the binary values of the byte-unaligned bitstrings for decoding +/// convenience. +mod bitstring_accumulation; + /// Decode the regenerated size from the literals header. mod literals_header; pub use literals_header::LiteralsHeaderTable; +/// The fixed code to Baseline/NumBits for Literal Length. +mod rom_sequence_codes; +pub use rom_sequence_codes::{ + LiteralLengthCodes, MatchLengthCodes, MatchOffsetCodes, RomSequenceCodes, +}; + /// Validate the following tag given the tag currently being processed. mod rom_tag; pub use rom_tag::RomTagTable; diff --git a/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs b/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs @@ -0,0 +1 @@ + diff --git a/aggregator/src/aggregation/decoder/tables/rom_sequence_codes.rs b/aggregator/src/aggregation/decoder/tables/rom_sequence_codes.rs new file mode 100644 index 0000000000..086b6dd7d6 --- /dev/null +++ b/aggregator/src/aggregation/decoder/tables/rom_sequence_codes.rs @@ -0,0 +1,175 @@ +use halo2_proofs::{ + circuit::{Layouter, Value}, + halo2curves::bn256::Fr, + plonk::{Any, Column, ConstraintSystem, Error, Fixed}, +}; +use std::marker::PhantomData; +use zkevm_circuits::table::LookupTable; + +pub struct CodeFseRow { + pub code: u64, + pub baseline: u64, + pub nb: u64, +} + +impl From<(u64, u64, u64)> for CodeFseRow { + fn from(v: (u64, u64, u64)) -> Self { + Self { + code: v.0, + baseline: v.1, + nb: v.2, + } + } +} + +pub trait SequenceCodeTable { + const NAME: &'static str; + + fn code_table() -> Vec; +} + +#[derive(Clone, Debug)] +pub struct RomSequenceCodes { + code: Column, + baseline: Column, + nb: Column, + _marker: PhantomData, +} + +impl RomSequenceCodes { + pub fn construct(meta: &mut ConstraintSystem) -> Self { + Self { + code: meta.fixed_column(), + baseline: meta.fixed_column(), + nb: meta.fixed_column(), + _marker: PhantomData, + } + } + + pub fn load(&self, layouter: &mut impl Layouter) -> Result<(), Error> { + layouter.assign_region( + || format!("(ROM): Sequence Code: {}", T::NAME), + |mut region| { + for (offset, row) in T::code_table().iter().enumerate() { + region.assign_fixed( + || "code", + self.code, + offset, + || Value::known(Fr::from(row.code)), + )?; + region.assign_fixed( + || "baseline", + self.baseline, + offset, + || Value::known(Fr::from(row.baseline)), + )?; + region.assign_fixed( + || "nb", + self.nb, + offset, + || Value::known(Fr::from(row.nb)), + )?; + } + + Ok(()) + }, + ) + } +} + +impl LookupTable for RomSequenceCodes { + fn columns(&self) -> Vec> { + vec![self.code.into(), self.baseline.into(), self.nb.into()] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("code"), + String::from("baseline"), + String::from("nb"), + ] + } +} + +#[derive(Clone, Debug)] +pub struct LiteralLengthCodes; +#[derive(Clone, Debug)] +pub struct MatchLengthCodes; +#[derive(Clone, Debug)] +pub struct MatchOffsetCodes; + +impl SequenceCodeTable for LiteralLengthCodes { + const NAME: &'static str = "Literal Length Codes"; + + fn code_table() -> Vec { + (0..16) + .map(|i| (i, i, 0)) + .chain([ + (16, 16, 1), + (17, 18, 1), + (18, 20, 1), + (19, 22, 1), + (20, 24, 2), + (21, 28, 2), + (22, 32, 3), + (23, 40, 3), + (24, 48, 4), + (25, 64, 6), + (26, 128, 7), + (27, 256, 8), + (28, 512, 9), + (29, 1024, 10), + (30, 2048, 11), + (31, 4096, 12), + (32, 8192, 13), + (33, 16384, 14), + (34, 32768, 15), + (35, 65536, 16), + ]) + .map(|tuple| tuple.into()) + .collect() + } +} + +impl SequenceCodeTable for MatchLengthCodes { + const NAME: &'static str = "Match Length Codes"; + + fn code_table() -> Vec { + (0..32) + .map(|i| (i, i + 3, 0)) + .chain([ + (32, 35, 1), + (33, 37, 1), + (34, 39, 1), + (35, 41, 1), + (36, 43, 2), + (37, 47, 2), + (38, 51, 3), + (39, 59, 3), + (40, 67, 4), + (41, 83, 4), + (42, 99, 5), + (43, 131, 7), + (44, 259, 8), + (45, 515, 9), + (46, 1027, 10), + (47, 2051, 11), + (48, 4099, 12), + (49, 8195, 13), + (50, 16387, 14), + (51, 32771, 15), + (52, 65539, 16), + ]) + .map(|tuple| tuple.into()) + .collect() + } +} + +impl SequenceCodeTable for MatchOffsetCodes { + const NAME: &'static str = "Match Offset Codes"; + + // N <- 31 for Match Offset Codes. + fn code_table() -> Vec { + (0..32).map(|i| (i, 1 << i, i).into()).collect() + } +} From d6325d586a33edeb48507c0c25c5204116e0f3eb Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 22 Apr 2024 14:22:49 +0100 Subject: [PATCH 2/4] fix: handle probability=0 case (fse) --- aggregator/src/aggregation/decoder/witgen.rs | 2 - .../src/aggregation/decoder/witgen/types.rs | 155 +++++++++++------- 2 files changed, 95 insertions(+), 62 deletions(-) diff --git a/aggregator/src/aggregation/decoder/witgen.rs b/aggregator/src/aggregation/decoder/witgen.rs index f12c2fbc74..c76a410bd9 100644 --- a/aggregator/src/aggregation/decoder/witgen.rs +++ b/aggregator/src/aggregation/decoder/witgen.rs @@ -1174,7 +1174,6 @@ fn process_block_zstd_huffman_code( }, huffman_data: HuffmanData::default(), fse_data: FseTableRow { - idx: 0, state: 0, symbol: 0, baseline: 0, @@ -1382,7 +1381,6 @@ fn process_block_zstd_huffman_code( is_zero_bit_read: (nb == 0), }, fse_data: FseTableRow { - idx: fse_table_idx, state: next_state, symbol: fse_row.0, baseline: fse_row.1, diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index e0415a52e7..d71f9c2673 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -512,8 +512,6 @@ impl HuffmanCodesData { /// A single row in the FSE table. #[derive(Clone, Debug, Default, PartialEq)] pub struct FseTableRow { - /// Incremental index, starting at 1. - pub idx: u64, /// The FSE state at this row in the FSE table. pub state: u64, /// The baseline associated with this state. @@ -563,7 +561,7 @@ pub struct FseAuxiliaryTableData { /// instance, the baseline and the number of bits to read from the FSE bitstream. /// /// For each symbol, the states are in strictly increasing order. - pub sym_to_states: BTreeMap>, + pub sym_to_states: BTreeMap>, } /// Another form of Fse table that has state as key instead of the FseSymbol. @@ -601,68 +599,92 @@ impl FseAuxiliaryTableData { let mut sym_to_states = BTreeMap::new(); let mut R = table_size; let mut state = 0x00; - let mut symbol = FseSymbol::S0; - let mut idx = 1; + let mut symbol = 0; while R > 0 { // number of bits and value read from the variable bit-packed data. + // And update the total number of bits read so far. let (n_bits_read, value) = read_variable_bit_packing(&data, offset, R + 1)?; - - let N = value - 1; - let states = std::iter::once(state) - .chain((1..N).map(|_| { - state += (table_size >> 1) + (table_size >> 3) + 3; - state &= table_size - 1; - state - })) - .sorted() - .collect::>(); - let (smallest_spot_idx, nbs) = smaller_powers_of_two(table_size, N); - let baselines = if N == 1 { - vec![0x00] - } else { - let mut rotated_nbs = nbs.clone(); - rotated_nbs.rotate_left(smallest_spot_idx); - - let mut baselines = std::iter::once(0x00) - .chain(rotated_nbs.iter().scan(0x00, |baseline, nb| { - *baseline += 1 << nb; - Some(*baseline) - })) - .take(N as usize) - .collect::>(); - - baselines.rotate_right(smallest_spot_idx); - baselines - }; - sym_to_states.insert( - symbol, - states - .iter() - .zip(nbs.iter()) - .zip(baselines.iter()) - .map(|((&state, &nb), &baseline)| FseTableRow { - idx, - state, - num_bits: nb, - baseline, - symbol: symbol.into(), - num_emitted: 0, - n_acc: 0, - }) - .collect(), - ); - idx += 1; - - // update the total number of bits read so far. + reader.skip(n_bits_read)?; offset += n_bits_read; bit_boundaries.push((offset, value)); - // increment symbol. - symbol = ((symbol as usize) + 1).into(); + let N = value - 1; - // update state. - state += (table_size >> 1) + (table_size >> 3) + 3; - state &= table_size - 1; + // When a symbol has a probability of zero, it is followed by a 2-bits repeat flag. This + // repeat flag tells how many probabilities of zeroes follow the current one. It + // provides a number ranging from 0 to 3. If it is a 3, another 2-bits repeat flag + // follows, and so on. + if N == 0 { + sym_to_states.insert(symbol, vec![]); + symbol += 1; + + loop { + let repeat_bits = reader.read::(2)?; + offset += 2; + bit_boundaries.push((offset, repeat_bits as u64)); + + for k in 0..repeat_bits { + sym_to_states.insert(symbol + (k as u64), vec![]); + } + symbol += repeat_bits as u64; + + if repeat_bits < 3 { + break; + } + } + } + + if N >= 1 { + let states = std::iter::once(state) + .chain((1..N).map(|_| { + state += (table_size >> 1) + (table_size >> 3) + 3; + state &= table_size - 1; + state + })) + .sorted() + .collect::>(); + let (smallest_spot_idx, nbs) = smaller_powers_of_two(table_size, N); + let baselines = if N == 1 { + vec![0x00] + } else { + let mut rotated_nbs = nbs.clone(); + rotated_nbs.rotate_left(smallest_spot_idx); + + let mut baselines = std::iter::once(0x00) + .chain(rotated_nbs.iter().scan(0x00, |baseline, nb| { + *baseline += 1 << nb; + Some(*baseline) + })) + .take(N as usize) + .collect::>(); + + baselines.rotate_right(smallest_spot_idx); + baselines + }; + sym_to_states.insert( + symbol, + states + .iter() + .zip(nbs.iter()) + .zip(baselines.iter()) + .map(|((&state, &nb), &baseline)| FseTableRow { + state, + num_bits: nb, + baseline, + symbol, + num_emitted: 0, + n_acc: 0, + }) + .collect(), + ); + + // increment symbol. + symbol += 1; + + // update state. + state += (table_size >> 1) + (table_size >> 3) + 3; + state &= table_size - 1; + } // remove N slots from a total of R. R -= N; @@ -763,7 +785,7 @@ mod tests { assert_eq!(n_bytes, 4); assert_eq!( - table.sym_to_states.get(&FseSymbol::S1).cloned().unwrap(), + table.sym_to_states.get(&1).cloned().unwrap(), [ (0x03, 0x10, 3), (0x0c, 0x18, 3), @@ -775,7 +797,6 @@ mod tests { .iter() .enumerate() .map(|(i, &(state, baseline, num_bits))| FseTableRow { - idx: (i + 1) as u64, state, symbol: 1, baseline, @@ -789,6 +810,20 @@ mod tests { Ok(()) } + #[test] + fn test_sequences_fse_reconstruction() -> std::io::Result<()> { + let src = vec![ + 0x21, 0x9d, 0x51, 0xcc, 0x18, 0x42, 0x44, 0x81, 0x8c, 0x94, 0xb4, 0x50, 0x1e, + ]; + + let (n_bytes, _bit_boundaries, table) = FseAuxiliaryTableData::reconstruct(&src, 0)?; + let parsed_state_map = table.parse_state_table(); + + // TODO: assertions + + Ok(()) + } + #[test] fn test_huffman_bitstring_reconstruction() -> std::io::Result<()> { let weights = vec![ From 01f0ac5873a0a373e4fe8c89e63cab9fb0d0413d Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 22 Apr 2024 14:35:33 +0100 Subject: [PATCH 3/4] todo: prob=-1 case not sure how to handle --- aggregator/src/aggregation/decoder/witgen/types.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aggregator/src/aggregation/decoder/witgen/types.rs b/aggregator/src/aggregation/decoder/witgen/types.rs index d71f9c2673..6e403e905d 100644 --- a/aggregator/src/aggregation/decoder/witgen/types.rs +++ b/aggregator/src/aggregation/decoder/witgen/types.rs @@ -608,6 +608,10 @@ impl FseAuxiliaryTableData { offset += n_bits_read; bit_boundaries.push((offset, value)); + if value == 0 { + unimplemented!("value=0 => prob=-1: scenario unimplemented"); + } + let N = value - 1; // When a symbol has a probability of zero, it is followed by a 2-bits repeat flag. This From 553d05eb40017cf5ab9d6929bb1c7a236d47b591 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 22 Apr 2024 16:20:04 +0100 Subject: [PATCH 4/4] feat: bitstring accumulation table --- aggregator/src/aggregation/decoder.rs | 8 +- aggregator/src/aggregation/decoder/tables.rs | 1 + .../decoder/tables/bitstring_accumulation.rs | 435 ++++++++++++++++++ 3 files changed, 442 insertions(+), 2 deletions(-) diff --git a/aggregator/src/aggregation/decoder.rs b/aggregator/src/aggregation/decoder.rs index 0297d6dc77..4060e5a57b 100644 --- a/aggregator/src/aggregation/decoder.rs +++ b/aggregator/src/aggregation/decoder.rs @@ -25,8 +25,8 @@ use zkevm_circuits::{ use self::{ tables::{ - LiteralLengthCodes, LiteralsHeaderTable, MatchLengthCodes, MatchOffsetCodes, - RomSequenceCodes, RomTagTable, + BitstringAccumulationTable, LiteralLengthCodes, LiteralsHeaderTable, MatchLengthCodes, + MatchOffsetCodes, RomSequenceCodes, RomTagTable, }, witgen::N_BLOCK_HEADER_BYTES, }; @@ -67,6 +67,8 @@ pub struct DecoderConfig { range16: RangeTable<16>, /// Helper table for decoding the regenerated size from LiteralsHeader. literals_header_table: LiteralsHeaderTable, + /// Helper table for decoding bitstreams. + bitstring_accumulation_table: BitstringAccumulationTable, /// ROM table for validating tag transition. rom_tag_table: RomTagTable, /// ROM table for Literal Length Codes. @@ -355,6 +357,7 @@ impl DecoderConfig { // Helper tables let literals_header_table = LiteralsHeaderTable::configure(meta, range8, range16); + let bitstring_accumulation_table = BitstringAccumulationTable::configure(meta); // Peripheral configs let tag_config = TagConfig::configure(meta); @@ -385,6 +388,7 @@ impl DecoderConfig { range8, range16, literals_header_table, + bitstring_accumulation_table, rom_tag_table, rom_llc_table, rom_mlc_table, diff --git a/aggregator/src/aggregation/decoder/tables.rs b/aggregator/src/aggregation/decoder/tables.rs index e427fe63da..39922c6185 100644 --- a/aggregator/src/aggregation/decoder/tables.rs +++ b/aggregator/src/aggregation/decoder/tables.rs @@ -2,6 +2,7 @@ /// construct a table to accumulate the binary values of the byte-unaligned bitstrings for decoding /// convenience. mod bitstring_accumulation; +pub use bitstring_accumulation::BitstringAccumulationTable; /// Decode the regenerated size from the literals header. mod literals_header; diff --git a/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs b/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs index 8b13789179..71fbccb15c 100644 --- a/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs +++ b/aggregator/src/aggregation/decoder/tables/bitstring_accumulation.rs @@ -1 +1,436 @@ +use gadgets::util::{and, not, select, Expr}; +use halo2_proofs::{ + circuit::Layouter, + halo2curves::bn256::Fr, + plonk::{Advice, Any, Column, ConstraintSystem, Error, Expression, Fixed}, + poly::Rotation, +}; +use zkevm_circuits::{ + evm_circuit::{BaseConstraintBuilder, ConstrainBuilderCommon}, + table::LookupTable, +}; +use crate::aggregation::decoder::witgen::ZstdWitnessRow; + +/// In the process of decoding zstd encoded data, there are several scenarios in which we process +/// bits instead of bytes, for instance: +/// - decoding FSE table +/// - applying the FSE table to decode sequences +/// +/// For the above scenarios we wish to know the binary value of the "bits" that we are reading, as +/// well as the start/end indices of those "bitstrings" of interest. +/// +/// The below table performs the very task and exposes a lookup table for the bitstream decoder +/// config, which is a part of the decoder config. For illustration purposes: +/// +/// Consider a bit chunk from bit_index == 4 to bit_index == 9. We will have: +/// +/// | bit index | from start | until end | bitstring len | bit | bit value acc | +/// |-----------|------------|-----------|---------------|-----|---------------| +/// | 0 | 1 | 0 | 0 | 0 | 0 | +/// | 1 | 1 | 0 | 0 | 0 | 0 | +/// | 2 | 1 | 0 | 0 | 1 | 0 | +/// | 3 | 1 | 0 | 0 | 0 | 0 | +/// | 4 -> | 1 | 1 | 1 | 1 | 1 | +/// | 5 -> | 1 | 1 | 2 | 0 | 1 | +/// | 6 -> | 1 | 1 | 3 | 1 | 5 | +/// | 7 -> | 1 | 1 | 4 | 1 | 13 | +/// | 8 -> | 1 | 1 | 5 | 0 | 13 | +/// | 9 -> | 1 | 1 | 6 | 1 | 45 | +/// | 10 | 0 | 1 | 6 | 0 | 45 | +/// | 11 | 0 | 1 | 6 | 0 | 45 | +/// | 12 | 0 | 1 | 6 | 0 | 45 | +/// | 13 | 0 | 1 | 6 | 1 | 45 | +/// | 14 | 0 | 1 | 6 | 1 | 45 | +/// | 15 | 0 | 1 | 6 | 0 | 45 | +/// | 16 | 0 | 1 | 6 | 0 | 45 | +/// | 17 | 0 | 1 | 6 | 0 | 45 | +/// | 18 | 0 | 1 | 6 | 0 | 45 | +/// | 19 | 0 | 1 | 6 | 0 | 45 | +/// | 20 | 0 | 1 | 6 | 0 | 45 | +/// | 21 | 0 | 1 | 6 | 0 | 45 | +/// | 22 | 0 | 1 | 6 | 0 | 45 | +/// | 23 | 0 | 1 | 6 | 0 | 45 | +/// +/// The above table illustrates 3 contiguous bytes b0, b1 and b2 where the bit index increments +/// from 0 to 23. We are interested in reading a bitstring of length 6 that starts at bit index 4 +/// and ends at bit index 9. The supporting columns "from start" and "until end" help us to mark +/// the bits of interest where "from_start == until_end == 1". Over these rows, we accumulate the +/// binary value and the bitstring's length. +#[derive(Clone, Debug)] +pub struct BitstringAccumulationTable { + /// Fixed column that is enabled only for the first row. + pub q_first: Column, + /// The byte offset of byte_1. + pub byte_idx_1: Column, + /// The byte offset of byte_2. + pub byte_idx_2: Column, + /// The byte offset of byte_3. + pub byte_idx_3: Column, + /// The byte value at byte_idx_1, i.e. the first byte in the contiguous chunk of 3 bytes. + pub byte_1: Column, + /// The byte value at byte_idx_2, i.e. the second byte in the contiguous chunk of 3 bytes. + pub byte_2: Column, + /// The byte value at byte_idx_3, i.e. the third byte in the contiguous chunk of 3 bytes. + pub byte_3: Column, + /// The index within these 2 bytes, i.e. 0 <= bit_index <= 23. bit_index increments until its + /// 23 and then is reset to 0. + pub bit_index: Column, + /// Helper column to know the start of a new chunk of 3 contiguous bytes, this is a fixed + /// column as well as it is set only on bit_index == 0. + pub q_start: Column, + /// The bit at bit_index. + /// - Accumulation of bits from 0 <= bit_index <= 7 denotes byte_1. + /// - Accumulation of bits from 8 <= bit_index <= 15 denotes byte_2. + /// - Accumulation of bits from 16 <= bit_index <= 23 denotes byte_3. + pub bit: Column, + /// The binary value of the bits in the current bitstring. + pub bitstring_value: Column, + /// The accumulator over bits from is_start to is_end, i.e. while is_set == 1. + pub bitstring_value_acc: Column, + /// The length of the bitstring, i.e. the number of bits in the bitstring. + pub bitstring_len: Column, + /// Boolean that is set from start of bit chunk to bit_index == 15. + pub from_start: Column, + /// Boolean that is set from bit_index == 0 to end of bit chunk. + pub until_end: Column, + /// Boolean to mark if the bitstring is a part of bytes that are read from front-to-back or + /// back-to-front. For the back-to-front case, the is_reverse boolean is set. + pub is_reverse: Column, + /// After all rows of meaningful bytes are done, we mark the remaining rows by a padding + /// boolean where our constraints are skipped. + pub is_padding: Column, +} + +impl BitstringAccumulationTable { + /// Construct the bitstring accumulation table. + pub fn configure(meta: &mut ConstraintSystem) -> Self { + let config = Self { + q_first: meta.fixed_column(), + byte_idx_1: meta.advice_column(), + byte_idx_2: meta.advice_column(), + byte_idx_3: meta.advice_column(), + byte_1: meta.advice_column(), + byte_2: meta.advice_column(), + byte_3: meta.advice_column(), + bit_index: meta.fixed_column(), + q_start: meta.fixed_column(), + bit: meta.advice_column(), + bitstring_value: meta.advice_column(), + bitstring_value_acc: meta.advice_column(), + bitstring_len: meta.advice_column(), + from_start: meta.advice_column(), + until_end: meta.advice_column(), + is_reverse: meta.advice_column(), + is_padding: meta.advice_column(), + }; + + meta.create_gate("BitstringAccumulationTable: bit_index == 0", |meta| { + let condition = and::expr([ + meta.query_fixed(config.q_start, Rotation::cur()), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); + + let mut cb = BaseConstraintBuilder::default(); + + let bits = (0..24) + .map(|i| meta.query_advice(config.bit, Rotation(i))) + .collect::>>(); + + cb.require_equal( + "byte1 is the binary accumulation of 0 <= bit_index <= 7", + meta.query_advice(config.byte_1, Rotation::cur()), + select::expr( + meta.query_advice(config.is_reverse, Rotation::cur()), + bits[7].expr() + + bits[6].expr() * 2.expr() + + bits[5].expr() * 4.expr() + + bits[4].expr() * 8.expr() + + bits[3].expr() * 16.expr() + + bits[2].expr() * 32.expr() + + bits[1].expr() * 64.expr() + + bits[0].expr() * 128.expr(), + bits[0].expr() + + bits[1].expr() * 2.expr() + + bits[2].expr() * 4.expr() + + bits[3].expr() * 8.expr() + + bits[4].expr() * 16.expr() + + bits[5].expr() * 32.expr() + + bits[6].expr() * 64.expr() + + bits[7].expr() * 128.expr(), + ), + ); + + cb.require_equal( + "byte2 is the binary accumulation of 8 <= bit_index <= 15", + meta.query_advice(config.byte_2, Rotation::cur()), + select::expr( + meta.query_advice(config.is_reverse, Rotation::cur()), + bits[15].expr() + + bits[14].expr() * 2.expr() + + bits[13].expr() * 4.expr() + + bits[12].expr() * 8.expr() + + bits[11].expr() * 16.expr() + + bits[10].expr() * 32.expr() + + bits[9].expr() * 64.expr() + + bits[8].expr() * 128.expr(), + bits[8].expr() + + bits[9].expr() * 2.expr() + + bits[10].expr() * 4.expr() + + bits[11].expr() * 8.expr() + + bits[12].expr() * 16.expr() + + bits[13].expr() * 32.expr() + + bits[14].expr() * 64.expr() + + bits[15].expr() * 128.expr(), + ), + ); + + cb.require_equal( + "byte3 is the binary accumulation of 16 <= bit_index <= 23", + meta.query_advice(config.byte_3, Rotation::cur()), + select::expr( + meta.query_advice(config.is_reverse, Rotation::cur()), + bits[23].expr() + + bits[22].expr() * 2.expr() + + bits[21].expr() * 4.expr() + + bits[20].expr() * 8.expr() + + bits[19].expr() * 16.expr() + + bits[18].expr() * 32.expr() + + bits[17].expr() * 64.expr() + + bits[16].expr() * 128.expr(), + bits[16].expr() + + bits[17].expr() * 2.expr() + + bits[18].expr() * 4.expr() + + bits[19].expr() * 8.expr() + + bits[20].expr() * 16.expr() + + bits[21].expr() * 32.expr() + + bits[22].expr() * 64.expr() + + bits[23].expr() * 128.expr(), + ), + ); + + cb.require_boolean( + "is_reverse is boolean", + meta.query_advice(config.is_reverse, Rotation::cur()), + ); + + // from_start initialises at 1 + cb.require_equal( + "if bit_index == 0: from_start == 1", + meta.query_advice(config.from_start, Rotation::cur()), + 1.expr(), + ); + + cb.gate(condition) + }); + + meta.create_gate("BitstringAccumulationTable: bit_index > 0", |meta| { + let condition = and::expr([ + not::expr(meta.query_fixed(config.q_start, Rotation::cur())), + not::expr(meta.query_advice(config.is_padding, Rotation::cur())), + ]); + + let mut cb = BaseConstraintBuilder::default(); + + // Columns that do not change in the chunk of 3 contigious bytes. + for col in [ + config.byte_idx_1, + config.byte_idx_2, + config.byte_idx_3, + config.byte_1, + config.byte_2, + config.byte_3, + config.bitstring_value, + config.is_reverse, + config.is_padding, + ] { + cb.require_equal( + "unchanged columns from 0 < bit_idx <= 23", + meta.query_advice(col, Rotation::cur()), + meta.query_advice(col, Rotation::prev()), + ); + } + + // from_start transitions from 1 to 0 only once, i.e. delta is boolean + let delta = meta.query_advice(config.from_start, Rotation::prev()) + - meta.query_advice(config.from_start, Rotation::cur()); + cb.require_boolean("from_start delta is boolean", delta); + + cb.gate(condition) + }); + + meta.create_gate( + "BitstringAccumulationTable: bitstring_value accumulation", + |meta| { + let condition = not::expr(meta.query_advice(config.is_padding, Rotation::cur())); + + let mut cb = BaseConstraintBuilder::default(); + + let is_start = meta.query_fixed(config.q_start, Rotation::cur()); + let is_end = meta.query_fixed(config.q_start, Rotation::next()); + + // bit value is boolean. + cb.require_boolean( + "bit is boolean", + meta.query_advice(config.bit, Rotation::cur()), + ); + + // Columns from_start and until_end are boolean. + cb.require_boolean( + "from_start is boolean", + meta.query_advice(config.from_start, Rotation::cur()), + ); + cb.require_boolean( + "until_end is boolean", + meta.query_advice(config.until_end, Rotation::cur()), + ); + + // until_end transitions from 0 to 1 only once, i.e. delta is boolean + let delta = meta.query_advice(config.until_end, Rotation::next()) + - meta.query_advice(config.until_end, Rotation::cur()); + + cb.condition(is_end.expr(), |cb| { + cb.require_equal( + "if bit_index == 23: until_end == 1", + meta.query_advice(config.until_end, Rotation::cur()), + 1.expr(), + ); + }); + cb.condition(not::expr(is_end.expr()), |cb| { + cb.require_boolean("until_end delta is boolean", delta); + }); + + // Constraints at meaningful bits. + let is_set = and::expr([ + meta.query_advice(config.from_start, Rotation::cur()), + meta.query_advice(config.until_end, Rotation::cur()), + ]); + cb.condition(is_start.expr() * is_set.expr(), |cb| { + cb.require_equal( + "if is_start && is_set: bit == bitstring_value_acc", + meta.query_advice(config.bit, Rotation::cur()), + meta.query_advice(config.bitstring_value_acc, Rotation::cur()), + ); + cb.require_equal( + "if is_start && is_set: bitstring_len == 1", + meta.query_advice(config.bitstring_len, Rotation::cur()), + 1.expr(), + ); + }); + cb.condition(not::expr(is_start) * is_set, |cb| { + cb.require_equal( + "is_set: bitstring_value_acc == bitstring_value_acc::prev * 2 + bit", + meta.query_advice(config.bitstring_value_acc, Rotation::cur()), + meta.query_advice(config.bitstring_value_acc, Rotation::prev()) * 2.expr() + + meta.query_advice(config.bit, Rotation::cur()), + ); + cb.require_equal( + "is_set: bitstring_len == bitstring_len::prev + 1", + meta.query_advice(config.bitstring_len, Rotation::cur()), + meta.query_advice(config.bitstring_len, Rotation::prev()) + 1.expr(), + ); + }); + + // Constraints at bits to be ignored (at the start). + let is_ignored_start = + not::expr(meta.query_advice(config.until_end, Rotation::cur())); + cb.condition(is_ignored_start, |cb| { + cb.require_zero( + "while until_end == 0: bitstring_len == 0", + meta.query_advice(config.bitstring_len, Rotation::cur()), + ); + cb.require_zero( + "while until_end == 0: bitstring_value_acc == 0", + meta.query_advice(config.bitstring_value_acc, Rotation::cur()), + ); + }); + + // Constraints at bits to be ignored (towards the end). + let is_ignored_end = + not::expr(meta.query_advice(config.from_start, Rotation::cur())); + cb.condition(is_ignored_end, |cb| { + cb.require_equal( + "bitstring_len unchanged at the last ignored bits", + meta.query_advice(config.bitstring_len, Rotation::cur()), + meta.query_advice(config.bitstring_len, Rotation::prev()), + ); + cb.require_equal( + "bitstring_value_acc unchanged at the last ignored bits", + meta.query_advice(config.bitstring_value_acc, Rotation::cur()), + meta.query_advice(config.bitstring_value_acc, Rotation::prev()), + ); + }); + + cb.gate(condition) + }, + ); + + meta.create_gate("BitstringAccumulationTable: padding", |meta| { + let condition = not::expr(meta.query_fixed(config.q_first, Rotation::cur())); + + let mut cb = BaseConstraintBuilder::default(); + + // padding is boolean + cb.require_boolean( + "is_padding is boolean", + meta.query_advice(config.is_padding, Rotation::cur()), + ); + + // padding transitions from 0 to 1 only once. + let delta = meta.query_advice(config.is_padding, Rotation::cur()) + - meta.query_advice(config.is_padding, Rotation::prev()); + cb.require_boolean("is_padding delta is boolean", delta); + + cb.gate(condition) + }); + + config + } + + /// Load witness to the table: dev mode. + pub fn assign( + &self, + layouter: &mut impl Layouter, + witness_rows: &[ZstdWitnessRow], + ) -> Result<(), Error> { + unimplemented!(); + + Ok(()) + } +} + +impl LookupTable for BitstringAccumulationTable { + fn columns(&self) -> Vec> { + vec![ + self.byte_idx_1.into(), + self.byte_idx_2.into(), + self.byte_idx_3.into(), + self.byte_1.into(), + self.byte_2.into(), + self.byte_3.into(), + self.bitstring_value.into(), + self.bitstring_len.into(), + self.bit_index.into(), + self.from_start.into(), + self.until_end.into(), + self.is_reverse.into(), + ] + } + + fn annotations(&self) -> Vec { + vec![ + String::from("byte_idx_1"), + String::from("byte_idx_2"), + String::from("byte_idx_3"), + String::from("byte_1"), + String::from("byte_2"), + String::from("byte_3"), + String::from("bitstring_value"), + String::from("bitstring_len"), + String::from("bit_index"), + String::from("from_start"), + String::from("until_end"), + String::from("is_reverse"), + ] + } +}