feat!: compute min and max of indices for multi-table queries && remo…

…ve `get_offset` and `get_length` from `ProofPlan` (#361) Please be sure to look over the pull request guidelines here: https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr. # Please go through the following checklist - [x] The PR title and commit messages adhere to guidelines here: https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md. In particular `!` is used if and only if at least one breaking change has been introduced. - [x] I have run the ci check script with `source scripts/run_ci_checks.sh`. # Rationale for this change We need to allow composable `ProofPlan`s and multiple table queries. Hence it no longer makes sense to have every `ProofPlan` have its own `get_offset` and `get_length` for input length. There can be multiple input lengths for a `ProofPlan` and offsets only make sense for source tables (that is, `TableExec`) but not intermediate `ProofPlan`s. Hence we have to refactor `query_proof.rs` to remove any reference to a single input length and a single offset. For proofs purposes what we actually care about are actually the smallest row index (that is, the smallest offset) among all input tables, the largest row index among all input tables and the length of the longest intermediate table (that is, the largest row index among all intermediate tables if we start from 0). The last value will remain less than or equal to the second one until we add unions, joins and other `ProofPlan`s that actually make output tables longer than input ones.  # What changes are included in this PR? - add `indexset` macro (thanks @JayWhite2357 for putting it in a review comment in #346) - add `get_index_range` to compute the index range - refactor `QueryProof` to use the index range as opposed to a single `input_length` / `offset_generators` - remove `get_offset` and `get_length` from `ProofPlan` which are no longer used  # Are these changes tested?  Existing tests do pass
spaceandtimelabs · Nov 11, 2024 · 87c1a41 · 87c1a41
2 parents 107fa10 + e09d93a
commit 87c1a41
Show file tree

Hide file tree

Showing 11 changed files with 136 additions and 122 deletions.
diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs
@@ -66,6 +66,7 @@ pub use expression_evaluation_error::{ExpressionEvaluationError, ExpressionEvalu
 mod test_accessor;
 pub use test_accessor::TestAccessor;
 #[cfg(test)]
+#[allow(unused_imports)]
 pub(crate) use test_accessor::UnimplementedTestAccessor;
 
 #[cfg(test)]

diff --git a/crates/proof-of-sql/src/base/map.rs b/crates/proof-of-sql/src/base/map.rs
@@ -21,4 +21,22 @@ macro_rules! indexmap_macro {
     };
 }
 
+/// Create an [`IndexSet`][self::IndexSet] from a list of values
+#[cfg(test)]
+macro_rules! indexset_macro {
+    ($($value:expr,)+) => { $crate::base::map::indexset!($($value),+) };
+    ($($value:expr),*) => {
+        {
+            const CAP: usize = <[()]>::len(&[$({ stringify!($value); }),*]);
+            let mut set = $crate::base::map::IndexSet::with_capacity_and_hasher(CAP, <_>::default());
+            $(
+                set.insert($value);
+            )*
+            set
+        }
+    };
+}
+
 pub(crate) use indexmap_macro as indexmap;
+#[cfg(test)]
+pub(crate) use indexset_macro as indexset;
diff --git a/crates/proof-of-sql/src/sql/proof/proof_plan.rs b/crates/proof-of-sql/src/sql/proof/proof_plan.rs
@@ -19,15 +19,12 @@ pub trait ProofPlan: Debug + Send + Sync + ProverEvaluate {
     /// Count terms used within the Query's proof
     fn count(&self, builder: &mut CountBuilder) -> Result<(), ProofError>;
 
-    /// The length of the input table
-    fn get_length(&self, accessor: &dyn MetadataAccessor) -> usize;
-
-    /// The offset of the query, that is, how many rows to skip before starting to read the input table
-    fn get_offset(&self, accessor: &dyn MetadataAccessor) -> usize;
-
     /// Check if the input table is empty
     fn is_empty(&self, accessor: &dyn MetadataAccessor) -> bool {
-        self.get_length(accessor) == 0
+        let table_refs = self.get_table_references();
+        table_refs
+            .iter()
+            .all(|table_ref| accessor.get_length(*table_ref) == 0)
     }
 
     /// Form components needed to verify and proof store into `VerificationBuilder`

diff --git a/crates/proof-of-sql/src/sql/proof/query_proof.rs b/crates/proof-of-sql/src/sql/proof/query_proof.rs
@@ -6,7 +6,7 @@ use crate::{
     base::{
         bit::BitDistribution,
         commitment::CommitmentEvaluationProof,
-        database::{Column, CommitmentAccessor, DataAccessor},
+        database::{Column, CommitmentAccessor, DataAccessor, MetadataAccessor, TableRef},
         math::log2_up,
         polynomial::{compute_evaluation_vector, CompositePolynomialInfo},
         proof::{Keccak256Transcript, ProofError, Transcript},
@@ -20,6 +20,26 @@ use core::cmp;
 use num_traits::Zero;
 use serde::{Deserialize, Serialize};
 
+/// Return the row number range of tables referenced in the Query
+///
+/// Basically we are looking for the smallest offset and the largest offset + length
+/// so that we have an index range of the table rows that the query is referencing.
+fn get_index_range(
+    accessor: &dyn MetadataAccessor,
+    table_refs: impl IntoIterator<Item = TableRef>,
+) -> (usize, usize) {
+    table_refs
+        .into_iter()
+        .map(|table_ref| {
+            let length = accessor.get_length(table_ref);
+            let offset = accessor.get_offset(table_ref);
+            (offset, offset + length)
+        })
+        .reduce(|(min_start, max_end), (start, end)| (min_start.min(start), max_end.max(end)))
+        // Only applies to `EmptyExec` where there are no tables
+        .unwrap_or((0, 1))
+}
+
 /// The proof for a query.
 ///
 /// Note: Because the class is deserialized from untrusted data, it
@@ -47,15 +67,15 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
         accessor: &impl DataAccessor<CP::Scalar>,
         setup: &CP::ProverPublicSetup<'_>,
     ) -> (Self, ProvableQueryResult) {
-        let table_length = expr.get_length(accessor);
-        let num_sumcheck_variables = cmp::max(log2_up(table_length), 1);
-        let generator_offset = expr.get_offset(accessor);
+        let (min_row_num, max_row_num) = get_index_range(accessor, expr.get_table_references());
+        let range_length = max_row_num - min_row_num;
+        let num_sumcheck_variables = cmp::max(log2_up(range_length), 1);
         assert!(num_sumcheck_variables > 0);
 
         let alloc = Bump::new();
 
         // Evaluate query result
-        let result_cols = expr.result_evaluate(table_length, &alloc, accessor);
+        let result_cols = expr.result_evaluate(range_length, &alloc, accessor);
         let output_length = result_cols.first().map_or(0, Column::len);
         let provable_result = ProvableQueryResult::new(output_length as u64, &result_cols);
 
@@ -65,7 +85,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
 
         // construct a transcript for the proof
         let mut transcript: Keccak256Transcript =
-            make_transcript(expr, &provable_result, table_length, generator_offset);
+            make_transcript(expr, &provable_result, range_length, min_row_num);
 
         // These are the challenges that will be consumed by the proof
         // Specifically, these are the challenges that the verifier sends to
@@ -78,14 +98,13 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
                 .collect();
 
         let mut builder =
-            FinalRoundBuilder::new(table_length, num_sumcheck_variables, post_result_challenges);
+            FinalRoundBuilder::new(range_length, num_sumcheck_variables, post_result_challenges);
         expr.final_round_evaluate(&mut builder, &alloc, accessor);
 
         let num_sumcheck_variables = builder.num_sumcheck_variables();
-        let table_length = builder.table_length();
 
         // commit to any intermediate MLEs
-        let commitments = builder.commit_intermediate_mles(generator_offset, setup);
+        let commitments = builder.commit_intermediate_mles(min_row_num, setup);
 
         // add the commitments and bit distributions to the proof
         extend_transcript(&mut transcript, &commitments, builder.bit_distributions());
@@ -98,7 +117,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
                 .collect();
         let poly = builder.make_sumcheck_polynomial(&SumcheckRandomScalars::new(
             &random_scalars,
-            table_length,
+            range_length,
             num_sumcheck_variables,
         ));
 
@@ -107,7 +126,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
         let sumcheck_proof = SumcheckProof::create(&mut transcript, &mut evaluation_point, &poly);
 
         // evaluate the MLEs used in sumcheck except for the result columns
-        let mut evaluation_vec = vec![Zero::zero(); table_length];
+        let mut evaluation_vec = vec![Zero::zero(); range_length];
         compute_evaluation_vector(&mut evaluation_vec, &evaluation_point);
         let pcs_proof_evaluations = builder.evaluate_pcs_proof_mles(&evaluation_vec);
 
@@ -127,7 +146,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
             &mut transcript,
             &folded_mle,
             &evaluation_point,
-            generator_offset as u64,
+            min_row_num as u64,
             setup,
         );
 
@@ -150,12 +169,13 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
         result: &ProvableQueryResult,
         setup: &CP::VerifierPublicSetup<'_>,
     ) -> QueryResult<CP::Scalar> {
-        let input_length = expr.get_length(accessor);
-        let output_length = result.table_length();
-        let generator_offset = expr.get_offset(accessor);
-        let num_sumcheck_variables = cmp::max(log2_up(input_length), 1);
+        let (min_row_num, max_row_num) = get_index_range(accessor, expr.get_table_references());
+        let range_length = max_row_num - min_row_num;
+        let num_sumcheck_variables = cmp::max(log2_up(range_length), 1);
         assert!(num_sumcheck_variables > 0);
 
+        let output_length = result.table_length();
+
         // validate bit decompositions
         for dist in &self.bit_distributions {
             if !dist.is_valid() {
@@ -181,7 +201,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
 
         // construct a transcript for the proof
         let mut transcript: Keccak256Transcript =
-            make_transcript(expr, result, input_length, generator_offset);
+            make_transcript(expr, result, range_length, min_row_num);
 
         // These are the challenges that will be consumed by the proof
         // Specifically, these are the challenges that the verifier sends to
@@ -203,7 +223,7 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
                 .take(num_random_scalars)
                 .collect();
         let sumcheck_random_scalars =
-            SumcheckRandomScalars::new(&random_scalars, input_length, num_sumcheck_variables);
+            SumcheckRandomScalars::new(&random_scalars, range_length, num_sumcheck_variables);
 
         // verify sumcheck up to the evaluation check
         let poly_info = CompositePolynomialInfo {
@@ -232,14 +252,14 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
 
         // pass over the provable AST to fill in the verification builder
         let sumcheck_evaluations = SumcheckMleEvaluations::new(
-            input_length,
+            range_length,
             output_length,
             &subclaim.evaluation_point,
             &sumcheck_random_scalars,
             &self.pcs_proof_evaluations,
         );
         let mut builder = VerificationBuilder::new(
-            generator_offset,
+            min_row_num,
             sumcheck_evaluations,
             &self.bit_distributions,
             &self.commitments,
@@ -279,8 +299,8 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
                 builder.inner_product_multipliers(),
                 &product,
                 &subclaim.evaluation_point,
-                generator_offset as u64,
-                input_length,
+                min_row_num as u64,
+                range_length,
                 setup,
             )
             .map_err(|_e| ProofError::VerificationError {
@@ -315,9 +335,9 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
 /// * `result` - A reference to a `ProvableQueryResult`, which is the result
 ///   of a query that needs to be proven.
 ///
-/// * `table_length` - The length of the table used in the proof, as a `usize`.
+/// * `range_length` - The length of the range of the generator used in the proof, as a `usize`.
 ///
-/// * `generator_offset` - The offset of the generator used in the proof, as a `usize`.
+/// * `min_row_num` - The smallest offset of the generator used in the proof, as a `usize`.
 ///
 /// # Returns
 /// This function returns a `merlin::Transcript`. The transcript is a record
@@ -326,14 +346,14 @@ impl<CP: CommitmentEvaluationProof> QueryProof<CP> {
 fn make_transcript<T: Transcript>(
     expr: &(impl ProofPlan + Serialize),
     result: &ProvableQueryResult,
-    table_length: usize,
-    generator_offset: usize,
+    range_length: usize,
+    min_row_num: usize,
 ) -> T {
     let mut transcript = T::new();
     transcript.extend_serialize_as_le(result);
     transcript.extend_serialize_as_le(expr);
-    transcript.extend_serialize_as_le(&table_length);
-    transcript.extend_serialize_as_le(&generator_offset);
+    transcript.extend_serialize_as_le(&range_length);
+    transcript.extend_serialize_as_le(&min_row_num);
     transcript
 }