-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor!: replace DataAccessor
with Table
in ProofExpr
&& remove input_length
from ProofPlan::result_evaluate
#366
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
use super::Column; | ||
use crate::base::{map::IndexMap, scalar::Scalar}; | ||
use super::{Column, ColumnRef, DataAccessor, TableRef}; | ||
use crate::base::{ | ||
map::{IndexMap, IndexSet}, | ||
scalar::Scalar, | ||
}; | ||
use alloc::vec; | ||
use bumpalo::Bump; | ||
use proof_of_sql_parser::Identifier; | ||
use snafu::Snafu; | ||
|
||
|
@@ -18,18 +23,20 @@ pub enum TableError { | |
#[derive(Debug, Clone, Eq)] | ||
pub struct Table<'a, S: Scalar> { | ||
table: IndexMap<Identifier, Column<'a, S>>, | ||
num_rows: usize, | ||
} | ||
impl<'a, S: Scalar> Table<'a, S> { | ||
/// Creates a new [`Table`]. | ||
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> { | ||
if table.is_empty() { | ||
return Ok(Self { table }); | ||
// `EmptyExec` should have one row for queries such as `SELECT 1`. | ||
return Ok(Self { table, num_rows: 1 }); | ||
} | ||
let num_rows = table[0].len(); | ||
if table.values().any(|column| column.len() != num_rows) { | ||
Err(TableError::ColumnLengthMismatch) | ||
} else { | ||
Ok(Self { table }) | ||
Ok(Self { table, num_rows }) | ||
} | ||
} | ||
/// Creates a new [`Table`]. | ||
|
@@ -38,15 +45,43 @@ impl<'a, S: Scalar> Table<'a, S> { | |
) -> Result<Self, TableError> { | ||
Self::try_new(IndexMap::from_iter(iter)) | ||
} | ||
/// Creates a new [`Table`] from a [`DataAccessor`], [`TableRef`] and [`ColumnRef`]s. | ||
/// | ||
/// Columns are retrieved from the [`DataAccessor`] using the provided [`ColumnRef`]s. | ||
/// # Panics | ||
/// Missing columns or column length mismatches can occur if the accessor doesn't | ||
/// contain the necessary columns. In practice, this should not happen. | ||
pub(crate) fn from_columns( | ||
column_refs: &IndexSet<ColumnRef>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer for this to be a provided method on |
||
table_ref: TableRef, | ||
accessor: &'a dyn DataAccessor<S>, | ||
alloc: &'a Bump, | ||
) -> Self { | ||
if column_refs.is_empty() { | ||
// TODO: Currently we have to have non-empty column references to have a non-empty table | ||
// to evaluate `ProofExpr`s on. Once we restrict [`DataAccessor`] to [`TableExec`] | ||
// and use input `DynProofPlan`s we should no longer need this. | ||
let input_length = accessor.get_length(table_ref); | ||
let bogus_vec = vec![true; input_length]; | ||
let bogus_col = Column::Boolean(alloc.alloc_slice_copy(&bogus_vec)); | ||
Table::<'a, S>::try_from_iter(core::iter::once(("bogus".parse().unwrap(), bogus_col))) | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't the proper behavior to create a |
||
Table::<'a, S>::try_from_iter(column_refs.into_iter().map(|column_ref| { | ||
let column = accessor.get_column(*column_ref); | ||
(column_ref.column_id(), column) | ||
})) | ||
} | ||
.expect("Failed to create table from column references") | ||
} | ||
/// Number of columns in the table. | ||
#[must_use] | ||
pub fn num_columns(&self) -> usize { | ||
self.table.len() | ||
} | ||
/// Number of rows in the table. For an empty table, this will return `None`. | ||
/// Number of rows in the table. | ||
#[must_use] | ||
pub fn num_rows(&self) -> Option<usize> { | ||
(!self.table.is_empty()).then(|| self.table[0].len()) | ||
pub fn num_rows(&self) -> usize { | ||
self.num_rows | ||
} | ||
/// Whether the table has no columns. | ||
#[must_use] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This default behavior doesn't really make sense.
This is what arrow does:
https://github.com/apache/arrow-rs/blob/3ee5048c8ea3aa531d111afe33d0a3551eabcd84/arrow-array/src/record_batch.rs#L86
https://github.com/apache/arrow-rs/blob/3ee5048c8ea3aa531d111afe33d0a3551eabcd84/arrow-array/src/record_batch.rs#L501