Skip to content

Commit

Permalink
fix: Resolve multiple SQL JOIN issues (pola-rs#16507)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored and Wouittone committed Jun 22, 2024
1 parent 6297800 commit 497c4b3
Show file tree
Hide file tree
Showing 18 changed files with 492 additions and 192 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1085,8 +1085,13 @@ impl LazyFrame {

/// Creates the Cartesian product from both frames, preserving the order of the left keys.
#[cfg(feature = "cross_join")]
pub fn cross_join(self, other: LazyFrame) -> LazyFrame {
self.join(other, vec![], vec![], JoinArgs::new(JoinType::Cross))
pub fn cross_join(self, other: LazyFrame, suffix: Option<String>) -> LazyFrame {
self.join(
other,
vec![],
vec![],
JoinArgs::new(JoinType::Cross).with_suffix(suffix),
)
}

/// Left outer join this query with another lazy query.
Expand Down Expand Up @@ -1237,9 +1242,7 @@ impl LazyFrame {
if let Some(suffix) = args.suffix {
builder = builder.suffix(suffix);
}

// Note: args.slice is set by the optimizer

builder.finish()
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/cse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ fn test_cse_columns_projections() -> PolarsResult<()> {
]?
.lazy();

let left = left.cross_join(right.clone().select([col("A")]));
let left = left.cross_join(right.clone().select([col("A")]), None);
let q = left.join(
right.rename(["B"], ["C"]),
[col("A"), col("C")],
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/projection_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn test_cross_join_pd() -> PolarsResult<()> {
"price" => [5, 4]
]?;

let q = food.lazy().cross_join(drink.lazy()).select([
let q = food.lazy().cross_join(drink.lazy(), None).select([
col("name").alias("food"),
col("name_right").alias("beverage"),
(col("price") + col("price_right")).alias("total"),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1171,7 +1171,7 @@ fn test_cross_join() -> PolarsResult<()> {
"b" => [None, Some(12)]
]?;

let out = df1.lazy().cross_join(df2.lazy()).collect()?;
let out = df1.lazy().cross_join(df2.lazy(), None).collect()?;
assert_eq!(out.shape(), (6, 4));
Ok(())
}
Expand Down
14 changes: 9 additions & 5 deletions crates/polars-lazy/src/tests/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fn test_streaming_union_order() -> PolarsResult<()> {
fn test_streaming_union_join() -> PolarsResult<()> {
let q = get_csv_glob();
let q = q.select([col("sugars_g"), col("calories")]);
let q = q.clone().cross_join(q);
let q = q.clone().cross_join(q, None);

assert_streaming_with_default(q, true, true);
Ok(())
Expand Down Expand Up @@ -166,18 +166,22 @@ fn test_streaming_cross_join() -> PolarsResult<()> {
"a" => [1 ,2, 3]
]?;
let q = df.lazy();
let out = q.clone().cross_join(q).with_streaming(true).collect()?;
let out = q
.clone()
.cross_join(q, None)
.with_streaming(true)
.collect()?;
assert_eq!(out.shape(), (9, 2));

let q = get_parquet_file().with_projection_pushdown(false);
let q1 = q
.clone()
.select([col("calories")])
.cross_join(q.clone())
.cross_join(q.clone(), None)
.filter(col("calories").gt(col("calories_right")));
let q2 = q1
.select([all().name().suffix("_second")])
.cross_join(q)
.cross_join(q, None)
.filter(col("calories_right_second").lt(col("calories")))
.select([
col("calories"),
Expand Down Expand Up @@ -266,7 +270,7 @@ fn test_streaming_slice() -> PolarsResult<()> {
]?
.lazy();

let q = lf_a.clone().cross_join(lf_a).slice(10, 20);
let q = lf_a.clone().cross_join(lf_a, None).slice(10, 20);
let a = q.with_streaming(true).collect().unwrap();
assert_eq!(a.shape(), (20, 2));

Expand Down
5 changes: 5 additions & 0 deletions crates/polars-ops/src/frame/join/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ impl JoinArgs {
self
}

pub fn with_suffix(mut self, suffix: Option<String>) -> Self {
self.suffix = suffix;
self
}

pub fn suffix(&self) -> &str {
self.suffix.as_deref().unwrap_or("_right")
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ pub enum Excluded {
impl Expr {
/// Get Field result of the expression. The schema is the input data.
pub fn to_field(&self, schema: &Schema, ctxt: Context) -> PolarsResult<Field> {
// this is not called much and th expression depth is typically shallow
// this is not called much and the expression depth is typically shallow
let mut arena = Arena::with_capacity(5);
self.to_field_amortized(schema, ctxt, &mut arena)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ fn resolve_join_suffixes(
.iter()
.map(|proj| {
let name = column_node_to_name(*proj, expr_arena);
if name.contains(suffix) && schema_after_join.get(&name).is_none() {
if name.ends_with(suffix) && schema_after_join.get(&name).is_none() {
let downstream_name = &name.as_ref()[..name.len() - suffix.len()];
let col = AExpr::Column(ColumnName::from(downstream_name));
let node = expr_arena.add(col);
Expand Down
1 change: 1 addition & 0 deletions crates/polars-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ arrow = { workspace = true }
polars-core = { workspace = true }
polars-error = { workspace = true }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-ops = { workspace = true }
polars-plan = { workspace = true }

hex = { workspace = true }
Expand Down
Loading

0 comments on commit 497c4b3

Please sign in to comment.