Skip to content

Commit

Permalink
fix(rust, python): don't run file-caching in streaming mode
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 27, 2023
1 parent 808bc59 commit 715be2c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub fn optimize(
let comm_subexpr_elim = opt_state.comm_subexpr_elim;

#[allow(unused_variables)]
let agg_scan_projection = opt_state.file_caching;
let agg_scan_projection = opt_state.file_caching && !streaming;

// gradually fill the rules passed to the optimizer
let opt = StackOptimizer {};
Expand Down
25 changes: 25 additions & 0 deletions py-polars/tests/unit/streaming/test_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,3 +693,28 @@ def test_streaming_groupby_list_9758() -> None:
.to_dict(False)
== payload
)


@pytest.mark.write_disk()
def test_streaming_10115(tmp_path: Path) -> None:
in_path = tmp_path / "in.parquet"
out_path = tmp_path / "out.parquet"

# this fails if the schema will be incorrectly due to the projection
# pushdown
(pl.DataFrame([{"x": 1, "y": "foo"}]).write_parquet(in_path))

joiner = pl.LazyFrame([{"y": "foo", "z": "_"}])

(
pl.scan_parquet(in_path)
.join(joiner, how="left", on="y")
.select("x", "y", "z")
.sink_parquet(out_path) #
)

assert pl.read_parquet(out_path).to_dict(False) == {
"x": [1],
"y": ["foo"],
"z": ["_"],
}

0 comments on commit 715be2c

Please sign in to comment.