Skip to content

Commit

Permalink
fix: scan emtpy csv miss row_count (#12316)
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Nov 8, 2023
1 parent 81b02f3 commit 723d674
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
6 changes: 5 additions & 1 deletion crates/polars-io/src/csv/read_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,11 @@ impl<'a> CoreReader<'a> {

// An empty file with a schema should return an empty DataFrame with that schema
if bytes.is_empty() {
return Ok(DataFrame::from(self.schema.as_ref()));
let mut df = DataFrame::from(self.schema.as_ref());
if let Some(ref row_count) = self.row_count {
df.insert_at_idx(0, Series::new_empty(&row_count.name, &IDX_DTYPE))?;
}
return Ok(df);
}

// all the buffers returned from the threads
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/io/test_lazy_csv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from collections import OrderedDict
from typing import TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -273,3 +274,14 @@ def test_scan_csv_slice_offset_zero(io_files_path: Path) -> None:
lf = pl.scan_csv(io_files_path / "small.csv")
result = lf.slice(0)
assert result.collect().height == 4


@pytest.mark.write_disk()
def test_scan_empty_csv_with_row_count(tmp_path: Path) -> None:
tmp_path.mkdir(exist_ok=True)
file_path = tmp_path / "small.parquet"
df = pl.DataFrame({"a": []})
df.write_csv(file_path)

read = pl.scan_csv(file_path).with_row_count("idx")
assert read.collect().schema == OrderedDict([("idx", pl.UInt32), ("a", pl.Utf8)])

0 comments on commit 723d674

Please sign in to comment.