Skip to content

Commit

Permalink
chore(rust): replace lexcal by atoi to parse integer
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Aug 21, 2023
1 parent bc166ce commit d6c3296
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 7 deletions.
6 changes: 3 additions & 3 deletions crates/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ polars-utils = { version = "0.32.0", path = "../polars-utils" }

ahash = { workspace = true }
arrow = { workspace = true }
atoi = { workspace = true, optional = true}
async-trait = { version = "0.1.59", optional = true }
bytes = { version = "1.3" }
chrono = { workspace = true, optional = true }
chrono-tz = { workspace = true, optional = true }
fast-float = { version = "0.2", optional = true }
flate2 = { version = "1", features = ["zlib-ng"], optional = true, default-features = false }
futures = { workspace = true, optional = true }
lexical = { version = "6", optional = true, default-features = false, features = ["std", "parse-integers"] }
lexical-core = { version = "0.8", optional = true }
memchr = { workspace = true }
memmap = { package = "memmap2", version = "0.7", optional = true }
Expand All @@ -51,10 +51,10 @@ tempdir = "0.3.7"
# support for arrows json parsing
json = [
"arrow/io_json_write",
"atoi",
"polars-json",
"simd-json",
"memmap",
"lexical",
"lexical-core",
"serde_json",
"dtype-struct",
Expand All @@ -65,7 +65,7 @@ ipc = ["arrow/io_ipc", "arrow/io_ipc_compression", "memmap"]
ipc_streaming = ["arrow/io_ipc", "arrow/io_ipc_compression"]
# support for arrow avro parsing
avro = ["arrow/io_avro", "arrow/io_avro_compression"]
csv = ["memmap", "lexical", "polars-core/rows", "lexical-core", "fast-float", "simdutf8"]
csv = ["atoi", "memmap", "polars-core/rows", "lexical-core", "fast-float", "simdutf8"]
decompress = ["flate2/miniz_oxide"]
decompress-fast = ["flate2/zlib-ng"]
dtype-categorical = ["polars-core/dtype-categorical"]
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-io/src/csv/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,25 @@ impl PrimitiveParser for Float64Type {
impl PrimitiveParser for UInt32Type {
#[inline]
fn parse(bytes: &[u8]) -> Option<u32> {
lexical::parse(bytes).ok()
atoi::atoi::<u32>(bytes)
}
}
impl PrimitiveParser for UInt64Type {
#[inline]
fn parse(bytes: &[u8]) -> Option<u64> {
lexical::parse(bytes).ok()
atoi::atoi::<u64>(bytes)
}
}
impl PrimitiveParser for Int32Type {
#[inline]
fn parse(bytes: &[u8]) -> Option<i32> {
lexical::parse(bytes).ok()
atoi::atoi::<i32>(bytes)
}
}
impl PrimitiveParser for Int64Type {
#[inline]
fn parse(bytes: &[u8]) -> Option<i64> {
lexical::parse(bytes).ok()
atoi::atoi::<i64>(bytes)
}
}

Expand Down
18 changes: 18 additions & 0 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,6 +1312,24 @@ def test_read_csv_chunked() -> None:
assert df.filter(pl.col("count") < pl.col("count").shift(1)).is_empty()


def test_read_csv_parse_integer() -> None:
# 2147483647 is max value of i32
csv = """value
2147483647
2147483648
9589934591
9589934592
9999999999
10000000000
"""

assert pl.read_csv(
source=io.StringIO(csv),
dtypes={"value": pl.Int32},
ignore_errors=True,
).to_dict(False) == {"value": [2147483647, None, None, None, None, None]}


def test_read_empty_csv(io_files_path: Path) -> None:
with pytest.raises(NoDataError) as err:
pl.read_csv(io_files_path / "empty.csv")
Expand Down

0 comments on commit d6c3296

Please sign in to comment.