From 2579f890716abd5f56e6183ec549711cc385f101 Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Wed, 15 Nov 2023 11:48:09 +0800 Subject: [PATCH 1/3] depr(python): Deprecate `parse_int` in favor of `to_integer` --- .../src/chunked_array/strings/namespace.rs | 11 ++-- .../src/dsl/function_expr/strings.rs | 2 +- py-polars/polars/expr/string.py | 54 ++++++++++++------- py-polars/polars/series/string.py | 38 +++++++++---- py-polars/src/expr/string.rs | 6 +-- .../unit/namespaces/string/test_string.py | 22 ++++---- 6 files changed, 82 insertions(+), 51 deletions(-) diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 61bfc508d986..9e4e9fea675d 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -62,13 +62,12 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { #[cfg(feature = "string_from_radix")] // Parse a string number with base _radix_ into a decimal (i32) - fn parse_int(&self, radix: u32, strict: bool) -> PolarsResult { - use arrow::legacy::utils::CustomIterTools; + fn to_integer(&self, radix: u32, strict: bool) -> PolarsResult { let ca = self.as_utf8(); - let f = |opt_s: Option<&str>| -> Option { - opt_s.and_then(|s| ::from_str_radix(s, radix).ok()) + let f = |opt_s: Option<&str>| -> Option { + opt_s.and_then(|s| ::from_str_radix(s, radix).ok()) }; - let out: Int32Chunked = ca.into_iter().map(f).collect_trusted(); + let out: Int64Chunked = ca.apply_generic(f); if strict && ca.null_count() != out.null_count() { let failure_mask = !ca.is_null() & out.is_null(); @@ -77,7 +76,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { let some_failures = all_failures.unique()?.slice(0, 10).sort(false); let some_error_msg = some_failures .get(0) - .and_then(|s| ::from_str_radix(s, radix).err()) + .and_then(|s| ::from_str_radix(s, radix).err()) .map_or_else( || unreachable!("failed to extract ParseIntError"), |e| format!("{}", e), diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index d73d061943bc..ffdfa0da4482 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -805,7 +805,7 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult PolarsResult { let ca = s.utf8()?; - ca.parse_int(radix, strict).map(|ok| ok.into_series()) + ca.to_integer(radix, strict).map(|ok| ok.into_series()) } pub(super) fn str_slice(s: &Series, start: i64, length: Option) -> PolarsResult { let ca = s.utf8()?; diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index a6f847e2bfcf..dd51b5baac5f 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -5,14 +5,13 @@ import polars._reexport as pl from polars import functions as F -from polars.datatypes import Date, Datetime, Time, py_type_to_dtype +from polars.datatypes import Date, Datetime, Int32, Time, py_type_to_dtype from polars.exceptions import ChronoFormatWarning from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr from polars.utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, - issue_deprecation_warning, rename_use_earliest_to_ambiguous, ) from polars.utils.various import find_stacklevel @@ -2030,16 +2029,15 @@ def explode(self) -> Expr: """ return wrap_expr(self._pyexpr.str_explode()) - def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr: + def to_integer(self, base: int = 10, *, strict: bool = True) -> Expr: """ - Parse integers with base radix from strings. - - ParseError/Overflows become Nulls. + Convert an Utf8 column into an Int64 column with base radix. Parameters ---------- - radix + base Positive integer which is the base of the string we are parsing. + Default: 10. strict Bool, Default=True will raise any ParseError or overflow as ComputeError. False silently convert to Null. @@ -2047,17 +2045,17 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr: Returns ------- Expr - Expression of data type :class:`Int32`. + Expression of data type :class:`Int64`. Examples -------- >>> df = pl.DataFrame({"bin": ["110", "101", "010", "invalid"]}) - >>> df.with_columns(parsed=pl.col("bin").str.parse_int(2, strict=False)) + >>> df.with_columns(parsed=pl.col("bin").str.to_integer(2, strict=False)) shape: (4, 2) ┌─────────┬────────┐ │ bin ┆ parsed │ │ --- ┆ --- │ - │ str ┆ i32 │ + │ str ┆ i64 │ ╞═════════╪════════╡ │ 110 ┆ 6 │ │ 101 ┆ 5 │ @@ -2066,12 +2064,12 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr: └─────────┴────────┘ >>> df = pl.DataFrame({"hex": ["fa1e", "ff00", "cafe", None]}) - >>> df.with_columns(parsed=pl.col("hex").str.parse_int(16, strict=True)) + >>> df.with_columns(parsed=pl.col("hex").str.to_integer(16, strict=True)) shape: (4, 2) ┌──────┬────────┐ │ hex ┆ parsed │ │ --- ┆ --- │ - │ str ┆ i32 │ + │ str ┆ i64 │ ╞══════╪════════╡ │ fa1e ┆ 64030 │ │ ff00 ┆ 65280 │ @@ -2080,15 +2078,31 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr: └──────┴────────┘ """ - if radix is None: - issue_deprecation_warning( - "The default value for the `radix` parameter of `parse_int` will be removed in a future version." - " Call `parse_int(radix=2)` to keep current behavior and silence this warning.", - version="0.19.8", - ) - radix = 2 + return wrap_expr(self._pyexpr.str_to_integer(base, strict)) + + @deprecate_renamed_function("to_integer", version="0.19.14") + @deprecate_renamed_parameter("radix", "base", version="0.19.14") + def parse_int(self, base: int | None = None, *, strict: bool = True) -> Expr: + """ + Parse integers with base radix from strings. + + ParseError/Overflows become Nulls. - return wrap_expr(self._pyexpr.str_parse_int(radix, strict)) + .. deprecated:: 0.19.14 + This method has been renamed to :func:`to_integer`. + + Parameters + ---------- + base + Positive integer which is the base of the string we are parsing. + strict + Bool, Default=True will raise any ParseError or overflow as ComputeError. + False silently convert to Null. + + """ + if base is None: + base = 2 + return self.to_integer(base, strict=strict).cast(Int32, strict=strict) @deprecate_renamed_function("strip_chars", version="0.19.3") def strip(self, characters: str | None = None) -> Expr: diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index e45513737820..08993bb08bb7 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -1511,16 +1511,15 @@ def explode(self) -> Series: """ - def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series: + def to_integer(self, base: int = 10, *, strict: bool = True) -> Series: """ - Parse integers with base radix from strings. - - ParseError/Overflows become Nulls. + Convert an Utf8 column into an Int64 column with base radix. Parameters ---------- - radix + base Positive integer which is the base of the string we are parsing. + Default: 10. strict Bool, Default=True will raise any ParseError or overflow as ComputeError. False silently convert to Null. @@ -1528,14 +1527,14 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series: Returns ------- Series - Series of data type :class:`Int32`. + Series of data type :class:`Int64`. Examples -------- >>> s = pl.Series("bin", ["110", "101", "010", "invalid"]) - >>> s.str.parse_int(2, strict=False) + >>> s.str.to_integer(base=2, strict=False) shape: (4,) - Series: 'bin' [i32] + Series: 'bin' [i64] [ 6 5 @@ -1544,9 +1543,9 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series: ] >>> s = pl.Series("hex", ["fa1e", "ff00", "cafe", None]) - >>> s.str.parse_int(16) + >>> s.str.to_integer(base=16) shape: (4,) - Series: 'hex' [i32] + Series: 'hex' [i64] [ 64030 65280 @@ -1556,6 +1555,25 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series: """ + @deprecate_renamed_function("to_integer", version="0.19.14") + @deprecate_renamed_parameter("radix", "base", version="0.19.14") + def parse_int(self, base: int | None = None, *, strict: bool = True) -> Series: + """ + Parse integers with base radix from strings. + + .. deprecated:: 0.19.14 + This method has been renamed to :func:`to_integer`. + + Parameters + ---------- + base + Positive integer which is the base of the string we are parsing. + strict + Bool, Default=True will raise any ParseError or overflow as ComputeError. + False silently convert to Null. + + """ + @deprecate_renamed_function("strip_chars", version="0.19.3") def strip(self, characters: str | None = None) -> Series: """ diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index 8a64c916bf10..f6dfc266a76b 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -188,12 +188,12 @@ impl PyExpr { self.inner.clone().str().base64_decode(strict).into() } - fn str_parse_int(&self, radix: u32, strict: bool) -> Self { + fn str_to_integer(&self, base: u32, strict: bool) -> Self { self.inner .clone() .str() - .from_radix(radix, strict) - .with_fmt("str.parse_int") + .from_radix(base, strict) + .with_fmt("str.to_integer") .into() } diff --git a/py-polars/tests/unit/namespaces/string/test_string.py b/py-polars/tests/unit/namespaces/string/test_string.py index 53797e85ebe3..5a0a827e8ca2 100644 --- a/py-polars/tests/unit/namespaces/string/test_string.py +++ b/py-polars/tests/unit/namespaces/string/test_string.py @@ -222,22 +222,22 @@ def test_str_case_cyrillic() -> None: assert s.str.to_uppercase().to_list() == [a.upper() for a in vals] -def test_str_parse_int() -> None: +def test_str_to_integer() -> None: bin = pl.Series(["110", "101", "010"]) - assert_series_equal(bin.str.parse_int(2), pl.Series([6, 5, 2]).cast(pl.Int32)) + assert_series_equal(bin.str.to_integer(base=2), pl.Series([6, 5, 2]).cast(pl.Int64)) hex = pl.Series(["fa1e", "ff00", "cafe", "invalid", None]) assert_series_equal( - hex.str.parse_int(16, strict=False), - pl.Series([64030, 65280, 51966, None, None]).cast(pl.Int32), + hex.str.to_integer(base=16, strict=False), + pl.Series([64030, 65280, 51966, None, None]).cast(pl.Int64), check_exact=True, ) with pytest.raises(pl.ComputeError): - hex.str.parse_int(16) + hex.str.to_integer(base=16) -def test_str_parse_int_df() -> None: +def test_str_to_integer_df() -> None: df = pl.DataFrame( { "bin": ["110", "101", "-010", "invalid", None], @@ -246,8 +246,8 @@ def test_str_parse_int_df() -> None: ) out = df.with_columns( [ - pl.col("bin").str.parse_int(2, strict=False), - pl.col("hex").str.parse_int(16, strict=False), + pl.col("bin").str.to_integer(2, strict=False), + pl.col("hex").str.to_integer(16, strict=False), ] ) @@ -261,13 +261,13 @@ def test_str_parse_int_df() -> None: with pytest.raises(pl.ComputeError): df.with_columns( - [pl.col("bin").str.parse_int(2), pl.col("hex").str.parse_int(16)] + [pl.col("bin").str.to_integer(2), pl.col("hex").str.to_integer(16)] ) -def test_str_parse_int_deprecated_default() -> None: +def test_str_parse_int_deprecated() -> None: s = pl.Series(["110", "101", "010"]) - with pytest.deprecated_call(match="default value"): + with pytest.deprecated_call(match="It has been renamed to `to_integer`"): result = s.str.parse_int() expected = pl.Series([6, 5, 2], dtype=pl.Int32) assert_series_equal(result, expected) From c65d602c49fdf0627e6476bd10d05c35d30fdb27 Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Wed, 15 Nov 2023 16:53:13 +0800 Subject: [PATCH 2/3] rename feature gate and methods --- crates/polars-lazy/Cargo.toml | 4 ++-- crates/polars-ops/Cargo.toml | 2 +- .../src/chunked_array/strings/namespace.rs | 12 +++++----- crates/polars-plan/Cargo.toml | 2 +- .../src/dsl/function_expr/strings.rs | 22 +++++++++---------- crates/polars-plan/src/dsl/string.rs | 8 +++---- crates/polars/Cargo.toml | 4 ++-- crates/polars/src/lib.rs | 2 +- docs/user-guide/installation.md | 2 +- py-polars/Cargo.toml | 2 +- py-polars/polars/expr/string.py | 4 ++-- py-polars/polars/series/string.py | 2 +- py-polars/src/expr/string.rs | 2 +- .../unit/namespaces/string/test_string.py | 9 +++++--- 14 files changed, 40 insertions(+), 37 deletions(-) diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index db61ebfbfc3f..3ff4b3ff233e 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -115,7 +115,7 @@ list_to_struct = ["polars-plan/list_to_struct"] python = ["pyo3", "polars-plan/python", "polars-core/python", "polars-io/python"] row_hash = ["polars-plan/row_hash"] string_pad = ["polars-plan/string_pad"] -string_from_radix = ["polars-plan/string_from_radix"] +string_to_integer = ["polars-plan/string_to_integer"] arg_where = ["polars-plan/arg_where"] search_sorted = ["polars-plan/search_sorted"] merge_sorted = ["polars-plan/merge_sorted"] @@ -178,7 +178,7 @@ test_all = [ "ipc", "row_hash", "string_pad", - "string_from_radix", + "string_to_integer", "search_sorted", "top_k", "pivot", diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index 41aed991aa44..7e4954ec61c9 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -87,7 +87,7 @@ diff = [] pct_change = ["diff"] strings = ["polars-core/strings"] string_pad = ["polars-core/strings"] -string_from_radix = ["polars-core/strings"] +string_to_integer = ["polars-core/strings"] extract_jsonpath = ["serde_json", "jsonpath_lib", "polars-json"] log = [] hash = [] diff --git a/crates/polars-ops/src/chunked_array/strings/namespace.rs b/crates/polars-ops/src/chunked_array/strings/namespace.rs index 9e4e9fea675d..d7d7f2cfb0ed 100644 --- a/crates/polars-ops/src/chunked_array/strings/namespace.rs +++ b/crates/polars-ops/src/chunked_array/strings/namespace.rs @@ -3,7 +3,7 @@ use arrow::legacy::kernels::string::*; use base64::engine::general_purpose; #[cfg(feature = "string_encoding")] use base64::Engine as _; -#[cfg(feature = "string_from_radix")] +#[cfg(feature = "string_to_integer")] use polars_core::export::num::Num; use polars_core::export::regex::Regex; use polars_core::prelude::arity::*; @@ -60,12 +60,12 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { ca.apply_values(|s| general_purpose::STANDARD.encode(s).into()) } - #[cfg(feature = "string_from_radix")] - // Parse a string number with base _radix_ into a decimal (i32) - fn to_integer(&self, radix: u32, strict: bool) -> PolarsResult { + #[cfg(feature = "string_to_integer")] + // Parse a string number with base _radix_ into a decimal (i64) + fn to_integer(&self, base: u32, strict: bool) -> PolarsResult { let ca = self.as_utf8(); let f = |opt_s: Option<&str>| -> Option { - opt_s.and_then(|s| ::from_str_radix(s, radix).ok()) + opt_s.and_then(|s| ::from_str_radix(s, base).ok()) }; let out: Int64Chunked = ca.apply_generic(f); @@ -76,7 +76,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 { let some_failures = all_failures.unique()?.slice(0, 10).sort(false); let some_error_msg = some_failures .get(0) - .and_then(|s| ::from_str_radix(s, radix).err()) + .and_then(|s| ::from_str_radix(s, base).err()) .map_or_else( || unreachable!("failed to extract ParseIntError"), |e| format!("{}", e), diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index ec8f900a36dd..be688960603c 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -124,7 +124,7 @@ chunked_ids = ["polars-core/chunked_ids"] list_to_struct = ["polars-ops/list_to_struct"] row_hash = ["polars-core/row_hash", "polars-ops/hash"] string_pad = ["polars-ops/string_pad"] -string_from_radix = ["polars-ops/string_from_radix"] +string_to_integer = ["polars-ops/string_to_integer"] arg_where = [] search_sorted = ["polars-ops/search_sorted"] merge_sorted = ["polars-ops/merge_sorted"] diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index ffdfa0da4482..30fae4e85bba 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -47,8 +47,8 @@ pub enum StringFunction { dtype: DataType, pat: String, }, - #[cfg(feature = "string_from_radix")] - FromRadix(u32, bool), + #[cfg(feature = "string_to_integer")] + ToInteger(u32, bool), LenBytes, LenChars, Lowercase, @@ -123,8 +123,8 @@ impl StringFunction { ExtractAll => mapper.with_dtype(DataType::List(Box::new(DataType::Utf8))), #[cfg(feature = "extract_groups")] ExtractGroups { dtype, .. } => mapper.with_dtype(dtype.clone()), - #[cfg(feature = "string_from_radix")] - FromRadix { .. } => mapper.with_dtype(DataType::Int32), + #[cfg(feature = "string_to_integer")] + ToInteger { .. } => mapper.with_dtype(DataType::Int64), #[cfg(feature = "extract_jsonpath")] JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()), LenBytes => mapper.with_dtype(DataType::UInt32), @@ -189,8 +189,8 @@ impl Display for StringFunction { ExtractAll => "extract_all", #[cfg(feature = "extract_groups")] ExtractGroups { .. } => "extract_groups", - #[cfg(feature = "string_from_radix")] - FromRadix { .. } => "from_radix", + #[cfg(feature = "string_to_integer")] + ToInteger { .. } => "to_integer", #[cfg(feature = "extract_jsonpath")] JsonExtract { .. } => "json_extract", LenBytes => "len_bytes", @@ -312,8 +312,8 @@ impl From for SpecialEq> { StripCharsEnd => map_as_slice!(strings::strip_chars_end), StripPrefix => map_as_slice!(strings::strip_prefix), StripSuffix => map_as_slice!(strings::strip_suffix), - #[cfg(feature = "string_from_radix")] - FromRadix(radix, strict) => map!(strings::from_radix, radix, strict), + #[cfg(feature = "string_to_integer")] + ToInteger(base, strict) => map!(strings::to_integer, base, strict), Slice(start, length) => map!(strings::str_slice, start, length), #[cfg(feature = "string_encoding")] HexEncode => map!(strings::hex_encode), @@ -802,10 +802,10 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult PolarsResult { +#[cfg(feature = "string_to_integer")] +pub(super) fn to_integer(s: &Series, base: u32, strict: bool) -> PolarsResult { let ca = s.utf8()?; - ca.to_integer(radix, strict).map(|ok| ok.into_series()) + ca.to_integer(base, strict).map(|ok| ok.into_series()) } pub(super) fn str_slice(s: &Series, start: i64, length: Option) -> PolarsResult { let ca = s.utf8()?; diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index 8cfedd8be6d6..c69610a244c6 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -399,12 +399,12 @@ impl StringNameSpace { .map_private(FunctionExpr::StringExpr(StringFunction::Titlecase)) } - #[cfg(feature = "string_from_radix")] + #[cfg(feature = "string_to_integer")] /// Parse string in base radix into decimal. - pub fn from_radix(self, radix: u32, strict: bool) -> Expr { + pub fn to_integer(self, base: u32, strict: bool) -> Expr { self.0 - .map_private(FunctionExpr::StringExpr(StringFunction::FromRadix( - radix, strict, + .map_private(FunctionExpr::StringExpr(StringFunction::ToInteger( + base, strict, ))) } diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index 0fab8fbb699c..01aaf38f12a0 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -171,7 +171,7 @@ list_take = ["polars-ops/list_take", "polars-lazy?/list_take"] describe = ["polars-core/describe"] timezones = ["polars-core/timezones", "polars-lazy?/timezones", "polars-io/timezones"] string_pad = ["polars-lazy?/string_pad", "polars-ops/string_pad"] -string_from_radix = ["polars-lazy?/string_from_radix", "polars-ops/string_from_radix"] +string_to_integer = ["polars-lazy?/string_to_integer", "polars-ops/string_to_integer"] arg_where = ["polars-lazy?/arg_where"] search_sorted = ["polars-lazy?/search_sorted"] merge_sorted = ["polars-lazy?/merge_sorted"] @@ -314,7 +314,7 @@ docs-selection = [ "asof_join", "cross_join", "concat_str", - "string_from_radix", + "string_to_integer", "decompress", "mode", "take_opt_iter", diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs index 530980a531ff..081638028b64 100644 --- a/crates/polars/src/lib.rs +++ b/crates/polars/src/lib.rs @@ -189,7 +189,7 @@ //! * `temporal` - Conversions between [Chrono](https://docs.rs/chrono/) and Polars for temporal data types //! * `timezones` - Activate timezone support. //! * `strings` - Extra string utilities for [`Utf8Chunked`] //! - `string_pad` - `zfill`, `ljust`, `rjust` -//! - `string_from_radix` - `parse_int` +//! - `string_to_integer` - `parse_int` //! * `object` - Support for generic ChunkedArrays called [`ObjectChunked`] (generic over `T`). //! These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait. //! * Performance related: diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index 5e4c73cb5361..5b1678326162 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -91,7 +91,7 @@ The opt-in features are: - `timezones` - Activate timezone support. - `strings` - Extra string utilities for `Utf8Chunked` - `string_pad` - `pad_start`, `pad_end`, `zfill` - - `string_from_radix` - `parse_int` + - `string_to_integer` - `parse_int` - `object` - Support for generic ChunkedArrays called `ObjectChunked` (generic over `T`). These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait. - Performance related: diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index c59ed93e052d..b31d26438d2b 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -76,7 +76,7 @@ features = [ "semi_anti_join", "serde-lazy", "string_encoding", - "string_from_radix", + "string_to_integer", "string_pad", "strings", "temporal", diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index dd51b5baac5f..2a20800ed74e 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -2029,7 +2029,7 @@ def explode(self) -> Expr: """ return wrap_expr(self._pyexpr.str_explode()) - def to_integer(self, base: int = 10, *, strict: bool = True) -> Expr: + def to_integer(self, *, base: int = 10, strict: bool = True) -> Expr: """ Convert an Utf8 column into an Int64 column with base radix. @@ -2102,7 +2102,7 @@ def parse_int(self, base: int | None = None, *, strict: bool = True) -> Expr: """ if base is None: base = 2 - return self.to_integer(base, strict=strict).cast(Int32, strict=strict) + return self.to_integer(base=base, strict=strict).cast(Int32, strict=strict) @deprecate_renamed_function("strip_chars", version="0.19.3") def strip(self, characters: str | None = None) -> Expr: diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 08993bb08bb7..68ee6eb31e7d 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -1511,7 +1511,7 @@ def explode(self) -> Series: """ - def to_integer(self, base: int = 10, *, strict: bool = True) -> Series: + def to_integer(self, *, base: int = 10, strict: bool = True) -> Series: """ Convert an Utf8 column into an Int64 column with base radix. diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index f6dfc266a76b..54663191a0b6 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -192,7 +192,7 @@ impl PyExpr { self.inner .clone() .str() - .from_radix(base, strict) + .to_integer(base, strict) .with_fmt("str.to_integer") .into() } diff --git a/py-polars/tests/unit/namespaces/string/test_string.py b/py-polars/tests/unit/namespaces/string/test_string.py index 5a0a827e8ca2..baad4370f16f 100644 --- a/py-polars/tests/unit/namespaces/string/test_string.py +++ b/py-polars/tests/unit/namespaces/string/test_string.py @@ -246,8 +246,8 @@ def test_str_to_integer_df() -> None: ) out = df.with_columns( [ - pl.col("bin").str.to_integer(2, strict=False), - pl.col("hex").str.to_integer(16, strict=False), + pl.col("bin").str.to_integer(base=2, strict=False), + pl.col("hex").str.to_integer(base=16, strict=False), ] ) @@ -261,7 +261,10 @@ def test_str_to_integer_df() -> None: with pytest.raises(pl.ComputeError): df.with_columns( - [pl.col("bin").str.to_integer(2), pl.col("hex").str.to_integer(16)] + [ + pl.col("bin").str.to_integer(base=2), + pl.col("hex").str.to_integer(base=16), + ] ) From 63cf7892e761ab9d3aff1a5b246b62bd447b3434 Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Wed, 15 Nov 2023 17:05:58 +0800 Subject: [PATCH 3/3] doc test --- py-polars/polars/expr/string.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 2a20800ed74e..5b5bdabd1640 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -2050,7 +2050,7 @@ def to_integer(self, *, base: int = 10, strict: bool = True) -> Expr: Examples -------- >>> df = pl.DataFrame({"bin": ["110", "101", "010", "invalid"]}) - >>> df.with_columns(parsed=pl.col("bin").str.to_integer(2, strict=False)) + >>> df.with_columns(parsed=pl.col("bin").str.to_integer(base=2, strict=False)) shape: (4, 2) ┌─────────┬────────┐ │ bin ┆ parsed │ @@ -2064,7 +2064,7 @@ def to_integer(self, *, base: int = 10, strict: bool = True) -> Expr: └─────────┴────────┘ >>> df = pl.DataFrame({"hex": ["fa1e", "ff00", "cafe", None]}) - >>> df.with_columns(parsed=pl.col("hex").str.to_integer(16, strict=True)) + >>> df.with_columns(parsed=pl.col("hex").str.to_integer(base=16, strict=True)) shape: (4, 2) ┌──────┬────────┐ │ hex ┆ parsed │