Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python,rust!): Deprecate parse_int in favor of to_integer #12464

Merged
merged 3 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ list_to_struct = ["polars-plan/list_to_struct"]
python = ["pyo3", "polars-plan/python", "polars-core/python", "polars-io/python"]
row_hash = ["polars-plan/row_hash"]
string_pad = ["polars-plan/string_pad"]
string_from_radix = ["polars-plan/string_from_radix"]
string_to_integer = ["polars-plan/string_to_integer"]
arg_where = ["polars-plan/arg_where"]
search_sorted = ["polars-plan/search_sorted"]
merge_sorted = ["polars-plan/merge_sorted"]
Expand Down Expand Up @@ -178,7 +178,7 @@ test_all = [
"ipc",
"row_hash",
"string_pad",
"string_from_radix",
"string_to_integer",
"search_sorted",
"top_k",
"pivot",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ diff = []
pct_change = ["diff"]
strings = ["polars-core/strings"]
string_pad = ["polars-core/strings"]
string_from_radix = ["polars-core/strings"]
string_to_integer = ["polars-core/strings"]
extract_jsonpath = ["serde_json", "jsonpath_lib", "polars-json"]
log = []
hash = []
Expand Down
17 changes: 8 additions & 9 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use arrow::legacy::kernels::string::*;
use base64::engine::general_purpose;
#[cfg(feature = "string_encoding")]
use base64::Engine as _;
#[cfg(feature = "string_from_radix")]
#[cfg(feature = "string_to_integer")]
use polars_core::export::num::Num;
use polars_core::export::regex::Regex;
use polars_core::prelude::arity::*;
Expand Down Expand Up @@ -60,15 +60,14 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
ca.apply_values(|s| general_purpose::STANDARD.encode(s).into())
}

#[cfg(feature = "string_from_radix")]
// Parse a string number with base _radix_ into a decimal (i32)
fn parse_int(&self, radix: u32, strict: bool) -> PolarsResult<Int32Chunked> {
use arrow::legacy::utils::CustomIterTools;
#[cfg(feature = "string_to_integer")]
// Parse a string number with base _radix_ into a decimal (i64)
fn to_integer(&self, base: u32, strict: bool) -> PolarsResult<Int64Chunked> {
let ca = self.as_utf8();
let f = |opt_s: Option<&str>| -> Option<i32> {
opt_s.and_then(|s| <i32 as Num>::from_str_radix(s, radix).ok())
let f = |opt_s: Option<&str>| -> Option<i64> {
opt_s.and_then(|s| <i64 as Num>::from_str_radix(s, base).ok())
};
let out: Int32Chunked = ca.into_iter().map(f).collect_trusted();
let out: Int64Chunked = ca.apply_generic(f);

if strict && ca.null_count() != out.null_count() {
let failure_mask = !ca.is_null() & out.is_null();
Expand All @@ -77,7 +76,7 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
let some_failures = all_failures.unique()?.slice(0, 10).sort(false);
let some_error_msg = some_failures
.get(0)
.and_then(|s| <i32 as Num>::from_str_radix(s, radix).err())
.and_then(|s| <i64 as Num>::from_str_radix(s, base).err())
.map_or_else(
|| unreachable!("failed to extract ParseIntError"),
|e| format!("{}", e),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ chunked_ids = ["polars-core/chunked_ids"]
list_to_struct = ["polars-ops/list_to_struct"]
row_hash = ["polars-core/row_hash", "polars-ops/hash"]
string_pad = ["polars-ops/string_pad"]
string_from_radix = ["polars-ops/string_from_radix"]
string_to_integer = ["polars-ops/string_to_integer"]
arg_where = []
search_sorted = ["polars-ops/search_sorted"]
merge_sorted = ["polars-ops/merge_sorted"]
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ pub enum StringFunction {
dtype: DataType,
pat: String,
},
#[cfg(feature = "string_from_radix")]
FromRadix(u32, bool),
#[cfg(feature = "string_to_integer")]
ToInteger(u32, bool),
LenBytes,
LenChars,
Lowercase,
Expand Down Expand Up @@ -123,8 +123,8 @@ impl StringFunction {
ExtractAll => mapper.with_dtype(DataType::List(Box::new(DataType::Utf8))),
#[cfg(feature = "extract_groups")]
ExtractGroups { dtype, .. } => mapper.with_dtype(dtype.clone()),
#[cfg(feature = "string_from_radix")]
FromRadix { .. } => mapper.with_dtype(DataType::Int32),
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => mapper.with_dtype(DataType::Int64),
#[cfg(feature = "extract_jsonpath")]
JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
LenBytes => mapper.with_dtype(DataType::UInt32),
Expand Down Expand Up @@ -189,8 +189,8 @@ impl Display for StringFunction {
ExtractAll => "extract_all",
#[cfg(feature = "extract_groups")]
ExtractGroups { .. } => "extract_groups",
#[cfg(feature = "string_from_radix")]
FromRadix { .. } => "from_radix",
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => "to_integer",
#[cfg(feature = "extract_jsonpath")]
JsonExtract { .. } => "json_extract",
LenBytes => "len_bytes",
Expand Down Expand Up @@ -312,8 +312,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
StripCharsEnd => map_as_slice!(strings::strip_chars_end),
StripPrefix => map_as_slice!(strings::strip_prefix),
StripSuffix => map_as_slice!(strings::strip_suffix),
#[cfg(feature = "string_from_radix")]
FromRadix(radix, strict) => map!(strings::from_radix, radix, strict),
#[cfg(feature = "string_to_integer")]
ToInteger(base, strict) => map!(strings::to_integer, base, strict),
Slice(start, length) => map!(strings::str_slice, start, length),
#[cfg(feature = "string_encoding")]
HexEncode => map!(strings::hex_encode),
Expand Down Expand Up @@ -802,10 +802,10 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult<Serie
.map(|ca| ca.into_series())
}

#[cfg(feature = "string_from_radix")]
pub(super) fn from_radix(s: &Series, radix: u32, strict: bool) -> PolarsResult<Series> {
#[cfg(feature = "string_to_integer")]
pub(super) fn to_integer(s: &Series, base: u32, strict: bool) -> PolarsResult<Series> {
let ca = s.utf8()?;
ca.parse_int(radix, strict).map(|ok| ok.into_series())
ca.to_integer(base, strict).map(|ok| ok.into_series())
}
pub(super) fn str_slice(s: &Series, start: i64, length: Option<u64>) -> PolarsResult<Series> {
let ca = s.utf8()?;
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,12 @@ impl StringNameSpace {
.map_private(FunctionExpr::StringExpr(StringFunction::Titlecase))
}

#[cfg(feature = "string_from_radix")]
#[cfg(feature = "string_to_integer")]
/// Parse string in base radix into decimal.
pub fn from_radix(self, radix: u32, strict: bool) -> Expr {
pub fn to_integer(self, base: u32, strict: bool) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::FromRadix(
radix, strict,
.map_private(FunctionExpr::StringExpr(StringFunction::ToInteger(
base, strict,
)))
}

Expand Down
4 changes: 2 additions & 2 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ list_take = ["polars-ops/list_take", "polars-lazy?/list_take"]
describe = ["polars-core/describe"]
timezones = ["polars-core/timezones", "polars-lazy?/timezones", "polars-io/timezones"]
string_pad = ["polars-lazy?/string_pad", "polars-ops/string_pad"]
string_from_radix = ["polars-lazy?/string_from_radix", "polars-ops/string_from_radix"]
string_to_integer = ["polars-lazy?/string_to_integer", "polars-ops/string_to_integer"]
arg_where = ["polars-lazy?/arg_where"]
search_sorted = ["polars-lazy?/search_sorted"]
merge_sorted = ["polars-lazy?/merge_sorted"]
Expand Down Expand Up @@ -314,7 +314,7 @@ docs-selection = [
"asof_join",
"cross_join",
"concat_str",
"string_from_radix",
"string_to_integer",
"decompress",
"mode",
"take_opt_iter",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
//! * `temporal` - Conversions between [Chrono](https://docs.rs/chrono/) and Polars for temporal data types
//! * `timezones` - Activate timezone support.
//! * `strings` - Extra string utilities for [`Utf8Chunked`] //! - `string_pad` - `zfill`, `ljust`, `rjust`
//! - `string_from_radix` - `parse_int`
//! - `string_to_integer` - `parse_int`
//! * `object` - Support for generic ChunkedArrays called [`ObjectChunked<T>`] (generic over `T`).
//! These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
//! * Performance related:
Expand Down
2 changes: 1 addition & 1 deletion docs/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ The opt-in features are:
- `timezones` - Activate timezone support.
- `strings` - Extra string utilities for `Utf8Chunked`
- `string_pad` - `pad_start`, `pad_end`, `zfill`
- `string_from_radix` - `parse_int`
- `string_to_integer` - `parse_int`
- `object` - Support for generic ChunkedArrays called `ObjectChunked<T>` (generic over `T`).
These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
- Performance related:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ features = [
"semi_anti_join",
"serde-lazy",
"string_encoding",
"string_from_radix",
"string_to_integer",
"string_pad",
"strings",
"temporal",
Expand Down
54 changes: 34 additions & 20 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@

import polars._reexport as pl
from polars import functions as F
from polars.datatypes import Date, Datetime, Time, py_type_to_dtype
from polars.datatypes import Date, Datetime, Int32, Time, py_type_to_dtype
from polars.exceptions import ChronoFormatWarning
from polars.utils._parse_expr_input import parse_as_expression
from polars.utils._wrap import wrap_expr
from polars.utils.deprecation import (
deprecate_renamed_function,
deprecate_renamed_parameter,
issue_deprecation_warning,
rename_use_earliest_to_ambiguous,
)
from polars.utils.various import find_stacklevel
Expand Down Expand Up @@ -2030,34 +2029,33 @@ def explode(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_explode())

def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
def to_integer(self, *, base: int = 10, strict: bool = True) -> Expr:
"""
Parse integers with base radix from strings.

ParseError/Overflows become Nulls.
Convert an Utf8 column into an Int64 column with base radix.

Parameters
----------
radix
base
Positive integer which is the base of the string we are parsing.
Default: 10.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.

Returns
-------
Expr
Expression of data type :class:`Int32`.
Expression of data type :class:`Int64`.

Examples
--------
>>> df = pl.DataFrame({"bin": ["110", "101", "010", "invalid"]})
>>> df.with_columns(parsed=pl.col("bin").str.parse_int(2, strict=False))
>>> df.with_columns(parsed=pl.col("bin").str.to_integer(base=2, strict=False))
shape: (4, 2)
┌─────────┬────────┐
│ bin ┆ parsed │
│ --- ┆ --- │
│ str ┆ i32
│ str ┆ i64
╞═════════╪════════╡
│ 110 ┆ 6 │
│ 101 ┆ 5 │
Expand All @@ -2066,12 +2064,12 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
└─────────┴────────┘

>>> df = pl.DataFrame({"hex": ["fa1e", "ff00", "cafe", None]})
>>> df.with_columns(parsed=pl.col("hex").str.parse_int(16, strict=True))
>>> df.with_columns(parsed=pl.col("hex").str.to_integer(base=16, strict=True))
shape: (4, 2)
┌──────┬────────┐
│ hex ┆ parsed │
│ --- ┆ --- │
│ str ┆ i32
│ str ┆ i64
╞══════╪════════╡
│ fa1e ┆ 64030 │
│ ff00 ┆ 65280 │
Expand All @@ -2080,15 +2078,31 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Expr:
└──────┴────────┘

"""
if radix is None:
issue_deprecation_warning(
"The default value for the `radix` parameter of `parse_int` will be removed in a future version."
" Call `parse_int(radix=2)` to keep current behavior and silence this warning.",
version="0.19.8",
)
radix = 2
return wrap_expr(self._pyexpr.str_to_integer(base, strict))

@deprecate_renamed_function("to_integer", version="0.19.14")
@deprecate_renamed_parameter("radix", "base", version="0.19.14")
def parse_int(self, base: int | None = None, *, strict: bool = True) -> Expr:
"""
Parse integers with base radix from strings.

ParseError/Overflows become Nulls.

return wrap_expr(self._pyexpr.str_parse_int(radix, strict))
.. deprecated:: 0.19.14
This method has been renamed to :func:`to_integer`.

Parameters
----------
base
Positive integer which is the base of the string we are parsing.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.

"""
if base is None:
base = 2
return self.to_integer(base=base, strict=strict).cast(Int32, strict=strict)

@deprecate_renamed_function("strip_chars", version="0.19.3")
def strip(self, characters: str | None = None) -> Expr:
Expand Down
38 changes: 28 additions & 10 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,31 +1511,30 @@ def explode(self) -> Series:

"""

def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:
def to_integer(self, *, base: int = 10, strict: bool = True) -> Series:
"""
Parse integers with base radix from strings.

ParseError/Overflows become Nulls.
Convert an Utf8 column into an Int64 column with base radix.

Parameters
----------
radix
base
Positive integer which is the base of the string we are parsing.
Default: 10.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.

Returns
-------
Series
Series of data type :class:`Int32`.
Series of data type :class:`Int64`.

Examples
--------
>>> s = pl.Series("bin", ["110", "101", "010", "invalid"])
>>> s.str.parse_int(2, strict=False)
>>> s.str.to_integer(base=2, strict=False)
shape: (4,)
Series: 'bin' [i32]
Series: 'bin' [i64]
[
6
5
Expand All @@ -1544,9 +1543,9 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:
]

>>> s = pl.Series("hex", ["fa1e", "ff00", "cafe", None])
>>> s.str.parse_int(16)
>>> s.str.to_integer(base=16)
shape: (4,)
Series: 'hex' [i32]
Series: 'hex' [i64]
[
64030
65280
Expand All @@ -1556,6 +1555,25 @@ def parse_int(self, radix: int | None = None, *, strict: bool = True) -> Series:

"""

@deprecate_renamed_function("to_integer", version="0.19.14")
@deprecate_renamed_parameter("radix", "base", version="0.19.14")
def parse_int(self, base: int | None = None, *, strict: bool = True) -> Series:
"""
Parse integers with base radix from strings.

.. deprecated:: 0.19.14
This method has been renamed to :func:`to_integer`.

Parameters
----------
base
Positive integer which is the base of the string we are parsing.
strict
Bool, Default=True will raise any ParseError or overflow as ComputeError.
False silently convert to Null.

"""

@deprecate_renamed_function("strip_chars", version="0.19.3")
def strip(self, characters: str | None = None) -> Series:
"""
Expand Down
Loading