Skip to content

Commit

Permalink
depr(python): Rename map_dict to replace and change default behav…
Browse files Browse the repository at this point in the history
…ior (pola-rs#12599)
  • Loading branch information
stinodego authored Nov 22, 2023
1 parent 08d009f commit 74c6ba2
Show file tree
Hide file tree
Showing 10 changed files with 630 additions and 585 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Manipulation/selection
Expr.rechunk
Expr.reinterpret
Expr.repeat_by
Expr.replace
Expr.reshape
Expr.reverse
Expr.rle
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Computation
Series.peak_max
Series.peak_min
Series.rank
Series.replace
Series.rolling_apply
Series.rolling_map
Series.rolling_max
Expand Down
308 changes: 149 additions & 159 deletions py-polars/polars/expr/expr.py

Large diffs are not rendered by default.

131 changes: 82 additions & 49 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
from polars.utils.meta import get_index_type
from polars.utils.various import (
_is_generator,
no_default,
parse_percentiles,
parse_version,
range_to_series,
Expand Down Expand Up @@ -6228,83 +6229,81 @@ def upper_bound(self) -> Self:
"""

def map_dict(
def replace(
self,
remapping: dict[Any, Any],
mapping: dict[Any, Any],
*,
default: Any = None,
default: Any = no_default,
return_dtype: PolarsDataType | None = None,
) -> Self:
"""
Replace values in the Series using a remapping dictionary.
Replace values according to the given mapping.
Needs a global string cache for lazily evaluated queries on columns of
type `Categorical`.
Parameters
----------
remapping
Dictionary containing the before/after values to map.
mapping
Mapping of values to their replacement.
default
Value to use when the remapping dict does not contain the lookup value.
Use `pl.first()`, to keep the original value.
Value to use when the mapping does not contain the lookup value.
Defaults to keeping the original value.
return_dtype
Set return dtype to override automatic return dtype determination.
Examples
See Also
--------
>>> s = pl.Series("iso3166", ["TUR", "???", "JPN", "NLD"])
>>> country_lookup = {
... "JPN": "Japan",
... "TUR": "Türkiye",
... "NLD": "Netherlands",
... }
str.replace
Remap, setting a default for unrecognised values...
Examples
--------
Replace a single value by another value. Values not in the mapping remain
unchanged.
>>> s.map_dict(country_lookup, default="Unspecified").alias("country_name")
>>> s = pl.Series("a", [1, 2, 2, 3])
>>> s.replace({2: 100})
shape: (4,)
Series: 'country_name' [str]
Series: 'a' [i64]
[
"Türkiye"
"Unspecified"
"Japan"
"Netherlands"
1
100
100
3
]
...or keep the original value, by making use of `pl.first()`:
Replace multiple values. Specify a default to set values not in the given map
to the default value.
>>> s.map_dict(country_lookup, default=pl.first()).alias("country_name")
>>> s = pl.Series("country_code", ["FR", "ES", "DE", None])
>>> country_code_map = {
... "CA": "Canada",
... "DE": "Germany",
... "FR": "France",
... None: "unspecified",
... }
>>> s.replace(country_code_map, default=None)
shape: (4,)
Series: 'country_name' [str]
Series: 'country_code' [str]
[
"Türkiye"
"???"
"Japan"
"Netherlands"
"France"
null
"Germany"
"unspecified"
]
...or keep the original value, by assigning the input series:
The return type can be overridden with the `return_dtype` argument.
>>> s.map_dict(country_lookup, default=s).alias("country_name")
>>> s = pl.Series("a", [0, 1, 2, 3])
>>> s.replace({1: 10, 2: 20}, default=0, return_dtype=pl.UInt8)
shape: (4,)
Series: 'country_name' [str]
Series: 'a' [u8]
[
"Türkiye"
"???"
"Japan"
"Netherlands"
]
Override return dtype:
>>> s = pl.Series("int8", [5, 2, 3], dtype=pl.Int8)
>>> s.map_dict({2: 7}, default=pl.first(), return_dtype=pl.Int16)
shape: (3,)
Series: 'int8' [i16]
[
5
7
3
0
10
20
0
]
"""

def reshape(self, dimensions: tuple[int, ...]) -> Series:
Expand Down Expand Up @@ -7136,6 +7135,40 @@ def view(self, *, ignore_nulls: bool = False) -> SeriesView:
"""
return self._view(ignore_nulls=ignore_nulls)

@deprecate_function(
"It has been renamed to `replace`."
" The default behavior has changed to keep any values not present in the mapping unchanged."
" Pass `default=None` to keep existing behavior.",
version="0.19.16",
)
@deprecate_renamed_parameter("remapping", "mapping", version="0.19.16")
def map_dict(
self,
mapping: dict[Any, Any],
*,
default: Any = None,
return_dtype: PolarsDataType | None = None,
) -> Self:
"""
Replace values in the Series using a remapping dictionary.
.. deprecated:: 0.19.16
This method has been renamed to :meth:`replace`. The default behavior
has changed to keep any values not present in the mapping unchanged.
Pass `default=None` to keep existing behavior.
Parameters
----------
mapping
Dictionary containing the before/after values to map.
default
Value to use when the remapping dict does not contain the lookup value.
Use `pl.first()`, to keep the original value.
return_dtype
Set return dtype to override automatic return dtype determination.
"""
return self.replace(mapping, default=default, return_dtype=return_dtype)

# Keep the `list` and `str` properties below at the end of the definition of Series,
# as to not confuse mypy with the type annotation `str` and `list`

Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/utils/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def op(inst: Instruction) -> str:
elif inst.opname in OpNames.UNARY:
return OpNames.UNARY[inst.opname]
elif inst.opname == "BINARY_SUBSCR":
return "map_dict"
return "replace"
else:
raise AssertionError(
"unrecognized opname"
Expand Down Expand Up @@ -520,7 +520,7 @@ def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str
if " " in e1
else f"{not_}{e1}.is_in({e2})"
)
elif op == "map_dict":
elif op == "replace":
if not self._caller_variables:
self._caller_variables.update(_get_all_caller_variables())
if not isinstance(self._caller_variables.get(e1, None), dict):
Expand Down
5 changes: 3 additions & 2 deletions py-polars/polars/utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,9 @@ def str_duration_(td: str | None) -> int | None:
.cast(tp)
)
elif tp == Boolean:
cast_cols[c] = F.col(c).map_dict(
remapping={"true": True, "false": False},
cast_cols[c] = F.col(c).replace(
mapping={"true": True, "false": False},
default=None,
return_dtype=Boolean,
)
elif tp in INTEGER_DTYPES:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@
# ---------------------------------------------
("c", "lambda x: json.loads(x)", 'pl.col("c").str.json_decode()'),
# ---------------------------------------------
# map_dict
# replace
# ---------------------------------------------
("a", "lambda x: MY_DICT[x]", 'pl.col("a").map_dict(MY_DICT)'),
("a", "lambda x: MY_DICT[x]", 'pl.col("a").replace(MY_DICT)'),
(
"a",
"lambda x: MY_DICT[x - 1] + MY_DICT[1 + x]",
'(pl.col("a") - 1).map_dict(MY_DICT) + (1 + pl.col("a")).map_dict(MY_DICT)',
'(pl.col("a") - 1).replace(MY_DICT) + (1 + pl.col("a")).replace(MY_DICT)',
),
# ---------------------------------------------
# standard library datetime parsing
Expand Down
Loading

0 comments on commit 74c6ba2

Please sign in to comment.