Skip to content

Commit

Permalink
depr(python,rust!): Rename str.json_extract to str.json_decode (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Nov 20, 2023
1 parent 07207a5 commit be6b565
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/test_bytecode_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def test_local_imports() -> None:

bytecode_parser = BytecodeParser(lambda x: json.loads(x), map_target="expr")
result = bytecode_parser.to_expression("x")
expected = 'pl.col("x").str.json_extract()'
expected = 'pl.col("x").str.json_decode()'
assert result == expected

bytecode_parser = BytecodeParser(
Expand Down
10 changes: 5 additions & 5 deletions crates/polars-ops/src/chunked_array/strings/json_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub trait Utf8JsonPathImpl: AsUtf8 {
}

/// Extracts a typed-JSON value for each row in the Utf8Chunked
fn json_extract(
fn json_decode(
&self,
dtype: Option<DataType>,
infer_schema_len: Option<usize>,
Expand Down Expand Up @@ -103,7 +103,7 @@ pub trait Utf8JsonPathImpl: AsUtf8 {
infer_schema_len: Option<usize>,
) -> PolarsResult<Series> {
let selected_json = self.as_utf8().json_path_select(json_path)?;
selected_json.json_extract(dtype, infer_schema_len)
selected_json.json_decode(dtype, infer_schema_len)
}
}

Expand Down Expand Up @@ -163,7 +163,7 @@ mod tests {
}

#[test]
fn test_json_extract() {
fn test_json_decode() {
let s = Series::new(
"json",
[
Expand All @@ -187,11 +187,11 @@ mod tests {
let expected_dtype = expected_series.dtype().clone();

assert!(ca
.json_extract(None, None)
.json_decode(None, None)
.unwrap()
.series_equal_missing(&expected_series));
assert!(ca
.json_extract(Some(expected_dtype), None)
.json_decode(Some(expected_dtype), None)
.unwrap()
.series_equal_missing(&expected_series));
}
Expand Down
14 changes: 7 additions & 7 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub enum StringFunction {
LenChars,
Lowercase,
#[cfg(feature = "extract_jsonpath")]
JsonExtract {
JsonDecode {
dtype: Option<DataType>,
infer_schema_len: Option<usize>,
},
Expand Down Expand Up @@ -126,7 +126,7 @@ impl StringFunction {
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => mapper.with_dtype(DataType::Int64),
#[cfg(feature = "extract_jsonpath")]
JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
JsonDecode { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
LenBytes => mapper.with_dtype(DataType::UInt32),
LenChars => mapper.with_dtype(DataType::UInt32),
#[cfg(feature = "regex")]
Expand Down Expand Up @@ -192,7 +192,7 @@ impl Display for StringFunction {
#[cfg(feature = "string_to_integer")]
ToInteger { .. } => "to_integer",
#[cfg(feature = "extract_jsonpath")]
JsonExtract { .. } => "json_extract",
JsonDecode { .. } => "json_decode",
LenBytes => "len_bytes",
Lowercase => "lowercase",
LenChars => "len_chars",
Expand Down Expand Up @@ -327,10 +327,10 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
#[cfg(feature = "dtype-decimal")]
ToDecimal(infer_len) => map!(strings::to_decimal, infer_len),
#[cfg(feature = "extract_jsonpath")]
JsonExtract {
JsonDecode {
dtype,
infer_schema_len,
} => map!(strings::json_extract, dtype.clone(), infer_schema_len),
} => map!(strings::json_decode, dtype.clone(), infer_schema_len),
}
}
}
Expand Down Expand Up @@ -844,11 +844,11 @@ pub(super) fn to_decimal(s: &Series, infer_len: usize) -> PolarsResult<Series> {
}

#[cfg(feature = "extract_jsonpath")]
pub(super) fn json_extract(
pub(super) fn json_decode(
s: &Series,
dtype: Option<DataType>,
infer_schema_len: Option<usize>,
) -> PolarsResult<Series> {
let ca = s.utf8()?;
ca.json_extract(dtype, infer_schema_len)
ca.json_decode(dtype, infer_schema_len)
}
4 changes: 2 additions & 2 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,9 @@ impl StringNameSpace {
}

#[cfg(feature = "extract_jsonpath")]
pub fn json_extract(self, dtype: Option<DataType>, infer_schema_len: Option<usize>) -> Expr {
pub fn json_decode(self, dtype: Option<DataType>, infer_schema_len: Option<usize>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::JsonExtract {
.map_private(FunctionExpr::StringExpr(StringFunction::JsonDecode {
dtype,
infer_schema_len,
}))
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The following methods are available under the `expr.str` attribute.
Expr.str.extract
Expr.str.extract_all
Expr.str.extract_groups
Expr.str.json_decode
Expr.str.json_extract
Expr.str.json_path_match
Expr.str.len_bytes
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The following methods are available under the `Series.str` attribute.
Series.str.extract
Series.str.extract_all
Series.str.extract_groups
Series.str.json_decode
Series.str.json_extract
Series.str.json_path_match
Series.str.len_bytes
Expand Down
31 changes: 26 additions & 5 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,13 +1165,13 @@ def starts_with(self, prefix: str | Expr) -> Expr:
prefix = parse_as_expression(prefix, str_as_lit=True)
return wrap_expr(self._pyexpr.str_starts_with(prefix))

def json_extract(
def json_decode(
self, dtype: PolarsDataType | None = None, infer_schema_length: int | None = 100
) -> Expr:
"""
Parse string values as JSON.
Throw errors if encounter invalid JSON strings.
Throws an error if invalid JSON strings are encountered.
Parameters
----------
Expand All @@ -1193,10 +1193,10 @@ def json_extract(
... {"json": ['{"a":1, "b": true}', None, '{"a":2, "b": false}']}
... )
>>> dtype = pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)])
>>> df.with_columns(extracted=pl.col("json").str.json_extract(dtype))
>>> df.with_columns(decoded=pl.col("json").str.json_decode(dtype))
shape: (3, 2)
┌─────────────────────┬─────────────┐
│ json ┆ extracted
│ json ┆ decoded
│ --- ┆ --- │
│ str ┆ struct[2] │
╞═════════════════════╪═════════════╡
Expand All @@ -1208,7 +1208,7 @@ def json_extract(
"""
if dtype is not None:
dtype = py_type_to_dtype(dtype)
return wrap_expr(self._pyexpr.str_json_extract(dtype, infer_schema_length))
return wrap_expr(self._pyexpr.str_json_decode(dtype, infer_schema_length))

def json_path_match(self, json_path: str) -> Expr:
"""
Expand Down Expand Up @@ -2241,6 +2241,27 @@ def rjust(self, length: int, fill_char: str = " ") -> Expr:
"""
return self.pad_start(length, fill_char)

@deprecate_renamed_function("json_decode", version="0.19.12")
def json_extract(
self, dtype: PolarsDataType | None = None, infer_schema_length: int | None = 100
) -> Expr:
"""
Parse string values as JSON.
.. deprecated:: 0.19.15
This method has been renamed to :meth:`json_decode`.
Parameters
----------
dtype
The dtype to cast the extracted value to. If None, the dtype will be
inferred from the JSON value.
infer_schema_length
How many rows to parse to determine the schema.
If `None` all rows are used.
"""
return self.json_decode(dtype, infer_schema_length)


def _validate_format_argument(format: str | None) -> None:
if format is not None and ".%f" in format:
Expand Down
27 changes: 24 additions & 3 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,13 +591,13 @@ def encode(self, encoding: TransferEncoding) -> Series:
"""

def json_extract(
def json_decode(
self, dtype: PolarsDataType | None = None, infer_schema_length: int | None = 100
) -> Series:
"""
Parse string values as JSON.
Throw errors if encounter invalid JSON strings.
Throws an error if invalid JSON strings are encountered.
Parameters
----------
Expand All @@ -616,7 +616,7 @@ def json_extract(
Examples
--------
>>> s = pl.Series("json", ['{"a":1, "b": true}', None, '{"a":2, "b": false}'])
>>> s.str.json_extract()
>>> s.str.json_decode()
shape: (3,)
Series: 'json' [struct[2]]
[
Expand Down Expand Up @@ -1703,3 +1703,24 @@ def rjust(self, length: int, fill_char: str = " ") -> Series:
Fill with this ASCII character.
"""

@deprecate_renamed_function("json_decode", version="0.19.15")
def json_extract(
self, dtype: PolarsDataType | None = None, infer_schema_length: int | None = 100
) -> Series:
"""
Parse string values as JSON.
.. deprecated:: 0.19.15
This method has been renamed to :meth:`json_decode`.
Parameters
----------
dtype
The dtype to cast the extracted value to. If None, the dtype will be
inferred from the JSON value.
infer_schema_length
How many rows to parse to determine the schema.
If `None` all rows are used.
"""
return self.json_decode(dtype, infer_schema_length)
4 changes: 2 additions & 2 deletions py-polars/polars/utils/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ def _rewrite_functions(
attribute_count : 3 + attribute_count
]
if inst1.argval == "json":
expr_name = "str.json_extract"
expr_name = "str.json_decode"
elif inst1.argval == "datetime":
fmt = matching_instructions[attribute_count + 3].argval
expr_name = f'str.to_datetime(format="{fmt}")'
Expand Down Expand Up @@ -815,7 +815,7 @@ def _is_raw_function(function: Callable[[Any], Any]) -> tuple[str, str]:
import json # double-check since it is referenced via 'builtins'

if function is json.loads:
return "json", "str.json_extract()"
return "json", "str.json_decode()"

except AttributeError:
pass
Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ impl PyExpr {
}

#[cfg(feature = "extract_jsonpath")]
fn str_json_extract(
fn str_json_decode(
&self,
dtype: Option<Wrap<DataType>>,
infer_schema_len: Option<usize>,
Expand All @@ -207,7 +207,7 @@ impl PyExpr {
self.inner
.clone()
.str()
.json_extract(dtype, infer_schema_len)
.json_decode(dtype, infer_schema_len)
.into()
}

Expand Down
30 changes: 19 additions & 11 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,35 +436,35 @@ def test_str_split() -> None:
assert out[2].to_list() == ["ab,", "c,", "de"]


def test_json_extract_series() -> None:
def test_json_decode_series() -> None:
s = pl.Series(["[1, 2, 3]", None, "[4, 5, 6]"])
expected = pl.Series([[1, 2, 3], None, [4, 5, 6]])
dtype = pl.List(pl.Int64)
assert_series_equal(s.str.json_extract(None), expected)
assert_series_equal(s.str.json_extract(dtype), expected)
assert_series_equal(s.str.json_decode(None), expected)
assert_series_equal(s.str.json_decode(dtype), expected)

s = pl.Series(['{"a": 1, "b": true}', None, '{"a": 2, "b": false}'])
expected = pl.Series([{"a": 1, "b": True}, None, {"a": 2, "b": False}])
dtype2 = pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)])
assert_series_equal(s.str.json_extract(None), expected)
assert_series_equal(s.str.json_extract(dtype2), expected)
assert_series_equal(s.str.json_decode(None), expected)
assert_series_equal(s.str.json_decode(dtype2), expected)

expected = pl.Series([{"a": 1}, None, {"a": 2}])
dtype2 = pl.Struct([pl.Field("a", pl.Int64)])
assert_series_equal(s.str.json_extract(dtype2), expected)
assert_series_equal(s.str.json_decode(dtype2), expected)

s = pl.Series([], dtype=pl.Utf8)
expected = pl.Series([], dtype=pl.List(pl.Int64))
dtype = pl.List(pl.Int64)
assert_series_equal(s.str.json_extract(dtype), expected)
assert_series_equal(s.str.json_decode(dtype), expected)


def test_json_extract_lazy_expr() -> None:
def test_json_decode_lazy_expr() -> None:
dtype = pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)])
ldf = (
pl.DataFrame({"json": ['{"a": 1, "b": true}', None, '{"a": 2, "b": false}']})
.lazy()
.select(pl.col("json").str.json_extract(dtype))
.select(pl.col("json").str.json_decode(dtype))
)
expected = pl.DataFrame(
{"json": [{"a": 1, "b": True}, None, {"a": 2, "b": False}]}
Expand All @@ -473,7 +473,15 @@ def test_json_extract_lazy_expr() -> None:
assert_frame_equal(ldf, expected)


def test_json_extract_primitive_to_list_11053() -> None:
def test_json_extract_deprecated() -> None:
s = pl.Series(['{"a": 1, "b": true}', None, '{"a": 2, "b": false}'])
expected = pl.Series([{"a": 1, "b": True}, None, {"a": 2, "b": False}])
with pytest.deprecated_call():
result = s.str.json_extract()
assert_series_equal(result, expected)


def test_json_decode_primitive_to_list_11053() -> None:
df = pl.DataFrame(
{
"json": [
Expand All @@ -490,7 +498,7 @@ def test_json_extract_primitive_to_list_11053() -> None:
)

output = df.select(
pl.col("json").str.json_extract(schema).alias("casted_json")
pl.col("json").str.json_decode(schema).alias("casted_json")
).unnest("casted_json")
expected = pl.DataFrame({"col1": [["123"], ["xyz"]], "col2": [["123"], None]})
assert_frame_equal(output, expected)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
# ---------------------------------------------
# json expr: load/extract
# ---------------------------------------------
("c", "lambda x: json.loads(x)", 'pl.col("c").str.json_extract()'),
("c", "lambda x: json.loads(x)", 'pl.col("c").str.json_decode()'),
# ---------------------------------------------
# map_dict
# ---------------------------------------------
Expand Down Expand Up @@ -248,10 +248,10 @@ def test_parse_apply_raw_functions() -> None:
result_frames = []
with pytest.warns(
PolarsInefficientMapWarning,
match=r"(?s)Expr\.map_elements.*In this case, you can replace.*\.str\.json_extract",
match=r"(?s)Expr\.map_elements.*In this case, you can replace.*\.str\.json_decode",
):
for expr in (
pl.col("value").str.json_extract(),
pl.col("value").str.json_decode(),
pl.col("value").map_elements(json.loads),
):
result_frames.append(
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/operations/map/test_map_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_datelike_identity() -> None:
def test_map_elements_list_anyvalue_fallback() -> None:
with pytest.warns(
PolarsInefficientMapWarning,
match=r'(?s)replace your `map_elements` with.*pl.col\("text"\).str.json_extract()',
match=r'(?s)replace your `map_elements` with.*pl.col\("text"\).str.json_decode()',
):
df = pl.DataFrame({"text": ['[{"x": 1, "y": 2}, {"x": 3, "y": 4}]']})
assert df.select(pl.col("text").map_elements(json.loads)).to_dict(
Expand Down Expand Up @@ -211,7 +211,7 @@ def test_map_elements_explicit_list_output_type() -> None:
def test_map_elements_dict() -> None:
with pytest.warns(
PolarsInefficientMapWarning,
match=r'(?s)replace your `map_elements` with.*pl.col\("abc"\).str.json_extract()',
match=r'(?s)replace your `map_elements` with.*pl.col\("abc"\).str.json_decode()',
):
df = pl.DataFrame({"abc": ['{"A":"Value1"}', '{"B":"Value2"}']})
assert df.select(pl.col("abc").map_elements(json.loads)).to_dict(
Expand Down

0 comments on commit be6b565

Please sign in to comment.