Skip to content

Commit

Permalink
depr(python): Rename LazyFrame.read/write_json to de/serialize (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Aug 6, 2023
1 parent c3c1f85 commit 8e6da21
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 37 deletions.
3 changes: 2 additions & 1 deletion py-polars/docs/source/reference/lazyframe/miscellaneous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Read/write logical plan
.. autosummary::
:toctree: api/

LazyFrame.deserialize
LazyFrame.from_json
LazyFrame.read_json
LazyFrame.write_json
LazyFrame.serialize
120 changes: 91 additions & 29 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
from polars.utils._wrap import wrap_df, wrap_expr
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import (
deprecate_function,
deprecate_renamed_function,
deprecate_renamed_methods,
deprecate_renamed_parameter,
issue_deprecation_warning,
Expand Down Expand Up @@ -112,8 +114,14 @@


@deprecate_renamed_methods(
mapping={"approx_unique": "approx_n_unique"},
versions={"approx_unique": "0.18.12"},
mapping={
"approx_unique": "approx_n_unique",
"write_json": "serialize",
},
versions={
"approx_unique": "0.18.12",
"write_json": "0.18.12",
},
)
class LazyFrame:
"""
Expand Down Expand Up @@ -523,45 +531,88 @@ def _scan_python_function(
return self

@classmethod
@deprecate_function(
"Convert the JSON string to `StringIO` and then use `LazyFrame.deserialize`.",
version="0.18.12",
)
def from_json(cls, json: str) -> Self:
"""
Read a logical plan from a JSON string to construct a LazyFrame.
.. deprecated:: 0.18.12
This method is deprecated. Convert the JSON string to ``StringIO``
and then use ``LazyFrame.deserialize``.
Parameters
----------
json
String in JSON format.
See Also
--------
read_json
deserialize
"""
bytes = StringIO(json).getvalue().encode()
file = BytesIO(bytes)
return cls._from_pyldf(PyLazyFrame.read_json(file))
return cls.deserialize(StringIO(json))

@classmethod
def read_json(cls, file: str | Path | IOBase) -> Self:
@deprecate_renamed_function("deserialize", version="0.18.12")
@deprecate_renamed_parameter("file", "source", version="0.18.12")
def read_json(cls, source: str | Path | IOBase) -> Self:
"""
Read a logical plan from a JSON file to construct a LazyFrame.
.. deprecated:: 0.18.12
This class method has been renamed to ``deserialize``.
Parameters
----------
file
source
Path to a file or a file-like object.
See Also
--------
LazyFrame.from_json, LazyFrame.write_json
deserialize
"""
if isinstance(file, StringIO):
file = BytesIO(file.getvalue().encode())
elif isinstance(file, (str, Path)):
file = normalise_filepath(file)
return cls.deserialize(source)

@classmethod
def deserialize(cls, source: str | Path | IOBase) -> Self:
"""
Read a logical plan from a JSON file to construct a LazyFrame.
Parameters
----------
source
Path to a file or a file-like object.
return cls._from_pyldf(PyLazyFrame.read_json(file))
See Also
--------
LazyFrame.serialize
Examples
--------
>>> import io
>>> lf = pl.LazyFrame({"a": [1, 2, 3]}).sum()
>>> json = lf.serialize()
>>> pl.LazyFrame.deserialize(io.StringIO(json)).collect()
shape: (1, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 6 │
└─────┘
"""
if isinstance(source, StringIO):
source = BytesIO(source.getvalue().encode())
elif isinstance(source, (str, Path)):
source = normalise_filepath(source)

return cls._from_pyldf(PyLazyFrame.deserialize(source))

@property
def columns(self) -> list[str]:
Expand Down Expand Up @@ -742,16 +793,16 @@ def _repr_html_(self) -> str:
"""

@overload
def write_json(self, file: None = ...) -> str:
def serialize(self, file: None = ...) -> str:
...

@overload
def write_json(self, file: IOBase | str | Path) -> None:
def serialize(self, file: IOBase | str | Path) -> None:
...

def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
def serialize(self, file: IOBase | str | Path | None = None) -> str | None:
"""
Write the logical plan of this LazyFrame to a file or string in JSON format.
Serialize the logical plan of this LazyFrame to a file or string in JSON format.
Parameters
----------
Expand All @@ -761,26 +812,37 @@ def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
See Also
--------
LazyFrame.read_json
LazyFrame.deserialize
Examples
--------
>>> lf = pl.LazyFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... }
... )
>>> lf.write_json()
'{"DataFrameScan":{"df":{"columns":[{"name":"foo","datatype":"Int64","values":[1,2,3]},{"name":"bar","datatype":"Int64","values":[6,7,8]}]},"schema":{"inner":{"foo":"Int64","bar":"Int64"}},"output_schema":null,"projection":null,"selection":null}}'
Serialize the logical plan into a JSON string.
>>> lf = pl.LazyFrame({"a": [1, 2, 3]}).sum()
>>> json = lf.serialize()
>>> json
'{"LocalProjection":{"expr":[{"Agg":{"Sum":{"Column":"a"}}}],"input":{"DataFrameScan":{"df":{"columns":[{"name":"a","datatype":"Int64","values":[1,2,3]}]},"schema":{"inner":{"a":"Int64"}},"output_schema":null,"projection":null,"selection":null}},"schema":{"inner":{"a":"Int64"}}}}'
The logical plan can later be deserialized back into a LazyFrame.
>>> import io
>>> pl.LazyFrame.deserialize(io.StringIO(json)).collect()
shape: (1, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 6 │
└─────┘
"""
if isinstance(file, (str, Path)):
file = normalise_filepath(file)
to_string_io = (file is not None) and isinstance(file, StringIO)
if file is None or to_string_io:
with BytesIO() as buf:
self._ldf.write_json(buf)
self._ldf.serialize(buf)
json_bytes = buf.getvalue()

json_str = json_bytes.decode("utf8")
Expand All @@ -789,7 +851,7 @@ def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
else:
return json_str
else:
self._ldf.write_json(file)
self._ldf.serialize(file)
return None

def pipe(
Expand Down
6 changes: 3 additions & 3 deletions py-polars/src/lazyframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl PyLazyFrame {
}

#[cfg(all(feature = "json", feature = "serde_json"))]
fn write_json(&self, py_f: PyObject) -> PyResult<()> {
fn serialize(&self, py_f: PyObject) -> PyResult<()> {
let file = BufWriter::new(get_file_like(py_f, true)?);
serde_json::to_writer(file, &self.ldf.logical_plan)
.map_err(|err| PyValueError::new_err(format!("{err:?}")))?;
Expand All @@ -93,7 +93,7 @@ impl PyLazyFrame {

#[staticmethod]
#[cfg(feature = "json")]
fn read_json(py_f: PyObject) -> PyResult<Self> {
fn deserialize(py_f: PyObject) -> PyResult<Self> {
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
// so don't bother with files.
let mut json = String::new();
Expand All @@ -105,7 +105,7 @@ impl PyLazyFrame {
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
// so we actually don't have a lifetime at all when serializing.

// &str still has a lifetime. Bit its ok, because we drop it immediately
// &str still has a lifetime. But it's ok, because we drop it immediately
// in this scope
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };

Expand Down
24 changes: 20 additions & 4 deletions py-polars/tests/unit/test_serde.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import io
import pickle
from datetime import datetime, timedelta

Expand All @@ -15,12 +16,27 @@ def test_pickling_simple_expression() -> None:
assert str(pickle.loads(buf)) == str(e)


def test_serde_lazy_frame_lp() -> None:
def test_lazyframe_serde() -> None:
lf = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}).lazy().select(pl.col("a"))
json = lf.write_json()

result = pl.LazyFrame.from_json(json).collect().to_series()
assert_series_equal(result, pl.Series("a", [1, 2, 3]))
json = lf.serialize()
result = pl.LazyFrame.deserialize(io.StringIO(json))

assert_series_equal(result.collect().to_series(), pl.Series("a", [1, 2, 3]))


def test_lazyframe_deprecated_serde() -> None:
lf = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}).lazy().select(pl.col("a"))

with pytest.deprecated_call():
json = lf.write_json() # type: ignore[attr-defined]
with pytest.deprecated_call():
result_from = pl.LazyFrame.from_json(json)
with pytest.deprecated_call():
result_read = pl.LazyFrame.read_json(io.StringIO(json))

assert_series_equal(result_from.collect().to_series(), pl.Series("a", [1, 2, 3]))
assert_series_equal(result_read.collect().to_series(), pl.Series("a", [1, 2, 3]))


def test_serde_time_unit() -> None:
Expand Down

0 comments on commit 8e6da21

Please sign in to comment.