From 34f55f40a0dedf02f228a2aa8a775c5badce543b Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sat, 1 Jun 2024 18:07:39 -0400 Subject: [PATCH 1/5] add to/from json for exprs --- lib/explorer/polars_backend/expression.ex | 12 ++++++++++++ lib/explorer/polars_backend/native.ex | 2 ++ native/explorer/Cargo.lock | 12 ++++++++++++ native/explorer/Cargo.toml | 3 +++ native/explorer/src/expressions.rs | 11 +++++++++++ native/explorer/src/lib.rs | 2 ++ 6 files changed, 42 insertions(+) diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index 8b43445d4..69b1cb8fa 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -356,6 +356,18 @@ defmodule Explorer.PolarsBackend.Expression do Native.expr_describe_filter_plan(polars_df, expression) end + def to_json(%__MODULE__{} = expression) do + expression + |> Native.expr_to_json() + |> Jason.decode!() + end + + def from_json(%{} = json_map) do + json_map + |> Jason.encode!() + |> Native.expr_from_json() + end + defp dtype(%LazySeries{dtype: dtype}), do: dtype defp dtype(%PolarsSeries{} = polars_series) do diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index a09920a70..80b7aa53f 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -196,6 +196,8 @@ defmodule Explorer.PolarsBackend.Native do def expr_datetime(_datetime), do: err() def expr_duration(_duration), do: err() def expr_describe_filter_plan(_df, _expr), do: err() + def expr_to_json(_expr), do: err() + def expr_from_json(_expr), do: err() def expr_float(_number), do: err() def expr_integer(_number), do: err() def expr_int_range(_start, _end, _step, _dtype), do: err() diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 67d03a394..af767bf37 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -182,6 +182,9 @@ name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] [[package]] name = "block-buffer" @@ -483,6 +486,7 @@ dependencies = [ "rand", "rand_pcg", "rustler", + "serde_json", "smartstring", "thiserror", "tokio", @@ -858,6 +862,7 @@ checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -1387,6 +1392,7 @@ dependencies = [ "polars-error", "polars-utils", "ryu", + "serde", "simdutf8", "streaming-iterator", "strength_reduce", @@ -1445,6 +1451,7 @@ dependencies = [ "rand_distr", "rayon", "regex", + "serde", "smartstring", "thiserror", "version_check", @@ -1498,6 +1505,7 @@ dependencies = [ "regex", "reqwest", "ryu", + "serde", "serde_json", "simd-json", "simdutf8", @@ -1584,6 +1592,7 @@ dependencies = [ "rand_distr", "rayon", "regex", + "serde", "serde_json", "smartstring", "unicode-reverse", @@ -1665,6 +1674,7 @@ dependencies = [ "polars-utils", "rayon", "regex", + "serde", "smartstring", "strum_macros", "version_check", @@ -1717,6 +1727,7 @@ dependencies = [ "polars-ops", "polars-utils", "regex", + "serde", "smartstring", ] @@ -2221,6 +2232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", + "serde", "static_assertions", "version_check", ] diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 2fed20cf6..d06127075 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -19,6 +19,7 @@ rand = { version = "0.8", features = ["alloc"] } rand_pcg = "0.3" rustler = { version = "0.32", default-features = false, features = ["derive"] } thiserror = "1" +serde_json = "1" smartstring = "1" either = "1" @@ -76,6 +77,8 @@ features = [ "rolling_window", "round_series", "rows", + "serde", + "serde-lazy", "simd", "streaming", "strings", diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index af1301d34..585758b3f 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -830,6 +830,17 @@ pub fn expr_describe_filter_plan(data: ExDataFrame, expr: ExExpr) -> String { df.lazy().filter(expressions).describe_plan() } +#[rustler::nif] +pub fn expr_to_json(expr: ExExpr) -> String { + serde_json::to_string(&expr.clone_inner()).unwrap() +} + +#[rustler::nif] +pub fn expr_from_json(expr_json: String) -> ExExpr { + let expr: Expr = serde_json::from_str(&expr_json).unwrap(); + ExExpr::new(expr) +} + #[rustler::nif] pub fn expr_contains(expr: ExExpr, pattern: &str) -> ExExpr { let expr = expr.clone_inner(); diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index db726001d..c6512d60d 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -258,6 +258,8 @@ rustler::init!( expr_ewm_variance, // inspect expressions expr_describe_filter_plan, + expr_to_json, + expr_from_json, // string expressions expr_contains, expr_re_contains, From cb040ae32c176b63cc498a6487a00fab7eb143be Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sat, 1 Jun 2024 18:08:21 -0400 Subject: [PATCH 2/5] add tests including proof of concept --- .../polars_backend/expression_test.exs | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/test/explorer/polars_backend/expression_test.exs b/test/explorer/polars_backend/expression_test.exs index 397e5d693..1c0ed2ca1 100644 --- a/test/explorer/polars_backend/expression_test.exs +++ b/test/explorer/polars_backend/expression_test.exs @@ -89,4 +89,69 @@ defmodule Explorer.PolarsBackend.ExpressionTest do """) end end + + describe "json" do + test "can convert exprs to/from json" do + lazy = %LazySeries{op: :column, args: ["a"]} + expr1 = Expression.to_expr(lazy) + json = Expression.to_json(expr1) + expr2 = Expression.from_json(json) + + assert json == %{"Column" => "a"} + assert %Expression{} = expr2 + end + + test "can perform an unsupported operation via json-derived exprs" do + # Built in Python from: + # `pl.col("list_col_unsorted").list.sort().meta.serialize()` + list_col_sorted_expr_json = %{ + "Function" => %{ + "input" => [%{"Column" => "list_col_unsorted"}], + "function" => %{ + "ListExpr" => %{ + "Sort" => %{ + "descending" => false, + "nulls_last" => false, + "multithreaded" => true, + "maintain_order" => false + } + } + }, + "options" => %{ + "collect_groups" => "ElementWise", + "fmt_str" => "", + "input_wildcard_expansion" => false, + "returns_scalar" => false, + "cast_to_supertypes" => false, + "allow_rename" => false, + "pass_name_to_apply" => false, + "changes_length" => false, + "check_lengths" => true, + "allow_group_aware" => true + } + } + } + + df = + Explorer.DataFrame.new(%{ + list_col_unsorted: [[1, 5, 3], [1, 1, 2, 0]] + }) + + new_name = "list_col_sorted" + + expr = + list_col_sorted_expr_json + |> Expression.from_json() + |> Expression.alias_expr(new_name) + + ldf = Explorer.DataFrame.lazy(df) + {:ok, lpdf_new} = Explorer.PolarsBackend.Native.lf_mutate_with(ldf.data, [expr]) + {:ok, pdf_new} = Explorer.PolarsBackend.Native.lf_collect(lpdf_new) + df_new = Explorer.PolarsBackend.Shared.create_dataframe(pdf_new) + + series = Explorer.DataFrame.to_series(df_new) + assert Explorer.Series.to_list(series["list_col_unsorted"]) == [[1, 5, 3], [1, 1, 2, 0]] + assert Explorer.Series.to_list(series["list_col_sorted"]) == [[1, 3, 5], [0, 1, 1, 2]] + end + end end From d88184ed44904ccb4a0e47092ad76a096ab82aab Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sat, 1 Jun 2024 18:31:19 -0400 Subject: [PATCH 3/5] refactor test --- .../polars_backend/expression_test.exs | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/test/explorer/polars_backend/expression_test.exs b/test/explorer/polars_backend/expression_test.exs index 1c0ed2ca1..853820317 100644 --- a/test/explorer/polars_backend/expression_test.exs +++ b/test/explorer/polars_backend/expression_test.exs @@ -132,22 +132,20 @@ defmodule Explorer.PolarsBackend.ExpressionTest do } } - df = - Explorer.DataFrame.new(%{ - list_col_unsorted: [[1, 5, 3], [1, 1, 2, 0]] - }) - - new_name = "list_col_sorted" - - expr = - list_col_sorted_expr_json - |> Expression.from_json() - |> Expression.alias_expr(new_name) - - ldf = Explorer.DataFrame.lazy(df) - {:ok, lpdf_new} = Explorer.PolarsBackend.Native.lf_mutate_with(ldf.data, [expr]) - {:ok, pdf_new} = Explorer.PolarsBackend.Native.lf_collect(lpdf_new) - df_new = Explorer.PolarsBackend.Shared.create_dataframe(pdf_new) + mutate_with_json = fn df, name, json -> + expr = + json + |> Expression.from_json() + |> Expression.alias_expr(name) + + ldf = Explorer.DataFrame.lazy(df) + {:ok, lpdf_new} = Explorer.PolarsBackend.Native.lf_mutate_with(ldf.data, [expr]) + {:ok, pdf_new} = Explorer.PolarsBackend.Native.lf_collect(lpdf_new) + Explorer.PolarsBackend.Shared.create_dataframe(pdf_new) + end + + df = Explorer.DataFrame.new(%{list_col_unsorted: [[1, 5, 3], [1, 1, 2, 0]]}) + df_new = mutate_with_json.(df, "list_col_sorted", list_col_sorted_expr_json) series = Explorer.DataFrame.to_series(df_new) assert Explorer.Series.to_list(series["list_col_unsorted"]) == [[1, 5, 3], [1, 1, 2, 0]] From 572398afdf945e022f4847cc2d2672c3c872eddf Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sat, 1 Jun 2024 18:46:01 -0400 Subject: [PATCH 4/5] make it so i can copy/paste into the repl --- test/explorer/polars_backend/expression_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/explorer/polars_backend/expression_test.exs b/test/explorer/polars_backend/expression_test.exs index 853820317..2fff431e3 100644 --- a/test/explorer/polars_backend/expression_test.exs +++ b/test/explorer/polars_backend/expression_test.exs @@ -135,8 +135,8 @@ defmodule Explorer.PolarsBackend.ExpressionTest do mutate_with_json = fn df, name, json -> expr = json - |> Expression.from_json() - |> Expression.alias_expr(name) + |> Explorer.PolarsBackend.Expression.from_json() + |> Explorer.PolarsBackend.Expression.alias_expr(name) ldf = Explorer.DataFrame.lazy(df) {:ok, lpdf_new} = Explorer.PolarsBackend.Native.lf_mutate_with(ldf.data, [expr]) From 7d92810b209bac2ca2a9969621324f0d3abb88ea Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sat, 1 Jun 2024 20:56:01 -0400 Subject: [PATCH 5/5] turns out jason isn't an actual dep --- mix.exs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mix.exs b/mix.exs index eee72db7c..da61d379a 100644 --- a/mix.exs +++ b/mix.exs @@ -43,6 +43,8 @@ defmodule Explorer.MixProject do {:table_rex, "~> 3.1.1 or ~> 4.0.0"}, {:castore, "~> 1.0", optional: true}, {:adbc, "~> 0.1", optional: true}, + # DELETEME! + {:jason, "~> 1.4"}, ## Optional {:rustler, "~> 0.32.0", optional: not (@dev? or @force_build?)},