From ae29cb62d8802ab7a787383902a7f3379dcfd4b0 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 27 May 2024 13:28:43 -0300 Subject: [PATCH 01/30] WIP: making things run, but break some tests --- lib/explorer/series.ex | 2 +- native/explorer/Cargo.lock | 117 +++++++++++++++++++++-------- native/explorer/Cargo.toml | 6 +- native/explorer/src/dataframe.rs | 54 ++++++++----- native/explorer/src/expressions.rs | 2 +- native/explorer/src/lazyframe.rs | 13 +++- native/explorer/src/series.rs | 2 +- test/explorer/data_frame_test.exs | 4 +- 8 files changed, 138 insertions(+), 62 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 1bf611cd8..fe05f7a22 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3617,7 +3617,7 @@ defmodule Explorer.Series do defp cast_to_pow({:f, l}, {:f, r}), do: {:f, max(l, r)} defp cast_to_pow({:f, l}, {n, _}) when K.in(n, [:u, :s]), do: {:f, l} defp cast_to_pow({n, _}, {:f, r}) when K.in(n, [:u, :s]), do: {:f, r} - defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:f, 64} + defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:s, 64} defp cast_to_pow(_, _), do: nil @doc """ diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 67d03a394..69e12af52 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -182,6 +182,9 @@ name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] [[package]] name = "block-buffer" @@ -858,6 +861,7 @@ checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -1340,9 +1344,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01006048a264047d6cba081fed8e11adbd69c15956f9e53185a9ac4a541853c" +checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" dependencies = [ "getrandom", "polars-arrow", @@ -1360,9 +1364,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25197f40d71f82b2f79bb394f03e555d3cc1ce4db1dd052c28318721c71e96ad" +checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" dependencies = [ "ahash", "atoi", @@ -1387,6 +1391,7 @@ dependencies = [ "polars-error", "polars-utils", "ryu", + "serde", "simdutf8", "streaming-iterator", "strength_reduce", @@ -1406,9 +1411,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c354515f73cdbbad03c2bf723fcd68e6825943b3ec503055abc8a8cb08ce46bb" +checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" dependencies = [ "bytemuck", "either", @@ -1422,9 +1427,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f20d3c227186f74aa3c228c64ef72f5a15617322fed30b4323eaf53b25f8e7b" +checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1445,6 +1450,7 @@ dependencies = [ "rand_distr", "rayon", "regex", + "serde", "smartstring", "thiserror", "version_check", @@ -1453,9 +1459,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dd0ce51f8bd620eb8bd376502fe68a2b1a446d5433ecd2e75270b0755ce76" +checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" dependencies = [ "object_store", "polars-arrow-format", @@ -1466,9 +1472,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b40bef2edcdc58394792c4d779465144283a09ff1836324e7b72df7978a6e992" +checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" dependencies = [ "ahash", "async-trait", @@ -1498,6 +1504,7 @@ dependencies = [ "regex", "reqwest", "ryu", + "serde", "serde_json", "simd-json", "simdutf8", @@ -1510,9 +1517,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef86aca08f10ddc939fe95aabb44e1d2582dcb08b55d4dadb93353ce42adc248" +checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701" dependencies = [ "ahash", "chrono", @@ -1531,9 +1538,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c27df26a19d3092298d31d47614ad84dc330c106e38aa8cd53727cd91c07cf56" +checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1557,9 +1564,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8a51c3bdc9e7c34196ff6f5c3cb17da134e5aafb1756aaf24b76c7118e63dc" +checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" dependencies = [ "ahash", "argminmax", @@ -1592,9 +1599,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8824ee00fbbe83d69553f2711014c50361238d210ed81a7a297695b7db97d42" +checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" dependencies = [ "ahash", "async-stream", @@ -1618,9 +1625,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c5e2c1f14e81d60cfa9afe4e611a9bad9631a2cb7cd19b7c0094d0dc32f0231" +checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1645,14 +1652,15 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff48362bd1b078bbbec7e7ba9ec01fea58fee2887db22a8e3deaf78f322fa3c4" +checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" dependencies = [ "ahash", "bytemuck", "chrono-tz 0.8.6", "futures", + "hashbrown", "once_cell", "percent-encoding", "polars-arrow", @@ -1664,6 +1672,7 @@ dependencies = [ "polars-time", "polars-utils", "rayon", + "recursive", "regex", "smartstring", "strum_macros", @@ -1672,9 +1681,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63029da56ff6a720b190490bbc7b6263f9b72d1134311b1f381fc8d306d37770" +checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" dependencies = [ "bytemuck", "polars-arrow", @@ -1684,9 +1693,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3652c362959f608d1297196b973d1e3acb508a9562b886ac39bf7606b841052b" +checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" dependencies = [ "hex", "polars-arrow", @@ -1702,9 +1711,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86eb74ea6ddfe675aa5c3f33c00dadbe2b85f0e8e3887b85db1fd5a3397267fd" +checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" dependencies = [ "atoi", "chrono", @@ -1722,9 +1731,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.38.3" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694656a7d2b0cd8f07660dbc8d0fb7a81066ff57a452264907531d805c1e58c4" +checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" dependencies = [ "ahash", "bytemuck", @@ -1736,6 +1745,7 @@ dependencies = [ "raw-cpuid", "rayon", "smartstring", + "stacker", "sysinfo", "version_check", ] @@ -1755,6 +1765,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quick-xml" version = "0.31.0" @@ -1852,6 +1871,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.60", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -2221,6 +2260,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", + "serde", "static_assertions", "version_check", ] @@ -2278,6 +2318,19 @@ dependencies = [ "log", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 2fed20cf6..aa64018a3 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.38" +version = "0.39" default-features = false features = [ "abs", @@ -55,7 +55,7 @@ features = [ "ewma", "extract_groups", "extract_jsonpath", - "group_by_list", + # "group_by_list", "ipc_streaming", "ipc", "is_in", @@ -86,7 +86,7 @@ features = [ ] [dependencies.polars-ops] -version = "0.38" +version = "0.39" features = ["abs", "ewma", "cum_agg", "cov"] [features] diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 4e258a290..a8fb819b1 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -258,30 +258,38 @@ pub fn df_sort_by( nulls_last: bool, groups: Vec, ) -> Result { + let sort_options = SortMultipleOptions::new() + .with_maintain_order(maintain_order) + .with_multithreaded(multithreaded) + .with_nulls_last(nulls_last) + .with_order_descendings(reverse); + let new_df = if groups.is_empty() { // Note: we cannot use either df.sort or df.sort_with_options. // df.sort does not allow a nulls_last option. // df.sort_with_options only allows a single column. - let by_columns = df.select_series(by_columns)?; - df.sort_impl( - by_columns, - reverse, - nulls_last, - maintain_order, - None, - multithreaded, - )? + // let by_columns = df.select_series(by_columns)?; + df.sort(by_columns, sort_options)? + // df.sort_impl( + // by_columns, + // reverse, + // nulls_last, + // maintain_order, + // None, + // multithreaded, + // )? } else { df.group_by_stable(groups)?.apply(|df| { - let by_columns = df.select_series(&by_columns)?; - df.sort_impl( - by_columns, - reverse.clone(), - nulls_last, - maintain_order, - None, - multithreaded, - ) + df.sort(by_columns.clone(), sort_options.clone()) + // let by_columns = df.select_series(&by_columns)?; + // df.sort_impl( + // by_columns, + // reverse.clone(), + // nulls_last, + // maintain_order, + // None, + // multithreaded, + // ) })? }; @@ -300,14 +308,20 @@ pub fn df_sort_with( let df = data.clone_inner(); let exprs = ex_expr_to_exprs(expressions); + let sort_options = SortMultipleOptions::new() + .with_maintain_order(maintain_order) + // .with_multithreaded(multithreaded) + .with_nulls_last(nulls_last) + .with_order_descendings(directions); + let new_df = if groups.is_empty() { df.lazy() - .sort_by_exprs(exprs, directions, nulls_last, maintain_order) + .sort_by_exprs(exprs, sort_options) .collect()? } else { df.group_by_stable(groups)?.apply(|df| { df.lazy() - .sort_by_exprs(&exprs, &directions, nulls_last, maintain_order) + .sort_by_exprs(&exprs, sort_options.clone()) .collect() })? }; diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index af1301d34..4949ddd39 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -780,7 +780,7 @@ pub fn expr_sort( nulls_last, }; - ExExpr::new(expr.sort_with(opts)) + ExExpr::new(expr.sort(opts)) } #[rustler::nif] diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 7ed529ab9..11ec919a3 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -146,9 +146,14 @@ pub fn lf_sort_with( nulls_last: bool, ) -> Result { let exprs = ex_expr_to_exprs(expressions); + let sort_options = SortMultipleOptions::new() + .with_nulls_last(nulls_last) + .with_maintain_order(maintain_order) + .with_order_descendings(directions); + let ldf = data .clone_inner() - .sort_by_exprs(exprs, directions, nulls_last, maintain_order); + .sort_by_exprs(exprs, sort_options); Ok(ExLazyFrame::new(ldf)) } @@ -160,12 +165,16 @@ pub fn lf_grouped_sort_with( groups: Vec, directions: Vec, ) -> Result { + let sort_options = SortMultipleOptions::new() + // .with_nulls_last(nulls_last) + // .with_maintain_order(maintain_order) + .with_order_descendings(directions); // For grouped lazy frames, we need to use the `#sort_by` method that is // less powerful, but can be used with `over`. // See: https://docs.pola.rs/user-guide/expressions/window/#operations-per-group let ldf = data .clone_inner() - .with_columns([col("*").sort_by(expressions, directions).over(groups)]); + .with_columns([col("*").sort_by(expressions, sort_options).over(groups)]); Ok(ExLazyFrame::new(ldf)) } diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index cad789b3f..d325e117c 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -393,7 +393,7 @@ pub fn s_sort( multithreaded, nulls_last, }; - Ok(ExSeries::new(series.sort_with(opts))) + Ok(ExSeries::new(series.sort_with(opts)?)) } #[rustler::nif(schedule = "DirtyCpu")] diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 0bda80d6d..6c6056f6e 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1114,7 +1114,7 @@ defmodule Explorer.DataFrameTest do calc2: [19, 38, 57], calc3: [3, 4, 3], calc4: [2.0, :infinity, 7.5], - calc5: [1.0, 4.0, 3.0], + calc5: [1, 4, 3], calc6: [2, nil, 7], calc7: [0, nil, 4] } @@ -1128,7 +1128,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } From fcd86787890c4c03fbefe286f0d6467bea695c3a Mon Sep 17 00:00:00 2001 From: Kartheek Date: Sat, 1 Jun 2024 23:40:26 +0530 Subject: [PATCH 02/30] fix compilation --- native/explorer/Cargo.lock | 118 ++++++++++++++++++---------- native/explorer/Cargo.toml | 4 +- native/explorer/src/dataframe.rs | 4 +- native/explorer/src/dataframe/io.rs | 64 ++++++++------- native/explorer/src/expressions.rs | 30 +++---- native/explorer/src/lazyframe.rs | 8 +- native/explorer/src/lazyframe/io.rs | 6 +- native/explorer/src/series.rs | 82 ++++++++++--------- 8 files changed, 173 insertions(+), 143 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 69e12af52..bf4c6ae3b 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -171,6 +171,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -197,9 +203,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.5.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -208,9 +214,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -707,6 +713,7 @@ dependencies = [ "ahash", "allocator-api2", "rayon", + "serde", ] [[package]] @@ -1194,7 +1201,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" dependencies = [ "async-trait", - "base64", + "base64 0.21.7", "bytes", "chrono", "futures", @@ -1344,9 +1351,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" +checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442" dependencies = [ "getrandom", "polars-arrow", @@ -1364,9 +1371,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" +checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e" dependencies = [ "ahash", "atoi", @@ -1411,9 +1418,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" +checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0" dependencies = [ "bytemuck", "either", @@ -1427,9 +1434,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" +checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1459,9 +1466,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" +checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a" dependencies = [ "object_store", "polars-arrow-format", @@ -1470,11 +1477,31 @@ dependencies = [ "thiserror", ] +[[package]] +name = "polars-expr" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484" +dependencies = [ + "ahash", + "bitflags 2.5.0", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", +] + [[package]] name = "polars-io" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" +checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e" dependencies = [ "ahash", "async-trait", @@ -1517,9 +1544,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701" +checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901" dependencies = [ "ahash", "chrono", @@ -1538,9 +1565,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" +checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1549,6 +1576,7 @@ dependencies = [ "once_cell", "polars-arrow", "polars-core", + "polars-expr", "polars-io", "polars-json", "polars-ops", @@ -1564,13 +1592,13 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" +checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58" dependencies = [ "ahash", "argminmax", - "base64", + "base64 0.22.1", "bytemuck", "chrono", "chrono-tz 0.8.6", @@ -1599,13 +1627,13 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" +checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8" dependencies = [ "ahash", "async-stream", - "base64", + "base64 0.22.1", "brotli", "ethnum", "flate2", @@ -1625,9 +1653,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" +checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1638,6 +1666,7 @@ dependencies = [ "polars-arrow", "polars-compute", "polars-core", + "polars-expr", "polars-io", "polars-ops", "polars-plan", @@ -1652,13 +1681,14 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" +checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90" dependencies = [ "ahash", "bytemuck", "chrono-tz 0.8.6", + "either", "futures", "hashbrown", "once_cell", @@ -1681,9 +1711,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" +checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40" dependencies = [ "bytemuck", "polars-arrow", @@ -1693,11 +1723,12 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" +checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9" dependencies = [ "hex", + "once_cell", "polars-arrow", "polars-core", "polars-error", @@ -1711,11 +1742,12 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" +checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97" dependencies = [ "atoi", + "bytemuck", "chrono", "chrono-tz 0.8.6", "now", @@ -1731,9 +1763,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" +checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c" dependencies = [ "ahash", "bytemuck", @@ -1955,7 +1987,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -2076,7 +2108,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -2360,9 +2392,9 @@ checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" [[package]] name = "strum_macros" -version = "0.25.3" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck 0.4.1", "proc-macro2", diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index aa64018a3..13b77b419 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.39" +version = "0.40" default-features = false features = [ "abs", @@ -86,7 +86,7 @@ features = [ ] [dependencies.polars-ops] -version = "0.39" +version = "0.40" features = ["abs", "ewma", "cum_agg", "cov"] [features] diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index a8fb819b1..10161b979 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -315,9 +315,7 @@ pub fn df_sort_with( .with_order_descendings(directions); let new_df = if groups.is_empty() { - df.lazy() - .sort_by_exprs(exprs, sort_options) - .collect()? + df.lazy().sort_by_exprs(exprs, sort_options).collect()? } else { df.group_by_stable(groups)?.apply(|df| { df.lazy() diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index 83fe7f55f..752afb6ff 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -44,24 +44,29 @@ pub fn df_from_csv( _ => CsvEncoding::Utf8, }; - let reader = CsvReader::from_path(filename)? - .infer_schema(infer_schema_length) - .has_header(has_header) - .truncate_ragged_lines(true) - .with_try_parse_dates(parse_dates) + let dataframe = CsvReadOptions::default() + .with_infer_schema_length(infer_schema_length) + .with_has_header(has_header) .with_n_rows(stop_after_n_rows) - .with_separator(delimiter_as_byte) .with_skip_rows(skip_rows) .with_skip_rows_after_header(skip_rows_after_header) - .with_projection(projection) + .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_encoding(encoding) - .with_columns(column_names) - .with_dtypes(Some(schema_from_dtypes_pairs(dtypes)?)) - .with_null_values(Some(NullValues::AllColumns(null_vals))) - .with_end_of_line_char(eol_delimiter.unwrap_or(b'\n')); - - Ok(ExDataFrame::new(reader.finish()?)) + .with_columns(column_names.map(Arc::new)) + .with_schema_overwrite(Some(schema_from_dtypes_pairs(dtypes)?)) + .with_parse_options( + CsvParseOptions::default() + .with_encoding(encoding) + .with_truncate_ragged_lines(true) + .with_try_parse_dates(parse_dates) + .with_separator(delimiter_as_byte) + .with_eol_char(eol_delimiter.unwrap_or(b'\n')) + .with_null_values(Some(NullValues::AllColumns(null_vals))), + ) + .try_into_reader_with_file_path(Some(filename.into()))? + .finish(); + + Ok(ExDataFrame::new(dataframe?)) } pub fn schema_from_dtypes_pairs( @@ -154,23 +159,28 @@ pub fn df_load_csv( let cursor = Cursor::new(binary.as_slice()); - let reader = CsvReader::new(cursor) - .infer_schema(infer_schema_length) - .has_header(has_header) - .with_try_parse_dates(parse_dates) + let dataframe = CsvReadOptions::default() + .with_has_header(has_header) + .with_infer_schema_length(infer_schema_length) .with_n_rows(stop_after_n_rows) - .with_separator(delimiter_as_byte) + .with_columns(column_names.map(Arc::new)) .with_skip_rows(skip_rows) .with_skip_rows_after_header(skip_rows_after_header) - .with_projection(projection) + .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_encoding(encoding) - .with_columns(column_names) - .with_dtypes(Some(schema_from_dtypes_pairs(dtypes)?)) - .with_null_values(Some(NullValues::AllColumns(null_vals))) - .with_end_of_line_char(eol_delimiter.unwrap_or(b'\n')); - - Ok(ExDataFrame::new(reader.finish()?)) + .with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) + .with_parse_options( + CsvParseOptions::default() + .with_separator(delimiter_as_byte) + .with_encoding(encoding) + .with_null_values(Some(NullValues::AllColumns(null_vals))) + .with_try_parse_dates(parse_dates) + .with_eol_char(eol_delimiter.unwrap_or(b'\n')), + ) + .into_reader_with_file_handle(cursor) + .finish(); + + Ok(ExDataFrame::new(dataframe?)) } // ============ Parquet ============ // diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 4949ddd39..a9d35130e 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -4,16 +4,11 @@ // or an expression and returns an expression that is // wrapped in an Elixir struct. -use polars::error::PolarsError; - -use polars::prelude::{GetOutput, IntoSeries, Utf8JsonPathImpl}; -use polars::series::Series; - use crate::datatypes::{ ExCorrelationMethod, ExDate, ExDuration, ExNaiveDateTime, ExRankMethod, ExSeriesDtype, ExValidValue, }; -use crate::series::{cast_str_to_f64, ewm_opts, rolling_opts}; +use crate::series::{cast_str_to_f64, ewm_opts, rolling_opts_fixed_window}; use crate::{ExDataFrame, ExExpr, ExSeries}; use polars::lazy::dsl; use polars::prelude::{ @@ -654,7 +649,7 @@ macro_rules! init_window_expr_fun { center: bool, ) -> ExExpr { let expr = data.clone_inner(); - let opts = rolling_opts(window_size, weights, min_periods, center); + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); ExExpr::new(expr.$fun(opts)) } }; @@ -675,7 +670,7 @@ pub fn expr_window_standard_deviation( center: bool, ) -> ExExpr { let expr = data.clone_inner(); - let opts = rolling_opts(window_size, weights, min_periods, center); + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); ExExpr::new(expr.rolling_std(opts).cast(DataType::Float64)) } @@ -827,7 +822,10 @@ pub fn expr_unary_not(expr: ExExpr) -> ExExpr { pub fn expr_describe_filter_plan(data: ExDataFrame, expr: ExExpr) -> String { let df = data.clone(); let expressions = expr.clone_inner(); - df.lazy().filter(expressions).describe_plan() + df.lazy() + .filter(expressions) + .describe_plan() + .expect("error") } #[rustler::nif] @@ -1115,18 +1113,8 @@ pub fn expr_json_decode(expr: ExExpr, ex_dtype: ExSeriesDtype) -> ExExpr { } #[rustler::nif] -pub fn expr_json_path_match(expr: ExExpr, json_path: &str) -> ExExpr { - let p = json_path.to_owned(); - let function = move |s: Series| { - let ca = s.str()?; - match ca.json_path_match(&p) { - Ok(ca) => Ok(Some(ca.into_series())), - Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())), - } - }; - let expr = expr - .clone_inner() - .map(function, GetOutput::from_type(DataType::String)); +pub fn expr_json_path_match(expr: ExExpr, json_path: String) -> ExExpr { + let expr = expr.clone_inner().str().json_path_match(json_path.lit()); ExExpr::new(expr) } diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 11ec919a3..aa894c013 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -24,7 +24,7 @@ pub fn lf_describe_plan(data: ExLazyFrame, optimized: bool) -> Result lf.describe_optimized_plan()?, - false => lf.describe_plan(), + false => lf.describe_plan().expect("error"), }; Ok(plan) } @@ -151,9 +151,7 @@ pub fn lf_sort_with( .with_maintain_order(maintain_order) .with_order_descendings(directions); - let ldf = data - .clone_inner() - .sort_by_exprs(exprs, sort_options); + let ldf = data.clone_inner().sort_by_exprs(exprs, sort_options); Ok(ExLazyFrame::new(ldf)) } @@ -290,7 +288,7 @@ pub fn lf_join( let how = match how { "left" => JoinType::Left, "inner" => JoinType::Inner, - "outer" => JoinType::Outer { coalesce: false }, + "outer" => JoinType::Outer, "cross" => JoinType::Cross, _ => { return Err(ExplorerError::Other(format!( diff --git a/native/explorer/src/lazyframe/io.rs b/native/explorer/src/lazyframe/io.rs index a075df5d3..6406834c4 100644 --- a/native/explorer/src/lazyframe/io.rs +++ b/native/explorer/src/lazyframe/io.rs @@ -209,7 +209,7 @@ pub fn lf_from_csv( let df = LazyCsvReader::new(filename) .with_infer_schema_length(infer_schema_length) - .has_header(has_header) + .with_has_header(has_header) .with_try_parse_dates(parse_dates) .with_n_rows(stop_after_n_rows) .with_separator(delimiter_as_byte) @@ -217,9 +217,9 @@ pub fn lf_from_csv( .with_skip_rows_after_header(skip_rows_after_header) .with_rechunk(do_rechunk) .with_encoding(encoding) - .with_dtype_overwrite(Some(schema_from_dtypes_pairs(dtypes)?.as_ref())) + .with_dtype_overwrite(Some(schema_from_dtypes_pairs(dtypes)?)) .with_null_values(Some(NullValues::AllColumns(null_vals))) - .with_end_of_line_char(eol_delimiter.unwrap_or(b'\n')) + .with_eol_char(eol_delimiter.unwrap_or(b'\n')) .finish()?; Ok(ExLazyFrame::new(df)) diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index d325e117c..fbcec42ff 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -10,7 +10,7 @@ use crate::{ use encoding::encode_naive_datetime; // use encoding::encode_datetime; -use polars::prelude::*; +use polars::prelude::*; //{lazy::dsl::Expr, }; use polars_ops::chunked_array::cov::{cov, pearson_corr}; use polars_ops::prelude::peaks::*; use rustler::{Binary, Encoder, Env, Error, ListIterator, NifResult, Term, TermType}; @@ -796,8 +796,8 @@ pub fn s_window_sum( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); - let s1 = series.rolling_sum(opts.try_into()?)?; + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); + let s1 = series.rolling_sum(opts)?; Ok(ExSeries::new(s1)) } @@ -809,8 +809,8 @@ pub fn s_window_mean( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); - let s1 = series.rolling_mean(opts.try_into()?)?; + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); + let s1 = series.rolling_mean(opts)?; Ok(ExSeries::new(s1)) } @@ -822,7 +822,7 @@ pub fn s_window_median( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); let s1 = series .clone_inner() .into_frame() @@ -842,8 +842,8 @@ pub fn s_window_max( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); - let s1 = series.rolling_max(opts.try_into()?)?; + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); + let s1 = series.rolling_max(opts)?; Ok(ExSeries::new(s1)) } @@ -855,8 +855,8 @@ pub fn s_window_min( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); - let s1 = series.rolling_min(opts.try_into()?)?; + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); + let s1 = series.rolling_min(opts)?; Ok(ExSeries::new(s1)) } @@ -868,27 +868,27 @@ pub fn s_window_standard_deviation( min_periods: Option, center: bool, ) -> Result { - let opts = rolling_opts(window_size, weights, min_periods, center); - let s1 = series.rolling_std(opts.try_into()?)?; + let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); + let s1 = series.rolling_std(opts)?; Ok(ExSeries::new(s1)) } // Used for rolling functions - also see "expressions" module -pub fn rolling_opts( +pub fn rolling_opts_fixed_window( window_size: usize, weights: Option>, min_periods: Option, center: bool, -) -> RollingOptions { - let min_periods = if let Some(mp) = min_periods { +) -> RollingOptionsFixedWindow { + let min_periods: usize = if let Some(mp) = min_periods { mp } else { window_size }; - let window_size_duration = Duration::new(window_size as i64); + // let window_size_duration = Duration::new(window_size as i64); - RollingOptions { - window_size: window_size_duration, + RollingOptionsFixedWindow { + window_size, weights, min_periods, center, @@ -1075,7 +1075,16 @@ pub fn s_mode(s: ExSeries) -> Result { #[rustler::nif(schedule = "DirtyCpu")] pub fn s_product(s: ExSeries) -> Result { if s.dtype().is_numeric() { - Ok(ExSeries::new(s.product()?)) + let series = s + .clone_inner() + .into_frame() + .lazy() + .select([col(s.name()).product()]) + .collect()? + .column(s.name())? + .clone(); + + Ok(ExSeries::new(series)) } else { panic!("product/1 not implemented for {:?}", &s.dtype()) } @@ -1235,12 +1244,16 @@ pub fn s_quantile<'a>( .unwrap() .encode(env)), }, - _ => encoding::term_from_value( - s.quantile_as_series(quantile, strategy)? - .cast(dtype)? - .get(0)?, - env, - ), + _ => todo!(), + // _ => + // s.agg_quantile(quantile, strategy) + + // encoding::term_from_value( + // s.quantile_as_series(quantile, strategy)? + // .cast(dtype)? + // .get(0)?, + // env, + // ), } } @@ -1849,26 +1862,17 @@ pub fn s_json_decode(s: ExSeries, ex_dtype: ExSeriesDtype) -> Result Result { - let p = json_path.to_owned(); - let function = move |s: Series| { - let ca = s.str()?; - match ca.json_path_match(&p) { - Ok(ca) => Ok(Some(ca.into_series())), - Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())), - } - }; - let s2 = s +pub fn s_json_path_match(s: ExSeries, json_path: String) -> Result { + let var_series = s .clone_inner() .into_frame() .lazy() - .select([col(s.name()) - .map(function, GetOutput::from_type(DataType::String)) - .alias(s.name())]) + .select([col(s.name()).str().json_path_match(json_path.lit())]) .collect()? .column(s.name())? .clone(); - Ok(ExSeries::new(s2)) + + Ok(ExSeries::new(var_series)) } #[rustler::nif] From 9d415dc8c1fc847999db66caf07a52c27725270f Mon Sep 17 00:00:00 2001 From: Kartheek Date: Mon, 24 Jun 2024 08:39:37 +0530 Subject: [PATCH 03/30] bump polars to 0.41 --- native/explorer/Cargo.lock | 608 +++++++++++++++++++++++++++++++++---- native/explorer/Cargo.toml | 4 +- 2 files changed, 545 insertions(+), 67 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index bf4c6ae3b..923010fe5 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -96,6 +96,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "async-stream" version = "0.3.5" @@ -144,6 +156,12 @@ version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.2.0" @@ -192,6 +210,19 @@ dependencies = [ "serde", ] +[[package]] +name = "blake3" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -338,6 +369,12 @@ dependencies = [ "cc", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation" version = "0.9.4" @@ -471,6 +508,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "ethnum" version = "1.5.0" @@ -486,7 +533,7 @@ dependencies = [ "chrono-tz 0.9.0", "either", "mimalloc", - "object_store", + "object_store 0.9.1", "polars", "polars-ops", "rand", @@ -551,6 +598,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "futures" version = "0.3.30" @@ -686,7 +743,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", "indexmap", "slab", "tokio", @@ -760,6 +836,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -767,7 +854,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite", ] @@ -799,9 +909,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -813,6 +923,26 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -820,11 +950,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", - "hyper", - "rustls", + "http 0.2.12", + "hyper 0.14.28", + "rustls 0.21.11", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.3.1", + "hyper-util", + "rustls 0.23.10", + "rustls-native-certs 0.7.0", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "hyper 1.3.1", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1029,6 +1197,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "lock_api" version = "0.4.11" @@ -1206,14 +1380,44 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper", + "hyper 0.14.28", + "itertools", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest 0.11.27", + "ring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "chrono", + "futures", + "humantime", + "hyper 1.3.1", "itertools", "md-5", "parking_lot", "percent-encoding", "quick-xml", "rand", - "reqwest", + "reqwest 0.12.5", "ring", "serde", "serde_json", @@ -1322,6 +1526,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -1351,9 +1575,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442" +checksum = "43845ccaa696de9cf374f5a4e3c5c2685192b4a56b7dd60b05a5cb1f7cd86cc7" dependencies = [ "getrandom", "polars-arrow", @@ -1371,9 +1595,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e" +checksum = "5e66ab16b782bdf1803c3e50438817f5fcb6f6bcfbeb6dddba01a2b83827c7bf" dependencies = [ "ahash", "atoi", @@ -1418,9 +1642,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0" +checksum = "087442a8e5c0e3d4d1d683f1b287770c6e26a8f86a9a8a47607e220ae9e08113" dependencies = [ "bytemuck", "either", @@ -1434,9 +1658,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a" +checksum = "9523e09a134c75557e2216e652c965013b92b8e682cc591059d4bfb0951a9d72" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1466,11 +1690,11 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a" +checksum = "3ad3cecbbe309229174a1126dfda0cfde4826d8e340e89e3846f546a9333a9e0" dependencies = [ - "object_store", + "object_store 0.10.1", "polars-arrow-format", "regex", "simdutf8", @@ -1479,9 +1703,9 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484" +checksum = "2685cee0dcbb92dbf7fccd98ca169741051fbdac5182541f82720279e08d93b6" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1499,25 +1723,27 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e" +checksum = "409494bcd2f1ba2e8fae075636ee69d23087a8fb373bdf74e29b0d9bdd05923c" dependencies = [ "ahash", "async-trait", "atoi_simd", + "blake3", "bytes", "chrono", "chrono-tz 0.8.6", "fast-float", "flate2", + "fs4", "futures", "home", "itoa", "memchr", "memmap2", "num-traits", - "object_store", + "object_store 0.10.1", "once_cell", "percent-encoding", "polars-arrow", @@ -1529,7 +1755,7 @@ dependencies = [ "polars-utils", "rayon", "regex", - "reqwest", + "reqwest 0.11.27", "ryu", "serde", "serde_json", @@ -1544,9 +1770,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901" +checksum = "16059980d1d7b76fd387b2bf88fc1f61b1f354bb784e5eda55148b910def1564" dependencies = [ "ahash", "chrono", @@ -1565,20 +1791,22 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5" +checksum = "6611d17efa46d7e61326425d99749a01810af690328845846277ef281d0eac4f" dependencies = [ "ahash", "bitflags 2.5.0", "futures", "glob", + "memchr", "once_cell", "polars-arrow", "polars-core", "polars-expr", "polars-io", "polars-json", + "polars-mem-engine", "polars-ops", "polars-pipe", "polars-plan", @@ -1590,11 +1818,32 @@ dependencies = [ "version_check", ] +[[package]] +name = "polars-mem-engine" +version = "0.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443c1014e43c0cabcfe0560d0442eb94fd6da1da329f226a8cbb17b7d43281d4" +dependencies = [ + "futures", + "polars-arrow", + "polars-core", + "polars-error", + "polars-expr", + "polars-io", + "polars-json", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "tokio", +] + [[package]] name = "polars-ops" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58" +checksum = "c96d32dd9de3bcfe3c390d04e49ee1d5318f6ee32afec0ce73d9fdc33faf2488" dependencies = [ "ahash", "argminmax", @@ -1627,9 +1876,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8" +checksum = "8bf285139df977a5ffc1f7d6e976c44a16c3ce80c67d333cb9d57a3afd6eef1b" dependencies = [ "ahash", "async-stream", @@ -1642,9 +1891,11 @@ dependencies = [ "num-traits", "parquet-format-safe", "polars-arrow", + "polars-compute", "polars-error", "polars-utils", "seq-macro", + "serde", "simdutf8", "snap", "streaming-decompression", @@ -1653,9 +1904,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436" +checksum = "f994ef90f46ab5ed9d10e8cd39265884e7d52cdb5ddf224128b1d2c91badd84e" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1681,9 +1932,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90" +checksum = "d0782e64fcf4603a5788f901de07ace3473dbbca77a77b12924cba55cf1efae6" dependencies = [ "ahash", "bytemuck", @@ -1711,9 +1962,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40" +checksum = "ba5f3c8fee6733cb744afb95d02304c519d70daa18493256a75c114db9cb1b20" dependencies = [ "bytemuck", "polars-arrow", @@ -1723,9 +1974,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9" +checksum = "dea6ed7109564d46c8d552a035f8ddc4a1c664953ac396096324dcf68210dfe9" dependencies = [ "hex", "once_cell", @@ -1733,7 +1984,9 @@ dependencies = [ "polars-core", "polars-error", "polars-lazy", + "polars-ops", "polars-plan", + "polars-time", "rand", "serde", "serde_json", @@ -1742,9 +1995,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97" +checksum = "b1de421e377bda3f60480dcf12ab615586332d6d65631d8a3bb8ec84589b9a10" dependencies = [ "atoi", "bytemuck", @@ -1763,9 +2016,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.40.0" +version = "0.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c" +checksum = "95dc0ef1209ac423bf40b86f2b89a36aafcac9e99b3c9fbc4249cace484d336f" dependencies = [ "ahash", "bytemuck", @@ -1816,6 +2069,53 @@ dependencies = [ "serde", ] +[[package]] +name = "quinn" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.10", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +dependencies = [ + "bytes", + "rand", + "ring", + "rustc-hash", + "rustls 0.23.10", + "slab", + "thiserror", + "tinyvec", + "tracing", +] + +[[package]] +name = "quinn-udp" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +dependencies = [ + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.36" @@ -1992,11 +2292,11 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.28", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2004,16 +2304,16 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pemfile", + "rustls 0.21.11", + "rustls-native-certs 0.6.3", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 0.1.2", "system-configuration", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -2021,7 +2321,52 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "winreg 0.50.0", +] + +[[package]] +name = "reqwest" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.3.1", + "hyper-rustls 0.27.2", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.10", + "rustls-native-certs 0.7.0", + "rustls-pemfile 2.1.2", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 1.0.1", + "tokio", + "tokio-rustls 0.26.0", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "winreg 0.52.0", ] [[package]] @@ -2045,6 +2390,25 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "rustler" version = "0.32.1" @@ -2086,10 +2450,24 @@ checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" dependencies = [ "log", "ring", - "rustls-webpki", + "rustls-webpki 0.101.7", "sct", ] +[[package]] +name = "rustls" +version = "0.23.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.102.4", + "subtle", + "zeroize", +] + [[package]] name = "rustls-native-certs" version = "0.6.3" @@ -2097,7 +2475,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.1.2", + "rustls-pki-types", "schannel", "security-framework", ] @@ -2111,6 +2502,22 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.1", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -2121,6 +2528,17 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustls-webpki" +version = "0.102.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.15" @@ -2343,9 +2761,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.39.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" dependencies = [ "log", ] @@ -2403,6 +2821,12 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "subtle" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0208408ba0c3df17ed26eb06992cb1a1268d41b2c0e12e65203fbe3972cee5" + [[package]] name = "syn" version = "1.0.109" @@ -2431,6 +2855,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" + [[package]] name = "sysinfo" version = "0.30.11" @@ -2541,7 +2971,18 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.11", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls 0.23.10", + "rustls-pki-types", "tokio", ] @@ -2559,6 +3000,27 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -3016,6 +3478,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "xxhash-rust" version = "0.8.10" @@ -3042,6 +3514,12 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zstd" version = "0.13.1" diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 5b9679515..c273541e3 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.40" +version = "0.41" default-features = false features = [ "abs", @@ -87,7 +87,7 @@ features = [ ] [dependencies.polars-ops] -version = "0.40" +version = "0.41" features = ["abs", "ewma", "cum_agg", "cov"] [features] From 66ee582599e70112d1ef3e68ca1d1f3f00c1e8fb Mon Sep 17 00:00:00 2001 From: Kartheek Date: Sun, 14 Jul 2024 16:42:45 +0530 Subject: [PATCH 04/30] Working version 0.41.3 --- lib/explorer/data_frame.ex | 2 +- lib/explorer/polars_backend/data_frame.ex | 7 +- lib/explorer/series.ex | 2 +- native/explorer/Cargo.lock | 80 +++++++++---------- native/explorer/Cargo.toml | 4 +- native/explorer/rust-toolchain.toml | 2 +- native/explorer/src/cloud_writer.rs | 1 - native/explorer/src/dataframe.rs | 6 +- native/explorer/src/dataframe/io.rs | 19 +++-- native/explorer/src/lazyframe.rs | 18 ++--- native/explorer/src/lazyframe/io.rs | 12 +-- native/explorer/src/series.rs | 36 ++++----- test/explorer/data_frame/grouped_test.exs | 6 +- test/explorer/data_frame_test.exs | 20 ++--- .../polars_backend/expression_test.exs | 6 +- test/explorer/series_test.exs | 14 ++-- 16 files changed, 119 insertions(+), 116 deletions(-) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 08b872fcf..e1e9e4279 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2778,7 +2778,7 @@ defmodule Explorer.DataFrame do #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] - b f64 [1.0, 4.0, 9.0] + b s64 [1, 4, 9] > It's possible to "reuse" a variable for different computations: diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index 54d64c73f..dac839b3d 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -195,6 +195,11 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, with_projection} = column_names_or_projection(columns) + dtypes_list = + if not Enum.empty?(dtypes) do + Map.to_list(dtypes) + end + df = Native.df_load_csv( contents, @@ -207,7 +212,7 @@ defmodule Explorer.PolarsBackend.DataFrame do delimiter, true, columns, - Map.to_list(dtypes), + dtypes_list, encoding, nil_values, parse_dates, diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 94f3abad2..8145b85cb 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3616,7 +3616,7 @@ defmodule Explorer.Series do iex> Explorer.Series.pow(s, 3) #Explorer.Series< Polars[3] - f64 [8.0, 64.0, 216.0] + s64 [8, 64, 216] > iex> s = [2, 4, 6] |> Explorer.Series.from_list() diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 3c8218b34..a55a1d8fc 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -1575,9 +1575,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43845ccaa696de9cf374f5a4e3c5c2685192b4a56b7dd60b05a5cb1f7cd86cc7" +checksum = "8e3351ea4570e54cd556e6755b78fe7a2c85368d820c0307cca73c96e796a7ba" dependencies = [ "getrandom", "polars-arrow", @@ -1595,9 +1595,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e66ab16b782bdf1803c3e50438817f5fcb6f6bcfbeb6dddba01a2b83827c7bf" +checksum = "ba65fc4bcabbd64fca01fd30e759f8b2043f0963c57619e331d4b534576c0b47" dependencies = [ "ahash", "atoi", @@ -1642,9 +1642,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "087442a8e5c0e3d4d1d683f1b287770c6e26a8f86a9a8a47607e220ae9e08113" +checksum = "9f099516af30ac9ae4b4480f4ad02aa017d624f2f37b7a16ad4e9ba52f7e5269" dependencies = [ "bytemuck", "either", @@ -1658,9 +1658,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9523e09a134c75557e2216e652c965013b92b8e682cc591059d4bfb0951a9d72" +checksum = "b2439484be228b8c302328e2f953e64cfd93930636e5c7ceed90339ece7fef6c" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1690,9 +1690,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ad3cecbbe309229174a1126dfda0cfde4826d8e340e89e3846f546a9333a9e0" +checksum = "0c9b06dfbe79cabe50a7f0a90396864b5ee2c0e0f8d6a9353b2343c29c56e937" dependencies = [ "object_store 0.10.1", "polars-arrow-format", @@ -1703,9 +1703,9 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2685cee0dcbb92dbf7fccd98ca169741051fbdac5182541f82720279e08d93b6" +checksum = "d9c630385a56a867c410a20f30772d088f90ec3d004864562b84250b35268f97" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1723,9 +1723,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "409494bcd2f1ba2e8fae075636ee69d23087a8fb373bdf74e29b0d9bdd05923c" +checksum = "9d7363cd14e4696a28b334a56bd11013ff49cc96064818ab3f91a126e453462d" dependencies = [ "ahash", "async-trait", @@ -1770,12 +1770,13 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16059980d1d7b76fd387b2bf88fc1f61b1f354bb784e5eda55148b910def1564" +checksum = "543d7d3853f2c52dbfedee9ebf0d58c4ff3b92aadee5309150b2d14df49d6253" dependencies = [ "ahash", "chrono", + "chrono-tz 0.8.6", "fallible-streaming-iterator", "hashbrown", "indexmap", @@ -1791,9 +1792,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6611d17efa46d7e61326425d99749a01810af690328845846277ef281d0eac4f" +checksum = "03877e74e42b5340ae52ded705f6d5d14563d90554c9177b01b91ed2412a56ed" dependencies = [ "ahash", "bitflags 2.5.0", @@ -1820,9 +1821,9 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443c1014e43c0cabcfe0560d0442eb94fd6da1da329f226a8cbb17b7d43281d4" +checksum = "dea9e17771af750c94bf959885e4b3f5b14149576c62ef3ec1c9ef5827b2a30f" dependencies = [ "futures", "polars-arrow", @@ -1841,9 +1842,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d32dd9de3bcfe3c390d04e49ee1d5318f6ee32afec0ce73d9fdc33faf2488" +checksum = "6066552eb577d43b307027fb38096910b643ffb2c89a21628c7e41caf57848d0" dependencies = [ "ahash", "argminmax", @@ -1876,9 +1877,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf285139df977a5ffc1f7d6e976c44a16c3ce80c67d333cb9d57a3afd6eef1b" +checksum = "2b35b2592a2e7ef7ce9942dc2120dc4576142626c0e661668e4c6b805042e461" dependencies = [ "ahash", "async-stream", @@ -1894,7 +1895,6 @@ dependencies = [ "polars-compute", "polars-error", "polars-utils", - "seq-macro", "serde", "simdutf8", "snap", @@ -1904,9 +1904,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f994ef90f46ab5ed9d10e8cd39265884e7d52cdb5ddf224128b1d2c91badd84e" +checksum = "021bce7768c330687d735340395a77453aa18dd70d57c184cbb302311e87c1b9" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1932,9 +1932,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0782e64fcf4603a5788f901de07ace3473dbbca77a77b12924cba55cf1efae6" +checksum = "220d0d7c02d1c4375802b2813dbedcd1a184df39c43b74689e729ede8d5c2921" dependencies = [ "ahash", "bytemuck", @@ -1962,9 +1962,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba5f3c8fee6733cb744afb95d02304c519d70daa18493256a75c114db9cb1b20" +checksum = "c1d70d87a2882a64a43b431aea1329cb9a2c4100547c95c417cc426bb82408b3" dependencies = [ "bytemuck", "polars-arrow", @@ -1974,9 +1974,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea6ed7109564d46c8d552a035f8ddc4a1c664953ac396096324dcf68210dfe9" +checksum = "a6fc1c9b778862f09f4a347f768dfdd3d0ba9957499d306d83c7103e0fa8dc5b" dependencies = [ "hex", "once_cell", @@ -1995,9 +1995,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de421e377bda3f60480dcf12ab615586332d6d65631d8a3bb8ec84589b9a10" +checksum = "179f98313a15c0bfdbc8cc0f1d3076d08d567485b9952d46439f94fbc3085df5" dependencies = [ "atoi", "bytemuck", @@ -2016,9 +2016,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.41.1" +version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95dc0ef1209ac423bf40b86f2b89a36aafcac9e99b3c9fbc4249cace484d336f" +checksum = "53e6dd89fcccb1ec1a62f752c9a9f2d482a85e9255153f46efecc617b4996d50" dependencies = [ "ahash", "bytemuck", @@ -2609,12 +2609,6 @@ dependencies = [ "libc", ] -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - [[package]] name = "serde" version = "1.0.198" diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 543c044d0..191933f89 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -38,7 +38,7 @@ object_store = { version = "0.9", default-features = false, optional = true } mimalloc = { version = "*", default-features = false } [dependencies.polars] -version = "0.41" +version = "0.41.3" default-features = false features = [ "abs", @@ -87,7 +87,7 @@ features = [ ] [dependencies.polars-ops] -version = "0.41" +version = "0.41.3" features = ["abs", "ewma", "cum_agg", "cov"] [features] diff --git a/native/explorer/rust-toolchain.toml b/native/explorer/rust-toolchain.toml index 526ab5a6f..bce747f05 100644 --- a/native/explorer/rust-toolchain.toml +++ b/native/explorer/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2024-06-23" +channel = "nightly-2024-06-24" components = ["rustfmt", "clippy"] profile = "minimal" diff --git a/native/explorer/src/cloud_writer.rs b/native/explorer/src/cloud_writer.rs index e32b979fa..1cc128bb9 100644 --- a/native/explorer/src/cloud_writer.rs +++ b/native/explorer/src/cloud_writer.rs @@ -95,7 +95,6 @@ mod tests { use polars::df; use polars::prelude::DataFrame; - use polars::prelude::NamedFrom; fn example_dataframe() -> DataFrame { df!( diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 4644e7393..b3d30ff7d 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -264,7 +264,7 @@ pub fn df_sort_by( .with_maintain_order(maintain_order) .with_multithreaded(multithreaded) .with_nulls_last(nulls_last) - .with_order_descendings(reverse); + .with_order_descending_multi(reverse); let new_df = if groups.is_empty() { // Note: we cannot use either df.sort or df.sort_with_options. @@ -314,7 +314,7 @@ pub fn df_sort_with( .with_maintain_order(maintain_order) // .with_multithreaded(multithreaded) .with_nulls_last(nulls_last) - .with_order_descendings(directions); + .with_order_descending_multi(directions); let new_df = if groups.is_empty() { df.lazy().sort_by_exprs(exprs, sort_options).collect()? @@ -428,7 +428,7 @@ pub fn df_pivot_wider( let mut new_df = pivot_stable( &df, &temp_id_names, - [pivot_column], + Some([pivot_column]), Some(values_column), false, Some(PivotAgg::First), diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index bc4d0d7aa..d3937b1f6 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -52,7 +52,7 @@ pub fn df_from_csv( .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_columns(column_names.map(Arc::new)) + .with_columns(column_names.map(Arc::from)) .with_schema_overwrite(Some(schema_from_dtypes_pairs(dtypes)?)) .with_parse_options( CsvParseOptions::default() @@ -146,7 +146,7 @@ pub fn df_load_csv( delimiter_as_byte: u8, do_rechunk: bool, column_names: Option>, - dtypes: Vec<(&str, ExSeriesDtype)>, + dtypes: Option>, encoding: &str, null_vals: Vec, parse_dates: bool, @@ -159,16 +159,21 @@ pub fn df_load_csv( let cursor = Cursor::new(binary.as_slice()); - let dataframe = CsvReadOptions::default() + let read_options = match dtypes { + Some(val) => CsvReadOptions::default().with_schema(Some(schema_from_dtypes_pairs(val)?)), + None => CsvReadOptions::default(), + }; + + let dataframe = read_options .with_has_header(has_header) .with_infer_schema_length(infer_schema_length) .with_n_rows(stop_after_n_rows) - .with_columns(column_names.map(Arc::new)) + .with_columns(column_names.map(Arc::from)) .with_skip_rows(skip_rows) .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - .with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) + //.with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) .with_parse_options( CsvParseOptions::default() .with_separator(delimiter_as_byte) @@ -519,7 +524,7 @@ pub fn df_from_ndjson( let reader = JsonReader::new(buf_reader) .with_json_format(JsonFormat::JsonLines) .with_batch_size(batch_size) - .infer_schema_len(infer_schema_length); + .infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new)); Ok(ExDataFrame::new(reader.finish()?)) } @@ -576,7 +581,7 @@ pub fn df_load_ndjson( let reader = JsonReader::new(cursor) .with_json_format(JsonFormat::JsonLines) .with_batch_size(batch_size) - .infer_schema_len(infer_schema_length); + .infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new)); Ok(ExDataFrame::new(reader.finish()?)) } diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index ade925181..d2eaef005 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -63,7 +63,7 @@ pub fn lf_tail( #[rustler::nif] pub fn lf_names(data: ExLazyFrame) -> Result, ExplorerError> { - let lf = data.clone_inner(); + let mut lf = data.clone_inner(); let names = lf .schema()? .iter_names() @@ -149,7 +149,7 @@ pub fn lf_sort_with( let sort_options = SortMultipleOptions::new() .with_nulls_last(nulls_last) .with_maintain_order(maintain_order) - .with_order_descendings(directions); + .with_order_descending_multi(directions); let ldf = data.clone_inner().sort_by_exprs(exprs, sort_options); @@ -166,7 +166,7 @@ pub fn lf_grouped_sort_with( let sort_options = SortMultipleOptions::new() // .with_nulls_last(nulls_last) // .with_maintain_order(maintain_order) - .with_order_descendings(directions); + .with_order_descending_multi(directions); // For grouped lazy frames, we need to use the `#sort_by` method that is // less powerful, but can be used with `over`. // See: https://docs.pola.rs/user-guide/expressions/window/#operations-per-group @@ -265,14 +265,14 @@ pub fn lf_pivot_longer( values_to: String, ) -> Result { let ldf = data.clone_inner(); - let melt_opts = MeltArgs { - id_vars: to_smart_strings(id_vars), - value_vars: to_smart_strings(value_vars), + let unpivot_opts = UnpivotArgs { + on: to_smart_strings(id_vars), + index: to_smart_strings(value_vars), variable_name: Some(names_to.into()), value_name: Some(values_to.into()), streamable: true, }; - let new_df = ldf.melt(melt_opts); + let new_df = ldf.unpivot(unpivot_opts); Ok(ExLazyFrame::new(new_df)) } @@ -288,7 +288,7 @@ pub fn lf_join( let how = match how { "left" => JoinType::Left, "inner" => JoinType::Inner, - "outer" => JoinType::Outer, + "outer" => JoinType::Full, "cross" => JoinType::Cross, _ => { return Err(ExplorerError::Other(format!( @@ -329,7 +329,7 @@ pub fn lf_concat_columns(ldfs: Vec) -> Result = ldf .schema() .expect("should be able to get schema") diff --git a/native/explorer/src/lazyframe/io.rs b/native/explorer/src/lazyframe/io.rs index 6406834c4..1048cf802 100644 --- a/native/explorer/src/lazyframe/io.rs +++ b/native/explorer/src/lazyframe/io.rs @@ -81,14 +81,14 @@ pub fn lf_to_parquet( if streaming { let options = ParquetWriteOptions { compression, - statistics: false, + statistics: StatisticsOptions::empty(), row_group_size: None, data_pagesize_limit: None, maintain_order: false, }; lf.with_comm_subplan_elim(false) - .sink_parquet(filename.into(), options)?; + .sink_parquet(filename, options)?; Ok(()) } else { let mut df = lf.collect()?; @@ -117,7 +117,7 @@ pub fn lf_to_parquet_cloud( let options = ParquetWriteOptions { compression, - statistics: false, + statistics: StatisticsOptions::empty(), row_group_size: None, data_pagesize_limit: None, maintain_order: false, @@ -172,7 +172,7 @@ pub fn lf_to_ipc( maintain_order: false, }; lf.with_comm_subplan_elim(false) - .sink_ipc(filename.into(), options)?; + .sink_ipc(filename, options)?; Ok(()) } else { let mut df = lf.collect()?; @@ -248,7 +248,7 @@ pub fn lf_to_csv( }; lf.with_comm_subplan_elim(false) - .sink_csv(filename.into(), options)?; + .sink_csv(filename, options)?; Ok(()) } else { let df = lf.collect()?; @@ -274,7 +274,7 @@ pub fn lf_from_ndjson( "\"batch_size\" expected to be non zero.".to_string(), ))?; let lf = LazyJsonLineReader::new(filename) - .with_infer_schema_length(infer_schema_length) + .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new)) .with_batch_size(Some(batch_size)) .finish()?; diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index dc504c099..d081b979a 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -70,29 +70,32 @@ pub fn s_mask(series: ExSeries, filter: ExSeries) -> Result Result { let s = data.clone_inner(); let s1 = other.clone_inner(); - Ok(ExSeries::new(s + s1)) + let result = s + s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_subtract(lhs: ExSeries, rhs: ExSeries) -> Result { let left = lhs.clone_inner(); let right = rhs.clone_inner(); - - Ok(ExSeries::new(left - right)) + let result = left - right; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_multiply(data: ExSeries, other: ExSeries) -> Result { let s = data.clone_inner(); let s1 = other.clone_inner(); - Ok(ExSeries::new(s * s1)) + let result = s * s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] pub fn s_divide(data: ExSeries, other: ExSeries) -> Result { let s = data.clone_inner().cast(&DataType::Float64)?; let s1 = other.clone_inner().cast(&DataType::Float64)?; - Ok(ExSeries::new(s / s1)) + let result = s / s1; + Ok(ExSeries::new(result?)) } #[rustler::nif(schedule = "DirtyCpu")] @@ -106,9 +109,9 @@ pub fn s_remainder(data: ExSeries, other: ExSeries) -> Result Result #[rustler::nif(schedule = "DirtyCpu")] pub fn s_frequencies(series: ExSeries) -> Result { - let df = series.value_counts(true, true)?; + let df = series.value_counts(true, true, "counts".to_string(), false)?; Ok(ExDataFrame::new(df)) } @@ -1007,16 +1010,13 @@ pub fn s_quantile<'a>( .unwrap() .encode(env)), }, - _ => todo!(), - // _ => - // s.agg_quantile(quantile, strategy) - - // encoding::term_from_value( - // s.quantile_as_series(quantile, strategy)? - // .cast(dtype)? - // .get(0)?, - // env, - // ), + _ => encoding::term_from_value( + s.quantile_reduce(quantile, strategy)? + .into_series("quantile") + .cast(dtype)? + .get(0)?, + env, + ), } } diff --git a/test/explorer/data_frame/grouped_test.exs b/test/explorer/data_frame/grouped_test.exs index d6fb522a9..2065431a8 100644 --- a/test/explorer/data_frame/grouped_test.exs +++ b/test/explorer/data_frame/grouped_test.exs @@ -578,11 +578,11 @@ defmodule Explorer.DataFrame.GroupedTest do a = ldf["a"] [ - b: Series.window_max(a, 2, weights: [1.0, 2.0]), + # b: Series.window_max(a, 2, weights: [1.0, 2.0]) c: Series.window_mean(a, 2, weights: [0.25, 0.75]), d: Series.window_median(a, 2, weights: [0.25, 0.75]), - e: Series.window_min(a, 2, weights: [1.0, 2.0]), - f: Series.window_sum(a, 2, weights: [1.0, 2.0]), + # e: Series.window_min(a, 2, weights: [1.0, 2.0]) + # f: Series.window_sum(a, 2, weights: [1.0, 2.0]) g: Series.window_standard_deviation(a, 2), p: Series.cumulative_max(a), q: Series.cumulative_min(a), diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 9f74b7aff..ec6648f16 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -299,7 +299,7 @@ defmodule Explorer.DataFrameTest do df = DF.new(a: [1, 2, 3, 4, 5, 6, 5], b: [9, 8, 7, 6, 5, 4, 3]) message = - "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:f, 64}" + "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:s, 64}" assert_raise ArgumentError, message, fn -> DF.filter_with(df, fn ldf -> @@ -811,7 +811,7 @@ defmodule Explorer.DataFrameTest do df = DF.new([%{a: ~s({"n": 1})}, %{a: ~s({"m": 1})}]) assert_raise RuntimeError, - "Polars Error: ComputeError(ErrString(\"error compiling JSONpath expression path error: \\nEof\\n\"))", + "Polars Error: error compiling JSON path expression path error: \nEof\n", fn -> DF.mutate(df, n: json_path_match(a, "$.")) end @@ -948,7 +948,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 0, 2], calc3: [2, 4, 8], calc4: [0.5, 1.0, 2.0], - calc5: [1.0, 4.0, 16.0], + calc5: [1, 4, 16], calc6: [0, 1, 2], calc7: [1, 0, 0], calc8: [:nan, :nan, :nan], @@ -964,7 +964,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64}, "calc8" => {:f, 64}, @@ -996,7 +996,7 @@ defmodule Explorer.DataFrameTest do calc2: [1, 0, -2], calc3: [2, 4, 8], calc4: [2.0, 1.0, 0.5], - calc5: [2.0, 4.0, 16.0], + calc5: [2, 4, 16], calc5_1: [2.0, 4.0, 16.0], calc6: [2, 1, 0], calc7: [0, 0, 2] @@ -1008,7 +1008,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc5_1" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} @@ -1036,7 +1036,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1047,7 +1047,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1074,7 +1074,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1085,7 +1085,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } diff --git a/test/explorer/polars_backend/expression_test.exs b/test/explorer/polars_backend/expression_test.exs index 1fafea9f8..354c31e58 100644 --- a/test/explorer/polars_backend/expression_test.exs +++ b/test/explorer/polars_backend/expression_test.exs @@ -19,7 +19,7 @@ defmodule Explorer.PolarsBackend.ExpressionTest do assert Expression.describe_filter_plan(df, expr) == String.trim(""" FILTER [(col("col_a")) == (5)] FROM - DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: "None" + DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: None """) end @@ -93,8 +93,8 @@ defmodule Explorer.PolarsBackend.ExpressionTest do assert Expression.describe_filter_plan(df, expr) == String.trim(""" - FILTER [(col("col_a")) == (col("col_b"))] FROM - DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: "None" + FILTER [(col("col_a").cast(Float64)) == (col("col_b"))] FROM + DF ["col_a", "col_b"]; PROJECT */2 COLUMNS; SELECTION: None """) end end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 84a7bf446..97be777f4 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -2290,7 +2290,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, 4, 3] end end @@ -2315,8 +2315,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [1.0, 4.0, 3.0] + assert result.dtype == {:s, 64} + assert Series.to_list(result) === [1, 4, 3] end end @@ -2392,7 +2392,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2402,7 +2402,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2412,7 +2412,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end @@ -2421,7 +2421,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, 2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, 4, 9] end From e79c0f6dca37411ae0aeb4b46a301e2555e572cc Mon Sep 17 00:00:00 2001 From: William Lanchantin Date: Sun, 14 Jul 2024 19:06:56 -0400 Subject: [PATCH 05/30] don't pass keys when cross-joining --- native/explorer/src/lazyframe.rs | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index d2eaef005..c9145d9fc 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -300,14 +300,24 @@ pub fn lf_join( let ldf = data.clone_inner(); let ldf1 = other.clone_inner(); - let new_ldf = ldf - .join_builder() - .with(ldf1) - .how(how) - .left_on(ex_expr_to_exprs(left_on)) - .right_on(ex_expr_to_exprs(right_on)) - .suffix(suffix) - .finish(); + let new_ldf = match how { + // Cross-joins no longer accept keys. + // https://github.com/pola-rs/polars/pull/17305 + JoinType::Cross => ldf + .join_builder() + .with(ldf1) + .how(JoinType::Cross) + .suffix(suffix) + .finish(), + _ => ldf + .join_builder() + .with(ldf1) + .how(how) + .left_on(ex_expr_to_exprs(left_on)) + .right_on(ex_expr_to_exprs(right_on)) + .suffix(suffix) + .finish(), + }; Ok(ExLazyFrame::new(new_ldf)) } From 6f463ff1eaa623a244e38467b0c953d911ca7226 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 15 Jul 2024 18:13:24 -0300 Subject: [PATCH 06/30] Run "cargo update" --- native/explorer/Cargo.lock | 473 ++++++++++++++++++------------------- 1 file changed, 236 insertions(+), 237 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index a55a1d8fc..fb2fc5ade 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.82" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "argminmax" @@ -127,18 +127,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "async-trait" -version = "0.1.80" +version = "0.1.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -164,15 +164,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -203,18 +203,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" dependencies = [ "serde", ] [[package]] name = "blake3" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" dependencies = [ "arrayref", "arrayvec", @@ -261,35 +261,35 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.15.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" +checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "bytes" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" [[package]] name = "cc" -version = "1.0.94" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7" +checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" dependencies = [ "jobserver", "libc", @@ -313,7 +313,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -393,18 +393,18 @@ checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" dependencies = [ "crossbeam-utils", ] @@ -439,9 +439,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crypto-common" @@ -477,9 +477,9 @@ checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" [[package]] name = "either" -version = "1.11.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encoding_rs" @@ -499,7 +499,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -559,9 +559,9 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "libz-ng-sys", @@ -664,7 +664,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -709,9 +709,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "js-sys", @@ -722,9 +722,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "glob" @@ -782,9 +782,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", @@ -860,9 +860,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http 1.1.0", @@ -877,15 +877,15 @@ dependencies = [ "bytes", "futures-util", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "pin-project-lite", ] [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" [[package]] name = "httpdate" @@ -901,9 +901,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.28" +version = "0.14.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" dependencies = [ "bytes", "futures-channel", @@ -925,16 +925,16 @@ dependencies = [ [[package]] name = "hyper" -version = "1.3.1" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" dependencies = [ "bytes", "futures-channel", "futures-util", "h2 0.4.5", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -951,8 +951,8 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.28", - "rustls 0.21.11", + "hyper 0.14.30", + "rustls 0.21.12", "tokio", "tokio-rustls 0.24.1", ] @@ -965,10 +965,10 @@ checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.3.1", + "hyper 1.4.1", "hyper-util", - "rustls 0.23.10", - "rustls-native-certs 0.7.0", + "rustls 0.23.11", + "rustls-native-certs 0.7.1", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -977,16 +977,16 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", - "http-body 1.0.0", - "hyper 1.3.1", + "http-body 1.0.1", + "hyper 1.4.1", "pin-project-lite", "socket2", "tokio", @@ -1074,9 +1074,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jobserver" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "685a7d121ee3f65ae4fddd72b25a04bb36b6af81bc0828f7d5434c0fe60fa3a2" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] @@ -1167,9 +1167,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.153" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libm" @@ -1179,9 +1179,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" -version = "0.1.35" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" +checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" dependencies = [ "cc", "libc", @@ -1205,9 +1205,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -1215,15 +1215,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" dependencies = [ "libc", "lz4-sys", @@ -1231,9 +1231,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" dependencies = [ "cc", "libc", @@ -1251,9 +1251,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" @@ -1266,9 +1266,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.39" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c" +checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" dependencies = [ "libmimalloc-sys", ] @@ -1281,9 +1281,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] @@ -1341,9 +1341,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -1361,9 +1361,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" dependencies = [ "memchr", ] @@ -1380,7 +1380,7 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper 0.14.28", + "hyper 0.14.30", "itertools", "md-5", "parking_lot", @@ -1410,7 +1410,7 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper 1.3.1", + "hyper 1.4.1", "itertools", "md-5", "parking_lot", @@ -1442,9 +1442,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -1452,15 +1452,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -1475,9 +1475,9 @@ dependencies = [ [[package]] name = "parse-zoneinfo" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" dependencies = [ "regex", ] @@ -1543,7 +1543,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -1663,7 +1663,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2439484be228b8c302328e2f953e64cfd93930636e5c7ceed90339ece7fef6c" dependencies = [ "ahash", - "bitflags 2.5.0", + "bitflags 2.6.0", "bytemuck", "chrono", "chrono-tz 0.8.6", @@ -1708,7 +1708,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c630385a56a867c410a20f30772d088f90ec3d004864562b84250b35268f97" dependencies = [ "ahash", - "bitflags 2.5.0", + "bitflags 2.6.0", "once_cell", "polars-arrow", "polars-core", @@ -1797,7 +1797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03877e74e42b5340ae52ded705f6d5d14563d90554c9177b01b91ed2412a56ed" dependencies = [ "ahash", - "bitflags 2.5.0", + "bitflags 2.6.0", "futures", "glob", "memchr", @@ -2043,9 +2043,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.81" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -2080,7 +2080,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.10", + "rustls 0.23.11", "thiserror", "tokio", "tracing", @@ -2096,7 +2096,7 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.10", + "rustls 0.23.11", "slab", "thiserror", "tinyvec", @@ -2176,11 +2176,11 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.1" +version = "11.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", ] [[package]] @@ -2220,43 +2220,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] name = "ref-cast" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", @@ -2266,9 +2266,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -2277,9 +2277,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" @@ -2295,7 +2295,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.28", + "hyper 0.14.30", "hyper-rustls 0.24.2", "ipnet", "js-sys", @@ -2304,7 +2304,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.11", + "rustls 0.21.12", "rustls-native-certs 0.6.3", "rustls-pemfile 1.0.4", "serde", @@ -2336,9 +2336,9 @@ dependencies = [ "futures-util", "h2 0.4.5", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "http-body-util", - "hyper 1.3.1", + "hyper 1.4.1", "hyper-rustls 0.27.2", "hyper-util", "ipnet", @@ -2349,8 +2349,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.10", - "rustls-native-certs 0.7.0", + "rustls 0.23.11", + "rustls-native-certs 0.7.1", "rustls-pemfile 2.1.2", "rustls-pki-types", "serde", @@ -2386,9 +2386,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -2402,7 +2402,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -2430,7 +2430,7 @@ dependencies = [ "inventory", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -2445,9 +2445,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.11" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", "ring", @@ -2457,14 +2457,14 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.10" +version = "0.23.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" +checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.4", + "rustls-webpki 0.102.5", "subtle", "zeroize", ] @@ -2483,9 +2483,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" +checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba" dependencies = [ "openssl-probe", "rustls-pemfile 2.1.2", @@ -2531,9 +2531,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.4" +version = "0.102.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" +checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" dependencies = [ "ring", "rustls-pki-types", @@ -2542,15 +2542,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" @@ -2588,11 +2588,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.10.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -2601,9 +2601,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.10.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" +checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" dependencies = [ "core-foundation-sys", "libc", @@ -2611,29 +2611,29 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.198" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.198" +version = "1.0.204" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "serde_json" -version = "1.0.116" +version = "1.0.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" dependencies = [ "indexmap", "itoa", @@ -2740,9 +2740,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2805,22 +2805,22 @@ checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" [[package]] name = "strum_macros" -version = "0.26.2" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "rustversion", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "subtle" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0208408ba0c3df17ed26eb06992cb1a1268d41b2c0e12e65203fbe3972cee5" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -2835,9 +2835,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.60" +version = "2.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" dependencies = [ "proc-macro2", "quote", @@ -2858,9 +2858,9 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "sysinfo" -version = "0.30.11" +version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" dependencies = [ "cfg-if", "core-foundation-sys", @@ -2899,29 +2899,29 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -2934,9 +2934,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -2951,13 +2951,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -2966,7 +2966,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.11", + "rustls 0.21.12", "tokio", ] @@ -2976,23 +2976,22 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.10", + "rustls 0.23.11", "rustls-pki-types", "tokio", ] [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", "futures-sink", "pin-project-lite", "tokio", - "tracing", ] [[package]] @@ -3041,7 +3040,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -3118,9 +3117,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -3129,9 +3128,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", ] @@ -3206,7 +3205,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", "wasm-bindgen-shared", ] @@ -3240,7 +3239,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3292,11 +3291,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -3312,7 +3311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -3321,7 +3320,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -3339,7 +3338,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -3359,18 +3358,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -3381,9 +3380,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -3393,9 +3392,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -3405,15 +3404,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -3423,9 +3422,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -3435,9 +3434,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -3447,9 +3446,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -3459,9 +3458,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winreg" @@ -3485,28 +3484,28 @@ dependencies = [ [[package]] name = "xxhash-rust" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" +checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.71", ] [[package]] @@ -3517,27 +3516,27 @@ checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" [[package]] name = "zstd" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.1.0" +version = "7.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.12+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" dependencies = [ "cc", "pkg-config", From a7759b7d3c2e74defecb1a23e6ad96271785ffdc Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Tue, 16 Jul 2024 20:28:53 -0300 Subject: [PATCH 07/30] Changes that fix some tests --- lib/explorer/polars_backend/lazy_frame.ex | 1 + lib/explorer/polars_backend/series.ex | 7 +++++ test/explorer/data_frame/grouped_test.exs | 6 ++-- test/explorer/data_frame_test.exs | 7 ++--- test/explorer/series_test.exs | 36 +++++++++++++++-------- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 43111ee84..ab5fa78b9 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -479,6 +479,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do exprs = for {name, lazy_series} <- column_pairs do + # TODO: we may want to cast to the target out_df column types. lazy_series |> to_expr() |> then(maybe_over_groups_fun) diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 989c799b4..35121c2ea 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -596,6 +596,13 @@ defmodule Explorer.PolarsBackend.Series do end defp window_function(operation, series, window_size, weights, min_periods, center) do + series = + if weights == [] do + series + else + cast(series, {:f, 64}) + end + Shared.apply_series(series, operation, [window_size, weights, min_periods, center]) end diff --git a/test/explorer/data_frame/grouped_test.exs b/test/explorer/data_frame/grouped_test.exs index 2065431a8..d6fb522a9 100644 --- a/test/explorer/data_frame/grouped_test.exs +++ b/test/explorer/data_frame/grouped_test.exs @@ -578,11 +578,11 @@ defmodule Explorer.DataFrame.GroupedTest do a = ldf["a"] [ - # b: Series.window_max(a, 2, weights: [1.0, 2.0]) + b: Series.window_max(a, 2, weights: [1.0, 2.0]), c: Series.window_mean(a, 2, weights: [0.25, 0.75]), d: Series.window_median(a, 2, weights: [0.25, 0.75]), - # e: Series.window_min(a, 2, weights: [1.0, 2.0]) - # f: Series.window_sum(a, 2, weights: [1.0, 2.0]) + e: Series.window_min(a, 2, weights: [1.0, 2.0]), + f: Series.window_sum(a, 2, weights: [1.0, 2.0]), g: Series.window_standard_deviation(a, 2), p: Series.cumulative_max(a), q: Series.cumulative_min(a), diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index ec6648f16..ad0dfc265 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1714,8 +1714,7 @@ defmodule Explorer.DataFrameTest do f: substring(a, 1), g: substring(b, 2, 5), h: substring(c, -3), - i: substring(d, 6, 10), - j: substring(e, -15, 2) + i: substring(d, 6, 10) ) assert DF.to_columns(df1, atom_keys: true) == %{ @@ -1727,8 +1726,7 @@ defmodule Explorer.DataFrameTest do f: ["hello", "world", "foo", "bar"], g: ["nus", "rth", "rs", "piter"], h: ["foo", "bar", "baz", "uox"], - i: ["", "", "", ""], - j: ["_f", "_b", "_b", "_q"] + i: ["", "", "", ""] } end @@ -3434,6 +3432,7 @@ defmodule Explorer.DataFrameTest do assert df.names == ["variable", "value"] assert df.dtypes == %{"variable" => :string, "value" => {:s, 64}} + assert DF.shape(df) == {3282, 2} end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 97be777f4..9e2143fe8 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -4176,9 +4176,9 @@ defmodule Explorer.SeriesTest do assert Series.to_list(s2) == [ 1.0, - 1.0, - 1.6666666666666667, + nil, 1.6666666666666667, + nil, 2.4285714285714284, 3.2666666666666666, 4.161290322580645, @@ -4189,14 +4189,16 @@ defmodule Explorer.SeriesTest do end test "does not ignore nil if set ignore_nils option to false and calculates ewma" do + # The idea is to ignore or not the `nils` in the calculation. But `nils` are + # always propagated. s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) s2 = Series.ewm_mean(s1, ignore_nils: false) assert Series.to_list(s2) == [ 1.0, - 1.0, - 1.8, + nil, 1.8, + nil, 2.7142857142857144, 3.490566037735849, 4.316239316239316, @@ -4281,14 +4283,16 @@ defmodule Explorer.SeriesTest do end test "ignores nil by default and calculates ewm std" do + # The idea is to ignore or not the `nils` in the calculation. But `nils` are + # always propagated. s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) s2 = Series.ewm_standard_deviation(s1, ignore_nils: true) assert Series.to_list(s2) == [ 0.0, - 0.0, - 0.7071067811865476, + nil, 0.7071067811865476, + nil, 0.9636241116594314, 1.1771636613972951, 1.3452425132127066, @@ -4299,14 +4303,16 @@ defmodule Explorer.SeriesTest do end test "does not ignore nil if set ignore_nils option to false and calculates ewm std" do + # The idea is to ignore or not the `nils` in the calculation. But `nils` are + # always propagated. s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) s2 = Series.ewm_standard_deviation(s1, ignore_nils: false) assert Series.to_list(s2) == [ 0.0, - 0.0, - 0.7071067811865476, + nil, 0.7071067811865476, + nil, 0.8864052604279183, 0.9772545497599153, 1.1470897308102692, @@ -4409,14 +4415,16 @@ defmodule Explorer.SeriesTest do end test "ignores nil by default and calculates ewm var" do + # The idea is to ignore or not the `nils` in the calculation. But `nils` are + # always propagated. s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) s2 = Series.ewm_variance(s1, ignore_nils: true) assert Series.to_list(s2) == [ 0.0, - 0.0, - 0.5, + nil, 0.5, + nil, 0.9285714285714284, 1.385714285714286, 1.8096774193548393, @@ -4427,14 +4435,16 @@ defmodule Explorer.SeriesTest do end test "does not ignore nil if set ignore_nils option to false and calculates ewm var" do + # The idea is to ignore or not the `nils` in the calculation. But `nils` are + # always propagated. s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) s2 = Series.ewm_variance(s1, ignore_nils: false) assert Series.to_list(s2) == [ 0.0, - 0.0, - 0.5, + nil, 0.5, + nil, 0.7857142857142857, 0.9550264550264549, 1.315814850530376, @@ -5549,7 +5559,7 @@ defmodule Explorer.SeriesTest do series = Series.from_list(["earth", "mars", "neptune"]) assert Series.substring(series, -4, 4) |> Series.to_list() == ["arth", "mars", "tune"] - assert Series.substring(series, -20, 4) |> Series.to_list() == ["eart", "mars", "nept"] + assert Series.substring(series, -6, 2) |> Series.to_list() == ["e", "", "ep"] end end From 0edc5d8d2220254c8dcb4ff26015fe847b17c620 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 17 Jul 2024 13:19:12 -0300 Subject: [PATCH 08/30] Fix args for pivot_longer --- native/explorer/src/lazyframe.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index c9145d9fc..3e9a0dbf8 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -266,8 +266,8 @@ pub fn lf_pivot_longer( ) -> Result { let ldf = data.clone_inner(); let unpivot_opts = UnpivotArgs { - on: to_smart_strings(id_vars), - index: to_smart_strings(value_vars), + index: to_smart_strings(id_vars), + on: to_smart_strings(value_vars), variable_name: Some(names_to.into()), value_name: Some(values_to.into()), streamable: true, From bc234bf6854aa6628de1fa6cadd9417d4a89f095 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 17 Jul 2024 15:50:31 -0300 Subject: [PATCH 09/30] Fix order of arguments for pivot_stable --- native/explorer/src/dataframe.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index b3d30ff7d..355fb87da 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -427,8 +427,8 @@ pub fn df_pivot_wider( let mut new_df = pivot_stable( &df, - &temp_id_names, - Some([pivot_column]), + [pivot_column], + Some(temp_id_names), Some(values_column), false, Some(PivotAgg::First), From c80e6dd77a8a0f1716e076a74aa04ed4ca7a984e Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 14:05:30 -0300 Subject: [PATCH 10/30] Changes in pivot_wider with repeated columns --- lib/explorer/data_frame.ex | 16 ++++++++-------- test/explorer/data_frame_test.exs | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index e1e9e4279..6da4b150a 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -4758,14 +4758,14 @@ defmodule Explorer.DataFrame do #Explorer.DataFrame< Polars[2 x 9] product_id s64 [1, 2] - property_value_property_product_id s64 [1, 2] - property_value_property_width_cm s64 [42, 35] - property_value_property_height_cm s64 [40, 20] - property_value_property_length_cm s64 [64, 40] - another_value_property_product_id s64 [1, 2] - another_value_property_width_cm s64 [43, 36] - another_value_property_height_cm s64 [41, 21] - another_value_property_length_cm s64 [65, 42] + property_value_product_id s64 [1, 2] + property_value_width_cm s64 [42, 35] + property_value_height_cm s64 [40, 20] + property_value_length_cm s64 [64, 40] + another_value_product_id s64 [1, 2] + another_value_width_cm s64 [43, 36] + another_value_height_cm s64 [41, 21] + another_value_length_cm s64 [65, 42] > ## Grouped examples diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index ad0dfc265..777174c57 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -3360,10 +3360,10 @@ defmodule Explorer.DataFrameTest do assert DF.to_columns(df2, atom_keys: true) == %{ id: [1], - value_variable_a: [1], - value_variable_b: [2], - another_value_variable_a: [6], - another_value_variable_b: [9] + value_a: [1], + value_b: [2], + another_value_a: [6], + another_value_b: [9] } end From 048d46afacf4b2d0daa549f0922f75a6c4ca066e Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 14:14:14 -0300 Subject: [PATCH 11/30] Explicitly use literal expressions with their types This is to avoid Polars to choose a different type. --- native/explorer/src/expressions.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 4851aaf3a..69dec1e5d 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -33,13 +33,13 @@ pub fn expr_nil() -> ExExpr { #[rustler::nif] pub fn expr_integer(number: i64) -> ExExpr { - let expr = number.lit(); + let expr = Expr::Literal(LiteralValue::Int64(number)); ExExpr::new(expr) } #[rustler::nif] pub fn expr_float(number: f64) -> ExExpr { - let expr = number.lit(); + let expr = Expr::Literal(LiteralValue::Float64(number)); ExExpr::new(expr) } From bc4e2a31583bff7f33b2f48f602554129029a214 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 14:49:31 -0300 Subject: [PATCH 12/30] Cast to float 64 series for window functions with weights --- lib/explorer/polars_backend/series.ex | 2 +- native/explorer/src/expressions.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 35121c2ea..574cbf725 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -597,7 +597,7 @@ defmodule Explorer.PolarsBackend.Series do defp window_function(operation, series, window_size, weights, min_periods, center) do series = - if weights == [] do + if Kernel.in(weights, [[], nil]) do series else cast(series, {:f, 64}) diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 69dec1e5d..a30b29e5f 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -652,7 +652,10 @@ macro_rules! init_window_expr_fun { min_periods: Option, center: bool, ) -> ExExpr { - let expr = data.clone_inner(); + let expr = match weights.as_deref() { + Some([]) | None => data.clone_inner(), + _ => data.clone_inner().cast(DataType::Float64), + }; let opts = rolling_opts_fixed_window(window_size, weights, min_periods, center); ExExpr::new(expr.$fun(opts)) } From 4274c88cc11de351a5064ecb4b40d33af15fcd81 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 14:57:44 -0300 Subject: [PATCH 13/30] Use aggregate function for polars query --- lib/explorer/data_frame.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 6da4b150a..c1fcbc024 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -6063,7 +6063,7 @@ defmodule Explorer.DataFrame do Basic example: iex> df = Explorer.DataFrame.new(a: [1, 2, 3], b: ["x", "y", "y"]) - iex> Explorer.DataFrame.sql(df, "select a, b from df group by b order by b") + iex> Explorer.DataFrame.sql(df, "select ARRAY_AGG(a), b from df group by b order by b") #Explorer.DataFrame< Polars[2 x 2] a list[s64] [[1], [2, 3]] From a6d923b81a9a44ffe6b890e89bb3fb4f2ddac6c4 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 16:52:45 -0300 Subject: [PATCH 14/30] Improve check of weights --- lib/explorer/polars_backend/series.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 574cbf725..281ee9aa3 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -597,7 +597,7 @@ defmodule Explorer.PolarsBackend.Series do defp window_function(operation, series, window_size, weights, min_periods, center) do series = - if Kernel.in(weights, [[], nil]) do + if List.wrap(weights) == [] do series else cast(series, {:f, 64}) From 2644d270bf8dfebb22b91c0713a8be9d1c208d10 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Thu, 18 Jul 2024 18:27:15 -0300 Subject: [PATCH 15/30] Attempt to fix "pow/2" after Polars changes --- lib/explorer/backend/lazy_series.ex | 11 ++++++++++- lib/explorer/data_frame.ex | 4 ++-- lib/explorer/series.ex | 12 +++++++++++- test/explorer/data_frame_test.exs | 26 ++++++++++++-------------- test/explorer/series_test.exs | 16 ++++++++-------- 5 files changed, 43 insertions(+), 26 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 5bb147bad..9738f53bb 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -166,7 +166,7 @@ defmodule Explorer.Backend.LazySeries do @comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal] - @basic_arithmetic_operations [:add, :subtract, :multiply, :divide, :pow] + @basic_arithmetic_operations [:add, :subtract, :multiply, :divide] @other_arithmetic_operations [:quotient, :remainder] @aggregation_operations [ @@ -453,6 +453,15 @@ defmodule Explorer.Backend.LazySeries do end end + @impl true + def pow(dtype, %Series{} = left, %Series{} = right) do + # Cast from the main module is needed because we may be seeing a series from another backend. + args = [data!(Explorer.Series.cast(left, dtype)), data!(right)] + data = new(:pow, args, dtype, aggregations?(args)) + + Backend.Series.new(data, dtype) + end + for op <- @other_arithmetic_operations do @impl true def unquote(op)(left, right) do diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index c1fcbc024..88d1f1001 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2774,11 +2774,11 @@ defmodule Explorer.DataFrame do You can overwrite existing columns as well: iex> df = Explorer.DataFrame.new(a: ["a", "b", "c"], b: [1, 2, 3]) - iex> Explorer.DataFrame.mutate_with(df, &[b: Explorer.Series.pow(&1["b"], 2)]) + iex> Explorer.DataFrame.mutate_with(df, &[b: Explorer.Series.add(&1["b"], 2)]) #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] - b s64 [1, 4, 9] + b s64 [3, 4, 5] > It's possible to "reuse" a variable for different computations: diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 8145b85cb..1bcb3d259 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3598,6 +3598,9 @@ defmodule Explorer.Series do sizes are series, the series must have the same size or at last one of them must have size of 1. + In case the expoent is a signed integer number or series, + the resultant series will be of `{:f, 64}` dtype. + ## Supported dtypes * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)} @@ -3614,6 +3617,13 @@ defmodule Explorer.Series do iex> s = [2, 4, 6] |> Explorer.Series.from_list() iex> Explorer.Series.pow(s, 3) + #Explorer.Series< + Polars[3] + f64 [8.0, 64.0, 216.0] + > + + iex> s = [2, 4, 6] |> Explorer.Series.from_list() + iex> Explorer.Series.pow(s, Explorer.Series.from_list([3], dtype: :u32)) #Explorer.Series< Polars[3] s64 [8, 64, 216] @@ -3657,7 +3667,7 @@ defmodule Explorer.Series do defp cast_to_pow({:f, l}, {:f, r}), do: {:f, max(l, r)} defp cast_to_pow({:f, l}, {n, _}) when K.in(n, [:u, :s]), do: {:f, l} defp cast_to_pow({n, _}, {:f, r}) when K.in(n, [:u, :s]), do: {:f, r} - defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:s, 64} + defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:f, 64} defp cast_to_pow(_, _), do: nil @doc """ diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 777174c57..ace2554a6 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -299,7 +299,7 @@ defmodule Explorer.DataFrameTest do df = DF.new(a: [1, 2, 3, 4, 5, 6, 5], b: [9, 8, 7, 6, 5, 4, 3]) message = - "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:s, 64}" + "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:f, 64}" assert_raise ArgumentError, message, fn -> DF.filter_with(df, fn ldf -> @@ -948,7 +948,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 0, 2], calc3: [2, 4, 8], calc4: [0.5, 1.0, 2.0], - calc5: [1, 4, 16], + calc5: [1.0, 4.0, 16.0], calc6: [0, 1, 2], calc7: [1, 0, 0], calc8: [:nan, :nan, :nan], @@ -964,7 +964,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:s, 64}, + "calc5" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64}, "calc8" => {:f, 64}, @@ -985,7 +985,6 @@ defmodule Explorer.DataFrameTest do calc3: multiply(2, a), calc4: divide(2, a), calc5: pow(2, a), - calc5_1: pow(2.0, a), calc6: quotient(2, a), calc7: remainder(2, a) ) @@ -996,8 +995,7 @@ defmodule Explorer.DataFrameTest do calc2: [1, 0, -2], calc3: [2, 4, 8], calc4: [2.0, 1.0, 0.5], - calc5: [2, 4, 16], - calc5_1: [2.0, 4.0, 16.0], + calc5: [2.0, 4.0, 16.0], calc6: [2, 1, 0], calc7: [0, 0, 2] } @@ -1008,8 +1006,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:s, 64}, - "calc5_1" => {:f, 64}, + "calc5" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1017,6 +1014,7 @@ defmodule Explorer.DataFrameTest do test "adds some columns with arithmetic operations on (lazy series, series)" do df = DF.new(a: [1, 2, 4]) + # TODO: check remainder and quotient in case they have a u32 on the right side. series = Explorer.Series.from_list([2, 1, 2]) df1 = @@ -1036,7 +1034,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1, 2, 16], + calc5: [1.0, 2.0, 16.0], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1047,7 +1045,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:s, 64}, + "calc5" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1074,7 +1072,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1, 2, 16], + calc5: [1.0, 2.0, 16.0], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1085,7 +1083,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:s, 64}, + "calc5" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1114,7 +1112,7 @@ defmodule Explorer.DataFrameTest do calc2: [19, 38, 57], calc3: [3, 4, 3], calc4: [2.0, :infinity, 7.5], - calc5: [1, 4, 3], + calc5: [1.0, 4.0, 3.0], calc6: [2, nil, 7], calc7: [0, nil, 4] } @@ -1128,7 +1126,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:s, 64}, + "calc5" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 9e2143fe8..8b2d6fa42 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -2290,8 +2290,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:s, 64} - assert Series.to_list(result) == [1, 4, 3] + assert result.dtype == {:f, 64} + assert Series.to_list(result) == [1.0, 4.0, 3.0] end end @@ -2315,8 +2315,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:s, 64} - assert Series.to_list(result) === [1, 4, 3] + assert result.dtype == {:f, 64} + assert Series.to_list(result) === [1.0, 4.0, 3.0] end end @@ -2392,13 +2392,13 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:s, 64} + assert result.dtype == {:f, 64} assert Series.to_list(result) == [1, nil, 3] end test "pow of an integer series that contains nil with an integer series" do s1 = Series.from_list([1, nil, 3]) - s2 = Series.from_list([3, 2, 1]) + s2 = Series.from_list([3, 2, 1], dtype: :u32) result = Series.pow(s1, s2) @@ -2408,7 +2408,7 @@ defmodule Explorer.SeriesTest do test "pow of an integer series that contains nil with an integer series also with nil" do s1 = Series.from_list([1, nil, 3]) - s2 = Series.from_list([3, nil, 1]) + s2 = Series.from_list([3, nil, 1], dtype: :u32) result = Series.pow(s1, s2) @@ -2421,7 +2421,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, 2) - assert result.dtype == {:s, 64} + assert result.dtype == {:f, 64} assert Series.to_list(result) == [1, 4, 9] end From 54991c0dd331324b5abb0299b948baaab569da48 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 19 Jul 2024 08:07:19 -0300 Subject: [PATCH 16/30] Revert "Attempt to fix "pow/2" after Polars changes" This reverts commit 2644d270bf8dfebb22b91c0713a8be9d1c208d10. --- lib/explorer/backend/lazy_series.ex | 11 +---------- lib/explorer/data_frame.ex | 4 ++-- lib/explorer/series.ex | 12 +----------- test/explorer/data_frame_test.exs | 26 ++++++++++++++------------ test/explorer/series_test.exs | 16 ++++++++-------- 5 files changed, 26 insertions(+), 43 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 9738f53bb..5bb147bad 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -166,7 +166,7 @@ defmodule Explorer.Backend.LazySeries do @comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal] - @basic_arithmetic_operations [:add, :subtract, :multiply, :divide] + @basic_arithmetic_operations [:add, :subtract, :multiply, :divide, :pow] @other_arithmetic_operations [:quotient, :remainder] @aggregation_operations [ @@ -453,15 +453,6 @@ defmodule Explorer.Backend.LazySeries do end end - @impl true - def pow(dtype, %Series{} = left, %Series{} = right) do - # Cast from the main module is needed because we may be seeing a series from another backend. - args = [data!(Explorer.Series.cast(left, dtype)), data!(right)] - data = new(:pow, args, dtype, aggregations?(args)) - - Backend.Series.new(data, dtype) - end - for op <- @other_arithmetic_operations do @impl true def unquote(op)(left, right) do diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 88d1f1001..c1fcbc024 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2774,11 +2774,11 @@ defmodule Explorer.DataFrame do You can overwrite existing columns as well: iex> df = Explorer.DataFrame.new(a: ["a", "b", "c"], b: [1, 2, 3]) - iex> Explorer.DataFrame.mutate_with(df, &[b: Explorer.Series.add(&1["b"], 2)]) + iex> Explorer.DataFrame.mutate_with(df, &[b: Explorer.Series.pow(&1["b"], 2)]) #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] - b s64 [3, 4, 5] + b s64 [1, 4, 9] > It's possible to "reuse" a variable for different computations: diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 1bcb3d259..8145b85cb 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3598,9 +3598,6 @@ defmodule Explorer.Series do sizes are series, the series must have the same size or at last one of them must have size of 1. - In case the expoent is a signed integer number or series, - the resultant series will be of `{:f, 64}` dtype. - ## Supported dtypes * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)} @@ -3617,13 +3614,6 @@ defmodule Explorer.Series do iex> s = [2, 4, 6] |> Explorer.Series.from_list() iex> Explorer.Series.pow(s, 3) - #Explorer.Series< - Polars[3] - f64 [8.0, 64.0, 216.0] - > - - iex> s = [2, 4, 6] |> Explorer.Series.from_list() - iex> Explorer.Series.pow(s, Explorer.Series.from_list([3], dtype: :u32)) #Explorer.Series< Polars[3] s64 [8, 64, 216] @@ -3667,7 +3657,7 @@ defmodule Explorer.Series do defp cast_to_pow({:f, l}, {:f, r}), do: {:f, max(l, r)} defp cast_to_pow({:f, l}, {n, _}) when K.in(n, [:u, :s]), do: {:f, l} defp cast_to_pow({n, _}, {:f, r}) when K.in(n, [:u, :s]), do: {:f, r} - defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:f, 64} + defp cast_to_pow({n, _}, {:s, _}) when K.in(n, [:u, :s]), do: {:s, 64} defp cast_to_pow(_, _), do: nil @doc """ diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index ace2554a6..777174c57 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -299,7 +299,7 @@ defmodule Explorer.DataFrameTest do df = DF.new(a: [1, 2, 3, 4, 5, 6, 5], b: [9, 8, 7, 6, 5, 4, 3]) message = - "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:f, 64}" + "expecting the function to return a boolean LazySeries, but instead it returned a LazySeries of type {:s, 64}" assert_raise ArgumentError, message, fn -> DF.filter_with(df, fn ldf -> @@ -948,7 +948,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 0, 2], calc3: [2, 4, 8], calc4: [0.5, 1.0, 2.0], - calc5: [1.0, 4.0, 16.0], + calc5: [1, 4, 16], calc6: [0, 1, 2], calc7: [1, 0, 0], calc8: [:nan, :nan, :nan], @@ -964,7 +964,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64}, "calc8" => {:f, 64}, @@ -985,6 +985,7 @@ defmodule Explorer.DataFrameTest do calc3: multiply(2, a), calc4: divide(2, a), calc5: pow(2, a), + calc5_1: pow(2.0, a), calc6: quotient(2, a), calc7: remainder(2, a) ) @@ -995,7 +996,8 @@ defmodule Explorer.DataFrameTest do calc2: [1, 0, -2], calc3: [2, 4, 8], calc4: [2.0, 1.0, 0.5], - calc5: [2.0, 4.0, 16.0], + calc5: [2, 4, 16], + calc5_1: [2.0, 4.0, 16.0], calc6: [2, 1, 0], calc7: [0, 0, 2] } @@ -1006,7 +1008,8 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, + "calc5_1" => {:f, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1014,7 +1017,6 @@ defmodule Explorer.DataFrameTest do test "adds some columns with arithmetic operations on (lazy series, series)" do df = DF.new(a: [1, 2, 4]) - # TODO: check remainder and quotient in case they have a u32 on the right side. series = Explorer.Series.from_list([2, 1, 2]) df1 = @@ -1034,7 +1036,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1045,7 +1047,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1072,7 +1074,7 @@ defmodule Explorer.DataFrameTest do calc2: [-1, 1, 2], calc3: [2, 2, 8], calc4: [0.5, 2.0, 2.0], - calc5: [1.0, 2.0, 16.0], + calc5: [1, 2, 16], calc6: [0, 2, 2], calc7: [1, 0, 0] } @@ -1083,7 +1085,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } @@ -1112,7 +1114,7 @@ defmodule Explorer.DataFrameTest do calc2: [19, 38, 57], calc3: [3, 4, 3], calc4: [2.0, :infinity, 7.5], - calc5: [1.0, 4.0, 3.0], + calc5: [1, 4, 3], calc6: [2, nil, 7], calc7: [0, nil, 4] } @@ -1126,7 +1128,7 @@ defmodule Explorer.DataFrameTest do "calc2" => {:s, 64}, "calc3" => {:s, 64}, "calc4" => {:f, 64}, - "calc5" => {:f, 64}, + "calc5" => {:s, 64}, "calc6" => {:s, 64}, "calc7" => {:s, 64} } diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 8b2d6fa42..9e2143fe8 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -2290,8 +2290,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} - assert Series.to_list(result) == [1.0, 4.0, 3.0] + assert result.dtype == {:s, 64} + assert Series.to_list(result) == [1, 4, 3] end end @@ -2315,8 +2315,8 @@ defmodule Explorer.SeriesTest do result = Series.pow(base, power) - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [1.0, 4.0, 3.0] + assert result.dtype == {:s, 64} + assert Series.to_list(result) === [1, 4, 3] end end @@ -2392,13 +2392,13 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, s2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, nil, 3] end test "pow of an integer series that contains nil with an integer series" do s1 = Series.from_list([1, nil, 3]) - s2 = Series.from_list([3, 2, 1], dtype: :u32) + s2 = Series.from_list([3, 2, 1]) result = Series.pow(s1, s2) @@ -2408,7 +2408,7 @@ defmodule Explorer.SeriesTest do test "pow of an integer series that contains nil with an integer series also with nil" do s1 = Series.from_list([1, nil, 3]) - s2 = Series.from_list([3, nil, 1], dtype: :u32) + s2 = Series.from_list([3, nil, 1]) result = Series.pow(s1, s2) @@ -2421,7 +2421,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(s1, 2) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [1, 4, 9] end From cb3886c101adb64e6244d9f6f00028906d1708c9 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 19 Jul 2024 08:31:12 -0300 Subject: [PATCH 17/30] Adapt pow/2 to follow the same rules from Polars If exponent is float, it follows dtype of exponent. Otherwise, it follows dtype of base. See: https://github.com/pola-rs/polars/pull/15506 --- lib/explorer/series.ex | 6 +++++- test/explorer/series_test.exs | 37 ++++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 8145b85cb..a4128342f 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -3595,9 +3595,13 @@ defmodule Explorer.Series do Raises a numeric series to the power of the exponent. At least one of the arguments must be a series. If both - sizes are series, the series must have the same size or + sides are series, the series must have the same size or at last one of them must have size of 1. + Note that this operation can fail if the exponent is a + signed integer series or scalar containing negative values, + and the base is also of an integer type. + ## Supported dtypes * floats: #{Shared.inspect_dtypes(@float_dtypes, backsticks: true)} diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 9e2143fe8..ae1ff898b 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -2283,7 +2283,7 @@ defmodule Explorer.SeriesTest do end end - test "pow(uint, sint) == float64" do + test "pow(uint, sint) == sint" do for u_base <- [8, 16, 32, 64], s_power <- [8, 16, 32, 64] do base = Series.from_list([1, 2, 3], dtype: {:u, u_base}) power = Series.from_list([3, 2, 1], dtype: {:s, s_power}) @@ -2308,7 +2308,7 @@ defmodule Explorer.SeriesTest do end end - test "pow(sint, sint) == float64" do + test "pow(sint, sint) == sint" do for s_base <- [8, 16, 32, 64], s_power <- [8, 16, 32, 64] do base = Series.from_list([1, 2, 3], dtype: {:s, s_base}) power = Series.from_list([3, 2, 1], dtype: {:s, s_power}) @@ -2360,10 +2360,13 @@ defmodule Explorer.SeriesTest do s1 = Series.from_list([1, 2, 3]) s2 = Series.from_list([1, -2, 3]) - result = Series.pow(s1, s2) + message = + "Polars Error: invalid operation: invalid operation: conversion from `i64` to `u32` failed in column 'exponent' for 1 out of 3 values: [-2]\n\n" <> + "Hint: if you were trying to raise an integer to a negative integer power, please cast your base or exponent to float first." - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [1.0, 0.25, 27.0] + assert_raise RuntimeError, message, fn -> + Series.pow(s1, s2) + end end test "pow of an integer series with a float series" do @@ -2428,10 +2431,13 @@ defmodule Explorer.SeriesTest do test "pow of an integer series with a negative integer scalar value on the right-hand side" do s1 = Series.from_list([1, 2, 3]) - result = Series.pow(s1, -2) + message = + "Polars Error: invalid operation: invalid operation: conversion from `i64` to `u32` failed in column 'literal' for 1 out of 1 values: [-2]\n\n" <> + "Hint: if you were trying to raise an integer to a negative integer power, please cast your base or exponent to float first." - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [1.0, 1 / 4, 1 / 9] + assert_raise RuntimeError, message, fn -> + Series.pow(s1, -2) + end end test "pow of an integer series with a float scalar value on the right-hand side" do @@ -2484,17 +2490,20 @@ defmodule Explorer.SeriesTest do result = Series.pow(2, s1) - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [2.0, 4.0, 8.0] + assert result.dtype == {:s, 64} + assert Series.to_list(result) === [2, 4, 8] end test "pow of an integer series that contains negative integer with an integer scalar value on the left-hand side" do s1 = Series.from_list([1, -2, 3]) - result = Series.pow(2, s1) + message = + "Polars Error: invalid operation: invalid operation: conversion from `i64` to `u32` failed in column 'exponent' for 1 out of 3 values: [-2]\n\n" <> + "Hint: if you were trying to raise an integer to a negative integer power, please cast your base or exponent to float first." - assert result.dtype == {:f, 64} - assert Series.to_list(result) === [2.0, 0.25, 8.0] + assert_raise RuntimeError, message, fn -> + Series.pow(2, s1) + end end test "pow of an integer series with a negative integer scalar value on the left-hand side" do @@ -2502,7 +2511,7 @@ defmodule Explorer.SeriesTest do result = Series.pow(-2, s1) - assert result.dtype == {:f, 64} + assert result.dtype == {:s, 64} assert Series.to_list(result) == [-2, 4, -8] end From 7c2baeb627039ea306c491e398f9a6bd69528fee Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 19 Jul 2024 15:28:27 -0300 Subject: [PATCH 18/30] Capture errors when inspecting DFs at PolarsBackend --- lib/explorer/polars_backend/data_frame.ex | 30 +++++++++++------------ lib/explorer/polars_backend/lazy_frame.ex | 10 ++++---- lib/explorer/polars_backend/series.ex | 6 ++--- lib/explorer/polars_backend/shared.ex | 29 +++++++++++++--------- 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index dac839b3d..a70a72397 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -32,7 +32,7 @@ defmodule Explorer.PolarsBackend.DataFrame do with {:ok, df_result} <- adbc_result, {:ok, df} <- df_result, - do: {:ok, Shared.create_dataframe(df)} + do: Shared.create_dataframe(df) end @impl true @@ -122,7 +122,7 @@ defmodule Explorer.PolarsBackend.DataFrame do ) case df do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -220,7 +220,7 @@ defmodule Explorer.PolarsBackend.DataFrame do ) case df do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -247,7 +247,7 @@ defmodule Explorer.PolarsBackend.DataFrame do @impl true def from_ndjson(%Local.Entry{} = entry, infer_schema_length, batch_size) do case Native.df_from_ndjson(entry.path, infer_schema_length, batch_size) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -279,7 +279,7 @@ defmodule Explorer.PolarsBackend.DataFrame do @impl true def load_ndjson(contents, infer_schema_length, batch_size) when is_binary(contents) do case Native.df_load_ndjson(contents, infer_schema_length, batch_size) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -289,7 +289,7 @@ defmodule Explorer.PolarsBackend.DataFrame do # We first read using a lazy dataframe, then we collect. with {:ok, ldf} <- Native.lf_from_parquet_cloud(entry, max_rows, columns), {:ok, df} <- Native.lf_compute(ldf) do - {:ok, Shared.create_dataframe(df)} + Shared.create_dataframe(df) end end @@ -321,7 +321,7 @@ defmodule Explorer.PolarsBackend.DataFrame do ) case df do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -375,7 +375,7 @@ defmodule Explorer.PolarsBackend.DataFrame do @impl true def load_parquet(contents) when is_binary(contents) do case Native.df_load_parquet(contents) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -399,7 +399,7 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, projection} = column_names_or_projection(columns) case Native.df_from_ipc(entry.path, columns, projection) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -433,7 +433,7 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, projection} = column_names_or_projection(columns) case Native.df_load_ipc(contents, columns, projection) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -457,7 +457,7 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, projection} = column_names_or_projection(columns) case Native.df_from_ipc_stream(entry.path, columns, projection) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -491,7 +491,7 @@ defmodule Explorer.PolarsBackend.DataFrame do {columns, projection} = column_names_or_projection(columns) case Native.df_load_ipc_stream(contents, columns, projection) do - {:ok, df} -> {:ok, Shared.create_dataframe(df)} + {:ok, df} -> Shared.create_dataframe(df) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -561,7 +561,7 @@ defmodule Explorer.PolarsBackend.DataFrame do list = Enum.map(list, & &1.data) Shared.apply(:df_from_series, [list]) - |> Shared.create_dataframe() + |> Shared.create_dataframe!() end defp to_column_name!(column_name) when is_binary(column_name), do: column_name @@ -674,7 +674,7 @@ defmodule Explorer.PolarsBackend.DataFrame do @impl true def nil_count(%DataFrame{} = df) do Shared.apply(:df_nil_count, [df.data]) - |> Shared.create_dataframe() + |> Shared.create_dataframe!() end @impl true @@ -816,7 +816,7 @@ defmodule Explorer.PolarsBackend.DataFrame do values_from, names_prefix_optional ]) - |> Shared.create_dataframe() + |> Shared.create_dataframe!() end @impl true diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index ab5fa78b9..50ce64b90 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -170,7 +170,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do ) case result do - {:ok, polars_ldf} -> {:ok, Shared.create_dataframe(polars_ldf)} + {:ok, polars_ldf} -> Shared.create_dataframe(polars_ldf) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -204,7 +204,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do @impl true def from_parquet(%S3.Entry{} = entry, max_rows, columns, _rechunk) do case Native.lf_from_parquet_cloud(entry, max_rows, columns) do - {:ok, polars_ldf} -> {:ok, Shared.create_dataframe(polars_ldf)} + {:ok, polars_ldf} -> Shared.create_dataframe(polars_ldf) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -212,7 +212,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do @impl true def from_parquet(%Local.Entry{} = entry, max_rows, columns, _rechunk) do case Native.lf_from_parquet(entry.path, max_rows, columns) do - {:ok, polars_ldf} -> {:ok, Shared.create_dataframe(polars_ldf)} + {:ok, polars_ldf} -> Shared.create_dataframe(polars_ldf) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -226,7 +226,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do @impl true def from_ndjson(%Local.Entry{} = entry, infer_schema_length, batch_size) do case Native.lf_from_ndjson(entry.path, infer_schema_length, batch_size) do - {:ok, polars_ldf} -> {:ok, Shared.create_dataframe(polars_ldf)} + {:ok, polars_ldf} -> Shared.create_dataframe(polars_ldf) {:error, error} -> {:error, RuntimeError.exception(error)} end end @@ -240,7 +240,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do @impl true def from_ipc(%Local.Entry{} = entry, columns) when is_nil(columns) do case Native.lf_from_ipc(entry.path) do - {:ok, polars_ldf} -> {:ok, Shared.create_dataframe(polars_ldf)} + {:ok, polars_ldf} -> Shared.create_dataframe(polars_ldf) {:error, error} -> {:error, RuntimeError.exception(error)} end end diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 281ee9aa3..5d5af3869 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -518,7 +518,7 @@ defmodule Explorer.PolarsBackend.Series do def frequencies(%Series{} = series) do Shared.apply(:s_frequencies, [series.data]) - |> Shared.create_dataframe() + |> Shared.create_dataframe!() |> DataFrame.rename(["values", "counts"]) end @@ -534,7 +534,7 @@ defmodule Explorer.PolarsBackend.Series do category_label ) do {:ok, polars_df} -> - Shared.create_dataframe(polars_df) + Shared.create_dataframe!(polars_df) {:error, "Polars Error: lengths don't match: " <> _rest} -> raise ArgumentError, "lengths don't match: labels count must equal bins count" @@ -553,7 +553,7 @@ defmodule Explorer.PolarsBackend.Series do break_point_label, category_label ]) - |> Shared.create_dataframe() + |> Shared.create_dataframe!() end # Window diff --git a/lib/explorer/polars_backend/shared.ex b/lib/explorer/polars_backend/shared.ex index c5e6dd234..215057b27 100644 --- a/lib/explorer/polars_backend/shared.ex +++ b/lib/explorer/polars_backend/shared.ex @@ -49,11 +49,11 @@ defmodule Explorer.PolarsBackend.Shared do check_df = if match?(%PolarsLazyFrame{}, new_df) do case Native.lf_compute(new_df) do - {:ok, new_df} -> create_dataframe(new_df) + {:ok, new_df} -> create_dataframe!(new_df) {:error, error} -> raise runtime_error(error) end else - create_dataframe(new_df) + create_dataframe!(new_df) end if Enum.sort(out_df.names) != Enum.sort(check_df.names) or @@ -95,27 +95,34 @@ defmodule Explorer.PolarsBackend.Shared do end def create_dataframe(polars_df) do - Explorer.Backend.DataFrame.new(polars_df, df_names(polars_df), df_dtypes(polars_df)) + with {:ok, names} <- df_names(polars_df), {:ok, dtypes} <- df_dtypes(polars_df) do + {:ok, Explorer.Backend.DataFrame.new(polars_df, names, dtypes)} + else + {:error, error} -> {:error, runtime_error(error)} + end + end + + def create_dataframe!(polars_df) do + case create_dataframe(polars_df) do + {:ok, df} -> df + {:error, error} -> raise error + end end defp df_names(%PolarsDataFrame{} = polars_df) do - {:ok, names} = Native.df_names(polars_df) - names + Native.df_names(polars_df) end defp df_names(%PolarsLazyFrame{} = polars_df) do - {:ok, names} = Native.lf_names(polars_df) - names + Native.lf_names(polars_df) end defp df_dtypes(%PolarsDataFrame{} = polars_df) do - {:ok, dtypes} = Native.df_dtypes(polars_df) - dtypes + Native.df_dtypes(polars_df) end defp df_dtypes(%PolarsLazyFrame{} = polars_df) do - {:ok, dtypes} = Native.lf_dtypes(polars_df) - dtypes + Native.lf_dtypes(polars_df) end def from_list(list, dtype), do: from_list(list, dtype, "") From 878b4227c30d3f59d6b35b029acf6f61d41e7bf9 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 19 Jul 2024 16:59:19 -0300 Subject: [PATCH 19/30] Cleaning comments and add missing option --- lib/explorer/polars_backend/data_frame.ex | 1 + lib/explorer/polars_backend/lazy_frame.ex | 1 - lib/explorer/polars_backend/native.ex | 1 + native/explorer/src/dataframe.rs | 29 ++++------------------- native/explorer/src/dataframe/io.rs | 1 - 5 files changed, 6 insertions(+), 27 deletions(-) diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index a70a72397..da27b437e 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -712,6 +712,7 @@ defmodule Explorer.PolarsBackend.DataFrame do expressions, directions, maintain_order?, + multithreaded?, nulls_last?, df.groups ]) diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 50ce64b90..31e3d4061 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -479,7 +479,6 @@ defmodule Explorer.PolarsBackend.LazyFrame do exprs = for {name, lazy_series} <- column_pairs do - # TODO: we may want to cast to the target out_df column types. lazy_series |> to_expr() |> then(maybe_over_groups_fun) diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 44e317457..037df1bd4 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -67,6 +67,7 @@ defmodule Explorer.PolarsBackend.Native do _expressions, _directions, _maintain_order?, + _multithreaded?, _nulls_last?, _groups ), diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 355fb87da..7ca7c9f73 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -267,32 +267,10 @@ pub fn df_sort_by( .with_order_descending_multi(reverse); let new_df = if groups.is_empty() { - // Note: we cannot use either df.sort or df.sort_with_options. - // df.sort does not allow a nulls_last option. - // df.sort_with_options only allows a single column. - // let by_columns = df.select_series(by_columns)?; df.sort(by_columns, sort_options)? - // df.sort_impl( - // by_columns, - // reverse, - // nulls_last, - // maintain_order, - // None, - // multithreaded, - // )? } else { - df.group_by_stable(groups)?.apply(|df| { - df.sort(by_columns.clone(), sort_options.clone()) - // let by_columns = df.select_series(&by_columns)?; - // df.sort_impl( - // by_columns, - // reverse.clone(), - // nulls_last, - // maintain_order, - // None, - // multithreaded, - // ) - })? + df.group_by_stable(groups)? + .apply(|df| df.sort(by_columns.clone(), sort_options.clone()))? }; Ok(ExDataFrame::new(new_df)) @@ -304,6 +282,7 @@ pub fn df_sort_with( expressions: Vec, directions: Vec, maintain_order: bool, + multithreaded: bool, nulls_last: bool, groups: Vec, ) -> Result { @@ -312,7 +291,7 @@ pub fn df_sort_with( let sort_options = SortMultipleOptions::new() .with_maintain_order(maintain_order) - // .with_multithreaded(multithreaded) + .with_multithreaded(multithreaded) .with_nulls_last(nulls_last) .with_order_descending_multi(directions); diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index d3937b1f6..4eea676ca 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -173,7 +173,6 @@ pub fn df_load_csv( .with_skip_rows_after_header(skip_rows_after_header) .with_projection(projection.map(Arc::new)) .with_rechunk(do_rechunk) - //.with_schema(Some(schema_from_dtypes_pairs(dtypes)?)) .with_parse_options( CsvParseOptions::default() .with_separator(delimiter_as_byte) From 0b1f39e78a2300774dcfac6d387cfe32e294b45e Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Sat, 20 Jul 2024 17:14:53 -0300 Subject: [PATCH 20/30] Rewrite CloudWriter to use BufWriter from object_store This makes the "ObjectStore" choose to upload using a single request or a multi request (multi part) upload depending on the size of the chunks. This change is needed because Polars is now calling the writer without buffering, so this was breaking the upload. There are two tests failing for different reasons: - the IPC upload to a unknown bucket is failing because the new CloudWriter is not propagating the error. This is probably an easy fix. - the Lazy "parquet to cloud" is failing for the reason I wrote above. This is probably related to this issue: https://github.com/pola-rs/polars/issues/17172 --- native/explorer/Cargo.lock | 166 ++++--------------------- native/explorer/Cargo.toml | 3 +- native/explorer/src/cloud_writer.rs | 117 +++++------------ native/explorer/src/dataframe/io.rs | 13 +- test/explorer/data_frame/ipc_test.exs | 1 + test/explorer/data_frame/lazy_test.exs | 3 +- 6 files changed, 66 insertions(+), 237 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index fb2fc5ade..cd632920a 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -287,9 +287,9 @@ checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" [[package]] name = "cc" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" dependencies = [ "jobserver", "libc", @@ -533,7 +533,7 @@ dependencies = [ "chrono-tz 0.9.0", "either", "mimalloc", - "object_store 0.9.1", + "object_store", "polars", "polars-ops", "rand", @@ -943,20 +943,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.30", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.2" @@ -967,11 +953,11 @@ dependencies = [ "http 1.1.0", "hyper 1.4.1", "hyper-util", - "rustls 0.23.11", - "rustls-native-certs 0.7.1", + "rustls", + "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls", "tower-service", ] @@ -1368,36 +1354,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" -dependencies = [ - "async-trait", - "base64 0.21.7", - "bytes", - "chrono", - "futures", - "humantime", - "hyper 0.14.30", - "itertools", - "md-5", - "parking_lot", - "percent-encoding", - "quick-xml", - "rand", - "reqwest 0.11.27", - "ring", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "object_store" version = "0.10.1" @@ -1694,7 +1650,7 @@ version = "0.41.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c9b06dfbe79cabe50a7f0a90396864b5ee2c0e0f8d6a9353b2343c29c56e937" dependencies = [ - "object_store 0.10.1", + "object_store", "polars-arrow-format", "regex", "simdutf8", @@ -1743,7 +1699,7 @@ dependencies = [ "memchr", "memmap2", "num-traits", - "object_store 0.10.1", + "object_store", "once_cell", "percent-encoding", "polars-arrow", @@ -2080,7 +2036,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.11", + "rustls", "thiserror", "tokio", "tracing", @@ -2096,7 +2052,7 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.11", + "rustls", "slab", "thiserror", "tinyvec", @@ -2176,9 +2132,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.2" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +checksum = "cb9ee317cfe3fbd54b36a511efc1edd42e216903c9cd575e686dd68a2ba90d8d" dependencies = [ "bitflags 2.6.0", ] @@ -2296,7 +2252,6 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "hyper 0.14.30", - "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2304,22 +2259,16 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.12", - "rustls-native-certs 0.6.3", - "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", "sync_wrapper 0.1.2", "system-configuration", "tokio", - "tokio-rustls 0.24.1", - "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", "web-sys", "winreg 0.50.0", ] @@ -2339,7 +2288,7 @@ dependencies = [ "http-body 1.0.1", "http-body-util", "hyper 1.4.1", - "hyper-rustls 0.27.2", + "hyper-rustls", "hyper-util", "ipnet", "js-sys", @@ -2349,16 +2298,16 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.11", - "rustls-native-certs 0.7.1", - "rustls-pemfile 2.1.2", + "rustls", + "rustls-native-certs", + "rustls-pemfile", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper 1.0.1", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls", "tokio-util", "tower-service", "url", @@ -2443,18 +2392,6 @@ dependencies = [ "unreachable", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.23.11" @@ -2464,23 +2401,11 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.5", + "rustls-webpki", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework", -] - [[package]] name = "rustls-native-certs" version = "0.7.1" @@ -2488,21 +2413,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba" dependencies = [ "openssl-probe", - "rustls-pemfile 2.1.2", + "rustls-pemfile", "rustls-pki-types", "schannel", "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pemfile" version = "2.1.2" @@ -2519,16 +2435,6 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.102.5" @@ -2576,16 +2482,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "2.11.1" @@ -2899,18 +2795,18 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "thiserror" -version = "1.0.62" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2675633b1499176c2dff06b0856a27976a8f9d436737b4cf4f312d4d91d8bbb" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.62" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", @@ -2934,9 +2830,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.0" +version = "1.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" dependencies = [ "backtrace", "bytes", @@ -2960,23 +2856,13 @@ dependencies = [ "syn 2.0.71", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.11", + "rustls", "rustls-pki-types", "tokio", ] diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index 191933f89..8fd22dd81 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -30,7 +30,7 @@ tokio-util = { version = "0.7", default-features = false, features = [ "io", "io-util", ], optional = true } -object_store = { version = "0.9", default-features = false, optional = true } +object_store = { version = "0.10", default-features = false, optional = true } # MiMalloc won´t compile on Windows with the GCC compiler. # On Linux with Musl it won´t load correctly. @@ -55,7 +55,6 @@ features = [ "ewma", "extract_groups", "extract_jsonpath", - # "group_by_list", "ipc_streaming", "ipc", "is_in", diff --git a/native/explorer/src/cloud_writer.rs b/native/explorer/src/cloud_writer.rs index 1cc128bb9..509633e8c 100644 --- a/native/explorer/src/cloud_writer.rs +++ b/native/explorer/src/cloud_writer.rs @@ -1,83 +1,62 @@ -use tokio::io::{AsyncWrite, AsyncWriteExt}; - use crate::ExplorerError; use object_store::path::Path; -use object_store::MultipartId; use object_store::ObjectStore; +use std::sync::Arc; -/// CloudWriter wraps the asynchronous interface of [ObjectStore::put_multipart](https://docs.rs/object_store/latest/object_store/trait.ObjectStore.html#tymethod.put_multipart) +use object_store::buffered::BufWriter as OSBufWriter; +use tokio::io::AsyncWriteExt; +/// CloudWriter wraps the asynchronous interface of [ObjectStore's BufWriter](https://docs.rs/object_store/latest/object_store/buffered/struct.BufWriter.html) /// in a synchronous interface which implements `std::io::Write`. /// /// This allows it to be used in sync code which would otherwise write to a simple File or byte stream, /// such as with `polars::prelude::CsvWriter`. pub struct CloudWriter { - // Hold a reference to the store. The store itself is thread-safe. - object_store: Box, - // The path in the object_store which we want to write to - path: Path, - // ID of a partially-done upload, used to abort the upload on error - multipart_id: MultipartId, // The Tokio runtime which the writer uses internally. runtime: tokio::runtime::Runtime, // Internal writer, constructed at creation - writer: Box, + writer: OSBufWriter, } impl CloudWriter { /// Construct a new CloudWriter /// /// Creates a new (current-thread) Tokio runtime - /// which bridges the sync writing process with the async ObjectStore multipart uploading. - pub fn new(object_store: Box, path: Path) -> Result { + /// which bridges the sync writing process with the async ObjectStore uploading. + pub fn new(object_store: Arc, path: Path) -> Result { let runtime = tokio::runtime::Builder::new_current_thread() .enable_time() .enable_io() .build()?; - - let (multipart_id, writer) = - runtime.block_on(async { Self::build_writer(&object_store, &path).await })?; - Ok(CloudWriter { - object_store, - path, - multipart_id, - runtime, - writer, - }) - } - - async fn build_writer( - object_store: &dyn ObjectStore, - path: &Path, - ) -> Result<(MultipartId, Box), ExplorerError> { - let (multipart_id, async_s3_writer) = (object_store.put_multipart(path).await) - .map_err(|_| ExplorerError::Other(format!("Could not put multipart to path {path}")))?; - Ok((multipart_id, async_s3_writer)) - } - - fn abort(&self) { - let _ = self.runtime.block_on(async { - self.object_store - .abort_multipart(&self.path, &self.multipart_id) - .await - }); + let writer = OSBufWriter::new(object_store, path); + Ok(CloudWriter { writer, runtime }) } } impl std::io::Write for CloudWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { - let res = self.runtime.block_on(self.writer.write(buf)); - if res.is_err() { - self.abort(); - } - res + // SAFETY: + // We extend the lifetime for the duration of this function. This is safe as well block the + // async runtime here + // This was copied from Polars' own CloudWriter. + let buf = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(buf) }; + + self.runtime.block_on(async { + let res = self.writer.write_all(buf).await; + if res.is_err() { + let _ = self.writer.abort().await; + } + Ok(buf.len()) + }) } fn flush(&mut self) -> std::io::Result<()> { - let res = self.runtime.block_on(self.writer.flush()); - if res.is_err() { - self.abort(); - } - res + self.runtime.block_on(async { + let res = self.writer.flush().await; + if res.is_err() { + let _ = self.writer.abort().await; + } + Ok(()) + }) } } @@ -86,41 +65,3 @@ impl Drop for CloudWriter { let _ = self.runtime.block_on(self.writer.shutdown()); } } - -#[cfg(test)] -mod tests { - use object_store::ObjectStore; - - use super::*; - - use polars::df; - use polars::prelude::DataFrame; - - fn example_dataframe() -> DataFrame { - df!( - "foo" => &[1, 2, 3], - "bar" => &[None, Some("bak"), Some("baz")], - ) - .unwrap() - } - - #[test] - fn csv_to_local_objectstore_cloudwriter() { - use polars::prelude::{CsvWriter, SerWriter}; - - let mut df = example_dataframe(); - - let object_store: Box = Box::new( - object_store::local::LocalFileSystem::new_with_prefix("/tmp/") - .expect("Could not initialize connection"), - ); - let object_store: Box = object_store; - - let path: object_store::path::Path = "cloud_writer_example.csv".into(); - - let mut cloud_writer = CloudWriter::new(object_store, path).unwrap(); - CsvWriter::new(&mut cloud_writer) - .finish(&mut df) - .expect("Could not write dataframe as CSV to remote location"); - } -} diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index 4eea676ca..67b2c4fc7 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -18,6 +18,9 @@ use std::io::{BufReader, BufWriter, Cursor}; use crate::datatypes::{ExParquetCompression, ExS3Entry, ExSeriesDtype}; use crate::{ExDataFrame, ExplorerError}; +#[cfg(feature = "cloud")] +use crate::cloud_writer::CloudWriter; + // ============ CSV ============ // #[rustler::nif(schedule = "DirtyIo")] @@ -240,6 +243,7 @@ pub fn df_to_parquet_cloud( ParquetWriter::new(&mut cloud_writer) .with_compression(compression) .finish(&mut data.clone())?; + Ok(()) } @@ -249,9 +253,7 @@ fn object_store_to_explorer_error(error: impl std::fmt::Debug) -> ExplorerError } #[cfg(feature = "aws")] -fn build_aws_s3_cloud_writer( - ex_entry: ExS3Entry, -) -> Result { +fn build_aws_s3_cloud_writer(ex_entry: ExS3Entry) -> Result { let config = ex_entry.config; let mut aws_builder = object_store::aws::AmazonS3Builder::new() .with_region(&config.region) @@ -278,9 +280,8 @@ fn build_aws_s3_cloud_writer( .build() .map_err(object_store_to_explorer_error)?; - let object_store: Box = Box::new(aws_s3); - - crate::cloud_writer::CloudWriter::new(object_store, ex_entry.key.into()) + let object_store: Arc = Arc::new(aws_s3); + CloudWriter::new(object_store, ex_entry.key.into()) } #[rustler::nif(schedule = "DirtyCpu")] diff --git a/test/explorer/data_frame/ipc_test.exs b/test/explorer/data_frame/ipc_test.exs index e0e45d14a..d417c27fe 100644 --- a/test/explorer/data_frame/ipc_test.exs +++ b/test/explorer/data_frame/ipc_test.exs @@ -179,6 +179,7 @@ defmodule Explorer.DataFrame.IPCTest do end @tag :cloud_integration + @tag :skip test "cannot write an IPC file to S3 if bucket does not exist", %{ df: df, s3_config: s3_config diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index da04861cb..5d201cf9a 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -261,7 +261,7 @@ defmodule Explorer.DataFrame.LazyTest do ) assert RuntimeError.message(error) =~ - "Polars Error: Object at location oranges.parquet not found:" + "Polars Error: expected at least 1 path: 'parquet scan' failed: 'select' input failed to resolve" end end @@ -400,6 +400,7 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration + @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", From 15f8521bbffcceca7a08fb83e85b45eb4b7b8bca Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 22 Jul 2024 09:43:27 -0300 Subject: [PATCH 21/30] Update object_store to v0.10.2 This is an attempt to fix the issues with the sink_parquet_cloud. --- native/explorer/Cargo.lock | 59 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index cd632920a..95c5faa54 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -98,9 +98,9 @@ checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" [[package]] name = "arrayref" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" @@ -127,7 +127,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -138,7 +138,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -276,7 +276,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -499,7 +499,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -664,7 +664,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -1039,9 +1039,9 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -1356,9 +1356,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" +checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -1499,7 +1499,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2017,9 +2017,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.31.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +checksum = "4091e032efecb09d7b1f711f487b85ab925632a842627e3200fb088382cde32c" dependencies = [ "memchr", "serde", @@ -2061,14 +2061,13 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +checksum = "25a78e6f726d84fcf960409f509ae354a32648f090c8d32a2ea8b1a1bc3bab14" dependencies = [ "libc", "once_cell", "socket2", - "tracing", "windows-sys 0.52.0", ] @@ -2176,7 +2175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2205,7 +2204,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2379,7 +2378,7 @@ dependencies = [ "inventory", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2522,7 +2521,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2709,7 +2708,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2731,9 +2730,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.71" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -2810,7 +2809,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2853,7 +2852,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -2926,7 +2925,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] @@ -3091,7 +3090,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -3125,7 +3124,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3391,7 +3390,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.71", + "syn 2.0.72", ] [[package]] From be9a23640c80c2b6f67d6192f44a5c641bcc93df Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 22 Jul 2024 09:48:23 -0300 Subject: [PATCH 22/30] WIP: enable the "to_parquet/2" test case with new object_store The hope is that the new version has fixed the issue. --- test/explorer/data_frame/lazy_test.exs | 1 - 1 file changed, 1 deletion(-) diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index 5d201cf9a..43b0cf10a 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -400,7 +400,6 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration - @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", From 142155bb5bc23806d7e9864c3f01633b374e69c1 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 22 Jul 2024 15:35:26 -0300 Subject: [PATCH 23/30] Revert "WIP: enable the "to_parquet/2" test case with new object_store" This reverts commit be9a23640c80c2b6f67d6192f44a5c641bcc93df. --- test/explorer/data_frame/lazy_test.exs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index 43b0cf10a..5d201cf9a 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -400,6 +400,7 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration + @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", From 739bc821490d1b30c38a81eb61eab7bf165ee52b Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 22 Jul 2024 20:02:02 -0300 Subject: [PATCH 24/30] Ensure the CloudWriter finishes its job after writers The idea is to avoid depending on shutting down only on "Drop". This is also important because we need to verify if the file was created. --- native/explorer/src/cloud_writer.rs | 59 +++++++++++++++++++++++---- native/explorer/src/dataframe/io.rs | 14 +++++++ test/explorer/data_frame/ipc_test.exs | 8 ++-- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/native/explorer/src/cloud_writer.rs b/native/explorer/src/cloud_writer.rs index 509633e8c..fecced8c8 100644 --- a/native/explorer/src/cloud_writer.rs +++ b/native/explorer/src/cloud_writer.rs @@ -1,10 +1,17 @@ use crate::ExplorerError; use object_store::path::Path; -use object_store::ObjectStore; +use object_store::{ObjectMeta, ObjectStore}; use std::sync::Arc; use object_store::buffered::BufWriter as OSBufWriter; use tokio::io::AsyncWriteExt; + +#[derive(Debug, PartialEq)] +enum CloudWriterStatus { + Running, + Stopped, + Aborted, +} /// CloudWriter wraps the asynchronous interface of [ObjectStore's BufWriter](https://docs.rs/object_store/latest/object_store/buffered/struct.BufWriter.html) /// in a synchronous interface which implements `std::io::Write`. /// @@ -15,6 +22,12 @@ pub struct CloudWriter { runtime: tokio::runtime::Runtime, // Internal writer, constructed at creation writer: OSBufWriter, + // The copy of the object_store + object_store: Arc, + // Keep the path for the file, so we can use to read head. + path: Path, + // Private status of the current writer + status: CloudWriterStatus, } impl CloudWriter { @@ -27,8 +40,34 @@ impl CloudWriter { .enable_time() .enable_io() .build()?; - let writer = OSBufWriter::new(object_store, path); - Ok(CloudWriter { writer, runtime }) + let writer = OSBufWriter::new(object_store.clone(), path.clone()); + + Ok(CloudWriter { + writer, + runtime, + object_store, + path, + status: CloudWriterStatus::Running, + }) + } + + /// Make a head request to check if the upload has finished. + pub fn finish(&mut self) -> Result { + if self.status != CloudWriterStatus::Stopped { + self.status = CloudWriterStatus::Stopped; + let _ = self.runtime.block_on(self.writer.shutdown()); + self.runtime + .block_on(self.object_store.head(&self.path)) + .map_err(|err| { + ExplorerError::Other(format!( + "cannot read information from file, which means the upload failed. {err}" + )) + }) + } else { + Err(ExplorerError::Other( + "cannot finish cloud writer due to an error, or it was already finished.".into(), + )) + } } } @@ -41,11 +80,13 @@ impl std::io::Write for CloudWriter { let buf = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(buf) }; self.runtime.block_on(async { - let res = self.writer.write_all(buf).await; + // TODO: use writer.put to avoid copying data + let res = self.writer.write(buf).await; if res.is_err() { let _ = self.writer.abort().await; + self.status = CloudWriterStatus::Aborted; } - Ok(buf.len()) + res }) } @@ -54,14 +95,18 @@ impl std::io::Write for CloudWriter { let res = self.writer.flush().await; if res.is_err() { let _ = self.writer.abort().await; + self.status = CloudWriterStatus::Aborted; } - Ok(()) + res }) } } impl Drop for CloudWriter { fn drop(&mut self) { - let _ = self.runtime.block_on(self.writer.shutdown()); + if self.status != CloudWriterStatus::Stopped { + self.status = CloudWriterStatus::Stopped; + let _ = self.runtime.block_on(self.writer.shutdown()); + } } } diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index 67b2c4fc7..676b36c68 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -113,6 +113,9 @@ pub fn df_to_csv_cloud( .include_header(include_headers) .with_separator(delimiter) .finish(&mut data.clone())?; + + let _ = cloud_writer.finish()?; + Ok(()) } @@ -244,6 +247,8 @@ pub fn df_to_parquet_cloud( .with_compression(compression) .finish(&mut data.clone())?; + let _ = cloud_writer.finish()?; + Ok(()) } @@ -365,6 +370,9 @@ pub fn df_to_ipc_cloud( IpcWriter::new(&mut cloud_writer) .with_compression(compression) .finish(&mut data.clone())?; + + let _ = cloud_writer.finish()?; + Ok(()) } @@ -467,6 +475,9 @@ pub fn df_to_ipc_stream_cloud( IpcStreamWriter::new(&mut cloud_writer) .with_compression(compression) .finish(&mut data.clone())?; + + let _ = cloud_writer.finish()?; + Ok(()) } @@ -549,6 +560,9 @@ pub fn df_to_ndjson_cloud(data: ExDataFrame, ex_entry: ExS3Entry) -> Result<(), JsonWriter::new(&mut cloud_writer) .with_json_format(JsonFormat::JsonLines) .finish(&mut data.clone())?; + + let _ = cloud_writer.finish()?; + Ok(()) } diff --git a/test/explorer/data_frame/ipc_test.exs b/test/explorer/data_frame/ipc_test.exs index d417c27fe..d4dce332c 100644 --- a/test/explorer/data_frame/ipc_test.exs +++ b/test/explorer/data_frame/ipc_test.exs @@ -179,7 +179,6 @@ defmodule Explorer.DataFrame.IPCTest do end @tag :cloud_integration - @tag :skip test "cannot write an IPC file to S3 if bucket does not exist", %{ df: df, s3_config: s3_config @@ -189,8 +188,11 @@ defmodule Explorer.DataFrame.IPCTest do assert {:error, error} = DF.to_ipc(df, path, config: s3_config) - assert error == - RuntimeError.exception("Generic Error: Could not put multipart to path " <> key) + assert %RuntimeError{message: message} = error + + assert message =~ "cannot read information from file, which means the upload failed." + assert message =~ "Object at location test-writes/wine" + assert message =~ "Client error with status 404 Not Found: No Body" end end From d64af7ef687aaef6ef294ee2db293dbdc347c392 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Tue, 23 Jul 2024 17:29:00 -0300 Subject: [PATCH 25/30] WIP: saving PoC that didn't work The reason it didn't work is that the sink_parquet function will try to create the file where the "fifo" file is located. This is not going to work. --- lib/explorer/polars_backend/lazy_frame.ex | 29 ++++++++++++++++++----- lib/explorer/polars_backend/native.ex | 2 ++ native/explorer/src/io.rs | 26 ++++++++++++++++++++ native/explorer/src/lib.rs | 3 +++ test/explorer/data_frame/lazy_test.exs | 5 ++-- 5 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 native/explorer/src/io.rs diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 31e3d4061..bfd16f297 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -363,15 +363,32 @@ defmodule Explorer.PolarsBackend.LazyFrame do end end + # @impl true + # def to_parquet(%DF{} = ldf, %S3.Entry{} = entry, {compression, level}, _streaming = true) do + # case Native.lf_to_parquet_cloud( + # ldf.data, + # entry, + # Shared.parquet_compression(compression, level) + # ) do + # {:ok, _} -> :ok + # {:error, error} -> {:error, RuntimeError.exception(error)} + # end + # end + @impl true def to_parquet(%DF{} = ldf, %S3.Entry{} = entry, {compression, level}, _streaming = true) do - case Native.lf_to_parquet_cloud( - ldf.data, - entry, - Shared.parquet_compression(compression, level) - ) do - {:ok, _} -> :ok + fifo_name = "explorer-fifo-" <> Base.encode16(:crypto.strong_rand_bytes(24), case: :lower) + fifo_path = "/tmp/" <> fifo_name + System.cmd("mkfifo", [fifo_path]) + + compression = Shared.parquet_compression(compression, level) + fs_writer = Task.async(fn -> Native.lf_to_parquet(ldf.data, fifo_path, compression, true) end) + cloud_writer = Task.async(fn -> Native.fifo_file_to_cloud(fifo_path, entry, fs_writer.pid) end) + + case Task.await_many([fs_writer, cloud_writer], :infinity) do + [{:ok, _}, {:ok, _}] -> :ok {:error, error} -> {:error, RuntimeError.exception(error)} + other -> dbg(other) end end diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 037df1bd4..1df59a201 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -463,5 +463,7 @@ defmodule Explorer.PolarsBackend.Native do def message_on_gc(_pid, _payload), do: err() def is_message_on_gc(_term), do: err() + def fifo_file_to_cloud(_fifo_path, _ex_entry, _task_pid), do: err() + defp err, do: :erlang.nif_error(:nif_not_loaded) end diff --git a/native/explorer/src/io.rs b/native/explorer/src/io.rs new file mode 100644 index 000000000..b77388326 --- /dev/null +++ b/native/explorer/src/io.rs @@ -0,0 +1,26 @@ +use std::{fs::File, io}; + +use crate::ExplorerError; +use crate::datatypes::ExS3Entry; +use rustler::types::LocalPid; +use rustler::Env; + +#[rustler::nif(schedule = "DirtyIo")] +pub fn fifo_file_to_cloud( + env: Env, + filename: &str, + _ex_s3_entry: ExS3Entry, + writer_task_pid: LocalPid, +) -> Result<(), ExplorerError> { + let mut fifo_file = File::open(filename)?; + let mut target = File::open("/tmp/my_target.parquet")?; + + while LocalPid::is_alive(writer_task_pid, env) { + match io::copy(&mut fifo_file, &mut target) { + Ok(_) => continue, + Err(_) => break, + }; + } + + Ok(()) +} diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index 0a0efb7c0..b1583447b 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -16,6 +16,9 @@ static GLOBAL: MiMalloc = MiMalloc; #[cfg(feature = "cloud")] mod cloud_writer; +// Global IO functions that are exported as NIFs. +mod io; + mod dataframe; mod datatypes; mod encoding; diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index 5d201cf9a..d7ddfd20b 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -400,7 +400,6 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration - @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", @@ -415,7 +414,9 @@ defmodule Explorer.DataFrame.LazyTest do assert :ok = DF.to_parquet(ldf, path, streaming: true, config: config) df = DF.compute(ldf) - df1 = DF.from_parquet!(path, config: config) + tmp_path = "/tmp/my_target.parquet" + # df1 = DF.from_parquet!(path, config: config) + df1 = DF.from_parquet!(tmp_path) assert DF.to_rows(df) |> Enum.sort() == DF.to_rows(df1) |> Enum.sort() end From c8aabfeeaaee826805aab40c5182f0508b2568d3 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Tue, 23 Jul 2024 17:30:09 -0300 Subject: [PATCH 26/30] Revert "WIP: saving PoC that didn't work" This reverts commit d64af7ef687aaef6ef294ee2db293dbdc347c392. --- lib/explorer/polars_backend/lazy_frame.ex | 29 +++++------------------ lib/explorer/polars_backend/native.ex | 2 -- native/explorer/src/io.rs | 26 -------------------- native/explorer/src/lib.rs | 3 --- test/explorer/data_frame/lazy_test.exs | 5 ++-- 5 files changed, 8 insertions(+), 57 deletions(-) delete mode 100644 native/explorer/src/io.rs diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index bfd16f297..31e3d4061 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -363,32 +363,15 @@ defmodule Explorer.PolarsBackend.LazyFrame do end end - # @impl true - # def to_parquet(%DF{} = ldf, %S3.Entry{} = entry, {compression, level}, _streaming = true) do - # case Native.lf_to_parquet_cloud( - # ldf.data, - # entry, - # Shared.parquet_compression(compression, level) - # ) do - # {:ok, _} -> :ok - # {:error, error} -> {:error, RuntimeError.exception(error)} - # end - # end - @impl true def to_parquet(%DF{} = ldf, %S3.Entry{} = entry, {compression, level}, _streaming = true) do - fifo_name = "explorer-fifo-" <> Base.encode16(:crypto.strong_rand_bytes(24), case: :lower) - fifo_path = "/tmp/" <> fifo_name - System.cmd("mkfifo", [fifo_path]) - - compression = Shared.parquet_compression(compression, level) - fs_writer = Task.async(fn -> Native.lf_to_parquet(ldf.data, fifo_path, compression, true) end) - cloud_writer = Task.async(fn -> Native.fifo_file_to_cloud(fifo_path, entry, fs_writer.pid) end) - - case Task.await_many([fs_writer, cloud_writer], :infinity) do - [{:ok, _}, {:ok, _}] -> :ok + case Native.lf_to_parquet_cloud( + ldf.data, + entry, + Shared.parquet_compression(compression, level) + ) do + {:ok, _} -> :ok {:error, error} -> {:error, RuntimeError.exception(error)} - other -> dbg(other) end end diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 1df59a201..037df1bd4 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -463,7 +463,5 @@ defmodule Explorer.PolarsBackend.Native do def message_on_gc(_pid, _payload), do: err() def is_message_on_gc(_term), do: err() - def fifo_file_to_cloud(_fifo_path, _ex_entry, _task_pid), do: err() - defp err, do: :erlang.nif_error(:nif_not_loaded) end diff --git a/native/explorer/src/io.rs b/native/explorer/src/io.rs deleted file mode 100644 index b77388326..000000000 --- a/native/explorer/src/io.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::{fs::File, io}; - -use crate::ExplorerError; -use crate::datatypes::ExS3Entry; -use rustler::types::LocalPid; -use rustler::Env; - -#[rustler::nif(schedule = "DirtyIo")] -pub fn fifo_file_to_cloud( - env: Env, - filename: &str, - _ex_s3_entry: ExS3Entry, - writer_task_pid: LocalPid, -) -> Result<(), ExplorerError> { - let mut fifo_file = File::open(filename)?; - let mut target = File::open("/tmp/my_target.parquet")?; - - while LocalPid::is_alive(writer_task_pid, env) { - match io::copy(&mut fifo_file, &mut target) { - Ok(_) => continue, - Err(_) => break, - }; - } - - Ok(()) -} diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index b1583447b..0a0efb7c0 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -16,9 +16,6 @@ static GLOBAL: MiMalloc = MiMalloc; #[cfg(feature = "cloud")] mod cloud_writer; -// Global IO functions that are exported as NIFs. -mod io; - mod dataframe; mod datatypes; mod encoding; diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index d7ddfd20b..5d201cf9a 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -400,6 +400,7 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration + @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", @@ -414,9 +415,7 @@ defmodule Explorer.DataFrame.LazyTest do assert :ok = DF.to_parquet(ldf, path, streaming: true, config: config) df = DF.compute(ldf) - tmp_path = "/tmp/my_target.parquet" - # df1 = DF.from_parquet!(path, config: config) - df1 = DF.from_parquet!(tmp_path) + df1 = DF.from_parquet!(path, config: config) assert DF.to_rows(df) |> Enum.sort() == DF.to_rows(df1) |> Enum.sort() end From bb04f295e277a1bf76bc56f7679e3d35a0b6deba Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Tue, 23 Jul 2024 17:48:10 -0300 Subject: [PATCH 27/30] Raise for when saving lazy df as parquet to cloud using streaming --- lib/explorer/polars_backend/lazy_frame.ex | 11 ++--------- test/explorer/data_frame/lazy_test.exs | 16 +++++++++++----- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 31e3d4061..147d9fb67 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -364,15 +364,8 @@ defmodule Explorer.PolarsBackend.LazyFrame do end @impl true - def to_parquet(%DF{} = ldf, %S3.Entry{} = entry, {compression, level}, _streaming = true) do - case Native.lf_to_parquet_cloud( - ldf.data, - entry, - Shared.parquet_compression(compression, level) - ) do - {:ok, _} -> :ok - {:error, error} -> {:error, RuntimeError.exception(error)} - end + def to_parquet(%DF{} = _ldf, %S3.Entry{} = _entry, {_compression, _level}, _streaming = true) do + raise "streaming of a lazy frame to the cloud using parquet is currently unavailable. Please try again disabling the `:streaming` option." end @impl true diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index 5d201cf9a..aa162b70f 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -400,7 +400,6 @@ defmodule Explorer.DataFrame.LazyTest do end @tag :cloud_integration - @tag :skip test "to_parquet/2 - cloud with streaming enabled", %{ldf: ldf} do config = %FSS.S3.Config{ access_key_id: "test", @@ -412,12 +411,19 @@ defmodule Explorer.DataFrame.LazyTest do path = "s3://test-bucket/test-lazy-writes/wine-#{System.monotonic_time()}.parquet" ldf = DF.head(ldf, 15) - assert :ok = DF.to_parquet(ldf, path, streaming: true, config: config) + # assert :ok = DF.to_parquet(ldf, path, streaming: true, config: config) - df = DF.compute(ldf) - df1 = DF.from_parquet!(path, config: config) + # df = DF.compute(ldf) + # df1 = DF.from_parquet!(path, config: config) + + # assert DF.to_rows(df) |> Enum.sort() == DF.to_rows(df1) |> Enum.sort() - assert DF.to_rows(df) |> Enum.sort() == DF.to_rows(df1) |> Enum.sort() + message = + "streaming of a lazy frame to the cloud using parquet is currently unavailable. Please try again disabling the `:streaming` option." + + assert_raise RuntimeError, message, fn -> + DF.to_parquet(ldf, path, streaming: true, config: config) + end end @tag :cloud_integration From d7b601064a5685d8016740aa71b482101fdd7dab Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 24 Jul 2024 11:06:00 -0300 Subject: [PATCH 28/30] Add changes to the changelog --- CHANGELOG.md | 19 +++++++++++++++++++ lib/explorer/data_frame.ex | 3 +-- native/explorer/src/lazyframe.rs | 2 -- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836a735cb..1c452b1e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- The `Explorer.Series.pow/2` function does not cast to float when exponent is signed + integer anymore. We are following the way Polars works now, which is to try to execute + the operation, or raise an exception in case the exponent is negative. + +- When pivoting wider with multiple `:values_from` columns, the name is not going to + have the prefix of the `names_from` argument. + +- The `Explorer.Series.substring/3` function no more cycle to the back of string if + the negative offset surpass the beginning of that string. In that case, an empty + string is returned. + +- All the series functions related to "EWM" are now propagating the `nil` values instead + of filling the missing values with the previous one. + +- Saving a dataframe as a Parque file to S3 services no longer works when streaming is + enabled. This is temporary due to a bug in Polars. An exception should be raised instead. + ## [v0.8.3] - 2024-06-10 ### Added diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index c1fcbc024..60013ebb7 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -4745,8 +4745,7 @@ defmodule Explorer.DataFrame do Multiple columns are accepted for the `values_from` parameter, but the behaviour is slightly different for the naming of new columns in the resultant dataframe. The new columns are going - to be prefixed by the name of the original value column, followed by an underscore and the - original column name, followed by the name of the variable. + to be prefixed by the name of the original value column, followed by the name of the variable. iex> df = Explorer.DataFrame.new( iex> product_id: [1, 1, 1, 1, 2, 2, 2, 2], diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 3e9a0dbf8..7ae63a27a 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -164,8 +164,6 @@ pub fn lf_grouped_sort_with( directions: Vec, ) -> Result { let sort_options = SortMultipleOptions::new() - // .with_nulls_last(nulls_last) - // .with_maintain_order(maintain_order) .with_order_descending_multi(directions); // For grouped lazy frames, we need to use the `#sort_by` method that is // less powerful, but can be used with `over`. From 64370e21754dddb349b6e2465179f881c93fad84 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 24 Jul 2024 11:14:13 -0300 Subject: [PATCH 29/30] Last round of "cargo update" --- native/explorer/Cargo.lock | 60 +++++++++++++------------------- native/explorer/src/lazyframe.rs | 3 +- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index 95c5faa54..75d312e2c 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -1060,9 +1060,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -1207,9 +1207,9 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4" -version = "1.25.0" +version = "1.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" +checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" dependencies = [ "libc", "lz4-sys", @@ -1217,9 +1217,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" dependencies = [ "cc", "libc", @@ -1276,13 +1276,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" dependencies = [ + "hermit-abi", "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -1335,21 +1336,11 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" -version = "0.36.1" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" dependencies = [ "memchr", ] @@ -2017,9 +2008,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.36.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4091e032efecb09d7b1f711f487b85ab925632a842627e3200fb088382cde32c" +checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" dependencies = [ "memchr", "serde", @@ -2061,9 +2052,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a78e6f726d84fcf960409f509ae354a32648f090c8d32a2ea8b1a1bc3bab14" +checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" dependencies = [ "libc", "once_cell", @@ -2393,9 +2384,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.11" +version = "0.23.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ "once_cell", "ring", @@ -2436,9 +2427,9 @@ checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" [[package]] name = "rustls-webpki" -version = "0.102.5" +version = "0.102.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" +checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" dependencies = [ "ring", "rustls-pki-types", @@ -2829,26 +2820,25 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.1" +version = "1.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" +checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 7ae63a27a..ec7e72b47 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -163,8 +163,7 @@ pub fn lf_grouped_sort_with( groups: Vec, directions: Vec, ) -> Result { - let sort_options = SortMultipleOptions::new() - .with_order_descending_multi(directions); + let sort_options = SortMultipleOptions::new().with_order_descending_multi(directions); // For grouped lazy frames, we need to use the `#sort_by` method that is // less powerful, but can be used with `over`. // See: https://docs.pola.rs/user-guide/expressions/window/#operations-per-group From 1ce99b96b5e25ec6e2edd2ff4d53bb353316a545 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Wed, 24 Jul 2024 13:21:58 -0300 Subject: [PATCH 30/30] Update CHANGELOG.md Co-authored-by: Billy Lanchantin --- CHANGELOG.md | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c452b1e3..f40caca47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,22 +9,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- The `Explorer.Series.pow/2` function does not cast to float when exponent is signed - integer anymore. We are following the way Polars works now, which is to try to execute - the operation, or raise an exception in case the exponent is negative. +- `Explorer.Series.pow/2` no longer casts to float when the exponent is a signed + integer. We are following the way Polars works now, which is to try to execute + the operation or raise an exception in case the exponent is negative. -- When pivoting wider with multiple `:values_from` columns, the name is not going to - have the prefix of the `names_from` argument. +- `Explorer.Series.pivot_wider/4` no longer includes the `names_from` column + name in the new columns when `values_from` is a list of columns. This is more + consistent with its behaviour when `values_from` is a single column. -- The `Explorer.Series.substring/3` function no more cycle to the back of string if - the negative offset surpass the beginning of that string. In that case, an empty +- `Explorer.Series.substring/3` no longer cycles to the end of the string if the + negative offset surpasses the beginning of that string. In that case, an empty string is returned. -- All the series functions related to "EWM" are now propagating the `nil` values instead - of filling the missing values with the previous one. +- The `Explorer.Series.ewm_*` functions no longer replace `nil` values with the + value at the previous index. They now propogate `nil` values through to the + result series. -- Saving a dataframe as a Parque file to S3 services no longer works when streaming is - enabled. This is temporary due to a bug in Polars. An exception should be raised instead. +- Saving a dataframe as a Parquet file to S3 services no longer works when + streaming is enabled. This is temporary due to a bug in Polars. An exception + should be raised instead. ## [v0.8.3] - 2024-06-10