From 723f4884285a4b35fa6e2a0b173d7c742107a5fd Mon Sep 17 00:00:00 2001 From: sorhawell Date: Fri, 4 Aug 2023 14:41:05 +0200 Subject: [PATCH] derive wrap_e from robj_to!, instead of opposite --- R/expr__expr.R | 19 +++++++++++++++++-- R/extendr-wrappers.R | 4 ++++ R/functions__lazy.R | 12 +++++++++++- R/zzz.R | 2 +- man/wrap_e.Rd | 5 +++-- man/wrap_e_legacy.Rd | 25 +++++++++++++++++++++++++ src/rust/src/rlib.rs | 28 ++++++++++++++++++++++++---- src/rust/src/utils/mod.rs | 33 +++++++++++++++++++++++++++------ tests/testthat/test-wrap_e.R | 32 ++++++++++++++++++++++++++++++++ 9 files changed, 144 insertions(+), 16 deletions(-) create mode 100644 man/wrap_e_legacy.Rd create mode 100644 tests/testthat/test-wrap_e.R diff --git a/R/expr__expr.R b/R/expr__expr.R index 4182c4784..2ffc6b94f 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -71,14 +71,16 @@ as.list.Expr = function(x, ...) { list(x) } -#' wrap as literal +#' DEPRECATED wrap as literal +#' @description use robj_to!(Expr) on rust side or rarely wrap_e on R-side +#' This function is only kept for reference #' @param e an Expr(polars) or any R expression #' @details #' used internally to ensure an object is an expression #' @keywords internal #' @return Expr #' @examples pl$col("foo") < 5 -wrap_e = function(e, str_to_lit = TRUE) { +wrap_e_legacy = function(e, str_to_lit = TRUE) { if (inherits(e, "Expr")) { return(e) } @@ -96,6 +98,19 @@ wrap_e = function(e, str_to_lit = TRUE) { } } +#' DEPRECATED wrap as literal +#' @description use robj_to!(Expr) on rust side or rarely wrap_e on R-side +#' This function is only kept for reference +#' @param e an Expr(polars) or any R expression +#' @details +#' used internally to ensure an object is an expression +#' @keywords internal +#' @return Expr +#' @examples pl$col("foo") < 5 +wrap_e = function(e, str_to_lit = TRUE) { + internal_wrap_e(e, str_to_lit) |> unwrap() +} + ## TODO refactor to \(e, str_to_lit = TRUE, argname = NULL) wrap_e(e) |> result() diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 4d3f23af9..be0a3f8b2 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -51,6 +51,8 @@ arrow_stream_to_rust <- function(rbr) invisible(.Call(wrap__arrow_stream_to_rust dtype_str_repr <- function(dtype) .Call(wrap__dtype_str_repr, dtype) +internal_wrap_e <- function(robj, str_to_lit) .Call(wrap__internal_wrap_e, robj, str_to_lit) + test_robj_to_usize <- function(robj) .Call(wrap__test_robj_to_usize, robj) test_robj_to_i64 <- function(robj) .Call(wrap__test_robj_to_i64, robj) @@ -59,6 +61,8 @@ test_robj_to_u32 <- function(robj) .Call(wrap__test_robj_to_u32, robj) test_print_string <- function(s) invisible(.Call(wrap__test_print_string, s)) +test_robj_to_expr <- function(robj) .Call(wrap__test_robj_to_expr, robj) + RPolarsErr <- new.env(parent = emptyenv()) RPolarsErr$new <- function() .Call(wrap__RPolarsErr__new) diff --git a/R/functions__lazy.R b/R/functions__lazy.R index c8014078b..3947ebde8 100644 --- a/R/functions__lazy.R +++ b/R/functions__lazy.R @@ -23,6 +23,9 @@ pl$all = function(name = NULL) { } + + + #' Start Expression with a column #' @name pl_col #' @description @@ -69,7 +72,14 @@ pl$all = function(name = NULL) { #' # from Series of names #' df$select(pl$col(pl$Series(c("bar", "foobar")))) pl$col = function(name = "", ...) { - # preconvert Series into char name(s) + pl_col_internal(name, ...) |> + result() |> + unwrap("in pl$col()") +} + +# This function throws error freely prefer pl$col +pl_col_internal = function(name="", ...) { +# preconvert Series into char name(s) if (inherits(name, "Series")) name <- name$to_vector() name_add = list(...) diff --git a/R/zzz.R b/R/zzz.R index 9fc99dcb0..67f2c9722 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -98,7 +98,7 @@ replace_private_with_pub_methods(Series, "^Series_") -# expression constructors +# expression constructors, why not just pl$lit = Expr_lit? move_env_elements(Expr, pl, c("lit"), remove = FALSE) diff --git a/man/wrap_e.Rd b/man/wrap_e.Rd index 2bacb3f7c..66e2c4346 100644 --- a/man/wrap_e.Rd +++ b/man/wrap_e.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/expr__expr.R \name{wrap_e} \alias{wrap_e} -\title{wrap as literal} +\title{DEPRECATED wrap as literal} \usage{ wrap_e(e, str_to_lit = TRUE) } @@ -13,7 +13,8 @@ wrap_e(e, str_to_lit = TRUE) Expr } \description{ -wrap as literal +use robj_to!(Expr) on rust side or rarely wrap_e on R-side +This function is only kept for reference } \details{ used internally to ensure an object is an expression diff --git a/man/wrap_e_legacy.Rd b/man/wrap_e_legacy.Rd new file mode 100644 index 000000000..e118a4581 --- /dev/null +++ b/man/wrap_e_legacy.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expr__expr.R +\name{wrap_e_legacy} +\alias{wrap_e_legacy} +\title{DEPRECATED wrap as literal} +\usage{ +wrap_e_legacy(e, str_to_lit = TRUE) +} +\arguments{ +\item{e}{an Expr(polars) or any R expression} +} +\value{ +Expr +} +\description{ +use robj_to!(Expr) on rust side or rarely wrap_e on R-side +This function is only kept for reference +} +\details{ +used internally to ensure an object is an expression +} +\examples{ +pl$col("foo") < 5 +} +\keyword{internal} diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 40e81102c..92ef16f4b 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -1,16 +1,16 @@ use crate::lazy::dsl::Expr; -use crate::rdataframe::DataFrame; -use crate::rpolarserr::{rdbg, RResult}; -use crate::{rdataframe::VecDataFrame, utils::r_result_list}; - use crate::lazy::dsl::ProtoExprArray; +use crate::rdataframe::DataFrame; use crate::rdatatype::robj_to_timeunit; use crate::robj_to; +use crate::rpolarserr::{rdbg, RResult}; use crate::series::Series; +use crate::{rdataframe::VecDataFrame, utils::r_result_list}; use extendr_api::prelude::*; use polars::prelude as pl; use polars_core::functions as pl_functions; use std::result::Result; + #[extendr] fn concat_df(vdf: &VecDataFrame) -> List { //-> PyResult { @@ -225,6 +225,18 @@ pub fn dtype_str_repr(dtype: Robj) -> RResult { Ok(dtype.to_string()) } +// replaces wrap_e_legacy, derived from robj_to! +#[extendr] +fn internal_wrap_e(robj: Robj, str_to_lit: Robj) -> RResult { + if robj_to!(bool, str_to_lit)? { + robj_to!(Expr, robj) + } else { + robj_to!(ExprCol, robj) + } +} + +// replaces wrap_e_legacy, derived from robj_to! + // pub fn series_from_arrow(name: &str, array: Robj) -> Result { // use polars::prelude::IntoSeries; // let arr = crate::arrow_interop::to_rust::arrow_array_to_rust(array)?; @@ -276,6 +288,11 @@ fn test_print_string(s: String) { rprintln!("{}", s); } +#[extendr] +fn test_robj_to_expr(robj: Robj) -> RResult { + robj_to!(Expr, robj) +} + extendr_module! { mod rlib; fn concat_df; @@ -298,8 +315,11 @@ extendr_module! { fn arrow_stream_to_rust; fn dtype_str_repr; + fn internal_wrap_e; + fn test_robj_to_usize; fn test_robj_to_i64; fn test_robj_to_u32; fn test_print_string; + fn test_robj_to_expr; } diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 4709ce7af..185c2e13f 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -612,12 +612,7 @@ pub fn robj_to_rexpr(robj: extendr_api::Robj, str_to_lit: bool) -> RResult //use R side wrap_e to convert any R value into Expr or use extendr_api::*; - let robj_result_expr = R!("polars:::result(polars:::wrap_e({{robj}},{{str_to_lit}}))") - .map_err(crate::rpolarserr::extendr_to_rpolars_err) - .plain("internal error: polars:::result failed to catch this error")?; - - // handle any error from wrap_e - let robj_expr = unpack_r_result_list(robj_result_expr) + let robj_expr = internal_rust_wrap_e(robj, str_to_lit) .bad_robj(&robj_clone) .plain("cannot be converted into an Expr")?; @@ -631,6 +626,32 @@ pub fn robj_to_rexpr(robj: extendr_api::Robj, str_to_lit: bool) -> RResult Ok(Expr(ext_expr.0.clone())) } +fn internal_rust_wrap_e(robj: Robj, str_to_lit: bool) -> RResult { + use extendr_api::Result as EResult; + use extendr_api::Rtype::*; + use extendr_api::*; + let unpack = |res: EResult| -> RResult { + unpack_r_result_list(res.map_err(|err| { + extendr_api::Error::Other(format!("internal_error calling R from rust: {:?}", err)) + })?) + }; + match robj.rtype() { + ExternalPtr if robj.inherits("Expr") => Ok(robj), + ExternalPtr if robj.inherits("WhenThen") | robj.inherits("WhenThenThen") => { + unpack(R!("polars:::({{robj}}$otherwise(pl$lit(NULL)))")) + } + ExternalPtr if robj.inherits("When") => { + rerr().plain("Cannot use a When-statement as Expr without a $then()") + } + _h @ Logicals | _h @ List | _h @ Doubles | _h @ Integers => { + unpack(R!("polars:::result(pl$lit({{robj}}))")) + } + _ if str_to_lit => unpack(R!("polars:::result(pl$lit({{robj}}))")), + + _ => unpack(R!("polars:::result(pl$col({{robj}}))")), + } +} + pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult { let robj = unpack_r_result_list(robj)?; let rv = rdbg(&robj); diff --git a/tests/testthat/test-wrap_e.R b/tests/testthat/test-wrap_e.R new file mode 100644 index 000000000..3499b72fc --- /dev/null +++ b/tests/testthat/test-wrap_e.R @@ -0,0 +1,32 @@ +#eventually these legacy behavior may drift away, then remove legacy and delete this test + +test_that("wrap_e_legacy and robj_to!(Expr) produce same Expr", { + #same expression + l = list( + 1, NA, pl$lit(2), 1:3, "23", + list(1:3,4:5,6L), pl$Series(3:1,"alice"), pl$lit("hej"), + pl$when(pl$col("a")=="b")$then("c"), + pl$when(pl$col("a")=="b")$then("c")$otherwise("d") + #some how NaN is not the same Expr + ) + l_wrap_e = lapply(l, wrap_e_legacy) + l_robj_e = lapply(l, wrap_e) + for (i in seq_along(l)) { + e1 = l_wrap_e[[i]] + e2 = l_robj_e[[i]] + expect_true(e1$meta$eq(e2),info=capture_output_lines(print(list(i,e1,e2)))) + } +}) + +test_that("wrap_e_legacy and robj_to!(Expr) produce same literal conversions", { + + l = list(NaN) # these do produce the same conversion + l_wrap_e = lapply(l, wrap_e_legacy) + l_robj_e = lapply(l, wrap_e) + for (i in seq_along(l)) { + e1 = l_wrap_e[[i]] + e2 = l_robj_e[[i]] + expect_identical(e1$to_r(), e2$to_r()) + } + +})